src/depfile_parser.in.cc - third_party/ninja - Git at Google

 // Copyright 2011 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "depfile_parser.h"
 #include "util.h"

 #include <algorithm>

 using namespace std;

 DepfileParser::DepfileParser(DepfileParserOptions options)
   : options_(options)
 {
 }

 // A note on backslashes in Makefiles, from reading the docs:
 // Backslash-newline is the line continuation character.
 // Backslash-# escapes a # (otherwise meaningful as a comment start).
 // Backslash-% escapes a % (otherwise meaningful as a special).
 // Finally, quoting the GNU manual, "Backslashes that are not in danger
 // of quoting ‘%’ characters go unmolested."
 // How do you end a line with a backslash?  The netbsd Make docs suggest
 // reading the result of a shell command echoing a backslash!
 //
 // Rather than implement all of above, we follow what GCC/Clang produces:
 // Backslashes escape a space or hash sign.
 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
 // followed by space.
 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
 // the end of a filename.
 // A hash sign is escaped by a single backslash. All other backslashes remain
 // unchanged.
 //
 // If anyone actually has depfiles that rely on the more complicated
 // behavior we can adjust this.
 bool DepfileParser::Parse(string* content, string* err) {
   // in: current parser input point.
   // end: end of input.
   // parsing_targets: whether we are parsing targets or dependencies.
   char* in = &(*content)[0];
   char* end = in + content->size();
   bool have_target = false;
   bool parsing_targets = true;
   bool poisoned_input = false;
   bool is_empty = true;
   while (in < end) {
     bool have_newline = false;
     // out: current output point (typically same as in, but can fall behind
     // as we de-escape backslashes).
     char* out = in;
     // filename: start of the current parsed filename.
     char* filename = out;
     for (;;) {
       // start: beginning of the current parsed span.
       const char* start = in;
       char* yymarker = NULL;
       /*!re2c
       re2c:define:YYCTYPE = "unsigned char";
       re2c:define:YYCURSOR = in;
       re2c:define:YYLIMIT = end;
       re2c:define:YYMARKER = yymarker;

       re2c:yyfill:enable = 0;

       re2c:indent:top = 2;
       re2c:indent:string = "  ";

       nul = "\000";
       newline = '\r'?'\n';

       '\\\\'* '\\ ' {
         // 2N+1 backslashes plus space -> N backslashes plus space.
         int len = (int)(in - start);
         int n = len / 2 - 1;
         if (out < start)
           memset(out, '\\', n);
         out += n;
         *out++ = ' ';
         continue;
       }
       '\\\\'+ ' ' {
         // 2N backslashes plus space -> 2N backslashes, end of filename.
         int len = (int)(in - start);
         if (out < start)
           memset(out, '\\', len - 1);
         out += len - 1;
         break;
       }
       '\\'+ '#' {
         // De-escape hash sign, but preserve other leading backslashes.
         int len = (int)(in - start);
         if (len > 2 && out < start)
           memset(out, '\\', len - 2);
         out += len - 2;
         *out++ = '#';
         continue;
       }
       '\\'+ ':' [\x00\x20\r\n\t] {
         // Backslash followed by : and whitespace.
         // It is therefore normal text and not an escaped colon
         int len = (int)(in - start - 1);
         // Need to shift it over if we're overwriting backslashes.
         if (out < start)
           memmove(out, start, len);
         out += len;
         if (*(in - 1) == '\n')
           have_newline = true;
         break;
       }
       '\\'+ ':' {
         // De-escape colon sign, but preserve other leading backslashes.
         // Regular expression uses lookahead to make sure that no whitespace
         // nor EOF follows. In that case it'd be the : at the end of a target
         int len = (int)(in - start);
         if (len > 2 && out < start)
           memset(out, '\\', len - 2);
         out += len - 2;
         *out++ = ':';
         continue;
       }
       '$$' {
         // De-escape dollar character.
         *out++ = '$';
         continue;
       }
       '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
         // Got a span of plain text.
         int len = (int)(in - start);
         // Need to shift it over if we're overwriting backslashes.
         if (out < start)
           memmove(out, start, len);
         out += len;
         continue;
       }
       nul {
         break;
       }
       '\\' newline {
         // A line continuation ends the current file name.
         break;
       }
       newline {
         // A newline ends the current file name and the current rule.
         have_newline = true;
         break;
       }
       [^] {
         // For any other character (e.g. whitespace), swallow it here,
         // allowing the outer logic to loop around again.
         break;
       }
       */
     }

     int len = (int)(out - filename);
     const bool is_dependency = !parsing_targets;
     if (len > 0 && filename[len - 1] == ':') {
       len--;  // Strip off trailing colon, if any.
       parsing_targets = false;
       have_target = true;
     }

     if (len > 0) {
       is_empty = false;
       StringPiece piece = StringPiece(filename, len);
       // If we've seen this as an input before, skip it.
       std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
       if (pos == ins_.end()) {
         if (is_dependency) {
           if (poisoned_input) {
             *err = "inputs may not also have inputs";
             return false;
           }
           // New input.
           ins_.push_back(piece);
         } else {
           // Check for a new output.
           if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
             outs_.push_back(piece);
         }
       } else if (!is_dependency) {
         // We've passed an input on the left side; reject new inputs.
         poisoned_input = true;
       }
     }

     if (have_newline) {
       // A newline ends a rule so the next filename will be a new target.
       parsing_targets = true;
       poisoned_input = false;
     }
   }
   if (!have_target && !is_empty) {
     *err = "expected ':' in depfile";
     return false;
   }
   return true;
 }
	// Copyright 2011 Google Inc. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "depfile_parser.h"
	#include "util.h"

	#include <algorithm>

	using namespace std;

	DepfileParser::DepfileParser(DepfileParserOptions options)
	: options_(options)
	{
	}

	// A note on backslashes in Makefiles, from reading the docs:
	// Backslash-newline is the line continuation character.
	// Backslash-# escapes a # (otherwise meaningful as a comment start).
	// Backslash-% escapes a % (otherwise meaningful as a special).
	// Finally, quoting the GNU manual, "Backslashes that are not in danger
	// of quoting ‘%’ characters go unmolested."
	// How do you end a line with a backslash? The netbsd Make docs suggest
	// reading the result of a shell command echoing a backslash!
	//
	// Rather than implement all of above, we follow what GCC/Clang produces:
	// Backslashes escape a space or hash sign.
	// When a space is preceded by 2N+1 backslashes, it is represents N backslashes
	// followed by space.
	// When a space is preceded by 2N backslashes, it represents 2N backslashes at
	// the end of a filename.
	// A hash sign is escaped by a single backslash. All other backslashes remain
	// unchanged.
	//
	// If anyone actually has depfiles that rely on the more complicated
	// behavior we can adjust this.
	bool DepfileParser::Parse(string* content, string* err) {
	// in: current parser input point.
	// end: end of input.
	// parsing_targets: whether we are parsing targets or dependencies.
	char* in = &(*content)[0];
	char* end = in + content->size();
	bool have_target = false;
	bool parsing_targets = true;
	bool poisoned_input = false;
	bool is_empty = true;
	while (in < end) {
	bool have_newline = false;
	// out: current output point (typically same as in, but can fall behind
	// as we de-escape backslashes).
	char* out = in;
	// filename: start of the current parsed filename.
	char* filename = out;
	for (;;) {
	// start: beginning of the current parsed span.
	const char* start = in;
	char* yymarker = NULL;
	/*!re2c
	re2c:define:YYCTYPE = "unsigned char";
	re2c:define:YYCURSOR = in;
	re2c:define:YYLIMIT = end;
	re2c:define:YYMARKER = yymarker;

	re2c:yyfill:enable = 0;

	re2c:indent:top = 2;
	re2c:indent:string = " ";

	nul = "\000";
	newline = '\r'?'\n';

	'\\\\'* '\\ ' {
	// 2N+1 backslashes plus space -> N backslashes plus space.
	int len = (int)(in - start);
	int n = len / 2 - 1;
	if (out < start)
	memset(out, '\\', n);
	out += n;
	*out++ = ' ';
	continue;
	}
	'\\\\'+ ' ' {
	// 2N backslashes plus space -> 2N backslashes, end of filename.
	int len = (int)(in - start);
	if (out < start)
	memset(out, '\\', len - 1);
	out += len - 1;
	break;
	}
	'\\'+ '#' {
	// De-escape hash sign, but preserve other leading backslashes.
	int len = (int)(in - start);
	if (len > 2 && out < start)
	memset(out, '\\', len - 2);
	out += len - 2;
	*out++ = '#';
	continue;
	}
	'\\'+ ':' [\x00\x20\r\n\t] {
	// Backslash followed by : and whitespace.
	// It is therefore normal text and not an escaped colon
	int len = (int)(in - start - 1);
	// Need to shift it over if we're overwriting backslashes.
	if (out < start)
	memmove(out, start, len);
	out += len;
	if (*(in - 1) == '\n')
	have_newline = true;
	break;
	}
	'\\'+ ':' {
	// De-escape colon sign, but preserve other leading backslashes.
	// Regular expression uses lookahead to make sure that no whitespace
	// nor EOF follows. In that case it'd be the : at the end of a target
	int len = (int)(in - start);
	if (len > 2 && out < start)
	memset(out, '\\', len - 2);
	out += len - 2;
	*out++ = ':';
	continue;
	}
	'$$' {
	// De-escape dollar character.
	*out++ = '$';
	continue;
	}
	'\\'+ [^\000\r\n] \| [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
	// Got a span of plain text.
	int len = (int)(in - start);
	// Need to shift it over if we're overwriting backslashes.
	if (out < start)
	memmove(out, start, len);
	out += len;
	continue;
	}
	nul {
	break;
	}
	'\\' newline {
	// A line continuation ends the current file name.
	break;
	}
	newline {
	// A newline ends the current file name and the current rule.
	have_newline = true;
	break;
	}
	[^] {
	// For any other character (e.g. whitespace), swallow it here,
	// allowing the outer logic to loop around again.
	break;
	}
	*/
	}

	int len = (int)(out - filename);
	const bool is_dependency = !parsing_targets;
	if (len > 0 && filename[len - 1] == ':') {
	len--; // Strip off trailing colon, if any.
	parsing_targets = false;
	have_target = true;
	}

	if (len > 0) {
	is_empty = false;
	StringPiece piece = StringPiece(filename, len);
	// If we've seen this as an input before, skip it.
	std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
	if (pos == ins_.end()) {
	if (is_dependency) {
	if (poisoned_input) {
	*err = "inputs may not also have inputs";
	return false;
	}
	// New input.
	ins_.push_back(piece);
	} else {
	// Check for a new output.
	if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
	outs_.push_back(piece);
	}
	} else if (!is_dependency) {
	// We've passed an input on the left side; reject new inputs.
	poisoned_input = true;
	}
	}

	if (have_newline) {
	// A newline ends a rule so the next filename will be a new target.
	parsing_targets = true;
	poisoned_input = false;
	}
	}
	if (!have_target && !is_empty) {
	*err = "expected ':' in depfile";
	return false;
	}
	return true;
	}