system/host/mdi/tokens.cpp - zircon - Git at Google

 // Copyright 2017 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <string.h>
 #include <stdio.h>
 #include <stdarg.h>

 #include "tokens.h"

 //#define PRINT_TOKENS 1

 struct ReservedWord {
     TokenType   token;
     const char* word;
 } reserved_words [] = {
     { TOKEN_TRUE,           "true" },
     { TOKEN_FALSE,          "false" },
     { TOKEN_CONST,          "const" },
     { TOKEN_INCLUDE,        "include" },
     { TOKEN_UINT8_TYPE,     "uint8" },
     { TOKEN_INT32_TYPE,     "int32" },
     { TOKEN_UINT32_TYPE,    "uint32" },
     { TOKEN_UINT64_TYPE,    "uint64" },
     { TOKEN_BOOLEAN_TYPE,   "boolean" },
     { TOKEN_STRING_TYPE,    "string" },
     { TOKEN_ARRAY_TYPE,     "array" },
     { TOKEN_LIST_TYPE,      "list" },
     { TOKEN_INVALID,        nullptr },
 };

 TokenType find_reserved_word(std::string& string) {
     const char* str = string.c_str();
     ReservedWord* test = reserved_words;

     while (test->word) {
         if (!strcmp(str, test->word)) {
             return test->token;
         }
         test++;
     }
     return TOKEN_IDENTIFIER;
 }

 mdi_type_t Token::get_type_name() {
     switch (type) {
     case TOKEN_UINT8_TYPE:
         return MDI_UINT8;
     case TOKEN_INT32_TYPE:
         return MDI_INT32;
     case TOKEN_UINT32_TYPE:
         return MDI_UINT32;
     case TOKEN_UINT64_TYPE:
         return MDI_UINT64;
     case TOKEN_BOOLEAN_TYPE:
         return MDI_BOOLEAN;
     case TOKEN_STRING_TYPE:
         return MDI_STRING;
     case TOKEN_ARRAY_TYPE:
         return MDI_ARRAY;
     case TOKEN_LIST_TYPE:
         return MDI_LIST;
     default:
         return MDI_INVALID_TYPE;
     }
 }

 // returns precedence for binary operators
 int Token::get_precedence() {
     switch (type) {
     case TOKEN_OR:
         return 1;
     case TOKEN_XOR:
         return 2;
     case TOKEN_AND:
         return 3;
     case TOKEN_LSHIFT:
     case TOKEN_RSHIFT:
         return 4;
     case TOKEN_PLUS:
     case TOKEN_MINUS:
         return 5;
     case TOKEN_TIMES:
     case TOKEN_DIV:
     case TOKEN_MOD:
         return 6;
     default:
         // not a binary operator
         return -1;
     }
 }

 void Token::print() {
     switch (type) {
     case TOKEN_INVALID:
         printf("TOKEN_INVALID\n");
         break;
     case TOKEN_EOF:
         printf("TOKEN_EOF\n");
         break;
     case TOKEN_INT_LITERAL:
         printf("TOKEN_INT_LITERAL %" PRId64 "\n", int_value);
         break;
     case TOKEN_STRING_LITERAL:
         printf("TOKEN_STRING_LITERAL %s\n", string_value.c_str());
         break;
     case TOKEN_IDENTIFIER:
         printf("TOKEN_IDENTIFIER %s\n", string_value.c_str());
         break;
     case TOKEN_LIST_START:
         printf("TOKEN_LIST_START\n");
         break;
     case TOKEN_LIST_END:
         printf("TOKEN_LIST_END\n");
         break;
     case TOKEN_ARRAY_START:
         printf("TOKEN_ARRAY_START\n");
         break;
     case TOKEN_ARRAY_END:
         printf("TOKEN_ARRAY_END\n");
         break;
     case TOKEN_EQUALS:
         printf("TOKEN_EQUALS\n");
         break;
     case TOKEN_COMMA:
         printf("TOKEN_COMMA\n");
         break;
     case TOKEN_DOT:
         printf("TOKEN_DOT\n");
         break;
     case TOKEN_LPAREN:
         printf("TOKEN_LPAREN\n");
         break;
     case TOKEN_RPAREN:
         printf("TOKEN_RPAREN\n");
         break;
     case TOKEN_PLUS:
         printf("TOKEN_PLUS\n");
         break;
     case TOKEN_MINUS:
         printf("TOKEN_MINUS\n");
         break;
     case TOKEN_TIMES:
         printf("TOKEN_TIMES\n");
         break;
     case TOKEN_DIV:
         printf("TOKEN_DIV\n");
         break;
     case TOKEN_MOD:
         printf("TOKEN_MOD\n");
         break;
     case TOKEN_NOT:
         printf("TOKEN_NOT\n");
         break;
     case TOKEN_AND:
         printf("TOKEN_AND\n");
         break;
     case TOKEN_OR:
         printf("TOKEN_OR\n");
         break;
     case TOKEN_XOR:
         printf("TOKEN_XOR\n");
         break;
     case TOKEN_LSHIFT:
         printf("TOKEN_LSHIFT\n");
         break;
     case TOKEN_RSHIFT:
         printf("TOKEN_RSHIFT\n");
         break;
     case TOKEN_TRUE:
         printf("TOKEN_TRUE\n");
         break;
     case TOKEN_FALSE:
         printf("TOKEN_FALSE\n");
         break;
     case TOKEN_CONST:
         printf("TOKEN_CONST\n");
         break;
     case TOKEN_INCLUDE:
         printf("TOKEN_INCLUDE\n");
         break;
     case TOKEN_UINT8_TYPE:
         printf("TOKEN_UINT8_TYPE\n");
         break;
     case TOKEN_INT32_TYPE:
         printf("TOKEN_INT32_TYPE\n");
         break;
     case TOKEN_UINT32_TYPE:
         printf("TOKEN_UINT32_TYPE\n");
         break;
     case TOKEN_UINT64_TYPE:
         printf("TOKEN_UINT64_TYPE\n");
         break;
     case TOKEN_BOOLEAN_TYPE:
         printf("TOKEN_BOOLEAN_TYPE\n");
         break;
     case TOKEN_STRING_TYPE:
         printf("TOKEN_STRING_TYPE\n");
         break;
     case TOKEN_ARRAY_TYPE:
         printf("TOKEN_ARRAY_TYPE\n");
         break;
     case TOKEN_LIST_TYPE:
         printf("TOKEN_LIST_TYPE\n");
         break;
     default:
         printf("unknown token %d\n", type);
         break;
     }
 }

 Tokenizer::Tokenizer() {
 }

 Tokenizer::~Tokenizer() {
 }

 bool Tokenizer::open_file(Tokenizer* container, const char* path) {
     in_file.open(path, std::ifstream::in);

     if (!in_file.good()) {
         if (container) {
             container->print_err("unable to open %s\n", path);
         } else {
             fprintf(stderr, "error: unable to open %s\n", path);
         }
         return false;
     }

     current_file = path;
     getline(in_file, current_line);
     line_number = 1;
     line_offset = 0;
     memset(peek, 0, sizeof(peek));
     return true;
 }

 int Tokenizer::get_char() {
     if (line_offset < current_line.length()) {
         return current_line[line_offset++];
     } else if (in_file.eof()) {
         return EOF;
     } else {
         getline(in_file, current_line);
         line_number++;
         line_offset = 0;
         return '\n';
     }
 }

 int Tokenizer::next_char() {
     if (peek[0]) {
         int ch = peek[0];
         peek[0] = peek[1];
         peek[1] = 0;
         return ch;
     } else {
         return get_char();
     }
 }

 int Tokenizer::peek_char() {
     if (!peek[0]) {
         peek[0] = next_char();
     }
     return peek[0];
 }

 void Tokenizer::eat_whitespace() {
     while (1) {
         while (isspace(peek_char())) {
             next_char();
         }
         // handle C style comments
         if (peek_char() == '/') {
             // consume the '/'
             next_char();
             int ch = peek_char();
             if (ch == '/') {
                 // read until end of line
                 while ((ch = next_char()) != EOF && ch != '\n' && ch != '\r') {}
                 if (ch == EOF) {
                     break;
                 }
             } else if (ch == '*') {
                 next_char();    // consume '*'

                 // look for "*/"
                 while (1) {
                     while ((ch = next_char()) != EOF && ch != '*') {}
                     if (ch == EOF) {
                         return;
                     }
                     if (peek_char() == '/') {
                         // consume '/'
                         next_char();
                         break;
                     }
                 }
             } else {
                 // end of whitespace
                 // put characters we read into peek
                 peek[0] = '/';
                 peek[1] = ch;
                 return;
             }
         } else {
             break;
         }
     }
 }

 bool Tokenizer::parse_identifier(Token& token, int ch) {
     std::string string;
     string.append(1, ch);

     ch = peek_char();
     while (isalnum(ch) || ch == '-' || ch == '_') {
         next_char();
         string.append(1, ch);
         ch = peek_char();
     }

     token.type = find_reserved_word(string);
     token.string_value = string;
     return true;
 }

 bool Tokenizer::parse_integer(Token& token, int ch) {
     int base = 10;
     uint64_t value = 0;

     token.string_value.clear();
     token.string_value.append(1, ch);

     if (ch == '0') {
         base = 8;
         int peek = peek_char();
         if (peek == 'x' || peek == 'X') {
             base = 16;
             next_char();
             ch = next_char();
             token.string_value.append(1, ch);
         }
     }

     // ch now contains highest order digit to parse
     int digit_count = 0;
     while (1) {
         int digit = -1;

         if (ch >= '0' && ch <= '9') {
             digit = ch - '0';
         } else if (base == 16) {
             if (ch >= 'A' && ch <= 'F') {
                 digit = ch - 'A' + 10;
             } else if (ch >= 'a' && ch <= 'f') {
                 digit = ch - 'a' + 10;
             }
         }

         if (digit < 0) {
             break;
         }

         value = base * value + digit;

         if (++digit_count > 16) {
             print_err("integer value too large\n");
             return false;
         }

         ch = peek_char();
         if (!isdigit(ch) && !(base == 16 &&
                               ((ch >= 'A' && ch <= 'F') ||
                                (ch >= 'a' && ch <= 'f')))) {
             break;
         }
         token.string_value.append(1, ch);
         next_char();
     }

     token.type = TOKEN_INT_LITERAL;
     token.int_value = value;
     return true;
 }

 bool Tokenizer::parse_string(Token& token) {
     std::string string;
     int ch = next_char();

     while (ch != EOF) {
         if (ch == '\\') {
             ch = next_char();
             if (ch == EOF) {
                 break;
             }
             switch (ch) {
             case 'a':
                 ch = '\a';
                 break;
             case 'b':
                 ch = '\b';
                 break;
             case 'f':
                 ch = '\f';
                 break;
             case 'n':
                 ch = '\n';
                 break;
             case 'r':
                 ch = '\r';
                 break;
             case 't':
                 ch = '\t';
                 break;
             case 'v':
                 ch = '\v';
                 break;
             case '\\':
                 ch = '\\';
                 break;
             case '\'':
                 ch = '\'';
                 break;
             case '\"':
                 ch = '\"';
                 break;
             case '?':
                 ch = '?';
                 break;
             default:
                 print_err("unsupported escape sequence \\%c in string literal\n", ch);
                 return false;
             }
         } else if (ch == '\"') {
             token.type = TOKEN_STRING_LITERAL;
             token.string_value = string;
             return true;
         }
         string.append(1, ch);
         ch = next_char();
     }

     print_err("end of file during unterminated string\n");
     return false;
 }

     // returns false if we cannot parse the next token
     // EOF is not considered an error
 bool Tokenizer::next_token(Token& token) {
     if (have_token_peek) {
         token = token_peek;
         have_token_peek = false;
         return true;
     }

     eat_whitespace();
     int ch = next_char();
     bool result = true;

     if (isalpha(ch)) {
        result = parse_identifier(token, ch);
     } else if (isdigit(ch)) {
         result = parse_integer(token, ch);
     } else if (ch == '\"') {
         result = parse_string(token);
     } else {
         switch (ch) {
         case EOF:
             token.type = TOKEN_EOF;
             break;
         case '{':
             token.type = TOKEN_LIST_START;
             break;
         case '}':
             token.type = TOKEN_LIST_END;
             break;
         case '[':
             token.type = TOKEN_ARRAY_START;
             break;
         case ']':
             token.type = TOKEN_ARRAY_END;
             break;
         case '=':
             token.type = TOKEN_EQUALS;
             break;
         case ',':
             token.type = TOKEN_COMMA;
             break;
         case '.':
             token.type = TOKEN_DOT;
             break;
         case '(':
             token.type = TOKEN_LPAREN;
             break;
         case ')':
             token.type = TOKEN_RPAREN;
             break;
         case '+':
             token.type = TOKEN_PLUS;
             break;
         case '-':
             token.type = TOKEN_MINUS;
             break;
         case '*':
             token.type = TOKEN_TIMES;
             break;
         case '/':
             token.type = TOKEN_DIV;
             break;
         case '%':
             token.type = TOKEN_MOD;
             break;
         case '~':
             token.type = TOKEN_NOT;
             break;
         case '&':
             token.type = TOKEN_AND;
             break;
         case '|':
             token.type = TOKEN_OR;
             break;
         case '^':
             token.type = TOKEN_XOR;
             break;
         case '<':
             if (next_char() == '<') {
                 token.type = TOKEN_LSHIFT;
             } else {
                 print_err("unexpected token '<'\n");
                 result = false;
             }
             break;
         case '>':
             if (next_char() == '>') {
                 token.type = TOKEN_RSHIFT;
             } else {
                 print_err("unexpected token '>'\n");
                 result = false;
             }
             break;
         default:
             print_err("invalid token \'%c\'\n", ch);
             result = false;
         }
         token.string_value.clear();
         token.string_value.append(1, ch);
     }

 #if PRINT_TOKENS
     if (result) {
         token.print();
     }
 #endif

     return result;
 }

 bool Tokenizer::peek_token(Token& token) {
     if (!have_token_peek && !next_token(token_peek)) {
         return false;
     }
     token = token_peek;
     have_token_peek = true;
     return true;
 }

 void Tokenizer::print_err(const char* fmt, ...) {
     fprintf(stderr, "%s:%d:%d: error: ", current_file.c_str(), line_number, line_offset);
     va_list ap;
     va_start(ap, fmt);
     vfprintf(stderr, fmt, ap);
     va_end(ap);
 }
	// Copyright 2017 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <string.h>
	#include <stdio.h>
	#include <stdarg.h>

	#include "tokens.h"

	//#define PRINT_TOKENS 1

	struct ReservedWord {
	TokenType token;
	const char* word;
	} reserved_words [] = {
	{ TOKEN_TRUE, "true" },
	{ TOKEN_FALSE, "false" },
	{ TOKEN_CONST, "const" },
	{ TOKEN_INCLUDE, "include" },
	{ TOKEN_UINT8_TYPE, "uint8" },
	{ TOKEN_INT32_TYPE, "int32" },
	{ TOKEN_UINT32_TYPE, "uint32" },
	{ TOKEN_UINT64_TYPE, "uint64" },
	{ TOKEN_BOOLEAN_TYPE, "boolean" },
	{ TOKEN_STRING_TYPE, "string" },
	{ TOKEN_ARRAY_TYPE, "array" },
	{ TOKEN_LIST_TYPE, "list" },
	{ TOKEN_INVALID, nullptr },
	};

	TokenType find_reserved_word(std::string& string) {
	const char* str = string.c_str();
	ReservedWord* test = reserved_words;

	while (test->word) {
	if (!strcmp(str, test->word)) {
	return test->token;
	}
	test++;
	}
	return TOKEN_IDENTIFIER;
	}

	mdi_type_t Token::get_type_name() {
	switch (type) {
	case TOKEN_UINT8_TYPE:
	return MDI_UINT8;
	case TOKEN_INT32_TYPE:
	return MDI_INT32;
	case TOKEN_UINT32_TYPE:
	return MDI_UINT32;
	case TOKEN_UINT64_TYPE:
	return MDI_UINT64;
	case TOKEN_BOOLEAN_TYPE:
	return MDI_BOOLEAN;
	case TOKEN_STRING_TYPE:
	return MDI_STRING;
	case TOKEN_ARRAY_TYPE:
	return MDI_ARRAY;
	case TOKEN_LIST_TYPE:
	return MDI_LIST;
	default:
	return MDI_INVALID_TYPE;
	}
	}

	// returns precedence for binary operators
	int Token::get_precedence() {
	switch (type) {
	case TOKEN_OR:
	return 1;
	case TOKEN_XOR:
	return 2;
	case TOKEN_AND:
	return 3;
	case TOKEN_LSHIFT:
	case TOKEN_RSHIFT:
	return 4;
	case TOKEN_PLUS:
	case TOKEN_MINUS:
	return 5;
	case TOKEN_TIMES:
	case TOKEN_DIV:
	case TOKEN_MOD:
	return 6;
	default:
	// not a binary operator
	return -1;
	}
	}

	void Token::print() {
	switch (type) {
	case TOKEN_INVALID:
	printf("TOKEN_INVALID\n");
	break;
	case TOKEN_EOF:
	printf("TOKEN_EOF\n");
	break;
	case TOKEN_INT_LITERAL:
	printf("TOKEN_INT_LITERAL %" PRId64 "\n", int_value);
	break;
	case TOKEN_STRING_LITERAL:
	printf("TOKEN_STRING_LITERAL %s\n", string_value.c_str());
	break;
	case TOKEN_IDENTIFIER:
	printf("TOKEN_IDENTIFIER %s\n", string_value.c_str());
	break;
	case TOKEN_LIST_START:
	printf("TOKEN_LIST_START\n");
	break;
	case TOKEN_LIST_END:
	printf("TOKEN_LIST_END\n");
	break;
	case TOKEN_ARRAY_START:
	printf("TOKEN_ARRAY_START\n");
	break;
	case TOKEN_ARRAY_END:
	printf("TOKEN_ARRAY_END\n");
	break;
	case TOKEN_EQUALS:
	printf("TOKEN_EQUALS\n");
	break;
	case TOKEN_COMMA:
	printf("TOKEN_COMMA\n");
	break;
	case TOKEN_DOT:
	printf("TOKEN_DOT\n");
	break;
	case TOKEN_LPAREN:
	printf("TOKEN_LPAREN\n");
	break;
	case TOKEN_RPAREN:
	printf("TOKEN_RPAREN\n");
	break;
	case TOKEN_PLUS:
	printf("TOKEN_PLUS\n");
	break;
	case TOKEN_MINUS:
	printf("TOKEN_MINUS\n");
	break;
	case TOKEN_TIMES:
	printf("TOKEN_TIMES\n");
	break;
	case TOKEN_DIV:
	printf("TOKEN_DIV\n");
	break;
	case TOKEN_MOD:
	printf("TOKEN_MOD\n");
	break;
	case TOKEN_NOT:
	printf("TOKEN_NOT\n");
	break;
	case TOKEN_AND:
	printf("TOKEN_AND\n");
	break;
	case TOKEN_OR:
	printf("TOKEN_OR\n");
	break;
	case TOKEN_XOR:
	printf("TOKEN_XOR\n");
	break;
	case TOKEN_LSHIFT:
	printf("TOKEN_LSHIFT\n");
	break;
	case TOKEN_RSHIFT:
	printf("TOKEN_RSHIFT\n");
	break;
	case TOKEN_TRUE:
	printf("TOKEN_TRUE\n");
	break;
	case TOKEN_FALSE:
	printf("TOKEN_FALSE\n");
	break;
	case TOKEN_CONST:
	printf("TOKEN_CONST\n");
	break;
	case TOKEN_INCLUDE:
	printf("TOKEN_INCLUDE\n");
	break;
	case TOKEN_UINT8_TYPE:
	printf("TOKEN_UINT8_TYPE\n");
	break;
	case TOKEN_INT32_TYPE:
	printf("TOKEN_INT32_TYPE\n");
	break;
	case TOKEN_UINT32_TYPE:
	printf("TOKEN_UINT32_TYPE\n");
	break;
	case TOKEN_UINT64_TYPE:
	printf("TOKEN_UINT64_TYPE\n");
	break;
	case TOKEN_BOOLEAN_TYPE:
	printf("TOKEN_BOOLEAN_TYPE\n");
	break;
	case TOKEN_STRING_TYPE:
	printf("TOKEN_STRING_TYPE\n");
	break;
	case TOKEN_ARRAY_TYPE:
	printf("TOKEN_ARRAY_TYPE\n");
	break;
	case TOKEN_LIST_TYPE:
	printf("TOKEN_LIST_TYPE\n");
	break;
	default:
	printf("unknown token %d\n", type);
	break;
	}
	}

	Tokenizer::Tokenizer() {
	}

	Tokenizer::~Tokenizer() {
	}

	bool Tokenizer::open_file(Tokenizer* container, const char* path) {
	in_file.open(path, std::ifstream::in);

	if (!in_file.good()) {
	if (container) {
	container->print_err("unable to open %s\n", path);
	} else {
	fprintf(stderr, "error: unable to open %s\n", path);
	}
	return false;
	}

	current_file = path;
	getline(in_file, current_line);
	line_number = 1;
	line_offset = 0;
	memset(peek, 0, sizeof(peek));
	return true;
	}

	int Tokenizer::get_char() {
	if (line_offset < current_line.length()) {
	return current_line[line_offset++];
	} else if (in_file.eof()) {
	return EOF;
	} else {
	getline(in_file, current_line);
	line_number++;
	line_offset = 0;
	return '\n';
	}
	}

	int Tokenizer::next_char() {
	if (peek[0]) {
	int ch = peek[0];
	peek[0] = peek[1];
	peek[1] = 0;
	return ch;
	} else {
	return get_char();
	}
	}

	int Tokenizer::peek_char() {
	if (!peek[0]) {
	peek[0] = next_char();
	}
	return peek[0];
	}

	void Tokenizer::eat_whitespace() {
	while (1) {
	while (isspace(peek_char())) {
	next_char();
	}
	// handle C style comments
	if (peek_char() == '/') {
	// consume the '/'
	next_char();
	int ch = peek_char();
	if (ch == '/') {
	// read until end of line
	while ((ch = next_char()) != EOF && ch != '\n' && ch != '\r') {}
	if (ch == EOF) {
	break;
	}
	} else if (ch == '*') {
	next_char(); // consume '*'

	// look for "*/"
	while (1) {
	while ((ch = next_char()) != EOF && ch != '*') {}
	if (ch == EOF) {
	return;
	}
	if (peek_char() == '/') {
	// consume '/'
	next_char();
	break;
	}
	}
	} else {
	// end of whitespace
	// put characters we read into peek
	peek[0] = '/';
	peek[1] = ch;
	return;
	}
	} else {
	break;
	}
	}
	}

	bool Tokenizer::parse_identifier(Token& token, int ch) {
	std::string string;
	string.append(1, ch);

	ch = peek_char();
	while (isalnum(ch) \|\| ch == '-' \|\| ch == '_') {
	next_char();
	string.append(1, ch);
	ch = peek_char();
	}

	token.type = find_reserved_word(string);
	token.string_value = string;
	return true;
	}

	bool Tokenizer::parse_integer(Token& token, int ch) {
	int base = 10;
	uint64_t value = 0;

	token.string_value.clear();
	token.string_value.append(1, ch);

	if (ch == '0') {
	base = 8;
	int peek = peek_char();
	if (peek == 'x' \|\| peek == 'X') {
	base = 16;
	next_char();
	ch = next_char();
	token.string_value.append(1, ch);
	}
	}

	// ch now contains highest order digit to parse
	int digit_count = 0;
	while (1) {
	int digit = -1;

	if (ch >= '0' && ch <= '9') {
	digit = ch - '0';
	} else if (base == 16) {
	if (ch >= 'A' && ch <= 'F') {
	digit = ch - 'A' + 10;
	} else if (ch >= 'a' && ch <= 'f') {
	digit = ch - 'a' + 10;
	}
	}

	if (digit < 0) {
	break;
	}

	value = base * value + digit;

	if (++digit_count > 16) {
	print_err("integer value too large\n");
	return false;
	}

	ch = peek_char();
	if (!isdigit(ch) && !(base == 16 &&
	((ch >= 'A' && ch <= 'F') \|\|
	(ch >= 'a' && ch <= 'f')))) {
	break;
	}
	token.string_value.append(1, ch);
	next_char();
	}

	token.type = TOKEN_INT_LITERAL;
	token.int_value = value;
	return true;
	}

	bool Tokenizer::parse_string(Token& token) {
	std::string string;
	int ch = next_char();

	while (ch != EOF) {
	if (ch == '\\') {
	ch = next_char();
	if (ch == EOF) {
	break;
	}
	switch (ch) {
	case 'a':
	ch = '\a';
	break;
	case 'b':
	ch = '\b';
	break;
	case 'f':
	ch = '\f';
	break;
	case 'n':
	ch = '\n';
	break;
	case 'r':
	ch = '\r';
	break;
	case 't':
	ch = '\t';
	break;
	case 'v':
	ch = '\v';
	break;
	case '\\':
	ch = '\\';
	break;
	case '\'':
	ch = '\'';
	break;
	case '\"':
	ch = '\"';
	break;
	case '?':
	ch = '?';
	break;
	default:
	print_err("unsupported escape sequence \\%c in string literal\n", ch);
	return false;
	}
	} else if (ch == '\"') {
	token.type = TOKEN_STRING_LITERAL;
	token.string_value = string;
	return true;
	}
	string.append(1, ch);
	ch = next_char();
	}

	print_err("end of file during unterminated string\n");
	return false;
	}

	// returns false if we cannot parse the next token
	// EOF is not considered an error
	bool Tokenizer::next_token(Token& token) {
	if (have_token_peek) {
	token = token_peek;
	have_token_peek = false;
	return true;
	}

	eat_whitespace();
	int ch = next_char();
	bool result = true;

	if (isalpha(ch)) {
	result = parse_identifier(token, ch);
	} else if (isdigit(ch)) {
	result = parse_integer(token, ch);
	} else if (ch == '\"') {
	result = parse_string(token);
	} else {
	switch (ch) {
	case EOF:
	token.type = TOKEN_EOF;
	break;
	case '{':
	token.type = TOKEN_LIST_START;
	break;
	case '}':
	token.type = TOKEN_LIST_END;
	break;
	case '[':
	token.type = TOKEN_ARRAY_START;
	break;
	case ']':
	token.type = TOKEN_ARRAY_END;
	break;
	case '=':
	token.type = TOKEN_EQUALS;
	break;
	case ',':
	token.type = TOKEN_COMMA;
	break;
	case '.':
	token.type = TOKEN_DOT;
	break;
	case '(':
	token.type = TOKEN_LPAREN;
	break;
	case ')':
	token.type = TOKEN_RPAREN;
	break;
	case '+':
	token.type = TOKEN_PLUS;
	break;
	case '-':
	token.type = TOKEN_MINUS;
	break;
	case '*':
	token.type = TOKEN_TIMES;
	break;
	case '/':
	token.type = TOKEN_DIV;
	break;
	case '%':
	token.type = TOKEN_MOD;
	break;
	case '~':
	token.type = TOKEN_NOT;
	break;
	case '&':
	token.type = TOKEN_AND;
	break;
	case '\|':
	token.type = TOKEN_OR;
	break;
	case '^':
	token.type = TOKEN_XOR;
	break;
	case '<':
	if (next_char() == '<') {
	token.type = TOKEN_LSHIFT;
	} else {
	print_err("unexpected token '<'\n");
	result = false;
	}
	break;
	case '>':
	if (next_char() == '>') {
	token.type = TOKEN_RSHIFT;
	} else {
	print_err("unexpected token '>'\n");
	result = false;
	}
	break;
	default:
	print_err("invalid token \'%c\'\n", ch);
	result = false;
	}
	token.string_value.clear();
	token.string_value.append(1, ch);
	}

	#if PRINT_TOKENS
	if (result) {
	token.print();
	}
	#endif

	return result;
	}

	bool Tokenizer::peek_token(Token& token) {
	if (!have_token_peek && !next_token(token_peek)) {
	return false;
	}
	token = token_peek;
	have_token_peek = true;
	return true;
	}

	void Tokenizer::print_err(const char* fmt, ...) {
	fprintf(stderr, "%s:%d:%d: error: ", current_file.c_str(), line_number, line_offset);
	va_list ap;
	va_start(ap, fmt);
	vfprintf(stderr, fmt, ap);
	va_end(ap);
	}