|  | # Copyright (C) 2011 Apple Inc. All rights reserved. | 
|  | # Copyright (C) 2012 Sony Network Entertainment. All rights reserved. | 
|  | # | 
|  | # Redistribution and use in source and binary forms, with or without | 
|  | # modification, are permitted provided that the following conditions | 
|  | # are met: | 
|  | # 1. Redistributions of source code must retain the above copyright | 
|  | #    notice, this list of conditions and the following disclaimer. | 
|  | # 2. Redistributions in binary form must reproduce the above copyright | 
|  | #    notice, this list of conditions and the following disclaimer in the | 
|  | #    documentation and/or other materials provided with the distribution. | 
|  | # | 
|  | # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 
|  | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 
|  | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR | 
|  | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
|  | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
|  | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
|  | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 
|  | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
|  | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  |  | 
|  | import sys | 
|  | import string | 
|  | import operator | 
|  |  | 
|  | keywordsText = open(sys.argv[1]).read() | 
|  |  | 
|  | # A second argument signifies that the output | 
|  | # should be redirected to a file | 
|  | redirect_to_file = len(sys.argv) > 2 | 
|  |  | 
|  | # Change stdout to point to the file if requested | 
|  | if redirect_to_file: | 
|  | file_output = open(sys.argv[-1], "w") | 
|  | sys.stdout = file_output | 
|  |  | 
|  | # Observed weights of the most common keywords, rounded to 2.s.d | 
|  | keyWordWeights = { | 
|  | "catch": 0.01, | 
|  | "try": 0.01, | 
|  | "while": 0.01, | 
|  | "case": 0.01, | 
|  | "break": 0.01, | 
|  | "new": 0.01, | 
|  | "in": 0.01, | 
|  | "typeof": 0.02, | 
|  | "true": 0.02, | 
|  | "false": 0.02, | 
|  | "for": 0.03, | 
|  | "null": 0.03, | 
|  | "else": 0.03, | 
|  | "return": 0.13, | 
|  | "var": 0.13, | 
|  | "if": 0.16, | 
|  | "function": 0.18, | 
|  | "this": 0.18, | 
|  | } | 
|  |  | 
|  |  | 
|  | def allWhitespace(str): | 
|  | for c in str: | 
|  | if not(c in string.whitespace): | 
|  | return False | 
|  | return True | 
|  |  | 
|  |  | 
|  | def parseKeywords(keywordsText): | 
|  |  | 
|  | if sys.platform == "cygwin": | 
|  | keywordsText = keywordsText.replace("\r\n", "\n") | 
|  |  | 
|  | lines = keywordsText.split("\n") | 
|  | lines = [line.split("#")[0] for line in lines] | 
|  | lines = [line for line in lines if (not allWhitespace(line))] | 
|  | name = lines[0].split() | 
|  | terminator = lines[-1] | 
|  |  | 
|  | if not name[0] == "@begin": | 
|  | raise Exception("expected description beginning with @begin") | 
|  | if not terminator == "@end": | 
|  | raise Exception("expected description ending with @end") | 
|  |  | 
|  | lines = lines[1:-1]  # trim off the old heading | 
|  | return [line.split() for line in lines] | 
|  |  | 
|  |  | 
|  | def makePadding(size): | 
|  | str = "" | 
|  | for i in range(size): | 
|  | str = str + " " | 
|  | return str | 
|  |  | 
|  |  | 
|  | class Trie: | 
|  | def __init__(self, prefix): | 
|  | self.prefix = prefix | 
|  | self.keys = {} | 
|  | self.value = None | 
|  |  | 
|  | def insert(self, key, value): | 
|  | if len(key) == 0: | 
|  | self.value = value | 
|  | return | 
|  | if not (key[0] in self.keys): | 
|  | self.keys[key[0]] = Trie(key[0]) | 
|  | self.keys[key[0]].insert(key[1:], value) | 
|  |  | 
|  | def coalesce(self): | 
|  | keys = {} | 
|  | for k, v in self.keys.items(): | 
|  | t = v.coalesce() | 
|  | keys[t.prefix] = t | 
|  | self.keys = keys | 
|  | if self.value != None: | 
|  | return self | 
|  | if len(self.keys) != 1: | 
|  | return self | 
|  | # Python 3: for() loop for compatibility. Use next() when Python 2.6 is the baseline. | 
|  | for (prefix, suffix) in self.keys.items(): | 
|  | res = Trie(self.prefix + prefix) | 
|  | res.value = suffix.value | 
|  | res.keys = suffix.keys | 
|  | return res | 
|  |  | 
|  | def fillOut(self, prefix=""): | 
|  | self.fullPrefix = prefix + self.prefix | 
|  | weight = 0 | 
|  | if self.fullPrefix in keyWordWeights: | 
|  | weight = weight + keyWordWeights[self.fullPrefix] | 
|  | self.selfWeight = weight | 
|  | for trie in self.keys.values(): | 
|  | trie.fillOut(self.fullPrefix) | 
|  | weight = weight + trie.weight | 
|  | self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)] | 
|  | self.weight = weight | 
|  |  | 
|  | def printSubTreeAsC(self, typeName, indent): | 
|  | str = makePadding(indent) | 
|  |  | 
|  | if self.value != None: | 
|  | print(str + "if (!isIdentPartIncludingEscape(code+%d, m_codeEnd)) {" % (len(self.fullPrefix))) | 
|  | print(str + "    internalShift<%d>();" % len(self.fullPrefix)) | 
|  | print(str + "    if (shouldCreateIdentifier)") | 
|  | print(str + ("        data->ident = &m_vm->propertyNames->%sKeyword;" % self.fullPrefix)) | 
|  | print(str + "    return " + self.value + ";") | 
|  | print(str + "}") | 
|  | rootIndex = len(self.fullPrefix) | 
|  | itemCount = 0 | 
|  | for k, trie in self.keys: | 
|  | baseIndex = rootIndex | 
|  | if (baseIndex > 0) and (len(k) == 3): | 
|  | baseIndex = baseIndex - 1 | 
|  | k = trie.fullPrefix[baseIndex] + k | 
|  | test = [("'%s'" % c) for c in k] | 
|  | if len(test) == 1: | 
|  | comparison = "code[%d] == %s" % (baseIndex, test[0]) | 
|  | else: | 
|  | base = "code" | 
|  | if baseIndex > 0: | 
|  | base = "code + %d" % baseIndex | 
|  | comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")" | 
|  | if itemCount == 0: | 
|  | print(str + "if (" + comparison + ") {") | 
|  | else: | 
|  | print(str + "} else if (" + comparison + ") {") | 
|  |  | 
|  | trie.printSubTreeAsC(typeName, indent + 4) | 
|  | itemCount = itemCount + 1 | 
|  |  | 
|  | if itemCount == len(self.keys): | 
|  | print(str + "}") | 
|  |  | 
|  | def maxLength(self): | 
|  | max = len(self.fullPrefix) | 
|  | for (_, trie) in self.keys: | 
|  | l = trie.maxLength() | 
|  | if l > max: | 
|  | max = l | 
|  | return max | 
|  |  | 
|  | def printAsC(self): | 
|  | print("namespace JSC {") | 
|  | print("") | 
|  | print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd);") | 
|  | print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd);") | 
|  | # max length + 1 so we don't need to do any bounds checking at all | 
|  | print("static const int maxTokenLength = %d;" % (self.maxLength() + 1)) | 
|  | print("") | 
|  | print("template <>") | 
|  | print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)") | 
|  | print("{") | 
|  | print("    ASSERT(m_codeEnd - m_code >= maxTokenLength);") | 
|  | print("") | 
|  | print("    const UChar* code = m_code;") | 
|  | self.printSubTreeAsC("UCHAR", 4) | 
|  | print("    return IDENT;") | 
|  | print("}") | 
|  | print("") | 
|  | print("template <>") | 
|  | print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)") | 
|  | print("{") | 
|  | print("    ASSERT(m_codeEnd - m_code >= maxTokenLength);") | 
|  | print("") | 
|  | print("    const LChar* code = m_code;") | 
|  | self.printSubTreeAsC("CHAR", 4) | 
|  | print("    return IDENT;") | 
|  | print("}") | 
|  | print("") | 
|  | print("} // namespace JSC") | 
|  |  | 
|  | keywords = parseKeywords(keywordsText) | 
|  | trie = Trie("") | 
|  | for k, v in keywords: | 
|  | trie.insert(k, v) | 
|  | trie.coalesce() | 
|  | trie.fillOut() | 
|  | print("// This file was generated by KeywordLookupGenerator.py.  Do not edit.") | 
|  | print(""" | 
|  | #if CPU(NEEDS_ALIGNED_ACCESS) | 
|  |  | 
|  | #define COMPARE_2CHARS(address, char1, char2) \\ | 
|  | (((address)[0] == char1) && ((address)[1] == char2)) | 
|  | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | 
|  | (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) | 
|  | #define COMPARE_2UCHARS(address, char1, char2) \\ | 
|  | (((address)[0] == char1) && ((address)[1] == char2)) | 
|  | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | 
|  | (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) | 
|  |  | 
|  | #else // CPU(NEEDS_ALIGNED_ACCESS) | 
|  |  | 
|  | #if CPU(BIG_ENDIAN) | 
|  |  | 
|  | #define CHARPAIR_TOUINT16(a, b) \\ | 
|  | ((((uint16_t)(a)) << 8) + (uint16_t)(b)) | 
|  | #define CHARQUAD_TOUINT32(a, b, c, d) \\ | 
|  | ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d)) | 
|  | #define UCHARPAIR_TOUINT32(a, b) \\ | 
|  | ((((uint32_t)(a)) << 16) + (uint32_t)(b)) | 
|  | #define UCHARQUAD_TOUINT64(a, b, c, d) \\ | 
|  | ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d)) | 
|  |  | 
|  | #else // CPU(BIG_ENDIAN) | 
|  |  | 
|  | #define CHARPAIR_TOUINT16(a, b) \\ | 
|  | ((((uint16_t)(b)) << 8) + (uint16_t)(a)) | 
|  | #define CHARQUAD_TOUINT32(a, b, c, d) \\ | 
|  | ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b)) | 
|  | #define UCHARPAIR_TOUINT32(a, b) \\ | 
|  | ((((uint32_t)(b)) << 16) + (uint32_t)(a)) | 
|  | #define UCHARQUAD_TOUINT64(a, b, c, d) \\ | 
|  | ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b)) | 
|  |  | 
|  | #endif // CPU(BIG_ENDIAN) | 
|  |  | 
|  |  | 
|  | #define COMPARE_2CHARS(address, char1, char2) \\ | 
|  | ((reinterpret_cast<const uint16_t*>(address))[0] == CHARPAIR_TOUINT16(char1, char2)) | 
|  | #define COMPARE_2UCHARS(address, char1, char2) \\ | 
|  | ((reinterpret_cast<const uint32_t*>(address))[0] == UCHARPAIR_TOUINT32(char1, char2)) | 
|  |  | 
|  | #if CPU(X86_64) | 
|  |  | 
|  | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | 
|  | ((reinterpret_cast<const uint32_t*>(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4)) | 
|  | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | 
|  | ((reinterpret_cast<const uint64_t*>(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4)) | 
|  |  | 
|  | #else // CPU(X86_64) | 
|  |  | 
|  | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | 
|  | (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) | 
|  | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | 
|  | (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) | 
|  |  | 
|  | #endif // CPU(X86_64) | 
|  |  | 
|  | #endif // CPU(NEEDS_ALIGNED_ACCESS) | 
|  |  | 
|  | #define COMPARE_3CHARS(address, char1, char2, char3) \\ | 
|  | (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3))) | 
|  | #define COMPARE_3UCHARS(address, char1, char2, char3) \\ | 
|  | (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3))) | 
|  | #define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\ | 
|  | (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) | 
|  | #define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\ | 
|  | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) | 
|  | #define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\ | 
|  | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6)) | 
|  | #define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\ | 
|  | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6)) | 
|  | #define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ | 
|  | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7)) | 
|  | #define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ | 
|  | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7)) | 
|  | #define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ | 
|  | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8)) | 
|  | #define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ | 
|  | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8)) | 
|  | """) | 
|  |  | 
|  | trie.printAsC() | 
|  |  | 
|  | # Close the redirected file if requested | 
|  | if (redirect_to_file): | 
|  | file_output.close() | 
|  | sys.stdout = sys.__stdout__ |