src/etc/extract_grammar.py - third_party/rust - Git at Google

 #!/usr/bin/env python
 # xfail-license

 # This script is for extracting the grammar from the rust docs.

 import fileinput

 collections = { "gram": [],
                 "keyword": [],
                 "reserved": [],
                 "binop": [],
                 "unop": [] }


 in_coll = False
 coll = ""

 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
     if in_coll:
         if line.startswith("~~~~"):
             in_coll = False
         else:
             if coll in ["keyword", "reserved", "binop", "unop"]:
                 for word in line.split():
                     if word not in collections[coll]:
                         collections[coll].append(word)
             else:
                 collections[coll].append(line)

     else:
         if line.startswith("~~~~"):
             for cname in collections:
                 if ("." + cname) in line:
                     coll = cname
                     in_coll = True
                     break

 # Define operator symbol-names here

 tokens = ["non_star", "non_slash", "non_eol",
           "non_single_quote", "non_double_quote", "ident" ]

 symnames = {
 ".": "dot",
 "+": "plus",
 "-": "minus",
 "/": "slash",
 "*": "star",
 "%": "percent",

 "~": "tilde",
 "@": "at",

 "!": "not",
 "&": "and",
 "|": "or",
 "^": "xor",

 "<<": "lsl",
 ">>": "lsr",
 ">>>": "asr",

 "&&": "andand",
 "||": "oror",

 "<" : "lt",
 "<=" : "le",
 "==" : "eqeq",
 ">=" : "ge",
 ">" : "gt",

 "=": "eq",

 "+=": "plusequal",
 "-=": "minusequal",
 "/=": "divequal",
 "*=": "starequal",
 "%=": "percentequal",

 "&=": "andequal",
 "|=": "orequal",
 "^=": "xorequal",

 ">>=": "lsrequal",
 ">>>=": "asrequal",
 "<<=": "lslequal",

 "::": "coloncolon",

 "->": "rightarrow",
 "<-": "leftarrow",
 "<->": "swaparrow",

 "//": "linecomment",
 "/*": "openblockcomment",
 "*/": "closeblockcomment",
 "macro_rules": "macro_rules",
 "=>" : "eg",
 ".." : "dotdot",
 ","  : "comma"
 }

 lines = []

 for line in collections["gram"]:
     line2 = ""
     for word in line.split():
         # replace strings with keyword-names or symbol-names from table
         if word.startswith("\""):
             word = word[1:-1]
             if word in symnames:
                 word = symnames[word]
             else:
                 for ch in word:
                     if not ch.isalpha():
                         raise Exception("non-alpha apparent keyword: "
                                         + word)
                 if word not in tokens:
                     if (word in collections["keyword"] or
                         word in collections["reserved"]):
                        tokens.append(word)
                     else:
                         raise Exception("unknown keyword/reserved word: "
                                         + word)

         line2 += " " + word
     lines.append(line2)


 for word in collections["keyword"] + collections["reserved"]:
     if word not in tokens:
         tokens.append(word)

 for sym in collections["unop"] + collections["binop"] + symnames.keys():
     word = symnames[sym]
     if word not in tokens:
         tokens.append(word)


 print("%start parser, token;")
 print("%%token %s ;" % ("\n\t, ".join(tokens)))
 for coll in ["keyword", "reserved"]:
     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
 for coll in ["binop", "unop"]:
     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
                                               for x in collections[coll]])));
 print("\n".join(lines));
	#!/usr/bin/env python
	# xfail-license

	# This script is for extracting the grammar from the rust docs.

	import fileinput

	collections = { "gram": [],
	"keyword": [],
	"reserved": [],
	"binop": [],
	"unop": [] }


	in_coll = False
	coll = ""

	for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
	if in_coll:
	if line.startswith("~~~~"):
	in_coll = False
	else:
	if coll in ["keyword", "reserved", "binop", "unop"]:
	for word in line.split():
	if word not in collections[coll]:
	collections[coll].append(word)
	else:
	collections[coll].append(line)

	else:
	if line.startswith("~~~~"):
	for cname in collections:
	if ("." + cname) in line:
	coll = cname
	in_coll = True
	break

	# Define operator symbol-names here

	tokens = ["non_star", "non_slash", "non_eol",
	"non_single_quote", "non_double_quote", "ident" ]

	symnames = {
	".": "dot",
	"+": "plus",
	"-": "minus",
	"/": "slash",
	"*": "star",
	"%": "percent",

	"~": "tilde",
	"@": "at",

	"!": "not",
	"&": "and",
	"\|": "or",
	"^": "xor",

	"<<": "lsl",
	">>": "lsr",
	">>>": "asr",

	"&&": "andand",
	"\|\|": "oror",

	"<" : "lt",
	"<=" : "le",
	"==" : "eqeq",
	">=" : "ge",
	">" : "gt",

	"=": "eq",

	"+=": "plusequal",
	"-=": "minusequal",
	"/=": "divequal",
	"*=": "starequal",
	"%=": "percentequal",

	"&=": "andequal",
	"\|=": "orequal",
	"^=": "xorequal",

	">>=": "lsrequal",
	">>>=": "asrequal",
	"<<=": "lslequal",

	"::": "coloncolon",

	"->": "rightarrow",
	"<-": "leftarrow",
	"<->": "swaparrow",

	"//": "linecomment",
	"/*": "openblockcomment",
	"*/": "closeblockcomment",
	"macro_rules": "macro_rules",
	"=>" : "eg",
	".." : "dotdot",
	"," : "comma"
	}

	lines = []

	for line in collections["gram"]:
	line2 = ""
	for word in line.split():
	# replace strings with keyword-names or symbol-names from table
	if word.startswith("\""):
	word = word[1:-1]
	if word in symnames:
	word = symnames[word]
	else:
	for ch in word:
	if not ch.isalpha():
	raise Exception("non-alpha apparent keyword: "
	+ word)
	if word not in tokens:
	if (word in collections["keyword"] or
	word in collections["reserved"]):
	tokens.append(word)
	else:
	raise Exception("unknown keyword/reserved word: "
	+ word)

	line2 += " " + word
	lines.append(line2)


	for word in collections["keyword"] + collections["reserved"]:
	if word not in tokens:
	tokens.append(word)

	for sym in collections["unop"] + collections["binop"] + symnames.keys():
	word = symnames[sym]
	if word not in tokens:
	tokens.append(word)


	print("%start parser, token;")
	print("%%token %s ;" % ("\n\t, ".join(tokens)))
	for coll in ["keyword", "reserved"]:
	print("%s: %s ; " % (coll, "\n\t\| ".join(collections[coll])));
	for coll in ["binop", "unop"]:
	print("%s: %s ; " % (coll, "\n\t\| ".join([symnames[x]
	for x in collections[coll]])));
	print("\n".join(lines));