week9
目前已经开始了build a computer
系列的最后几章了,后面的章节感觉越来越难,build a computer
的part II
部分基本上全是软件,需要编码,耗费大量的实践和精力来应付这些代码,不过能够最终通过测试的话还是成就满满。特别是只剩下后面三章了,主要是完成一个jack
语言编译器和一个最基本的微型操作系统,并提供基本的系统库函数。project10
主要完成jack
语言编译器的语法分析,感觉还是非常有难度和挑战的,花了差不多两周时间,利用子集的业余时间终于把蹩脚的语法解析器完成了,主要是关于jack
语言的语法分析。收获满满,最终的实现结果是将jack
语言的语法分析结果结构化为固定的xml
文档,供后面的代码生成器和链接器使用。当然总体来说jack
语言的语法还是非常简单的语法,我们直接用常见的top down
递归下降分析法即可实现,实际是非常简单的语法,至少感觉总体语法来说比c
语言的语法简单多了,非常喜欢这类有挑战性的小project
,感觉对基本的递归方法有了更深层次的了解。
Lexical elements
: 词的定义,在jack
语言中词的定义非常简单,只有5类基本类型,如下所示:
关键字、符号、整数常数、字符串常数、普通标识符。
- 关键字即可语法中的关键字,不可随意使用
- 特殊符号即为过程标志位和运算符
- 整数常数 (0~32767)
- 字符串常数 (“xxx”)
- 普通标识符
Program structure
:结构语法,包含基本的程序结构定义
Statements
:语句定义,还是非常常见的语句定义
Expressions
:表达式定义,包含常见的表达式定义:
整个project
基本围绕着上述几个语法展开,实际还是非常有意思的,需要对递归非常熟悉才可以对整个代码比较好的掌握
project
本周的project
还是非常有难度的project
,刚开始准备用c++
来实现,后来发现c++
太恶心了点,最后用蹩脚的python
花了大概一个星期终于实现了整个project
,整个程序的流程也分为两部分,先将整个程序进行token
格式化,后续的分析引擎对整个语法树进行分析,生成树状结构的xml
文件,当然总的来说本章的project
有难度,但是不够有趣,感觉csapp
的project
难度非常大,不是一般人能够坚持完成的,目前的打算是准备把build a computer
这个系列的课程的project
代码完整的实现一遍,终于磨蹭了这么长时间只剩下最后两个project
了。源代码放在
poj10.
token parser
:整个程序非常简单,其实大概核心的代码100
行即可完成token
解析,非常简单的代码逻辑即可实现。import fileinput
import sys, getopt
from enum import Enum, unique
class TOKEN_TYPE(Enum):
TOKEN_KEYWORD = 0
TOKEN_SYMBOL = 1
TOKEN_IDENTIFIER = 2
TOKEN_INT_CONST = 3
TOKEN_STRING_CONST = 4
TOKEN_INVALID = 5
class KEYWORD_TYPE(Enum):
KEYWORD_CLASS = 101
KEYWORD_METHOD = 102
KEYWORD_FUNCTION = 103
KEYWORD_CONSTRUCTOR = 104
KEYWORD_BOOLEAN = 105
KEYWORD_CHAR = 106
KEYWORD_VOID = 107
KEYWORD_VAR = 108
KEYWORD_STATIC = 109
KEYWORD_FIELD = 110
KEYWORD_LET = 111
KEYWORD_DO = 112
KEYWORD_IF = 113
KEYWORD_WHILE = 114
KEYWORD_RETURN = 115
KEYWORD_TRUE = 116
KEYWORD_FALSE = 117
KEYWORD_NULL = 118
KEYWORD_THIS = 119
KEYWORD_VALID = 120
keywordname = ["class","method","function","constructor","boolean","char","void","var","static"
,"field","let","do","if","else","while","return","true","false","null","this"]
keywordtype = [name for name, member in KEYWORD_TYPE.__members__.items()]
keworddict = dict(zip(keywordname,keywordtype))
tokenname = ["keyword","symbol","identifier","integerConstant","stringConstant"]
tokentype = [name for name, member in TOKEN_TYPE.__members__.items()]
tokendict = dict(zip(tokentype,tokenname))
symboldict = set(['{','}','(',')','[',']',',',';','=','.','+','-','*','/','&','|','~','<','>'])
def validNum(val):
if len(val) == 0:
return False
if val[0] == '0' and len(val) > 1:
return False
for c in val:
if ord(c) < ord('0') or ord(c) > ord('9'):
return False
return True
def isAlpha(val):
if ord(val) >= ord('a') and ord(val) <= ord('z'):
return True
if ord(val) >= ord('A') and ord(val) <= ord('Z'):
return True
return False
def isDigit(val):
if ord(val) >= ord('0') and ord(val) <= ord('9'):
return True
return False
def validIdentifiers(val):
if not (val[0] == '_' or isAlpha(val[0])):
return False
for i in range(1,len(val)):
if not (val[i] == '_' or isAlpha(val[i]) or isDigit(val[i])):
return False
return True
class JackTokenizer:
def __init__(self,infile):
# read all source code string to the buffer
self.curr = 0
self.buffer = ""
with open(infile,"r") as ifs:
self.buffer = ifs.read()
ifs.close()
self.end = len(self.buffer)
self.tokens = []
self.linenum = []
self.parseToken()
self.outfile = infile[:infile.find(".jack")] + "T.xml"
self.genToken()
def parseToken(self):
pos = 0
while pos < self.end:
#we will skip space
if self.buffer[pos] == ' ' or \
self.buffer[pos] == '\n' or \
self.buffer[pos] == '\t':
while pos < self.end and (self.buffer[pos] == ' ' or \
self.buffer[pos] == '\n' or \
self.buffer[pos] == '\t'):
pos += 1
continue
# we will skip the comment "//"
if self.buffer[pos:pos+2] == "//":
while pos < self.end and self.buffer[pos] != '\n':
pos += 1
continue
# we will skip the comment "/* .. */"
if self.buffer[pos:pos+2] == "/*":
while pos < self.end and self.buffer[pos:pos+2] != "*/":
pos += 1
pos += 2
continue
# check curr token is symbol
if self.buffer[pos] in symboldict:
self.tokens.append(self.buffer[pos:pos+1])
pos += 1
continue
# check string constant
if pos < self.end and self.buffer[pos] == '"':
posCurr = pos
pos += 1
while pos < self.end and self.buffer[pos] != '"':
pos += 1
pos += 1
self.tokens.append(self.buffer[posCurr:pos])
continue
# check curr token
posCurr = pos
while pos < self.end and self.buffer[pos] != ' ' and \
self.buffer[pos] != '\n' and self.buffer[pos] not in symboldict:
pos += 1
if pos > posCurr:
self.tokens.append(self.buffer[posCurr:pos])
return True
def hasMoreTokens(self):
if self.curr >= len(self.tokens) - 1:
return False
return True
def advance(self):
self.curr += 1
def currToken(self):
return self.tokens[self.curr]
def tokenType(self):
return self.getType(self.tokens[self.curr])
def getType(self,token):
if token in keworddict:
return TOKEN_TYPE.TOKEN_KEYWORD
elif len(token) == 1 and token[0] in symboldict:
return TOKEN_TYPE.TOKEN_SYMBOL
elif validNum(token):
return TOKEN_TYPE.TOKEN_INT_CONST
elif len(token) > 1 and token[0] == '"' and token[-1] == '"':
return TOKEN_TYPE.TOKEN_STRING_CONST
elif validIdentifiers(token):
return TOKEN_TYPE.TOKEN_IDENTIFIER
else:
return TOKEN_TYPE.TOKEN_INVALID
def keyWord(self):
return self.tokens[self.curr]
def symbol(self):
return self.tokens[self.curr]
def identifier(self):
return self.tokens[self.curr]
def intVal(self):
return self.tokens[self.curr]
def stringVal(self):
return self.tokens[self.curr][1:-1]
def genToken(self):
out = open(self.outfile,"w")
out.write("<tokens>\n")
for token in self.tokens:
t = self.getType(token)
if t == TOKEN_TYPE.TOKEN_KEYWORD:
out.write("<keyword> ")
out.write(token)
out.write(" </keyword>\n")
elif t == TOKEN_TYPE.TOKEN_SYMBOL:
out.write("<symbol> ")
out.write(token)
out.write(" </symbol>\n")
elif t == TOKEN_TYPE.TOKEN_INT_CONST:
out.write("<integerConstant> ")
out.write(token)
out.write(" </integerConstant>\n")
elif t == TOKEN_TYPE.TOKEN_STRING_CONST:
out.write("<stringConstant> ")
out.write(token[1:-1])
out.write(" </stringConstant>\n")
elif t == TOKEN_TYPE.TOKEN_IDENTIFIER:
out.write("<identifier> ")
out.write(token)
out.write(" </identifier>\n")
out.write("</tokens>\n")
out.close()
def main(input):
parser = JackTokenizer(input)
parser.genToken()
if __name__ == "__main__":
main(sys.argv[1])compile engine
:整个的语法解析器还是非常复杂的,但是实际实现的project
可以根据配套的《The Elements of Computing Systems》
中的提示,可以很快的将代码解析为几个关键函数的实现即可。from JackTokenizer import JackTokenizer,TOKEN_TYPE,KEYWORD_TYPE,tokentype,tokendict
import fileinput
import sys, getopt
from enum import Enum, unique
import sys
class CompilationEngine:
def __init__(self,infile):
# read all source code string to the buffer
self.parser = JackTokenizer(infile)
outfile = infile[:infile.find(".jack")] + ".xml"
self.out = open(outfile,"w")
self.depth = 0
self.compileClass()
self.out.close()
def lowerLevel(self):
self.depth += 1
def upperLevel(self):
self.depth -= 1
def compileOut(self,str):
self.out.write(" "*self.depth)
self.out.write(str)
def compileOutElement(self,tkType,tkStr):
self.out.write(" "*self.depth)
typeStr = ""
if tkType == TOKEN_TYPE.TOKEN_KEYWORD:
typeStr = "keyword"
elif tkType == TOKEN_TYPE.TOKEN_SYMBOL:
typeStr = "symbol"
elif tkType == TOKEN_TYPE.TOKEN_IDENTIFIER:
typeStr = "identifier"
elif tkType == TOKEN_TYPE.TOKEN_INT_CONST:
typeStr = "integerConstant"
elif tkType == TOKEN_TYPE.TOKEN_STRING_CONST:
typeStr = "stringConstant"
elif tkType == TOKEN_TYPE.TOKEN_INVALID:
typeStr = "invalid"
self.out.write("<" + typeStr + "> " + tkStr + " </" + typeStr + ">\n")
def compileClass(self):
self.compileOut("<class>\n")
self.lowerLevel()
# parse class
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "class":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid class define!\n")
exit(1)
# parse class name
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid class define!\n")
exit(1)
# parse symbol '{'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "{":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid class define!\n")
exit(1)
#parse class val des
while self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
(self.parser.keyWord() == "static" or self.parser.keyWord() == "field"):
self.compileClassVarDec()
#parse class method
while self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
(self.parser.keyWord() == "method" or \
self.parser.keyWord() == "constructor" or \
self.parser.keyWord() == "function"):
self.compileSubroutine()
#parse symbol '{'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "}":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid class define!\n")
exit(1)
# end of parse class
self.upperLevel()
self.compileOut("</class>\n")
return True
def compileClassVarDec(self):
self.compileOut("<classVarDec>\n")
self.lowerLevel()
# parse key word
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
(self.parser.keyWord() == "static" or self.parser.keyWord() == "field"):
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid val define!\n")
exit(1)
# parse val type
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD or \
self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid val define!\n")
exit(1)
# parse val name
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid val define!\n")
exit(1)
# parse the left val name
while not (self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ";"):
# parse symbol ','
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == ",":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid val define!\n")
exit(1)
# parse val name
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid val define!\n")
exit(1)
# parse the end symbol
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == ";":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "valid val define!\n")
exit(1)
# end of class var descrtion
self.upperLevel()
self.compileOut("</classVarDec>\n")
return True
def compileSubroutine(self):
self.compileOut("<subroutineDec>\n")
self.lowerLevel()
# parse key word
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
(self.parser.keyWord() == "constructor" or \
self.parser.keyWord() == "function" or \
self.parser.keyWord() == "method"):
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "invalid subroutine!\n")
exit(1)
# parse type
if (self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
(self.parser.keyWord() == "int" or \
self.parser.keyWord() == "char" or \
self.parser.keyWord() == "void" or \
self.parser.keyWord() == "boolean")) or \
self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "invalid subroutine!\n")
exit(1)
# parse subroutineName
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "invalid subroutine!\n")
exit(1)
# parse '('
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == "(":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "invalid subroutine!\n")
exit(1)
# parse param list
self.compileParameterList()
# parse ')'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == ")":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print(str(sys._getframe().f_lineno) + "invalid subroutine!\n")
exit(1)
# parse body
self.compileSubroutineBody()
self.upperLevel()
self.compileOut("</subroutineDec>\n")
return True
def compileSubroutineBody(self):
self.compileOut("<subroutineBody>\n")
self.lowerLevel()
# parse '{'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "{":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("inValid sub routine body define!\n")
exit(1)
# parse var
while self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "var":
self.compileVarDec()
# parse statements
self.compileStatements()
# parse '}'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "}":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid sub routine body define!\n")
exit(1)
self.upperLevel()
self.compileOut("</subroutineBody>\n")
return True
def compileParameterList(self):
self.compileOut("<parameterList>\n")
self.lowerLevel()
# parse rest param
while not (self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and\
self.parser.symbol() == ")"):
# parse first element type
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER or \
(self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
(self.parser.keyWord() == "int" or self.parser.keyWord() == "char" or \
self.parser.keyWord() == "boolean")):
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
# parse first element varName
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
# parse ','
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL:
if self.parser.symbol() == ",":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
elif self.parser.symbol() == ")":
break
else:
print(str(sys._getframe().f_lineno) + "valid param list!\n")
exit(1)
else:
print(str(sys._getframe().f_lineno) + "valid param list!\n")
exit(1)
self.upperLevel()
self.compileOut("</parameterList>\n")
return True
def compileVarDec(self):
self.compileOut("<varDec>\n")
self.lowerLevel()
# parse key word
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "var":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("valid val define!\n")
exit(1)
# parse var type
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD or \
self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("valid val define!\n")
exit(1)
# parse var name
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("valid val define!\n")
exit(1)
# parse the rest var name
while not (self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ";"):
# parse ","
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == ",":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("valid val define!\n")
exit(1)
# parse var name
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("valid val define!\n")
exit(1)
# parse the end symbol
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ";":
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("valid var define!\n")
exit(1)
# end of class var descrtion
self.upperLevel()
self.compileOut("</varDec>\n")
return True
def compileStatements(self):
self.compileOut("<statements>\n")
self.lowerLevel()
while self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
(self.parser.keyWord() == "do" or \
self.parser.keyWord() == "if" or \
self.parser.keyWord() == "while" or \
self.parser.keyWord() == "let" or \
self.parser.keyWord() == "return"):
if self.parser.keyWord() == "do":
self.compileDo()
elif self.parser.keyWord() == "if":
self.compileIf()
elif self.parser.keyWord() == "while":
self.compileWhile()
elif self.parser.keyWord() == "let":
self.compileLet()
elif self.parser.keyWord() == "return":
self.compileReturn()
else:
print("valid statement define!\n")
exit(1)
self.upperLevel()
self.compileOut("</statements>\n")
return True
def compileDo(self):
self.compileOut("<doStatement>\n")
self.lowerLevel()
# parse do
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "do":
self.compileOutElement(self.parser.tokenType(),self.parser.keyWord())
self.parser.advance()
else:
print("inValid do define!\n")
exit(1)
# parse '('
while not (self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "("):
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
# parse '('
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "(":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid do statement define!\n")
exit(1)
# parse expression list
self.compileExpressionList()
# parse ')'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ")":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid do statement body define!\n")
exit(1)
# parse ';'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ';':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid do statement define!\n")
exit(1)
self.upperLevel()
self.compileOut("</doStatement>\n")
return True
def compileLet(self):
self.compileOut("<letStatement>\n")
self.lowerLevel()
# parse let
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "let":
self.compileOutElement(self.parser.tokenType(),self.parser.keyWord())
self.parser.advance()
else:
print("inValid let define!\n")
exit(1)
# parse varname
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.identifier())
self.parser.advance()
else:
print("inValid let define!\n")
exit(1)
# parse `[expression]`
while self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == '[':
# parse '['
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse expression
self.compileExpression()
# parse ']'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ']':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid let define!\n")
exit(1)
# parse '='
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == '=':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid let define!\n")
exit(1)
# parse expression
self.compileExpression()
# parse ';'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ';':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid let define!\n")
exit(1)
self.upperLevel()
self.compileOut("</letStatement>\n")
return True
def compileWhile(self):
self.compileOut("<whileStatement>\n")
self.lowerLevel()
# parse return
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "while":
self.compileOutElement(self.parser.tokenType(),self.parser.keyWord())
self.parser.advance()
else:
print("inValid while define!\n")
exit(1)
# parse '('
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == '(':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid while define!\n")
exit(1)
# parse expression
self.compileExpression()
# parse ')'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ')':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid while define!\n")
exit(1)
# parse '{'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == '{':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid while define!\n")
exit(1)
# parse statements
self.compileStatements()
# parse '}'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == '}':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("inValid while define!\n")
exit(1)
self.upperLevel()
self.compileOut("</whileStatement>\n")
return True
def compileReturn(self):
self.compileOut("<returnStatement>\n")
self.lowerLevel()
# parse return
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "return":
self.compileOutElement(self.parser.tokenType(),self.parser.keyWord())
self.parser.advance()
else:
print("valid if return statement!\n")
exit(1)
# parse expression list
if not (self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ';'):
self.compileExpression()
# parse ';'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ';':
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid if return statement!\n")
exit(1)
self.upperLevel()
self.compileOut("</returnStatement>\n")
return True
def compileIf(self):
self.compileOut("<ifStatement>\n")
self.lowerLevel()
# parse if
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "if":
self.compileOutElement(self.parser.tokenType(),self.parser.keyWord())
self.parser.advance()
else:
print("valid if define!\n")
exit(1)
# parse '('
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "(":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid if define!\n")
exit(1)
# parse expression
self.compileExpression()
# parse ')'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ")":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid if define!\n")
exit(1)
# parse '{'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "{":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid if define!\n")
exit(1)
# parse statements
self.compileStatements()
# parse '}'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "}":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid if define!\n")
exit(1)
# parse else
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD and \
self.parser.keyWord() == "else":
# parse 'else'
self.compileOutElement(self.parser.tokenType(),self.parser.keyWord())
self.parser.advance()
# parse '{'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "{":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid if define!\n")
exit(1)
# parse statements
self.compileStatements()
# parse '}'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == "}":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid if define!\n")
exit(1)
self.upperLevel()
self.compileOut("</ifStatement>\n")
return
def compileExpression(self):
self.compileOut("<expression>\n")
self.lowerLevel()
# parse term
self.compileTerm()
# parse op
while self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
(self.parser.symbol() == "+" or self.parser.symbol() == "-" or \
self.parser.symbol() == "*" or self.parser.symbol() == "/" or \
self.parser.symbol() == "&" or self.parser.symbol() == "|" or \
self.parser.symbol() == ">" or self.parser.symbol() == "<" or \
self.parser.symbol() == "="):
# parse op
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse term
self.compileTerm()
self.upperLevel()
self.compileOut("</expression>\n")
return
def compileTerm(self):
self.compileOut("<term>\n")
self.lowerLevel()
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_INT_CONST:
# parse int const
self.compileOutElement(self.parser.tokenType(),self.parser.intVal())
self.parser.advance()
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_STRING_CONST:
# parse string const
self.compileOutElement(self.parser.tokenType(),self.parser.stringVal())
self.parser.advance()
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_KEYWORD:
# parse keword const
if self.parser.keyWord() == "true" or self.parser.keyWord() == "false" or \
self.parser.keyWord() == "null" or self.parser.keyWord() == "this":
self.compileOutElement(self.parser.tokenType(),self.parser.keyWord())
self.parser.advance()
else:
print("inValid expression define!\n")
exit(1)
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL:
if self.parser.symbol() == "(":
# parse '('
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse expression
self.compileExpression()
# parse ')'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
self.parser.symbol() == ")":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid term define!\n")
exit(1)
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and \
(self.parser.symbol() == "-" or self.parser.symbol() == "~"):
# parse unaryOp
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse term
self.compileTerm()
else:
print("valid term define!\n")
exit(1)
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
# parse subroutineName or varName
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
# parse expression
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == "[":
# parse '['
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse expression
self.compileExpression()
# parse ']'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == "]":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid term define!\n")
exit(1)
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == ".":
# parse '.'
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse subroutineName
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_IDENTIFIER:
self.compileOutElement(self.parser.tokenType(),self.parser.currToken())
self.parser.advance()
else:
print("valid term define!\n")
exit(1)
# parse '('
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == "(":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid term define!\n")
exit(1)
# parse expressList
self.compileExpressionList()
# parse ')'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == ")":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid term define!\n")
exit(1)
elif self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == "(":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse expressList
self.compileExpressionList()
# parse ')'
if self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL and self.parser.symbol() == ")":
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
else:
print("valid term define!\n")
exit(1)
self.upperLevel()
self.compileOut("</term>\n")
return True
def compileExpressionList(self):
self.compileOut("<expressionList>\n")
self.lowerLevel()
if self.parser.symbol() == ')' and \
self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL:
self.upperLevel()
self.compileOut("</expressionList>\n")
return True
# parse expression
self.compileExpression()
# parse `, expression`
while self.parser.symbol() == ',' and \
self.parser.tokenType() == TOKEN_TYPE.TOKEN_SYMBOL:
# parse ','
self.compileOutElement(self.parser.tokenType(),self.parser.symbol())
self.parser.advance()
# parse expression
self.compileExpression()
self.upperLevel()
self.compileOut("</expressionList>\n")
return
def main(input):
parser = CompilationEngine(input)
if __name__ == "__main__":
main(sys.argv[1])
欢迎关注和打赏,感谢支持!
- 关注我的博客: http://mikemeng.org/
- 关注我的知乎:https://www.zhihu.com/people/da-hua-niu
- 关注我的微信公众号: 公务程序猿
扫描二维码,分享此文章