@memoize
装饰器,以实现packrat 解析。statement: assignment | expr | if_statement
expr: expr '+' term | expr '-' term | term
term: term '*' atom | term '/' atom | atom
atom: NAME | NUMBER | '(' expr ')'
assignment: target '=' expr
target: NAME
if_statement: 'if' expr ':' statement
grammar: rule+ ENDMARKER
rule: NAME ':' alternative ('|' alternative)* NEWLINE
alternative: item+
item: NAME | STRING
NAME
和STRING
)__repr__
与__eq__
:class Rule:
def __init__(self, name, alts):
self.name = name
self.alts = alts
GrammarParser
类(关于基类Parser
,请参阅我之前的帖子):class GrammarParser(Parser):
def grammar(self):
pos = self.mark()
if rule := self.rule():
rules = [rule]
while rule := self.rule():
rules.append(rule)
if self.expect(ENDMARKER):
return rules # <------------- final result
self.reset(pos)
return None
def rule(self):
pos = self.mark()
if name := self.expect(NAME):
if self.expect(":"):
if alt := self.alternative():
alts = [alt]
apos = self.mark()
while (self.expect("|")
and (alt := self.alternative())):
alts.append(alt)
apos = self.mark()
self.reset(apos)
if self.expect(NEWLINE):
return Rule(name.string, alts)
self.reset(pos)
return None
def alternative(self):
items = []
while item := self.item():
items.append(item)
return items
def item(self):
if name := self.expect(NAME):
return name.string
if string := self.expect(STRING):
return string.string
return None
ENDMARKER
,它用来确保在最后一条规则后没有遗漏任何东西(如果语法中出现拼写错误,可能会导致这种情况)。ToyParser
类很相似,所以我不作解释。[
Rule('statement', [['assignment'], ['expr'], ['if_statement']]),
Rule('expr', [['term', "'+'", 'expr'],
['term', "'-'", 'term'],
['term']]),
Rule('term', [['atom', "'*'", 'term'],
['atom', "'/'", 'atom'],
['atom']]),
Rule('atom', [['NAME'], ['NUMBER'], ["'('", 'expr', "')'"]]),
Rule('assignment', [['target', "'='", 'expr']]),
Rule('target', [['NAME']]),
Rule('if_statement', [["'if'", 'expr', "':'", 'statement']]),
]
def generate_parser_class(rules):
print(f"class ToyParser(Parser):")
for rule in rules:
print()
print(f" @memoize")
print(f" def {rule.name}(self):")
print(f" pos = self.mark()")
for alt in rule.alts:
items = []
print(f" if (True")
for item in alt:
if item[0] in ('"', "'"):
print(f" and self.expect({item})")
else:
var = item.lower()
if var in items:
var += str(len(items))
items.append(var)
if item.isupper():
print(" " +
f"and ({var} := self.expect({item}))")
else:
print(f" " +
f"and ({var} := self.{item}())")
print(f" ):")
print(f" " +
f"return Node({rule.name!r}, [{', '.join(items)}])")
print(f" self.reset(pos)")
print(f" return None")
'+'
,我们生成self.expect('+')
NAME
,我们生成(name := self.expect(NAME))
expr
,我们生成 (expr := self.expr())
term '-' term
),我们会在第二个条目后附加一个数字。这里还有个小小的 bug,我会在以后的内容中修复。if (True and … )
语句,我使用它们,以便每个生成的条件都能够以and
开头。Python 的字节码编译器会优化它。class ToyParser(Parser):
@memoize
def statement(self):
pos = self.mark()
if (True
and (assignment := self.assignment())
):
return Node('statement', [assignment])
self.reset(pos)
if (True
and (expr := self.expr())
):
return Node('statement', [expr])
self.reset(pos)
if (True
and (if_statement := self.if_statement())
):
return Node('statement', [if_statement])
self.reset(pos)
return None
...
@memoize
装饰器:我“偷运”(smuggle)它进来,以便转向另一个主题:使用记忆法(memoization)来加速生成的解析器。def memoize(func):
def memoize_wrapper(self, *args):
pos = self.mark()
memo = self.memos.get(pos)
if memo is None:
memo = self.memos[pos] = {}
key = (func, args)
if key in memo:
res, endpos = memo[key]
self.reset(endpos)
else:
res = func(self, *args)
endpos = self.mark()
memo[key] = res, endpos
return res
return memoize_wrapper
self
,指向 ToyParser 实例,后者会调用被装饰的函数。self.memos = {}
添加到 Parser.__init__()
,以初始化它。self.mark()
中获得的一个新的输入位置。self.reset()
来向前移动输入位置,最后返回那缓存中的返回值。assert
断言来检查它。(pos, func, args)
作为 key,以摆脱嵌套字典的设计。