# -*- coding: utf-8 -*- # lexer.py # Copyright (C) 2006, 2007, 2008 Michael Bayer mike_mp@zzzcomputing.com # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php """provides the Lexer class for parsing template strings into parse trees.""" import re, codecs from mako import parsetree, exceptions from mako.pygen import adjust_whitespace _regexp_cache = {} class Lexer(object): def __init__(self, text, filename=None, disable_unicode=False, input_encoding=None, preprocessor=None): self.text = text self.filename = filename self.template = parsetree.TemplateNode(self.filename) self.matched_lineno = 1 self.matched_charpos = 0 self.lineno = 1 self.match_position = 0 self.tag = [] self.control_line = [] self.disable_unicode = disable_unicode self.encoding = input_encoding if preprocessor is None: self.preprocessor = [] elif not hasattr(preprocessor, '__iter__'): self.preprocessor = [preprocessor] else: self.preprocessor = preprocessor exception_kwargs = property(lambda self:{'source':self.text, 'lineno':self.matched_lineno, 'pos':self.matched_charpos, 'filename':self.filename}) def match(self, regexp, flags=None): """match the given regular expression string and flags to the current text position. if a match occurs, update the current text and line position.""" mp = self.match_position try: reg = _regexp_cache[(regexp, flags)] except KeyError: if flags: reg = re.compile(regexp, flags) else: reg = re.compile(regexp) _regexp_cache[(regexp, flags)] = reg match = reg.match(self.text, self.match_position) if match: (start, end) = match.span() if end == start: self.match_position = end + 1 else: self.match_position = end self.matched_lineno = self.lineno lines = re.findall(r"\n", self.text[mp:self.match_position]) cp = mp - 1 while (cp >= 0 and cp self.textlength: break if self.match_end(): break if self.match_expression(): continue if self.match_control_line(): continue if self.match_comment(): continue if self.match_tag_start(): continue if self.match_tag_end(): continue if self.match_python_block(): continue if self.match_text(): continue if self.match_position > self.textlength: break raise exceptions.CompileException("assertion failed") if len(self.tag): raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs) if len(self.control_line): raise exceptions.SyntaxException("Unterminated control keyword: '%s'" % self.control_line[-1].keyword, self.text, self.control_line[-1].lineno, self.control_line[-1].pos, self.filename) return self.template def match_encoding(self): match = self.match(r'#.*coding[:=]\s*([-\w.]+).*\r?\n') if match: return match.group(1) else: return None def match_tag_start(self): match = self.match(r''' \<% # opening tag ([\w\.\:]+) # keyword ((?:\s+\w+|=|".*?"|'.*?')*) # attrname, = sign, string expression \s* # more whitespace (/)?> # closing ''', re.I | re.S | re.X) if match: (keyword, attr, isend) = (match.group(1).lower(), match.group(2), match.group(3)) self.keyword = keyword attributes = {} if attr: for att in re.findall(r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr): (key, val1, val2) = att text = val1 or val2 text = text.replace('\r\n', '\n') attributes[key] = self.escape_code(text) self.append_node(parsetree.Tag, keyword, attributes) if isend: self.tag.pop() else: if keyword == 'text': match = self.match(r'(.*?)(?=\)', re.S) if not match: raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs) self.append_node(parsetree.Text, match.group(1)) return self.match_tag_end() return True else: return False def match_tag_end(self): match = self.match(r'\') if match: if not len(self.tag): raise exceptions.SyntaxException("Closing tag without opening tag: " % match.group(1), **self.exception_kwargs) elif self.tag[-1].keyword != match.group(1): raise exceptions.SyntaxException("Closing tag does not match tag: <%%%s>" % (match.group(1), self.tag[-1].keyword), **self.exception_kwargs) self.tag.pop() return True else: return False def match_end(self): match = self.match(r'\Z', re.S) if match: string = match.group() if string: return string else: return True else: return False def match_text(self): match = self.match(r""" (.*?) # anything, followed by: ( (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based comment preceded by a consumed \n and whitespace | (?=\${) # an expression | (?=\#\*) # multiline comment | (?=') text = adjust_whitespace(text) + "\n" # the trailing newline helps compiler.parse() not complain about indentation self.append_node(parsetree.Code, self.escape_code(text), match.group(1)=='!', lineno=line, pos=pos) return True else: return False def match_expression(self): match = self.match(r"\${") if match: (line, pos) = (self.matched_lineno, self.matched_charpos) (text, end) = self.parse_until_text(r'\|', r'}') if end == '|': (escapes, end) = self.parse_until_text(r'}') else: escapes = "" text = text.replace('\r\n', '\n') self.append_node(parsetree.Expression, self.escape_code(text), escapes.strip(), lineno=line, pos=pos) return True else: return False def match_control_line(self): match = self.match(r"(?<=^)[\t ]*(%|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)(?:\r?\n|\Z)", re.M) if match: operator = match.group(1) text = match.group(2) if operator == '%': m2 = re.match(r'(end)?(\w+)\s*(.*)', text) if not m2: raise exceptions.SyntaxException("Invalid control line: '%s'" % text, **self.exception_kwargs) (isend, keyword) = m2.group(1, 2) isend = (isend is not None) if isend: if not len(self.control_line): raise exceptions.SyntaxException("No starting keyword '%s' for '%s'" % (keyword, text), **self.exception_kwargs) elif self.control_line[-1].keyword != keyword: raise exceptions.SyntaxException("Keyword '%s' doesn't match keyword '%s'" % (text, self.control_line[-1].keyword), **self.exception_kwargs) self.append_node(parsetree.ControlLine, keyword, isend, self.escape_code(text)) else: self.append_node(parsetree.Comment, text) return True else: return False def match_comment(self): """matches the multiline version of a comment""" match = self.match(r"<%doc>(.*?)", re.S) if match: self.append_node(parsetree.Comment, match.group(1)) return True else: return False