# -*- coding: utf-8 -*- # pygen.py # Copyright (C) 2006, 2007, 2008 Michael Bayer mike_mp@zzzcomputing.com # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php """utilities for generating and formatting literal Python code.""" import re, string from StringIO import StringIO class PythonPrinter(object): def __init__(self, stream): # indentation counter self.indent = 0 # a stack storing information about why we incremented # the indentation counter, to help us determine if we # should decrement it self.indent_detail = [] # the string of whitespace multiplied by the indent # counter to produce a line self.indentstring = " " # the stream we are writing to self.stream = stream # a list of lines that represents a buffered "block" of code, # which can be later printed relative to an indent level self.line_buffer = [] self.in_indent_lines = False self._reset_multi_line_flags() def write(self, text): self.stream.write(text) def write_indented_block(self, block): """print a line or lines of python which already contain indentation. The indentation of the total block of lines will be adjusted to that of the current indent level.""" self.in_indent_lines = False for l in re.split(r'\r?\n', block): self.line_buffer.append(l) def writelines(self, *lines): """print a series of lines of python.""" for line in lines: self.writeline(line) def writeline(self, line): """print a line of python, indenting it according to the current indent level. this also adjusts the indentation counter according to the content of the line.""" if not self.in_indent_lines: self._flush_adjusted_lines() self.in_indent_lines = True decreased_indent = False if (line is None or re.match(r"^\s*#",line) or re.match(r"^\s*$", line) ): hastext = False else: hastext = True is_comment = line and len(line) and line[0] == '#' # see if this line should decrease the indentation level if (not decreased_indent and not is_comment and (not hastext or self._is_unindentor(line)) ): if self.indent > 0: self.indent -=1 # if the indent_detail stack is empty, the user # probably put extra closures - the resulting # module wont compile. if len(self.indent_detail) == 0: raise "Too many whitespace closures" self.indent_detail.pop() if line is None: return # write the line self.stream.write(self._indent_line(line) + "\n") # see if this line should increase the indentation level. # note that a line can both decrase (before printing) and # then increase (after printing) the indentation level. if re.search(r":[ \t]*(?:#.*)?$", line): # increment indentation count, and also # keep track of what the keyword was that indented us, # if it is a python compound statement keyword # where we might have to look for an "unindent" keyword match = re.match(r"^\s*(if|try|elif|while|for)", line) if match: # its a "compound" keyword, so we will check for "unindentors" indentor = match.group(1) self.indent +=1 self.indent_detail.append(indentor) else: indentor = None # its not a "compound" keyword. but lets also # test for valid Python keywords that might be indenting us, # else assume its a non-indenting line m2 = re.match(r"^\s*(def|class|else|elif|except|finally)", line) if m2: self.indent += 1 self.indent_detail.append(indentor) def close(self): """close this printer, flushing any remaining lines.""" self._flush_adjusted_lines() def _is_unindentor(self, line): """return true if the given line is an 'unindentor', relative to the last 'indent' event received.""" # no indentation detail has been pushed on; return False if len(self.indent_detail) == 0: return False indentor = self.indent_detail[-1] # the last indent keyword we grabbed is not a # compound statement keyword; return False if indentor is None: return False # if the current line doesnt have one of the "unindentor" keywords, # return False match = re.match(r"^\s*(else|elif|except|finally).*\:", line) if not match: return False # whitespace matches up, we have a compound indentor, # and this line has an unindentor, this # is probably good enough return True # should we decide that its not good enough, heres # more stuff to check. #keyword = match.group(1) # match the original indent keyword #for crit in [ # (r'if|elif', r'else|elif'), # (r'try', r'except|finally|else'), # (r'while|for', r'else'), #]: # if re.match(crit[0], indentor) and re.match(crit[1], keyword): return True #return False def _indent_line(self, line, stripspace = ''): """indent the given line according to the current indent level. stripspace is a string of space that will be truncated from the start of the line before indenting.""" return re.sub(r"^%s" % stripspace, self.indentstring * self.indent, line) def _reset_multi_line_flags(self): """reset the flags which would indicate we are in a backslashed or triple-quoted section.""" (self.backslashed, self.triplequoted) = (False, False) def _in_multi_line(self, line): """return true if the given line is part of a multi-line block, via backslash or triple-quote.""" # we are only looking for explicitly joined lines here, # not implicit ones (i.e. brackets, braces etc.). this is just # to guard against the possibility of modifying the space inside # of a literal multiline string with unfortunately placed whitespace current_state = (self.backslashed or self.triplequoted) if re.search(r"\\$", line): self.backslashed = True else: self.backslashed = False triples = len(re.findall(r"\"\"\"|\'\'\'", line)) if triples == 1 or triples % 2 != 0: self.triplequoted = not self.triplequoted return current_state def _flush_adjusted_lines(self): stripspace = None self._reset_multi_line_flags() for entry in self.line_buffer: if self._in_multi_line(entry): self.stream.write(entry + "\n") else: entry = string.expandtabs(entry) if stripspace is None and re.search(r"^[ \t]*[^# \t]", entry): stripspace = re.match(r"^([ \t]*)", entry).group(1) self.stream.write(self._indent_line(entry, stripspace) + "\n") self.line_buffer = [] self._reset_multi_line_flags() def adjust_whitespace(text): """remove the left-whitespace margin of a block of Python code.""" state = [False, False] (backslashed, triplequoted) = (0, 1) def in_multi_line(line): start_state = (state[backslashed] or state[triplequoted]) if re.search(r"\\$", line): state[backslashed] = True else: state[backslashed] = False def match(reg, t): m = re.match(reg, t) if m: return m, t[len(m.group(0)):] else: return None, t while line: if state[triplequoted]: m, line = match(r"%s" % state[triplequoted], line) if m: state[triplequoted] = False else: m, line = match(r".*?(?=%s|$)" % state[triplequoted], line) else: m, line = match(r'#', line) if m: return start_state m, line = match(r"\"\"\"|\'\'\'", line) if m: state[triplequoted] = m.group(0) continue m, line = match(r".*?(?=\"\"\"|\'\'\'|#|$)", line) return start_state def _indent_line(line, stripspace = ''): return re.sub(r"^%s" % stripspace, '', line) lines = [] stripspace = None for line in re.split(r'\r?\n', text): if in_multi_line(line): lines.append(line) else: line = string.expandtabs(line) if stripspace is None and re.search(r"^[ \t]*[^# \t]", line): stripspace = re.match(r"^([ \t]*)", line).group(1) lines.append(_indent_line(line, stripspace)) return "\n".join(lines)