From 8c31b4585b45d274d339412f72ba77b944337dbd Mon Sep 17 00:00:00 2001 From: Bernie Innocenti Date: Wed, 8 Apr 2009 19:20:20 +0200 Subject: [PATCH] Convert wiki parser to verbose regexes --- geekigeeki.py | 61 +++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/geekigeeki.py b/geekigeeki.py index 651d98d..106250b 100755 --- a/geekigeeki.py +++ b/geekigeeki.py @@ -490,51 +490,50 @@ class WikiFormatter: def print_html(self): print '

' - # For each line, we scan through looking for magic - # strings, outputting verbatim any intervening text - # TODO: highlight search words (look at referrer) - scan_re = re.compile( - r"(?:" - # Formatting - + r"(?P\*\*|'''|//|''|##|``|__|\^\^|,,)" - + r"|(?P\={2,6})" - + r"|(?P
\\\\)" - + r"|(?P^-{3,})" - + r"|(?P\b(FIXME|TODO|DONE)\b)" + scan_re = re.compile(r"""(?: + # Styles and formatting + (?P \*\*|'''|//|''|\#\#|``|__|\^\^|,,) + | (?P \={2,6}) + | (?P
\\\\) + | (?P ^-{3,}) + | (?P \b(FIXME|TODO|DONE)\b) # Links - + r"|(?P\<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>)" - + r"|(?P\[\[([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\]\])" + | (?P \<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>) + | (?P \[\[([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\]\]) # Inline HTML - + r"|(?P<(/|)(br|hr|div|span|form|iframe|input|textarea|a|img|h[1-5])[^>]*>)" - + r"|(?P[<>&])" + | (?P <(/|)(br|hr|div|span|form|iframe|input|textarea|a|img|h[1-5])[^>]*>) + | (?P [<>&]) # Auto links (LEGACY) - + r"|(?P\b[a-zA-Z0-9_/-]+\.(png|gif|jpg|jpeg|bmp|ico|ogm|ogg|mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt))" - + r"|(?P\b(?:[A-Z][a-z]+){2,}\b)" - + r"|(?P(http|https|ftp|mailto)\:[^\s'\"]+\S)" - + r"|(?P[-\w._+]+\@[\w.-]+)" + | (?P \b[a-zA-Z0-9_/-]+\.(png|gif|jpg|jpeg|bmp|ico|ogm|ogg|mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt)) + | (?P \b(?:[A-Z][a-z]+){2,}\b) + | (?P (http|https|ftp|mailto)\:[^\s'\"]+\S) + | (?P [-\w._+]+\@[\w.-]+) # Lists, divs, spans - + r"|(?P

  • ^\s+[\*#] +)" - + r"|(?P
    \{\{\{|\s*\}\}\})"
    -            + r"|(?P\{\{([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\}\})"
    +            | (?P
  • ^\s+[\*\#]\s+) + | (?P
       \{\{\{|\s*\}\}\})
    +            | (?P   \{\{([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\}\})
     
                 # Tables
    -            + r"|(?P^\s*\|\|(=|)\s*)"
    -            + r"|(?P\s*\|\|(=|)\s*$)"
    -            + r"|(?P\s*\|\|(=|)\s*)"
    -            + r")")
    -        pre_re = re.compile(
    -            r"(?:"
    -            + r"(?P
    \s*\}\}\})"
    -            + r"|(?P[<>&])"
    -            + r")")
    +            | (?P    ^\s*\|\|(=|)\s*)
    +            | (?P   \s*\|\|(=|)\s*$)
    +            | (?P    \s*\|\|(=|)\s*)
    +
    +            # TODO: highlight search words (look at referrer)
    +          )""", re.VERBOSE)
    +        pre_re = re.compile("""(?:
    +              (?P
    \s*\}\}\})
    +            | (?P[<>&])"
    +            )""", re.VERBOSE)
             blank_re = re.compile(r"^\s*$")
             indent_re = re.compile(r"^\s*")
             tr_re = re.compile(r"^\s*\|\|")
             eol_re = re.compile(r"\r?\n")
    +
    +        # For each line, we scan through looking for magic strings, outputting verbatim any intervening text
             for self.line in eol_re.split(self.raw.expandtabs()):
                 # Skip pragmas
                 if self.in_header:
    -- 
    2.25.1