Convert wiki parser to verbose regexes
authorBernie Innocenti <bernie@codewiz.org>
Wed, 8 Apr 2009 17:20:20 +0000 (19:20 +0200)
committerBernie Innocenti <bernie@codewiz.org>
Wed, 8 Apr 2009 17:20:20 +0000 (19:20 +0200)
geekigeeki.py

index 651d98db20e984506af833ebf7c810e20a8ae635..106250b10c63a627109a333dc7ffbfa9b85bea60 100755 (executable)
@@ -490,51 +490,50 @@ class WikiFormatter:
     def print_html(self):
         print '<div class="wiki"><p>'
 
-        # For each line, we scan through looking for magic
-        # strings, outputting verbatim any intervening text
-        # TODO: highlight search words (look at referrer)
-        scan_re = re.compile(
-            r"(?:"
-            # Formatting
-            + r"(?P<b>\*\*|'''|//|''|##|``|__|\^\^|,,)"
-            + r"|(?P<tit>\={2,6})"
-            + r"|(?P<br>\\\\)"
-            + r"|(?P<rule>^-{3,})"
-            + r"|(?P<hi>\b(FIXME|TODO|DONE)\b)"
+        scan_re = re.compile(r"""(?:
+            # Styles and formatting
+              (?P<b>     \*\*|'''|//|''|\#\#|``|__|\^\^|,,)
+            | (?P<tit>   \={2,6})
+            | (?P<br>    \\\\)
+            | (?P<rule>  ^-{3,})
+            | (?P<hi>    \b(FIXME|TODO|DONE)\b)
 
             # Links
-            + r"|(?P<macro>\<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>)"
-            + r"|(?P<hurl>\[\[([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\]\])"
+            | (?P<macro> \<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>)
+            | (?P<hurl>  \[\[([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\]\])
 
             # Inline HTML
-            + r"|(?P<html><(/|)(br|hr|div|span|form|iframe|input|textarea|a|img|h[1-5])[^>]*>)"
-            + r"|(?P<ent>[<>&])"
+            | (?P<html>  <(/|)(br|hr|div|span|form|iframe|input|textarea|a|img|h[1-5])[^>]*>)
+            | (?P<ent>   [<>&])
 
             # Auto links (LEGACY)
-            + r"|(?P<img>\b[a-zA-Z0-9_/-]+\.(png|gif|jpg|jpeg|bmp|ico|ogm|ogg|mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt))"
-            + r"|(?P<word>\b(?:[A-Z][a-z]+){2,}\b)"
-            + r"|(?P<url>(http|https|ftp|mailto)\:[^\s'\"]+\S)"
-            + r"|(?P<email>[-\w._+]+\@[\w.-]+)"
+            | (?P<img>   \b[a-zA-Z0-9_/-]+\.(png|gif|jpg|jpeg|bmp|ico|ogm|ogg|mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt))
+            | (?P<word>  \b(?:[A-Z][a-z]+){2,}\b)
+            | (?P<url>   (http|https|ftp|mailto)\:[^\s'\"]+\S)
+            | (?P<email> [-\w._+]+\@[\w.-]+)
 
             # Lists, divs, spans
-            + r"|(?P<li>^\s+[\*#] +)"
-            + r"|(?P<pre>\{\{\{|\s*\}\}\})"
-            + r"|(?P<inl>\{\{([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\}\})"
+            | (?P<li>    ^\s+[\*\#]\s+)
+            | (?P<pre>   \{\{\{|\s*\}\}\})
+            | (?P<inl>   \{\{([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\}\})
 
             # Tables
-            + r"|(?P<tr>^\s*\|\|(=|)\s*)"
-            + r"|(?P<tre>\s*\|\|(=|)\s*$)"
-            + r"|(?P<td>\s*\|\|(=|)\s*)"
-            + r")")
-        pre_re = re.compile(
-            r"(?:"
-            + r"(?P<pre>\s*\}\}\})"
-            + r"|(?P<ent>[<>&])"
-            + r")")
+            | (?P<tr>    ^\s*\|\|(=|)\s*)
+            | (?P<tre>   \s*\|\|(=|)\s*$)
+            | (?P<td>    \s*\|\|(=|)\s*)
+
+            # TODO: highlight search words (look at referrer)
+          )""", re.VERBOSE)
+        pre_re = re.compile("""(?:
+              (?P<pre>\s*\}\}\})
+            | (?P<ent>[<>&])"
+            )""", re.VERBOSE)
         blank_re = re.compile(r"^\s*$")
         indent_re = re.compile(r"^\s*")
         tr_re = re.compile(r"^\s*\|\|")
         eol_re = re.compile(r"\r?\n")
+
+        # For each line, we scan through looking for magic strings, outputting verbatim any intervening text
         for self.line in eol_re.split(self.raw.expandtabs()):
             # Skip pragmas
             if self.in_header: