Factor out code to generate inline links

[geekigeeki.git] / geekigeeki.py
diff --git a/geekigeeki.py b/geekigeeki.py

index 0ff2e7a007527f0d2fa2ce9e9212ceffd839e88e..f3ce1dcb3268d452e29c997c5a698040a15b58cf 100755 (executable)
--- a/geekigeeki.py
+++ b/geekigeeki.py
@@ -3,7 +3,7 @@
  #
  # Copyright 1999, 2000 Martin Pool <mbp@humbug.org.au>
  # Copyright 2002 Gerardo Poggiali
-# Copyright 2007, 2008 Bernie Innocenti <bernie@codewiz.org>
+# Copyright 2007, 2008, 2009 Bernie Innocenti <bernie@codewiz.org>
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
@@ -30,18 +30,24 @@ import cgi, sys, os, re, errno, stat
  word_re = re.compile(r"^\b((([A-Z][a-z0-9]+){2,}/)*([A-Z][a-z0-9]+){2,})\b$")
  # FIXME: we accept stuff like foo/../bar and we shouldn't
  file_re = re.compile(r"^\b([A-Za-z0-9_\-][A-Za-z0-9_\.\-/]*)\b$")
-img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg|bmp|ico)$", re.IGNORECASE)
+img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg|bmp|ico|ogm|ogg|mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt)$", re.IGNORECASE)
+video_re = re.compile(r"^.*\.(ogm|ogg|mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt)$", re.IGNORECASE)
  url_re = re.compile(r"^[a-z]{3,8}://[^\s'\"]+\S$")
  link_re = re.compile("(?:\[\[|{{)([^\s\|]+)(?:\s*\|\s*([^\]]+)|)(?:\]\]|}})")
  
  title_done = False
  
-
  # CGI stuff ---------------------------------------------------------
-
  def script_name():
      return os.environ.get('SCRIPT_NAME', '')
  
+def query_string():
+    path_info = os.environ.get('PATH_INFO', '')
+    if len(path_info) and path_info[0] == '/':
+        return path_info[1:] or 'FrontPage'
+    else:
+        return os.environ.get('QUERY_STRING', '') or 'FrontPage'
+
  def privileged_path():
      return privileged_url or script_name()
  
@@ -61,8 +67,11 @@ def get_hostname(addr):
      except Exception:
          return addr
  
+def is_external_url(pathname):
+    return (url_re.match(pathname) or pathname.startswith('/'))
+
  def relative_url(pathname, privileged=False):
-    if not (url_re.match(pathname) or pathname.startswith('/')):
+    if not is_external_url(pathname):
          if privileged:
              url = privileged_path()
          else:
@@ -77,6 +86,13 @@ def permalink(s):
  def emit_header(mime_type="text/html"):
      print "Content-type: " + mime_type + "; charset=utf-8\n"
  
+def sendfile(dest_file, src_file):
+    """Efficiently copy file data between file descriptors"""
+    while 1:
+        data = src_file.read(65536)
+        if not data: break
+        dest_file.write(data)
+
  def send_guru(msg_text, msg_type):
      if not msg_text: return
      print '<pre id="guru" onclick="this.style.display = \'none\'" class="' + msg_type + '">'
@@ -86,28 +102,10 @@ def send_guru(msg_text, msg_type):
      if msg_type == 'error':
          print '\n      Guru Meditation #DEADBEEF.ABADC0DE'
      print '</pre>'
-    # FIXME: This little JS snippet is harder to pass than ACID 3.0 
-    print """
-    <script language="JavaScript" type="text/javascript">
-        var guru = document.getElementById('guru');
-        // Firefox 2.0 doesn't take border-color, but returns border-top-color fine
-        var color = document.defaultView.getComputedStyle(guru,null).getPropertyValue('border-top-color');
-
-        function guruOn() {
-            guru.style.setProperty('border-color', color, '');
-            setTimeout('guruOff()', 1000);
-        }
-        function guruOff() {
-            guru.style.setProperty('border-color', '#000000', '');
-            setTimeout('guruOn()', 1000);
-        }
-        // Safari 2.0 returns this rgba crap
-        // Konqueror 3.5.6 doesn't seem to support computed properties
-        if (color && color != 'rgba(0, 0, 0, 0)') {
-            //window.alert("enabled! color='" + color + "'");
-            guruOn();
-        }
-    </script>"""
+    try:
+        sendfile(sys.stdout, open('gurumeditation.js', 'rb'))
+    except IOError, err:
+        pass
  
  def send_title(name, text="Limbo", msg_text=None, msg_type='error', writable=False):
      global title_done
@@ -153,11 +151,11 @@ def send_title(name, text="Limbo", msg_text=None, msg_type='error', writable=Fal
  
      # Navbar
      print '<div class="nav">'
+    print link_tag('FrontPage', relative_url(site_icon or 'Home'), 'navlink')
      if name:
          print '  <b>' + link_tag('?fullsearch=' + name, text, 'navlink') + '</b> '
      else:
          print '  <b>' + text + '</b> '
-    print ' | ' + link_tag('FrontPage', 'Home', 'navlink')
      print ' | ' + link_tag('FindPage', 'Find Page', 'navlink')
      if 'history_url' in globals():
          print ' | <a href="' + relative_url(history_url) + '" class="navlink">Recent Changes</a>'
@@ -177,7 +175,7 @@ def send_title(name, text="Limbo", msg_text=None, msg_type='error', writable=Fal
  
      user = remote_user()
      if user != 'AnonymousCoward':
-        print ' | <span class="login"><i>logged in as <b>' + cgi.escape(user) + '</b></i></span>'
+        print ' | <span class="login"><i><b>' + link_tag('User/' + user, user) + '</b></i></span>'
  
      print '<hr /></div>'
  
@@ -186,19 +184,35 @@ def send_httperror(status="403 Not Found", query=""):
      send_title(None, msg_text=("%s: on query '%s'" % (status, query)))
      send_footer()
  
-def link_tag(params, text=None, ss_class=None, privileged=False):
+def link_tag(params, text=None, link_class=None, privileged=False):
      if text is None:
          text = params # default
-    classattr = ''
-    if ss_class:
-        classattr += 'class="%s" ' % ss_class
-        # Prevent crawlers from following links potentially added by spammers or to generated pages
-        if ss_class == 'external' or ss_class == 'navlink':
-            classattr += 'rel="nofollow" '
-    elif url_re.match(params):
-        classattr += 'rel="nofollow" '
+    elif img_re.match(text):
+        text = '<img border="0" src="' + text + '" />'
+
+    if not link_class:
+        if is_external_url(params):
+            link_class = 'external'
+        elif file_re.match(params) and Page(params).exists():
+            link_class = 'wikilink'
+        else:
+            params = nonexist_pfx + params
+            link_class = 'nonexistent'
+
+    classattr = 'class="%s" ' % link_class
+    # Prevent crawlers from following links potentially added by spammers or to generated pages
+    if link_class == 'external' or link_class == 'navlink':
+        classattr += 'rel="nofollow"'
+
      return '<a %shref="%s">%s</a>' % (classattr, relative_url(params, privileged=privileged), text)
  
+def link_inline(name, descr=None, args=''):
+    if not descr: descr = name
+    if video_re.match(name):
+        return '<video src="%s">Your browser does not support the HTML5 video tag</video>' % name
+    else:
+        return '<a href="%s"><img border="0" src="%s" alt="%s" /></a>' % (name, name + args, descr)
+
  # Search ---------------------------------------------------
  
  def handle_fullsearch(needle):
@@ -220,7 +234,7 @@ def handle_fullsearch(needle):
  
      print "<ul>"
      for (count, page_name) in hits:
-        print '<li><p>' + Page(page_name).link_to()
+        print '<li><p>' + link_tag(page_name)
          print ' . . . . ' + `count`
          print ['match', 'matches'][count != 1]
          print '</p></li>'
@@ -238,7 +252,7 @@ def handle_titlesearch(needle):
  
      print "<ul>"
      for filename in hits:
-        print '<li><p>' + Page(filename).link_to() + "</p></li>"
+        print '<li><p>' + link_tag(filename) + "</p></li>"
      print "</ul>"
  
      print_search_stats(len(hits), len(all_pages))
@@ -290,10 +304,10 @@ def send_footer(mod_string=None):
      print '''
  <div id="footer"><hr />
  <p class="copyright">
-<a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/"><img class="license" alt="Creative Commons License" src="http://i.creativecommons.org/l/by-sa/3.0/80x15.png" /></a>
+<a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/"><img class="license" alt="Creative Commons License" src="%s" /></a>
  <span class="benchmark">generated in %0.3fs</span> by <a href="http://www.codewiz.org/wiki/GeekiGeeki">GeekiGeeki</a> version %s
  </p>
-''' % (clock() - start_time, __version__)
+''' % (relative_url('cc-by-sa.png'), clock() - start_time, __version__)
      if mod_string:
          print '<p class="modified">last modified %s</p>' % mod_string
      print '</div></body></html>'
@@ -314,14 +328,14 @@ class WikiFormatter:
          self.styles = {
              #wiki   html   enabled?
              "//":  ["em",  False],
-            "''":  ["em",  False],
              "**":  ["b",   False],
-            "'''": ["b",   False],
              "##":  ["tt",  False],
-            "``":  ["tt",  False],
              "__":  ["u",   False],
              "^^":  ["sup", False],
-            ",,":  ["sub", False]
+            ",,":  ["sub", False],
+            "''":  ["em",  False], # LEGACY
+            "'''": ["b",   False], # LEGACY
+            "``":  ["tt",  False], # LEGACY
          }
  
      def _b_repl(self, word):
@@ -345,19 +359,6 @@ class WikiFormatter:
      def _rule_repl(self, word):
          return self._undent() + '\n<hr size="%d" noshade="noshade" />\n' % (len(word) - 2)
  
-    def _word_repl(self, word):
-        return Page(word).link_to()
-
-    def _img_repl(self, word):
-        pathname = relative_url(word)
-        return '<a href="%s"><img border="0" src="%s" /></a>' % (pathname, pathname)
-
-    def _url_repl(self, word):
-        if img_re.match(word):
-            return '<a href="%s"><img border="0" src="%s" /></a>' % (word, word)
-        else:
-            return '<a href="%s" rel="nofollow" class="external">%s</a>' % (word, word)
-
      def _macro_repl(self, word):
          m = re.compile("\<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>").match(word)
          name = m.group(1)
@@ -376,54 +377,64 @@ class WikiFormatter:
          if macro:
              return macro(argv)
          else:
-            return '<strong class="error">&lt;&lt;' + '|'.join(argv) + '&gt;&gt;</strong>'
+            msg = '&lt;&lt;' + '|'.join(argv) + '&gt;&gt;'
+            if not self.in_html:
+                msg = '<strong class="error">' + msg + '</strong>'
+            return msg
  
      def _hurl_repl(self, word):
          m = link_re.match(word)
-        name = m.group(1)
-        descr = m.group(2)
-        if descr is None:
-            descr = name
-        elif img_re.match(m.group(2)):
-            descr = '<img border="0" src="' + descr + '" />'
-
-        return link_tag(name, descr, 'wikilink')
+        return link_tag(m.group(1), m.group(2))
  
      def _inl_repl(self, word):
          m = link_re.match(word)
-        name = m.group(1)
-        descr = m.group(2) or name
-        name = relative_url(name)
-        argv = descr.split('|')
-        descr = argv.pop(0)
+        name = relative_url(m.group(1))
+        descr = m.group(2)
  
-        if argv:
-            args = '?' + '&amp;'.join(argv)
-        else:
+        if descr:
+            argv = descr.split('|')
+            descr = argv.pop(0)
              args = ''
+            if argv:
+                args = '?' + '&amp;'.join(argv)
  
-        if descr:
              # The "extthumb" nonsense works around a limitation of the HTML block model
-            return '<div class="extthumb"><div class="thumb"><a href="%s"><img border="0" src="%s" alt="%s" /></a><div class="caption">%s</div></div></div>' \
-                    % (name, name + args, descr, descr)
+            return '<div class="extthumb"><div class="thumb">' \
+                + link_inline(name, descr, args) \
+                + '<div class="caption">' + descr + '</div></div></div>'
          else:
-            return '<a href="%s"><img border="0" src="%s" /></a>' % (name, name + args)
-
-    def _email_repl(self, word):
-        return '<a href="mailto:%s">%s</a>' % (word, word)
+            return link_inline(name, name)
  
      def _html_repl(self, word):
          self.in_html += 1
          return word; # Pass through
  
+    def _htmle_repl(self, word):
+        self.in_html -= 1
+        return word; # Pass through
+
      def _ent_repl(self, s):
-        if self.in_html and s == '>':
-            self.in_html -= 1
-            return '>'
+        if self.in_html:
+            return s; # Pass through
          return {'&': '&amp;',
                  '<': '&lt;',
                  '>': '&gt;'}[s]
  
+    def _img_repl(self, word): # LEGACY
+        return self._inl_repl('{{' + word + '}}')
+
+    def _word_repl(self, word): # LEGACY
+        if self.in_html: return word # pass through
+        return link_tag(word)
+
+    def _url_repl(self, word): # LEGACY
+        if self.in_html: return word # pass through
+        return link_tag(word)
+
+    def _email_repl(self, word): # LEGACY
+        if self.in_html: return word # pass through
+        return '<a href="mailto:%s">%s</a>' % (word, word)
+
      def _li_repl(self, match):
          if self.in_li:
              return '</li><li>'
@@ -499,51 +510,51 @@ class WikiFormatter:
      def print_html(self):
          print '<div class="wiki"><p>'
  
-        # For each line, we scan through looking for magic
-        # strings, outputting verbatim any intervening text
-        # TODO: highlight search words (look at referrer)
-        scan_re = re.compile(
-            r"(?:"
-            # Formatting
-            + r"(?P<b>\*\*|'''|//|''|##|``|__|\^\^|,,)"
-            + r"|(?P<tit>\={2,6})"
-            + r"|(?P<br>\\\\)"
-            + r"|(?P<rule>^-{3,})"
-            + r"|(?P<hi>\b(FIXME|TODO|DONE)\b)"
+        scan_re = re.compile(r"""(?:
+            # Styles and formatting
+              (?P<b>     \*\*|'''|//|''|\#\#|``|__|\^\^|,,)
+            | (?P<tit>   \={2,6})
+            | (?P<br>    \\\\)
+            | (?P<rule>  ^-{3,})
+            | (?P<hi>    \b( FIXME | TODO | DONE )\b )
  
              # Links
-            + r"|(?P<macro>\<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>)"
-            + r"|(?P<hurl>\[\[([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\]\])"
+            | (?P<macro> \<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>)
+            | (?P<hurl>  \[\[([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\]\])
  
              # Inline HTML
-            + r"|(?P<html><(/|)(br|hr|div|span|form|iframe|input|textarea|a|img|h[1-5])[^>]*>)"
-            + r"|(?P<ent>[<>&])"
+            | (?P<html>  <(br|hr|div|span|form|iframe|input|textarea|a|img|h[1-5])\b )
+            | (?P<htmle> ( /\s*> | </(br|hr|div|span|form|iframe|input|textarea|a|img|h[1-5])> ) )
+            | (?P<ent>   [<>&] )
  
              # Auto links (LEGACY)
-            + r"|(?P<img>\b[a-zA-Z0-9_/-]+\.(png|gif|jpg|jpeg|bmp|ico))"
-            + r"|(?P<word>\b(?:[A-Z][a-z]+){2,}\b)"
-            + r"|(?P<url>(http|https|ftp|mailto)\:[^\s'\"]+\S)"
-            + r"|(?P<email>[-\w._+]+\@[\w.-]+)"
+            | (?P<img>   \b[a-zA-Z0-9_/-]+\.(png|gif|jpg|jpeg|bmp|ico|ogm|ogg|mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt))
+            | (?P<word>  \b(?:[A-Z][a-z]+){2,}\b)
+            | (?P<url>   (http|https|ftp|mailto)\:[^\s'\"]+\S)
+            | (?P<email> [-\w._+]+\@[\w.-]+)
  
              # Lists, divs, spans
-            + r"|(?P<li>^\s+[\*#] +)"
-            + r"|(?P<pre>\{\{\{|\s*\}\}\})"
-            + r"|(?P<inl>\{\{([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\}\})"
+            | (?P<li>    ^\s+[\*\#]\s+)
+            | (?P<pre>   \{\{\{|\s*\}\}\})
+            | (?P<inl>   \{\{([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\}\})
  
              # Tables
-            + r"|(?P<tr>^\s*\|\|(=|)\s*)"
-            + r"|(?P<tre>\s*\|\|(=|)\s*$)"
-            + r"|(?P<td>\s*\|\|(=|)\s*)"
-            + r")")
-        pre_re = re.compile(
-            r"(?:"
-            + r"(?P<pre>\s*\}\}\})"
-            + r"|(?P<ent>[<>&])"
-            + r")")
+            | (?P<tr>    ^\s*\|\|(=|)\s*)
+            | (?P<tre>   \s*\|\|(=|)\s*$)
+            | (?P<td>    \s*\|\|(=|)\s*)
+
+            # TODO: highlight search words (look at referrer)
+          )""", re.VERBOSE)
+        pre_re = re.compile("""(?:
+              (?P<pre>\s*\}\}\})
+            | (?P<ent>[<>&])"
+            )""", re.VERBOSE)
          blank_re = re.compile(r"^\s*$")
          indent_re = re.compile(r"^\s*")
          tr_re = re.compile(r"^\s*\|\|")
          eol_re = re.compile(r"\r?\n")
+
+        # For each line, we scan through looking for magic strings, outputting verbatim any intervening text
          for self.line in eol_re.split(self.raw.expandtabs()):
              # Skip pragmas
              if self.in_header:
@@ -595,13 +606,6 @@ class Page:
                  return False
              raise err
  
-    def link_to(self):
-        word = self.page_name
-        if self.exists():
-            return link_tag(word, word, 'wikilink')
-        else:
-            return link_tag(word, nonexist_pfx + word, 'nonexistent')
-
      def get_raw_body(self):
          try:
              return open(self._filename(), 'rb').read()
@@ -731,7 +735,7 @@ class Page:
              //-->
              </script>
              """
-        print "<p>" + Page('EditingTips').link_to() + "</p>"
+        print "<p>" + link_tag('EditingTips') + "</p>"
          if preview:
              print "<div class='preview'>"
              WikiFormatter(preview).print_html()
@@ -803,12 +807,7 @@ def main():
              handler(form[cmd].value)
              break
      else:
-        path_info = os.environ.get('PATH_INFO', '')
-        if len(path_info) and path_info[0] == '/':
-            query = path_info[1:] or 'FrontPage'
-        else:
-            query = os.environ.get('QUERY_STRING', '') or 'FrontPage'
-
+        query = query_string()
          if file_re.match(query):
              if word_re.match(query):
                  Page(query).format()