Bugfix hurl handling of image links

[geekigeeki.git] / geekigeeki.py
diff --git a/geekigeeki.py b/geekigeeki.py

index 76888601650f52e137b24a9278cbf9c5c12aad04..8de46ab91cbf4d2b14e4ce25753a0848545c311c 100755 (executable)
--- a/geekigeeki.py
+++ b/geekigeeki.py
@@ -3,7 +3,7 @@
  #
  # Copyright 1999, 2000 Martin Pool <mbp@humbug.org.au>
  # Copyright 2002 Gerardo Poggiali
-# Copyright 2007, 2008 Bernardo Innocenti <bernie@codewiz.org>
+# Copyright 2007, 2008 Bernie Innocenti <bernie@codewiz.org>
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
@@ -28,11 +28,12 @@ from os import path, environ
  
  # Regular expression defining a WikiWord
  # (but this definition is also assumed in other places)
-word_re = re.compile(r"^\b((([A-Z][a-z]+){2,}/)*([A-Z][a-z]+){2,})\b$")
+word_re = re.compile(r"^\b((([A-Z][a-z0-9]+){2,}/)*([A-Z][a-z0-9]+){2,})\b$")
  # FIXME: we accept stuff like foo/../bar and we shouldn't
  file_re = re.compile(r"^\b([A-Za-z0-9_\-][A-Za-z0-9_\.\-/]*)\b$")
-img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg)$", re.IGNORECASE)
+img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg|bmp|ico)$", re.IGNORECASE)
  url_re = re.compile(r"^[a-z]{3,8}://[^\s'\"]+\S$")
+link_re = re.compile("(?:\[\[|{{)([^\s\|]+)(?:\s*\|\s*([^\]]+)|)(?:\]\]|}})")
  
  title_done = False
  
@@ -70,8 +71,10 @@ def relative_url(pathname, privileged=False):
          pathname = url + '/' + pathname
      return pathname
  
-# Formatting stuff --------------------------------------------------
+def permalink(s):
+    return re.sub(' ', '-', re.sub('[^a-z0-9_ ]', '', s.lower()).strip())
  
+# Formatting stuff --------------------------------------------------
  def emit_header(mime_type="text/html"):
      print "Content-type: " + mime_type + "; charset=utf-8\n"
  
@@ -107,7 +110,7 @@ def send_guru(msg_text, msg_type):
          }
      </script>"""
  
-def send_title(name, text="Limbo", msg_text=None, msg_type='error'):
+def send_title(name, text="Limbo", msg_text=None, msg_type='error', writable=False):
      global title_done
      if title_done: return
  
@@ -122,12 +125,23 @@ def send_title(name, text="Limbo", msg_text=None, msg_type='error'):
      print ' <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />'
      if not name:
          print ' <meta name="robots" content="noindex,nofollow" />'
-    for css in css_url:
-        print ' <link rel="stylesheet" type="text/css" href="%s" />' % relative_url(css)
+
+    for link in link_urls:
+        rel, href = link
+        print ' <link rel="%s" href="%s" />' % (rel, relative_url(href))
+
+    if name and writable and privileged_url is not None:
+        print ' <link rel="alternate" type="application/x-wiki" title="Edit this page" href="%s" />' \
+            % (privileged_path() + '?edit=' + name)
+
+    if history_url is not None:
+        print ' <link rel="alternate" type="application/rss+xml" title="RSS" href="%s" />' \
+            % relative_url(history_url + '?a=rss')
+
      print '</head>'
  
      # Body
-    if name and privileged_url is not None:
+    if name and writable and privileged_url is not None:
          print '<body ondblclick="location.href=\'' + privileged_path() + '?edit=' + name + '\'">'
      else:
          print '<body>'
@@ -141,17 +155,21 @@ def send_title(name, text="Limbo", msg_text=None, msg_type='error'):
          print '  <b>' + link_tag('?fullsearch=' + name, text, 'navlink') + '</b> '
      else:
          print '  <b>' + text + '</b> '
-    print ' | ' + link_tag('FrontPage', 'Front Page', 'navlink')
+    print ' | ' + link_tag('FrontPage', 'Home', 'navlink')
      print ' | ' + link_tag('FindPage', 'Find Page', 'navlink')
      if 'history_url' in globals():
-        print ' | <a href="' + history_url + '" class="navlink">Recent Changes</a>'
+        print ' | <a href="' + relative_url(history_url) + '" class="navlink">Recent Changes</a>'
          if name:
-            print ' | <a href="' + history_url + '?a=history;f=' + name + '" class="navlink">Page History</a>'
+            print ' | <a href="' + relative_url(history_url + '?a=history;f=' + name) + '" class="navlink">Page History</a>'
  
      if name:
          print ' | ' + link_tag('?raw=' + name, 'Raw Text', 'navlink')
          if privileged_url is not None:
-            print ' | ' + link_tag('?edit=' + name, 'Edit Page', 'navlink', privileged=True)
+            if writable:
+                print ' | ' + link_tag('?edit=' + name, 'Edit', 'navlink', privileged=True)
+            else:
+                print ' | ' + link_tag(name, 'Login', 'navlink', privileged=True)
+
      else:
          print ' | <i>Immutable Page</i>'
  
@@ -231,11 +249,11 @@ def handle_edit(pagename):
              pg.save(form['file'].file.read())
          else:
              pg.save(form['savetext'].value.replace('\r\n', '\n'))
-        pg.send_page()
+        pg.format()
      elif 'cancel' in form:
          pg.msg_text = 'Editing canceled'
          pg.msg_type = 'notice'
-        pg.send_page()
+        pg.format()
      else: # preview or edit
          text = None
          if 'preview' in form:
@@ -246,8 +264,8 @@ def make_index_key():
      links = map(lambda ch: '<a href="#%s">%s</a>' % (ch, ch), 'abcdefghijklmnopqrstuvwxyz')
      return '<p><center>'+ ' | '.join(links) + '</center></p>'
  
-def page_list():
-    return filter(word_re.match, os.listdir(data_dir))
+def page_list(dir = None, re = word_re):
+    return filter(re.match, os.listdir(dir or data_dir))
  
  def send_footer(name, mod_string=None):
      if globals().get('debug_cgi', False):
@@ -262,68 +280,7 @@ def send_footer(name, mod_string=None):
          print '<p class="modified">last modified %s</p>' % mod_string
      print '</div></body></html>'
  
-# Macros ------------------------------------------------------------
-def _macro_TitleSearch(*vargs):
-    return _macro_search("titlesearch")
-
-def _macro_FullSearch(*vargs):
-    return _macro_search("fullsearch")
-
-def _macro_search(type):
-    default = ''
-    if 'value' in form:
-        default = form['value'].value
-    return """<form method="get"><input name="%s" size="30" value="%s" /><input type="submit" value="Search" /></form>""" % (type, default)
-
-def _macro_WordIndex(*vargs):
-    s = make_index_key()
-    pages = list(page_list())
-    map = {}
-    word_re = re.compile('[A-Z][a-z]+')
-    for name in pages:
-        for word in word_re.findall(name):
-            try:
-                map[word].append(name)
-            except KeyError:
-                map[word] = [name]
-
-    all_words = map.keys()
-    all_words.sort()
-    last_letter = None
-    # set title
-    for word in all_words:
-        letter = word[0].lower()
-        if letter != last_letter:
-            s = s + '; <a name="%s"><h3>%s</h3></a>' % (letter, letter)
-            last_letter = letter
-
-        s = s + '<b>%s</b><ul>' % word
-        links = map[word]
-        links.sort()
-        last_page = None
-        for name in links:
-            if name == last_page: continue
-            s = s + '<li>' + Page(name).link_to()
-        s = s + '</ul>'
-    return s
-
-
-def _macro_TitleIndex(*vargs):
-    s = make_index_key()
-    pages = list(page_list())
-    pages.sort()
-    current_letter = None
-    for name in pages:
-        letter = name[0].lower()
-        if letter != current_letter:
-            s += '<a name="%s"><h3>%s</h3></a>' % (letter, letter)
-            current_letter = letter
-        else:
-            s += '<br />'
-        s += Page(name).link_to()
-    return s
-
-class PageFormatter:
+class WikiFormatter:
      """Object that turns Wiki markup into HTML.
  
      All formatting commands can be parsed one line at a time, though
@@ -332,7 +289,7 @@ class PageFormatter:
      def __init__(self, raw):
          self.raw = raw
          self.h_level = 0
-        self.in_pre = self.in_table = self.in_li = False
+        self.in_pre = self.in_html = self.in_table = self.in_li = False
          self.in_header = True
          self.list_indents = []
          self.tr_cnt = 0
@@ -360,8 +317,8 @@ class PageFormatter:
              self.h_level = 0
          else:
              self.h_level = len(word) - 1
-            abridged = re.sub('[^A-Za-z0-9_]', '', self.line).lower()
-            result = '\n</p><h%d id="%s"><a class="heading" href="#%s">¶</a> ' % (self.h_level, abridged, abridged)
+            link = permalink(self.line)
+            result = '\n</p><h%d id="%s"><a class="heading" href="#%s">¶</a> ' % (self.h_level, link, link)
          return result
  
      def _br_repl(self, word):
@@ -383,31 +340,65 @@ class PageFormatter:
          else:
              return '<a href="%s" rel="nofollow" class="external">%s</a>' % (word, word)
  
-    def _hurl_repl(self, word):
-        m = re.compile("\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\]").match(word)
+    def _macro_repl(self, word):
+        m = re.compile("\<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>").match(word)
          name = m.group(1)
-        descr = m.group(2) or name
+        argv = [name]
+        if m.group(2):
+            argv.extend(m.group(2).split('|'))
+        argv = map(str.strip, argv)
  
          macro = globals().get('_macro_' + name)
+        if not macro:
+            execfile("macros/" + name + ".py", globals())
+            macro = globals().get('_macro_' + name)
          if macro:
-            return macro(name, descr)
-        elif img_re.match(name):
-            name = relative_url(name)
-            # The "extthumb" nonsense works around a limitation of the HTML block model
-            return '<div class="extthumb"><div class="thumb"><a href="%s"><img border="0" src="%s" alt="%s" /></a><div class="caption">%s</div></div></div>' % (name, name, descr, descr)
+            return macro(argv)
          else:
-            if img_re.match(descr):
-                descr = '<img border="0" src="' + descr + '" />'
+            return '<strong class="error">&lt;&lt;' + '|'.join(argv) + '&gt;&gt;</strong>'
+
+    def _hurl_repl(self, word):
+        m = link_re.match(word)
+        name = m.group(1)
+        descr = m.group(2)
+        if descr is None:
+            descr = name
+        elif img_re.match(m.group(2)):
+            descr = '<img border="0" src="' + descr + '" />'
+
+        return link_tag(name, descr, 'wikilink')
  
-            return link_tag(name, descr, 'wikilink')
+    def _inl_repl(self, word):
+        m = link_re.match(word)
+        name = m.group(1)
+        descr = m.group(2) or name
+        name = relative_url(name)
+        argv = descr.split('|')
+        descr = argv.pop(0)
+
+        if argv:
+            args = '?' + '&amp;'.join(argv)
+        else:
+            args = ''
+
+        if descr:
+            # The "extthumb" nonsense works around a limitation of the HTML block model
+            return '<div class="extthumb"><div class="thumb"><a href="%s"><img border="0" src="%s" alt="%s" /></a><div class="caption">%s</div></div></div>' \
+                    % (name, name + args, descr, descr)
+        else:
+            return '<a href="%s"><img border="0" src="%s" /></a>' % (name, name + args)
  
      def _email_repl(self, word):
          return '<a href="mailto:%s">%s</a>' % (word, word)
  
      def _html_repl(self, word):
+        self.in_html += 1
          return word; # Pass through
  
      def _ent_repl(self, s):
+        if self.in_html and s == '>':
+            self.in_html -= 1
+            return '>'
          return {'&': '&amp;',
                  '<': '&lt;',
                  '>': '&gt;'}[s]
@@ -497,20 +488,26 @@ class PageFormatter:
              + r"|(?P<tit>\={2,6})"
              + r"|(?P<br>\\\\)"
              + r"|(?P<rule>^-{3,})"
-            + r"|(?P<html><(/|)(div|span|iframe)[^<>]*>)"
-            + r"|(?P<ent>[<>&])"
              + r"|(?P<hi>\b(FIXME|TODO|DONE)\b)"
  
              # Links
-            + r"|(?P<img>\b[a-zA-Z0-9_-]+\.(png|gif|jpg|jpeg|bmp))"
+            + r"|(?P<macro>\<\<([^\s\|\>]+)(?:\s*\|\s*([^\>]+)|)\>\>)"
+            + r"|(?P<hurl>\[\[([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\]\])"
+
+            # Inline HTML
+            + r"|(?P<html><(/|)(br|hr|div|form|iframe|input|span))"
+            + r"|(?P<ent>[<>&])"
+
+            # Auto links (LEGACY)
+            + r"|(?P<img>\b[a-zA-Z0-9_/-]+\.(png|gif|jpg|jpeg|bmp|ico))"
              + r"|(?P<word>\b(?:[A-Z][a-z]+){2,}\b)"
-            + r"|(?P<hurl>\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\])"
              + r"|(?P<url>(http|https|ftp|mailto)\:[^\s'\"]+\S)"
              + r"|(?P<email>[-\w._+]+\@[\w.-]+)"
  
              # Lists, divs, spans
              + r"|(?P<li>^\s+[\*#] +)"
              + r"|(?P<pre>\{\{\{|\s*\}\}\})"
+            + r"|(?P<inl>\{\{([^\s\|]+)(?:\s*\|\s*([^\]]+)|)\}\})"
  
              # Tables
              + r"|(?P<tr>^\s*\|\|(=|)\s*)"
@@ -562,7 +559,7 @@ class Page:
          # look for the end of words and the start of a new word and insert a space there
          return re.sub('([a-z])([A-Z])', r'\1 \2', self.page_name)
  
-    def _text_filename(self):
+    def _filename(self):
          return path.join(data_dir, self.page_name)
  
      def _tmp_filename(self):
@@ -570,7 +567,7 @@ class Page:
  
      def exists(self):
          try:
-            os.stat(self._text_filename())
+            os.stat(self._filename())
              return True
          except OSError, er:
              if er.errno == errno.ENOENT:
@@ -586,20 +583,36 @@ class Page:
  
      def get_raw_body(self):
          try:
-            return open(self._text_filename(), 'rb').read()
+            return open(self._filename(), 'rb').read()
          except IOError, er:
              if er.errno == errno.ENOENT:
                  return '' # just doesn't exist, use default
              if er.errno == errno.EISDIR:
-                return 'DIR'
+                return self.format_dir()
              raise er
  
+    def format_dir(self):
+        out = '== '
+        path = ''
+        for dir in self.page_name.split('/'):
+            path = (path + '/' + dir) if path else dir
+            out += '[[' + path + '|' + dir + ']]/'
+        out += ' ==\n'
+ 
+        for file in page_list(self._filename(), file_re):
+            if img_re.match(file):
+                if image_maxwidth:
+                    maxwidth_arg = '|maxwidth=' + str(image_maxwidth)
+                out += '{{' + self.page_name + '/' + file + '|' + file + maxwidth_arg + '}}\n'
+            else:
+                out += ' * [[' + self.page_name + '/' + file + ']]\n'
+        return out
      def get_attrs(self):
          if 'attrs' in self.__dict__:
              return self.attrs
          self.attrs = {}
          try:
-            file = open(self._text_filename(), 'rt')
+            file = open(self._filename(), 'rt')
              attr_re = re.compile(r"^#(\S*)(.*)$")
              for line in file:
                  m = attr_re.match(line)
@@ -608,7 +621,7 @@ class Page:
                  self.attrs[m.group(1)] = m.group(2).strip()
                  #print "bernie: attrs[" + m.group(1) + "] = " + m.group(2) + "<br>\n"
          except IOError, er:
-            if er.errno != errno.ENOENT:
+            if er.errno != errno.ENOENT and er.errno != errno.EISDIR:
                  raise er
          return self.attrs
  
@@ -636,26 +649,28 @@ class Page:
      def can_read(self):
          return self.can("read", True)
  
-    def send_page(self):
-        page_name = None
-        if self.can_write():
-            page_name = self.page_name
-
-        #css foo.css bar.css
-        global css_url
-        css_url = self.get_attr("css", "").split() + css_url
-
-        send_title(page_name, self.split_title(), msg_text=self.msg_text, msg_type=self.msg_type)
+    def send_naked(self):
          if self.can_read():
-            PageFormatter(self.get_raw_body()).print_html()
+            WikiFormatter(self.get_raw_body()).print_html()
          else:
              send_guru("Read access denied by ACLs", "notice")
-        send_footer(page_name, self._last_modified())
+
+    def format(self):
+        #css foo.css
+        value = self.get_attr("css", None)
+        if value:
+            global link_urls
+            link_urls += { "stylesheet": value }
+
+        send_title(self.page_name, self.split_title(),
+            msg_text=self.msg_text, msg_type=self.msg_type, writable=self.can_write())
+        self.send_naked()
+        send_footer(self.page_name, self._last_modified())
  
      def _last_modified(self):
          try:
              from time import localtime, strftime
-            modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME])
+            modtime = localtime(os.stat(self._filename())[stat.ST_MTIME])
          except OSError, er:
              if er.errno != errno.ENOENT:
                  raise er
@@ -691,7 +706,7 @@ class Page:
          print "<p>" + Page('EditingTips').link_to() + "</p>"
          if preview:
              print "<div class='preview'>"
-            PageFormatter(preview).print_html()
+            WikiFormatter(preview).print_html()
              print "</div>"
          send_footer(self.page_name)
  
@@ -703,10 +718,20 @@ class Page:
          else:
              send_title(None, msg_text='Read access denied by ACLs', msg_type='notice')
  
+    def send_image(self, mimetype, args=[]):
+        if 'maxwidth' in args:
+            import subprocess
+            emit_header(mimetype)
+            sys.stdout.flush()
+            subprocess.check_call(['gm', 'convert', self._filename(),
+                '-scale', args['maxwidth'].value + ' >', '-'])
+        else:
+            self.send_raw(mimetype)
+
      def _write_file(self, data):
          tmp_filename = self._tmp_filename()
          open(tmp_filename, 'wb').write(data)
-        name = self._text_filename()
+        name = self._filename()
          if os.name == 'nt':
              # Bad Bill!  POSIX rename ought to replace. :-(
              try:
@@ -761,12 +786,19 @@ try:
  
          if file_re.match(query):
              if word_re.match(query):
-                Page(query).send_page()
+                Page(query).format()
              else:
                  from mimetypes import MimeTypes
                  type, encoding = MimeTypes().guess_type(query)
-                type = type or 'text/plain'
-                Page(query).send_raw(mimetype=type)
+                #type = type or 'text/plain'
+                #Page(query).send_raw(mimetype=type)
+                if type:
+                    if type.startswith('image/'):
+                        Page(query).send_image(mimetype=type,args=form)
+                    else:
+                        Page(query).send_raw(mimetype=type)
+                else:
+                    Page(query).format()
          else:
              print "Status: 404 Not Found"
              send_title(None, msg_text='Can\'t work out query: ' + query)