X-Git-Url: https://codewiz.org/gitweb?a=blobdiff_plain;f=geekigeeki.py;h=b85a3a2ad1be68264733b5cae74ef9710436728e;hb=344a5723acc80525ac936017a64ad3d69d1950b7;hp=0c1423a97d7534e64d9e9da53cf64e3578e641a5;hpb=71ec383a3cf5f913055172eb966f3b95ea7fa8d3;p=geekigeeki.git diff --git a/geekigeeki.py b/geekigeeki.py index 0c1423a..f616311 100755 --- a/geekigeeki.py +++ b/geekigeeki.py @@ -1,172 +1,193 @@ -#! /usr/bin/env python -"""Quick-quick implementation of WikiWikiWeb in Python -""" +#!/usr/bin/python +# -*- coding: utf-8 -*- # # Copyright (C) 1999, 2000 Martin Pool -# This version includes additional changes by Gerardo Poggiali (2002) -# This version includes additional changes by Bernardo Innocenti (2007) +# Copyright (C) 2002 Gerardo Poggiali +# Copyright (C) 2007, 2008, 2009, 2010, 2011 Bernie Innocenti # # This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -__version__ = '$Revision: 1.63+gerry+bernie $'[11:-2] - -import cgi, sys, string, os, re, errno, time, stat -from os import path, environ - -# Regular expression defining a WikiWord -# (but this definition is also assumed in other places) -file_re = re.compile(r"^\b([A-Za-z0-9_\.\-]+)\b$") -word_re = re.compile(r"^\b([A-Z][a-z]+){2,}\b$") -img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg)$", re.IGNORECASE) -url_re = re.compile(r"^[a-z]{3,8}://[^\s'\"]+\S$") +__version__ = '4.0-' + '$Id$'[4:11] +from time import clock, localtime, gmtime, strftime +start_time = clock() title_done = False +import cgi, sys, os, re, errno, stat, glob -# CGI stuff --------------------------------------------------------- +image_ext = 'png|gif|jpg|jpeg|bmp|ico' +video_ext = "ogg|ogv|oga|webm" # Not supported by Firefox 3.6: mkv|mpg|mpeg|mp4|avi|asf|flv|wmv|qt +image_re = re.compile(r".*\.(" + image_ext + "|" + video_ext + ")$", re.IGNORECASE) +video_re = re.compile(r".*\.(" + video_ext + ")$", re.IGNORECASE) +# FIXME: we accept stuff like foo/../bar and we shouldn't +file_re = re.compile(r"([A-Za-z0-9_\-][A-Za-z0-9_\.\-/ ]*)$") +url_re = re.compile(r"[a-z]{3,8}://[^\s'\"]+\S$") +ext_re = re.compile(r"\.([^\./]+)$") + +def config_get(key, default=None): + return globals().get(key, default) def script_name(): - return environ.get('SCRIPT_NAME', '') + return os.environ.get('SCRIPT_NAME', '') + +#TODO: move post-edit hook into wiki, then kill this +def script_path(): + return os.path.split(os.environ.get('SCRIPT_FILENAME', ''))[0] + +def query_string(): + path_info = os.environ.get('PATH_INFO', '') + if len(path_info) and path_info[0] == '/': + return path_info[1:] or 'FrontPage' + else: + return os.environ.get('QUERY_STRING', '') or 'FrontPage' -def privileged_path(): - return privileged_url or script_name() +def is_privileged(): + purl = config_get('privileged_url') + return (purl is not None) and os.environ.get('SCRIPT_URI', '').startswith(purl) def remote_user(): - user = environ.get('REMOTE_USER', '') + user = os.environ.get('REMOTE_USER', '') if user is None or user == '' or user == 'anonymous': user = 'AnonymousCoward' return user def remote_host(): - return environ.get('REMOTE_ADDR', '') + return os.environ.get('REMOTE_ADDR', '') def get_hostname(addr): try: from socket import gethostbyaddr return gethostbyaddr(addr)[0] + ' (' + addr + ')' - except: + except Exception: return addr -# Formatting stuff -------------------------------------------------- +def is_external_url(pathname): + return (url_re.match(pathname) or pathname.startswith('/')) -def emit_header(type="text/html"): - print "Content-type: " + type + "; charset=utf-8" - print +def relative_url(pathname, privileged=False): + if not is_external_url(pathname): + if privileged: + url = config_get('privileged_url') or script_name() + else: + url = script_name() + pathname = url + '/' + pathname + return cgi.escape(pathname, quote=True) + +def permalink(s): + return re.sub(' ', '-', re.sub('[^a-z0-9_ ]', '', s.lower()).strip()) + +def humanlink(s): + return re.sub(r'(?:.*[/:]|)([^:/\.]+)(?:\.[^/:]+|)$', r'\1', s.replace('_', ' ')) + +# Split arg lists like "blah|blah blah| width=100 | align = center", +# return a list containing anonymous arguments and a map containing the named arguments +def parse_args(s): + args = [] + kvargs = {} + for arg in s.strip('<[{}]>').split('|'): + m = re.match('\s*(\w+)\s*=\s*(.+)\s*', arg) + if m is not None: + kvargs[m.group(1)] = m.group(2) + else: + args.append(arg.strip()) + return (args, kvargs) + +def url_args(kvargs): + argv = [] + for k, v in kvargs.items(): + argv.append(k + '=' + v) + if argv: + return '?' + '&'.join(argv) + return '' + +def emit_header(mtime=None, mime_type="text/html"): + if mtime: + # Prevent caching when the wiki engine gets updated + mtime = max(mtime, os.stat(__file__).st_mtime) + print("Last-Modified: " + strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime(mtime))) + else: + print("Cache-Control: must-revalidate, max-age=0") + print("Content-type: " + mime_type + "; charset=utf-8") + print('') -def send_guru(msg, msg_type): - if msg is None or msg == '': return - print '
'
+def send_guru(msg_text, msg_type):
+    if not msg_text: return
+    print('
')
     if msg_type == 'error':
-        print '    Software Failure.  Press left mouse button to continue.\n'
-    print msg
+        print('    Software Failure.  Press left mouse button to continue.\n')
+    print(cgi.escape(msg_text))
     if msg_type == 'error':
-        print '      Guru Meditation #DEADBEEF.ABADC0DE'
-    print '
' - # FIXME: This simple JS snippet is harder to pass than ACID 3.0 - print """ - """ - -def send_title(name, text="Limbo", msg=None, msg_type='error'): - global title_done - if title_done: return - - # Head - emit_header() - print """ - -""" - print "%s: %s" % (site_name, text) - print ' ' - if not name: - print ' ' - if css_url: - print ' ' % css_url - print '' - - # Body - if name and privileged_url is not None: - print '' - else: - print '' - - title_done = True - send_guru(msg, msg_type) - - # Navbar - print '' - -def link_tag(params, text=None, ss_class=None, authentication=False): + print '\n Guru Meditation #DEADBEEF.ABADC0DE' + print('
' \ + % relative_url('sys/GuruMeditation.js')) + +def send_httperror(status="404 Not Found", query="", trace=False): + print("Status: %s" % status) + msg_text = "%s: on query '%s'" % (status, query) + if trace: + import traceback + msg_text += '\n\n' + traceback.format_exc() + page = Page() + page.send_title(msg_text=msg_text) + page.send_footer() + +def link_tag(dest, text=None, privileged=False, **kvargs): if text is None: - text = params # default - classattr = '' - if ss_class: - classattr += 'class="%s" ' % ss_class - # Prevent crawlers from following links potentially added by spammers or to generated pages - if ss_class == 'external' or ss_class == 'navlink': - classattr += 'rel="nofollow" ' - if authentication: - path = privileged_path() + text = humanlink(dest) + elif image_re.match(text): + text = '' + text + '' + + link_class = kvargs.get('class', kvargs.get('cssclass', None)) + if not link_class: + if is_external_url(dest): + link_class = 'external' + elif file_re.match(dest) and Page(dest).exists(): + link_class = 'wikilink' + else: + text = config_get('nonexist_pfx', '') + text + link_class = 'nonexistent' + + # Prevent crawlers from following links potentially added by spammers and to autogenerated pages + nofollow = '' + if link_class in ('external', 'navlink', 'nonexistent'): + nofollow = 'rel="nofollow" ' + + return '%s' % (link_class, nofollow, relative_url(dest, privileged=privileged), text) + +def link_inline(name, descr=None, kvargs={}): + if not descr: descr = humanlink(name) + url = relative_url(name) + if video_re.match(name): + return '' % url + elif image_re.match(name): + return '%s' % (url, url + url_args(kvargs), descr) + elif file_re.match(name) and not ext_re.search(name): # FIXME: this guesses a wiki page + Page(name).send_naked(kvargs) # FIXME: we should return the page as a string rather than print it + return '' else: - path = script_name() - return '%s' % (classattr, path, params, text) + return '' \ + % (url, url, name) + +def link_inline_glob(pattern, descr=None, kvargs={}): + if not url_re.match(pattern) and bool(set(pattern) & set('?*[')): + s = '' + for name in sorted(glob.glob(pattern), reverse=bool(int(kvargs.get('reverse', '0'))) ): + s += link_inline(name, descr, kvargs) + return s + else: + return link_inline(pattern, descr, kvargs) -# Search --------------------------------------------------- +def search_stats(hits, searched): + return "%d hits out of %d pages searched.\n" % (hits, searched) -def do_fullsearch(needle): - send_title(None, 'Full text search for "%s"' % (needle)) +def handle_fullsearch(query, form): + needle = form['q'].value + Page().send_title(text='Full text search for "' + needle + '"') needle_re = re.compile(needle, re.IGNORECASE) hits = [] @@ -177,234 +198,205 @@ def do_fullsearch(needle): if count: hits.append((count, page_name)) - # The default comparison for tuples compares elements in order, - # so this sorts by number of hits + # The default comparison for tuples compares elements in order, so this sorts by number of hits hits.sort() hits.reverse() - print "
    " + out = '' for (count, page_name) in hits: - print '
  • ' + Page(page_name).link_to() - print ' . . . . ' + `count` - print ['match', 'matches'][count <> 1] - print '

  • ' - print "
" - - print_search_stats(len(hits), len(all_pages)) + out += ' * [[' + page_name + ']] . . . ' + str(count) + ' ' + ['match', 'matches'][count != 1] + '\n' + out += search_stats(len(hits), len(all_pages)) + WikiFormatter(out).print_html() -def do_titlesearch(needle): - # TODO: check needle is legal -- but probably we can just accept any RE - send_title(None, "Title search for \"" + needle + '"') +def handle_titlesearch(query, form): + needle = form['q'].value + Page().send_title(text='Title search for "' + needle + '"') needle_re = re.compile(needle, re.IGNORECASE) all_pages = page_list() - hits = filter(needle_re.search, all_pages) + hits = list(filter(needle_re.search, all_pages)) - print "
    " + out = '' for filename in hits: - print '
  • ' + Page(filename).link_to() + "

  • " - print "
" - - print_search_stats(len(hits), len(all_pages)) + out += ' * [[' + filename + ']]\n' + out += search_stats(len(hits), len(all_pages)) + WikiFormatter(out).print_html() -def print_search_stats(hits, searched): - print "

%d hits out of %d pages searched.

" % (hits, searched) - -def do_edit(pagename): - Page(pagename).send_editor() - -def do_raw(pagename): +def handle_raw(pagename, form): Page(pagename).send_raw() -def do_savepage(pagename): - global form - pg = Page(pagename) - if 'preview' in form: - pg.send_editor(form['savetext'].value) - elif 'save' in form: - pg.save_text(form['savetext'].value) - pg.send_page() +def handle_atom(pagename, form): + Page(pagename).send_atom() + +def handle_edit(pagename, form): + pg = Page(pagename) + if 'save' in form: + if form['file'].value: + pg.save(form['file'].file.read(), form['changelog'].value) + else: + pg.save(form['savetext'].value.replace('\r\n', '\n'), form['changelog'].value) + pg.send() elif 'cancel' in form: - pg.msg = 'Editing canceled' + pg.msg_text = 'Editing canceled' pg.msg_type = 'notice' - pg.send_page() + pg.send() + else: # preview or edit + text = None + if 'preview' in form: + text = form['savetext'].value + pg.send_editor(text) + +def handle_get(pagename, form): + if not ext_re.search(pagename): # FIXME: no extension guesses a wiki page + Page(pagename).send() else: - raise 'What did you press?' + # FIMXE: this is all bullshit, MimeTypes bases its guess on the extension! + from mimetypes import MimeTypes + mimetype, encoding = MimeTypes().guess_type(pagename) + Page(pagename).send_raw(mimetype=mimetype, args=form) +# Used by sys/macros/WordIndex and sys/macros/TitleIndex def make_index_key(): - s = '

' - links = map(lambda ch: '%s' % (ch, ch), - string.lowercase) - s = s + string.join(links, ' | ') - s = s + '

' - return s - - -def page_list(): - return filter(word_re.match, os.listdir(text_dir)) - + links = ['%s' % (ch, ch) for ch in 'abcdefghijklmnopqrstuvwxyz'] + return '

' + ' | '.join(links) + '

' -def send_footer(name, mod_string=None): - if debug_cgi: - cgi.print_arguments() - cgi.print_form(cgi.FieldStorage()) - cgi.print_environ() - print '' - - -# ---------------------------------------------------------- -# Macros -def _macro_TitleSearch(*vargs): - return _macro_search("titlesearch") - -def _macro_FullSearch(*vargs): - return _macro_search("fullsearch") - -def _macro_search(type): - if form.has_key('value'): - default = form["value"].value - else: - default = '' - return """
""" % (type, default) - -def _macro_WordIndex(*vargs): - s = make_index_key() - pages = list(page_list()) - map = {} - word_re = re.compile('[A-Z][a-z]+') - for name in pages: - for word in word_re.findall(name): - try: - map[word].append(name) - except KeyError: - map[word] = [name] - - all_words = map.keys() - all_words.sort() - last_letter = None - # set title - for word in all_words: - letter = string.lower(word[0]) - if letter <> last_letter: - s = s + ';

%s

' % (letter, letter) - last_letter = letter - - s = s + '%s
    ' % word - links = map[word] - links.sort() - last_page = None - for name in links: - if name == last_page: continue - s = s + '
  • ' + Page(name).link_to() - s = s + '
' - return s - - -def _macro_TitleIndex(*vargs): - s = make_index_key() - pages = list(page_list()) - pages.sort() - current_letter = None - for name in pages: - letter = string.lower(name[0]) - if letter <> current_letter: - s = s + '

%s

' % (letter, letter) - current_letter = letter - else: - s = s + '
' - s = s + Page(name).link_to() - return s +def page_list(dirname=None, search_re=None): + if search_re is None: + # FIXME: WikiWord is too restrictive now! + search_re = re.compile(r"^\b((([A-Z][a-z0-9]+){2,}/)*([A-Z][a-z0-9]+){2,})\b$") + return sorted(filter(search_re.match, os.listdir(dirname or '.'))) +def _macro_ELAPSED_TIME(*args, **kvargs): + return "%03f" % (clock() - start_time) -# ---------------------------------------------------------- -class PageFormatter: - """Object that turns Wiki markup into HTML. +def _macro_VERSION(*args, **kvargs): + return __version__ - All formatting commands can be parsed one line at a time, though - some state is carried over between lines. - """ - def __init__(self, raw): +class WikiFormatter: + """Object that turns Wiki markup into HTML.""" + def __init__(self, raw, kvargs=None): self.raw = raw - self.is_em = self.is_b = 0 + self.kvargs = kvargs or {} self.h_level = 0 - self.h_count = 0 - self.list_indents = [] - self.in_pre = False - self.in_table = False - self.tr_cnt = 0 - self.in_var = False + self.in_pre = self.in_html = self.in_table = self.in_li = False self.in_header = True + self.list_indents = [] # a list of pairs (indent_level, list_type) to track nested lists + self.tr_cnt = 0 + self.styles = { + #wiki html enabled? + "//": ["em", False], + "**": ["b", False], + "##": ["tt", False], + "__": ["u", False], + "--": ["del", False], + "^^": ["sup", False], + ",,": ["sub", False], + "''": ["em", False], # LEGACY + "'''": ["b", False], # LEGACY + } - def _emph_repl(self, word): - if len(word) == 3: - self.is_b = not self.is_b - return ['', ''][self.is_b] - else: - self.is_em = not self.is_em - return ['', ''][self.is_em] + def _b_repl(self, word): + style = self.styles[word] + style[1] = not style[1] + return ['' + + def _glyph_repl(self, word): + return '—' def _tit_repl(self, word): + link = permalink(self.line) if self.h_level: - result = '' % self.h_level + result = '¶

\n' % (link, self.h_level) self.h_level = 0 else: self.h_level = len(word) - 1 - self.h_count += 1 - result = '* ' % (self.h_level, self.h_count, self.h_count) + result = '\n

' % (self.h_level, link) return result - def _rule_repl(self, word): - return self._undent() + '\n
\n' % (len(word) - 2) - - def _word_repl(self, word): - return Page(word).link_to() + def _br_repl(self, word): + return '
' - def _img_repl(self, word): - path = script_name() + '/' + word; - return '' % (path, path) + def _rule_repl(self, word): + return '\n
\n' % (len(word) - 2) - def _url_repl(self, word): - if img_re.match(word): - return '' % (word, word) - else: - return '%s' % (word, word) + def _macro_repl(self, word): + try: + args, macro_kvargs = parse_args(word) + # Is this a parameter given to the current page? + if args[0] in self.kvargs: + return self.kvargs[args[0]] + # Is this an internal macro? + macro = globals().get('_macro_' + args[0]) + if not macro: + # Can we load (and cache) an external macro? + exec(open("sys/macros/" + args[0] + ".py").read(), globals()) + macro = globals().get('_macro_' + args[0]) + # Invoke macro passing both macro args augmented by page args + macro_kvargs.update(self.kvargs) + return macro(*args, **macro_kvargs) + except Exception, e: + msg = cgi.escape(word) + ": " + cgi.escape(str(e)) + if not self.in_html: + msg = '' + msg + '' + return msg def _hurl_repl(self, word): - m = re.compile("\[\[(\S+)([^\]]*)\]\]").match(word) - name = m.group(1) - descr = m.group(2).strip() or name - - macro = globals().get('_macro_' + name) - if macro: - return apply(macro, (name, descr)) - elif img_re.match(name): - return '%s' % (name, name, descr) - elif url_re.match(name): - return '%s' % (name, descr) - elif name.startswith('/'): - return '%s' % (name, descr) + args, kvargs = parse_args(word) + return link_tag(*args, **kvargs) + + def _inl_repl(self, word): + args, kvargs = parse_args(word) + name = args.pop(0) + if len(args): + descr = args.pop(0) + # This double div nonsense works around a limitation of the HTML block model + return '
' \ + + '
' \ + + link_inline_glob(name, descr, kvargs) \ + + '
' + descr + '
' else: - return link_tag(name, descr) - - def _email_repl(self, word): - return '%s' % (word, word) + return link_inline_glob(name, None, kvargs) def _html_repl(self, word): + if not self.in_html and word.startswith('': '>'}[s] + def _img_repl(self, word): # LEGACY + return self._inl_repl('{{' + word + '}}') + + def _word_repl(self, word): # LEGACY + if self.in_html: return word # pass through + return link_tag(word) + + def _url_repl(self, word): # LEGACY + if self.in_html: return word # pass through + return link_tag(word) + + def _email_repl(self, word): # LEGACY + if self.in_html: return word # pass through + return '%s' % (word, word) + def _li_repl(self, match): - return '
  • ' + if self.in_li: + return '
  • ' + else: + self.in_li = True + return '
  • ' def _pre_repl(self, word): if word == '{{{' and not self.in_pre: @@ -416,22 +408,7 @@ class PageFormatter: return '' def _hi_repl(self, word): - if word == 'FIXME': - cl = 'error' - elif word == 'DONE': - cl = 'success' - elif word == 'TODO': - cl = 'notice' - return '' + word + '' - - def _var_repl(self, word): - if word == '{{' and not self.in_var: - self.in_var = True - return '' - elif self.in_var: - self.in_var = False - return '' - return '' + return '' + word + '' def _tr_repl(self, word): out = '' @@ -440,197 +417,219 @@ class PageFormatter: self.tr_cnt = 0 out = '

    \n' self.tr_cnt += 1 - return out + '' + return out + ['' + return ['', ''][word.strip() == '||='] return '' def _indent_level(self): - return len(self.list_indents) and self.list_indents[-1] + return len(self.list_indents) and self.list_indents[-1][0] - def _indent_to(self, new_level): + def _indent_to(self, new_level, list_type=''): if self._indent_level() == new_level: return '' s = '

    ' while self._indent_level() > new_level: + if self.in_li: + s += '' + self.in_li = False # FIXME + s += '\n' del(self.list_indents[-1]) - s += '\n' + + list_type = ('ul', 'ol')[list_type == '#'] while self._indent_level() < new_level: - self.list_indents.append(new_level) - s += '
      \n' + self.list_indents.append((new_level, list_type)) + s += '<' + list_type + '>\n' s += '

      ' return s - def _undent(self): - res = '

      ' - res += '
    ' * len(self.list_indents) - res += '

    ' - self.list_indents = [] - return res - def replace(self, match): - for type, hit in match.groupdict().items(): + for rule, hit in list(match.groupdict().items()): if hit: - return apply(getattr(self, '_' + type + '_repl'), (hit,)) + return getattr(self, '_' + rule + '_repl')(hit) else: - raise "Can't handle match " + `match` + raise Exception("Can't handle match " + repr(match)) def print_html(self): - print '

    ' - - # For each line, we scan through looking for magic - # strings, outputting verbatim any intervening text - # TODO: highlight search words (look at referer) - scan_re = re.compile( - r"(?:" - # Formatting - + r"(?P'{2,3})" - + r"|(?P\={2,6})" - + r"|(?P^-{3,})" - + r"|(?P<(/|)(div|span|iframe)[^<>]*>)" - + r"|(?P[<>&])" - + r"|(?P\b(FIXME|TODO|DONE)\b)" + print('

    ') + + scan_re = re.compile(r"""(?: + # Styles and formatting ("--" must cling to a word to disambiguate it from the dash) + (?P \*\* | // | \#\# | __ | --\b | \b-- | \^\^ | ,, | ''' | '' ) + | (?P \={2,6}) + | (?P
    \\\\) + | (?P ^-{3,}) + | (?P \b( FIXME | TODO | DONE )\b ) + | (?P --) # Links - + r"|(?P\b[a-zA-Z0-9_-]+\.(png|gif|jpg|jpeg|bmp))" - + r"|(?P\b(?:[A-Z][a-z]+){2,}\b)" - + r"|(?P\[\[\S+[^\]]*\]\])" - + r"|(?P(http|https|ftp|mailto)\:[^\s'\"]+\S)" - + r"|(?P[-\w._+]+\@[\w.-]+)" + | (?P \<\<[^\>]+\>\>) + | (?P \[\[[^\]]+\]\]) + + # Inline HTML + | (?P <(br|hr|small|div|span|form|iframe|input|textarea|a|img|h[1-5])\b ) + | (?P ( /\s*> | ) ) + | (?P [<>&] ) - # Lists, divs, spans - + r"|(?P

  • ^\s+\*)" - + r"|(?P
    (\{\{\{|\s*\}\}\}))"
    -            + r"|(?P(\{\{|\}\}))"
    +            # Auto links (LEGACY)
    +            | (?P   \b[a-zA-Z0-9_/-]+\.(""" + image_ext + "|" + video_ext + r"""))
    +            | (?P  \b(?:[A-Z][a-z]+){2,}\b)
    +            | (?P   (http|https|ftp|mailto)\:[^\s'\"]+\S)
    +            | (?P [-\w._+]+\@[\w.-]+)
    +
    +            # Lists, divs, spans and inline objects
    +            | (?P
  • ^\s+[\*\#]\s+) + | (?P
       \{\{\{|\s*\}\}\})
    +            | (?P   \{\{[^\}]+\}\})
     
                 # Tables
    -            + r"|(?P
  • ^\s*\|\|\s*)" - + r"|(?P\s*\|\|\s*$)" - + r"|(?P ^\s*\|\|(=|)\s*) + | (?P \s*\|\|(=|)\s*$) + | (?P
    ' + out = out + '
    ', ''][word.strip() == '||='] - def _tre_repl(self, word): + def _td_repl(self, word): if self.in_table: - return '
    ', ''][word.strip() == '||='] return '' - def _td_repl(self, word): + def _tre_repl(self, word): if self.in_table: - return '' + return ['
    \s*\|\|\s*)" - + r")") - pre_re = re.compile( - r"(?:" - + r"(?P
    \s*\}\}\})"
    -            + r")")
    +            | (?P
    \s*\|\|(=|)\s*) + )""", re.VERBOSE) + pre_re = re.compile("""(?: + (?P
    \s*\}\}\})
    +            | (?P[<>&])"
    +            )""", re.VERBOSE)
             blank_re = re.compile(r"^\s*$")
    -        indent_re = re.compile(r"^\s*")
    +        indent_re = re.compile(r"^(\s*)(\*|\#|)")
             tr_re = re.compile(r"^\s*\|\|")
             eol_re = re.compile(r"\r?\n")
    -        raw = string.expandtabs(self.raw)
    -        for line in eol_re.split(raw):
    -            # Skip ACLs
    +        # For each line, we scan through looking for magic strings, outputting verbatim any intervening text
    +        #3.0: for self.line in eol_re.split(str(self.raw.expandtabs(), 'utf-8')):
    +        for self.line in eol_re.split(str(self.raw.expandtabs())):
    +            # Skip pragmas
                 if self.in_header:
    -                if line.startswith('#'):
    -                   continue
    +                if self.line.startswith('#'):
    +                    continue
                     self.in_header = False
     
                 if self.in_pre:
    -                print re.sub(pre_re, self.replace, line)
    +                print(re.sub(pre_re, self.replace, self.line))
                 else:
    -                if self.in_table and not tr_re.match(line):
    +                if self.in_table and not tr_re.match(self.line):
                         self.in_table = False
    -                    print '

    ' + print('

    ') - if blank_re.match(line): - print '

    ' + if blank_re.match(self.line): + print('

    ') else: - indent = indent_re.match(line) - print self._indent_to(len(indent.group(0))) - print re.sub(scan_re, self.replace, line) + indent = indent_re.match(self.line) + print(self._indent_to(len(indent.group(1)), indent.group(2))) + # Stand back! Here we apply the monster regex that does all the parsing + print(re.sub(scan_re, self.replace, self.line)) + + if self.in_pre: print('') + if self.in_table: print('

    ') + print(self._indent_to(0)) + print('

    ') - if self.in_pre: print '' - if self.in_table: print '

    ' - print self._undent() - print '

    ' +class HttpException(Exception): + def __init__(self, error, query): + self.error = error + self.query = query -# ---------------------------------------------------------- class Page: - def __init__(self, page_name): - self.page_name = page_name - self.msg = '' + def __init__(self, page_name="Limbo"): + self.page_name = page_name.rstrip('/'); + self.msg_text = '' self.msg_type = 'error' - self.attrs = {} + if not file_re.match(self.page_name): + raise HttpException("403 Forbidden", self.page_name) def split_title(self): - # look for the end of words and the start of a new word, - # and insert a space there + # look for the end of words and the start of a new word and insert a space there return re.sub('([a-z])([A-Z])', r'\1 \2', self.page_name) - def _text_filename(self): - return path.join(text_dir, self.page_name) + def _filename(self): + return self.page_name def _tmp_filename(self): - return path.join(text_dir, ('#' + self.page_name + '.' + `os.getpid()` + '#')) + return self.page_name + '.tmp' + str(os.getpid()) + '#' - def exists(self): + def _mtime(self): try: - os.stat(self._text_filename()) - return 1 - except OSError, er: - if er.errno == errno.ENOENT: - return 0 - else: - raise er + return os.stat(self._filename()).st_mtime + except OSError, err: + if err.errno == errno.ENOENT: + return None + raise err - def link_to(self): - word = self.page_name - if self.exists(): - return link_tag(word, word, 'wikilink') - else: - if nonexist_qm: - return link_tag(word, '?', 'nonexistent') + word - else: - return link_tag(word, word, 'nonexistent') + def exists(self): + if self._mtime(): + return True + return False def get_raw_body(self): try: - return open(self._text_filename(), 'rt').read() - except IOError, er: - if er.errno == errno.ENOENT: - # just doesn't exist, use default - return 'Describe %s here.' % self.page_name + return open(self._filename(), 'rb').read() + except IOError, err: + if err.errno == errno.ENOENT: + return '' + if err.errno == errno.EISDIR: + return self.format_dir() + raise err + + def format_dir(self): + out = '== ' + pathname = '' + for dirname in self.page_name.strip('/').split('/'): + pathname = (pathname and pathname + '/' ) + dirname + out += '[[' + pathname + '|' + dirname + ']]/' + out += ' ==\n' + images_out = '\n' + + for filename in page_list(self._filename(), file_re): + if image_re.match(filename): + maxwidth = config_get('image_maxwidth', '400') + if maxwidth: + maxwidth = ' | maxwidth=' + str(maxwidth) + images_out += '{{' + self.page_name + '/' + filename + ' | ' + humanlink(filename) + maxwidth + ' | class=thumbleft}}\n' else: - raise er + out += ' * [[' + self.page_name + '/' + filename + ']]\n' + return out + images_out - def get_attrs(self): - if self.attrs: - return self.attrs - try: - file = open(self._text_filename(), 'rt') - attr_re = re.compile(r"^#(\S*)(.*)$") - for line in file: - m = attr_re.match(line) - if not m: - break - self.attrs[m.group(1)] = m.group(2).strip() - #print "bernie: attrs[" + m.group(1) + "] = " + m.group(2) + "
    \n" - except IOError, er: - if er.errno != errno.ENOENT: - raise er - return self.attrs + def pragmas(self): + if not '_pragmas' in self.__dict__: + self._pragmas = {} + try: + file = open(self._filename(), 'rt') + attr_re = re.compile(r"^#(\S*)(.*)$") + for line in file: + m = attr_re.match(line) + if not m: + break + self._pragmas[m.group(1)] = m.group(2).strip() + #print "bernie: pragmas[" + m.group(1) + "] = " + m.group(2) + "
    \n" + except IOError, err: + if err.errno != errno.ENOENT and err.errno != errno.EISDIR: + raise err + return self._pragmas + + def pragma(self, name, default): + return self.pragmas().get(name, default) def can(self, action, default=True): - attrs = self.get_attrs() + acl = None try: - # SomeUser:read,write All:read - acl = attrs["acl"] + #acl SomeUser:read,write All:read + acl = self.pragma("acl", None) for rule in acl.split(): - (user,perms) = rule.split(':') + (user, perms) = rule.split(':') if user == remote_user() or user == "All": - if action in perms.split(','): - return True - else: - return False + return action in perms.split(',') return False - except Exception, er: - pass + except Exception: + if acl: + self.msg_text = 'Illegal acl line: ' + acl return default def can_write(self): @@ -639,161 +638,211 @@ class Page: def can_read(self): return self.can("read", True) - def send_page(self): - page_name = None - if self.can_write(): - page_name = self.page_name - send_title(page_name, self.split_title(), msg=self.msg, msg_type=self.msg_type) - if self.can_read(): - PageFormatter(self.get_raw_body()).print_html() + def send_title(self, name=None, text="Limbo", msg_text=None, msg_type='error'): + global title_done + if title_done: return + + # HEAD + emit_header(name and self._mtime()) + print('\n') + print("%s: %s" % (config_get('site_name', "Unconfigured Wiki"), text)) + print(' ') + if not name: + print(' ') + + for http_equiv, content in config_get('meta_urls', {}): + print(' ' % (http_equiv, relative_url(content))) + + for link in config_get('link_urls', {}): + rel, href = link + print(' ' % (rel, relative_url(href))) + + editable = name and self.can_write() and is_privileged() + if editable: + print(' ' \ + % relative_url(name + '?a=edit', privileged=True)) + + print(' ' \ + % relative_url(name + '?a=atom')) + + print('') + + # BODY + if editable: + print('') else: - send_guru("Read access denied by ACLs", "notice") - send_footer(page_name, self._last_modified()) + print('') + + title_done = True + send_guru(msg_text, msg_type) + + if self.pragma("navbar", "on") != "on": + return - def _last_modified(self): - if not self.exists(): - return None - from time import localtime, strftime - modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME]) - return strftime(datetime_fmt, modtime) + # NAVBAR + print('') + + def send_footer(self): + if config_get('debug_cgi', False): + cgi.print_arguments() + cgi.print_form(form) + cgi.print_environ() + footer = self.pragma("footer", "sys/footer") + if footer != "off": + link_inline(footer, kvargs = { + 'LAST_MODIFIED': strftime(config_get('datetime_fmt', '%Y-%m-%dT%I:%M:%S%p'), localtime(self._mtime())) + }) + print('') + + def send_naked(self, kvargs=None): + if self.can_read(): + body = self.get_raw_body() + if not body: + body = "//[[%s?a=edit|Describe %s]]//" % (self.page_name, self.page_name) + WikiFormatter(body, kvargs).print_html() + else: + send_guru('Read access denied by ACLs', 'notice') + + def send(self): + #css foo.css + value = self.pragma("css", None) + if value: + global link_urls + link_urls += [ [ "stylesheet", value ] ] + + self.send_title(name=self.page_name, text=self.split_title(), msg_text=self.msg_text, msg_type=self.msg_type) + self.send_naked() + self.send_footer() + + def send_atom(self): + emit_header(self._mtime(), 'application/atom+xml') + self.in_html = True + link_inline("sys/atom_header", kvargs = { + 'LAST_MODIFIED': strftime(config_get('datetime_fmt', '%a, %d %b %Y %I:%M:%S %p'), localtime(self._mtime())) + }) + self.in_html = False + self.send_naked() + self.in_html = True + link_inline("sys/atom_footer") + self.in_html = False def send_editor(self, preview=None): - send_title(None, 'Edit ' + self.split_title(), msg=self.msg, msg_type=self.msg_type) + self.send_title(text='Edit ' + self.split_title(), msg_text=self.msg_text, msg_type=self.msg_type) if not self.can_write(): send_guru("Write access denied by ACLs", "error") return - print ('

    Editing ' + self.page_name - + ' for ' + cgi.escape(remote_user()) - + ' from ' + cgi.escape(get_hostname(remote_host())) - + '

    ') - print '
    ' % (script_name(), self.page_name) - print '' % (self.page_name) - print """""" % (preview or self.get_raw_body()) - print """ -
    - - - - -
    -
    """ - print "

    " + Page('EditingTips').link_to() + "

    " + if preview is None: + preview = self.get_raw_body() + + link_inline("sys/EditPage", kvargs = { + 'EDIT_BODY': cgi.escape(preview), + #'EDIT_PREVIEW': WikiFormatter(preview).print_html(), + }) + if preview: - print "
    " - PageFormatter(preview).print_html() - print "
    " - send_footer(self.page_name) + print("
    ") + WikiFormatter(preview).print_html() + print("
    ") + self.send_footer() - def send_raw(self): + def send_raw(self, mimetype='text/plain', args=[]): if not self.can_read(): - send_title(None, msg='Read access denied by ACLs', msg_type='notice') + self.send_title(msg_text='Read access denied by ACLs', msg_type='notice') return - emit_header("text/plain") - print self.get_raw_body() - def _write_file(self, text): + emit_header(self._mtime(), mimetype) + if 'maxwidth' in args: + import subprocess + sys.stdout.flush() + subprocess.check_call(['convert', self._filename(), + '-auto-orient', '-orient', 'TopLeft', + '-scale', args['maxwidth'].value + ' >', '-']) + else: + body = self.get_raw_body() + print(body) + + def _write_file(self, data): tmp_filename = self._tmp_filename() - open(tmp_filename, 'wt').write(text.replace('\r\n', '\n')) - text = self._text_filename() + open(tmp_filename, 'wb').write(data) + name = self._filename() if os.name == 'nt': # Bad Bill! POSIX rename ought to replace. :-( try: - os.remove(text) - except OSError, er: - if er.errno <> errno.ENOENT: raise er - os.rename(tmp_filename, text) - - def save_text(self, newtext): + os.remove(name) + except OSError, err: + if err.errno != errno.ENOENT: raise err + path = os.path.split(name)[0] + if path and not os.path.exists(path): + os.makedirs(path) + os.rename(tmp_filename, name) + + def save(self, newdata, changelog): if not self.can_write(): - self.msg = 'Write access denied by ACLs' - self.msg_type = 'error' + self.msg_text = 'Write access denied by Access Control List' + return + if not is_privileged(): + self.msg_text = 'Unauthenticated access denied' return - self._write_file(newtext) + self._write_file(newdata) rc = 0 - if post_edit_hook: - # FIXME: what's the std way to perform shell quoting in python? - cmd = ( post_edit_hook - + " '" + text_dir + '/' + self.page_name - + "' '" + remote_user() - + "' '" + remote_host() + "'" - ) - out = os.popen(cmd) - msg = out.read() - rc = out.close() + if config_get('post_edit_hook'): + import subprocess + cmd = [ + config_get('post_edit_hook'), + self.page_name, remote_user(), + remote_host(), changelog ] + child = subprocess.Popen(cmd, stdout=subprocess.PIPE, close_fds=True) + output = child.stdout.read() + rc = child.wait() if rc: - self.msg += "Post-editing hook returned %d.\n" % rc - self.msg += 'Command was: ' + cmd + '\n' - if msg: - self.msg += 'Output follows:\n' + msg + self.msg_text += "Post-editing hook returned %d. Command was:\n'%s'\n" % (rc, "' '".join(cmd)) + if output: + self.msg_text += 'Output follows:\n' + output else: - self.msg = 'Thank you for your contribution. Your attention to detail is appreciated.' + self.msg_text = 'Thank you for your contribution. Your attention to detail is appreciated.' self.msg_type = 'success' -def send_verbatim(filename, mime_type='application/octet-stream'): - pathname = path.join(text_dir, filename) - data = open(pathname, 'rb').read() - emit_header(mime_type) - sys.stdout.write(data) - -# Main --------------------------------------------------------------- try: - # Configuration values - site_name = 'Codewiz' - - # set to None for read-only sites, leave empty ('') to allow anonymous edits - # otherwise, set to a URL that requires authentication - privileged_url = 'https://www.codewiz.org/~bernie/wiki' - - data_dir = '/home/bernie/public_html/wiki' - text_dir = path.join(data_dir, 'text') - css_url = '../wikidata/geekigeeki.css' # optional stylesheet link - history_url = '../wikigit/wiki.git' - post_edit_hook = './post_edit_hook.sh' - datetime_fmt = '%a %d %b %Y %I:%M %p' - allow_edit = True # Is it possible to edit pages? - show_hosts = True # show hostnames? - nonexist_qm = False # show '?' for nonexistent? - debug_cgi = False # Set to True for CGI var dump - + exec(open("geekigeeki.conf.py").read()) + os.chdir(config_get('data_dir', 'data')) form = cgi.FieldStorage() - - handlers = { 'fullsearch': do_fullsearch, - 'titlesearch': do_titlesearch, - 'edit': do_edit, - 'raw': do_raw, - 'savepage': do_savepage } - - for cmd in handlers.keys(): - if form.has_key(cmd): - apply(handlers[cmd], (form[cmd].value,)) - break - else: - path_info = environ.get('PATH_INFO', '') - if len(path_info) and path_info[0] == '/': - query = path_info[1:] or 'FrontPage' - else: - query = environ.get('QUERY_STRING', '') or 'FrontPage' - - if file_re.match(query): - if word_re.match(query): - Page(query).send_page() - elif img_re.match(query): - #FIXME: use correct mime type - send_verbatim(query, 'image/jpeg') - else: - send_verbatim(query) - else: - # TODO: return 404? - send_title(None, msg='Can\'t work out query: ' + query) -except: - import traceback - msg=traceback.format_exc() - if title_done: - send_guru(msg, "error") + action = form.getvalue('a', 'get') + handler = globals().get('handle_' + action) + if handler: + handler(query_string(), form) else: - send_title(None, msg=msg) - send_footer(None) + send_httperror("403 Forbidden", query_string()) + +except HttpException, e: + send_httperror(e.error, query=e.query) +except Exception: + send_httperror("500 Internal Server Error", query=query_string(), trace=True) sys.stdout.flush()