X-Git-Url: https://codewiz.org/gitweb?p=geekigeeki.git;a=blobdiff_plain;f=geekigeeki.py;h=5340646c63101b24629d96a26ff2adea8e66a3b7;hp=aa6d332a449e2b9b3ce481e38c0862f820ce67af;hb=61f00a0d333aeb1cb45e98ab4c1902ff1981ad6a;hpb=a0e866e20240e1b4c862a16c1ce798628985b823 diff --git a/geekigeeki.py b/geekigeeki.py index aa6d332..5340646 100755 --- a/geekigeeki.py +++ b/geekigeeki.py @@ -1,10 +1,9 @@ -#! /usr/bin/env python -"""Quick-quick implementation of WikiWikiWeb in Python -""" +#!/usr/bin/python +# -*- coding: utf-8 -*- # -# Copyright (C) 1999, 2000 Martin Pool -# This version includes additional changes by Gerardo Poggiali (2002) -# This version includes additional changes by Bernardo Innocenti (2007) +# Copyright 1999, 2000 Martin Pool +# Copyright 2002 Gerardo Poggiali +# Copyright 2007, 2008 Bernardo Innocenti # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,15 +18,18 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -__version__ = '$Revision: 1.63+gerry+bernie $'[11:-2] +__version__ = '$Id$'[4:12] -import cgi, sys, string, os, re, errno, time, stat +from time import clock +start_time = clock() + +import cgi, sys, string, os, re, errno, stat from os import path, environ # Regular expression defining a WikiWord # (but this definition is also assumed in other places) -file_re = re.compile(r"^\b([A-Za-z0-9_\.\-]+)\b$") -word_re = re.compile(r"^\b([A-Z][a-z]+){2,}\b$") +file_re = re.compile(r"^\b([A-Za-z0-9_\.\-/]+)\b$") +word_re = re.compile(r"^\b((([A-Z][a-z]+){2,}/)*([A-Z][a-z]+){2,})\b$") img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg)$", re.IGNORECASE) url_re = re.compile(r"^[a-z]{3,8}://[^\s'\"]+\S$") @@ -58,6 +60,15 @@ def get_hostname(addr): except: return addr +def relative_url(path, privileged=False): + if not (url_re.match(path) or path.startswith('/')): + if privileged: + url = privileged_path() + else: + url = script_name() + path = url + '/' + path + return path + # Formatting stuff -------------------------------------------------- def emit_header(type="text/html"): @@ -102,16 +113,17 @@ def send_title(name, text="Limbo", msg=None, msg_type='error'): # Head emit_header() - print """ - -""" + print '' + print '' + + site_name = globals().get('site_name', 'Unconfigured Site') print "%s: %s" % (site_name, text) print ' ' if not name: print ' ' - if css_url: - print ' ' % css_url + for css in css_url: + print ' ' % relative_url(css) print '' # Body @@ -127,18 +139,20 @@ def send_title(name, text="Limbo", msg=None, msg_type='error'): print '' -def link_tag(params, text=None, ss_class=None, authentication=False): +def link_tag(params, text=None, ss_class=None, privileged=False): if text is None: text = params # default classattr = '' if ss_class: classattr += 'class="%s" ' % ss_class - # Prevent crawlers from following links to generated pages - # and links added by potential spammers + # Prevent crawlers from following links potentially added by spammers or to generated pages if ss_class == 'external' or ss_class == 'navlink': classattr += 'rel="nofollow" ' - if authentication: - path = privileged_path() - else: - path = script_name() - return '%s' % (classattr, path, params, text) + elif url_re.match(params): + classattr += 'rel="nofollow" ' + return '%s' % (classattr, relative_url(params, privileged=privileged), text) # Search --------------------------------------------------- @@ -187,13 +198,12 @@ def do_fullsearch(needle): for (count, page_name) in hits: print '
  • ' + Page(page_name).link_to() print ' . . . . ' + `count` - print ['match', 'matches'][count <> 1] + print ['match', 'matches'][count != 1] print '

  • ' print "" print_search_stats(len(hits), len(all_pages)) - def do_titlesearch(needle): # TODO: check needle is legal -- but probably we can just accept any RE send_title(None, "Title search for \"" + needle + '"') @@ -209,10 +219,10 @@ def do_titlesearch(needle): print_search_stats(len(hits), len(all_pages)) - def print_search_stats(hits, searched): print "

    %d hits out of %d pages searched.

    " % (hits, searched) +#TODO: merge into do_savepage() def do_edit(pagename): Page(pagename).send_editor() @@ -220,9 +230,6 @@ def do_raw(pagename): Page(pagename).send_raw() def do_savepage(pagename): - if privileged_url is None: - raise 'editing disallowed for ' + pagename - global form pg = Page(pagename) if 'preview' in form: @@ -231,7 +238,7 @@ def do_savepage(pagename): pg.save_text(form['savetext'].value) pg.send_page() elif 'cancel' in form: - pg.msg = 'Editing cancelled' + pg.msg = 'Editing canceled' pg.msg_type = 'notice' pg.send_page() else: @@ -245,29 +252,29 @@ def make_index_key(): s = s + '

    ' return s - def page_list(): - return filter(word_re.match, os.listdir(text_dir)) - + return filter(word_re.match, os.listdir(data_dir)) def send_footer(name, mod_string=None): - if debug_cgi: + if globals().get('debug_cgi', False): cgi.print_arguments() cgi.print_form(cgi.FieldStorage()) cgi.print_environ() + global __version__ print '' - # ---------------------------------------------------------- # Macros -def _macro_TitleSearch(): +def _macro_TitleSearch(*vargs): return _macro_search("titlesearch") -def _macro_FullSearch(): +def _macro_FullSearch(*vargs): return _macro_search("fullsearch") def _macro_search(type): @@ -275,9 +282,9 @@ def _macro_search(type): default = form["value"].value else: default = '' - return """
    """ % (type, default) + return """
    """ % (type, default) -def _macro_WordIndex(): +def _macro_WordIndex(*vargs): s = make_index_key() pages = list(page_list()) map = {} @@ -295,7 +302,7 @@ def _macro_WordIndex(): # set title for word in all_words: letter = string.lower(word[0]) - if letter <> last_letter: + if letter != last_letter: s = s + ';

    %s

    ' % (letter, letter) last_letter = letter @@ -310,19 +317,19 @@ def _macro_WordIndex(): return s -def _macro_TitleIndex(): +def _macro_TitleIndex(*vargs): s = make_index_key() pages = list(page_list()) pages.sort() current_letter = None for name in pages: letter = string.lower(name[0]) - if letter <> current_letter: - s = s + '

    %s

    ' % (letter, letter) + if letter != current_letter: + s += '

    %s

    ' % (letter, letter) current_letter = letter else: - s = s + '
    ' - s = s + Page(name).link_to() + s += '
    ' + s += Page(name).link_to() return s @@ -335,79 +342,91 @@ class PageFormatter: """ def __init__(self, raw): self.raw = raw - self.is_em = self.is_b = 0 self.h_level = 0 - self.list_indents = [] - self.in_pre = False - self.in_table = False - self.tr_cnt = 0 - self.in_var = False + self.in_pre = self.in_table = False self.in_header = True + self.list_indents = [] + self.tr_cnt = self.h_cnt = 0 + self.styles = { + #wiki html enabled? + "//": ["em", False], + "''": ["em", False], + "**": ["b", False], + "'''": ["b", False], + "##": ["tt", False], + "``": ["tt", False], + "__": ["u", False], + "^^": ["sup", False], + ",,": ["sub", False] + } - def _emph_repl(self, word): - if len(word) == 3: - self.is_b = not self.is_b - return ['', ''][self.is_b] - else: - self.is_em = not self.is_em - return ['', ''][self.is_em] + def _b_repl(self, word): + style = self.styles[word] + style[1] = not style[1] + return ['' def _tit_repl(self, word): if self.h_level: - result = "" % self.h_level + result = '' % self.h_level self.h_level = 0 else: self.h_level = len(word) - 1 - result = "" % self.h_level + self.h_cnt += 1 + #abridged = re.sub('[^a-z_]', '', word.lower().replace(' ', '_')) + result = '¶ ' % (self.h_level, self.h_cnt, self.h_cnt) return result + def _br_repl(self, word): + return '
    ' + def _rule_repl(self, word): - s = self._undent() - if len(word) <= 3: - s = s + "\n
    \n" - else: - s = s + "\n
    \n" % (len(word) - 2 ) - return s + return self._undent() + '\n
    \n' % (len(word) - 2) def _word_repl(self, word): return Page(word).link_to() def _img_repl(self, word): - return '' % (script_name(), word) + path = relative_url(word) + return '' % (path, path) def _url_repl(self, word): if img_re.match(word): - return '' % word + return '' % (word, word) else: return '%s' % (word, word) def _hurl_repl(self, word): - m = re.compile("\[\[(\S+)\ (.+)\]\]").match(word) - anchor = m.group(1) - descr = m.group(2) - if img_re.match(anchor): - return '%s' % (anchor, descr) - elif url_re.match(anchor): - return '%s' % (anchor, descr) - elif anchor.startswith('/'): - return '%s' % (anchor, descr) + m = re.compile("\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\]").match(word) + name = m.group(1) + descr = m.group(2) or name + + macro = globals().get('_macro_' + name) + if macro: + return apply(macro, (name, descr)) + elif img_re.match(name): + name = relative_url(name) + # The "extthumb" nonsense works around a limitation of the HTML block model + return '
    %s
    %s
    ' % (name, name, descr, descr) else: - return link_tag(anchor, descr) + if img_re.match(descr): + descr = '' + + return link_tag(name, descr, 'wikilink') def _email_repl(self, word): return '%s' % (word, word) + def _html_repl(self, word): + return word; # Pass through def _ent_repl(self, s): return {'&': '&', '<': '<', '>': '>'}[s] - def _li_repl(self, match): return '
  • ' - def _pre_repl(self, word): if word == '{{{' and not self.in_pre: self.in_pre = True @@ -417,17 +436,8 @@ class PageFormatter: return '' return '' - def _hilight_repl(self, word): - return '' + word + '' - - def _var_repl(self, word): - if word == '{{' and not self.in_var: - self.in_var = True - return '' - elif self.in_var: - self.in_var = False - return '' - return '' + def _hi_repl(self, word): + return '' + word + '' def _tr_repl(self, word): out = '' @@ -436,23 +446,19 @@ class PageFormatter: self.tr_cnt = 0 out = '

    \n' self.tr_cnt += 1 - return out + '' + return out + ['' + return ['', ''][word.strip() == '||='] return '' - def _macro_repl(self, word): - macro_name = word[2:-2] - # TODO: Somehow get the default value into the search field - return apply(globals()['_macro_' + macro_name], ()) - def _indent_level(self): return len(self.list_indents) and self.list_indents[-1] @@ -484,7 +490,7 @@ class PageFormatter: raise "Can't handle match " + `match` def print_html(self): - print "

    " + print '

    ' # For each line, we scan through looking for magic # strings, outputting verbatim any intervening text @@ -492,35 +498,34 @@ class PageFormatter: scan_re = re.compile( r"(?:" # Formatting - + r"(?P'{2,3})" + + r"(?P\*\*|'''|//|''|##|``|__|\^\^|,,)" + r"|(?P\={2,6})" + + r"|(?P
    \\\\)" + r"|(?P^-{3,})" + + r"|(?P<(/|)(div|span|iframe)[^<>]*>)" + r"|(?P[<>&])" - + r"|(?P\b(FIXME|TODO)\b)" + + r"|(?P\b(FIXME|TODO|DONE)\b)" # Links + r"|(?P\b[a-zA-Z0-9_-]+\.(png|gif|jpg|jpeg|bmp))" + r"|(?P\b(?:[A-Z][a-z]+){2,}\b)" - + r"|(?P\[\[\S+\s+.+\]\])" - + r"|(?P(http|ftp|nntp|news|mailto)\:[^\s'\"]+\S)" + + r"|(?P\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\])" + + r"|(?P(http|https|ftp|mailto)\:[^\s'\"]+\S)" + r"|(?P[-\w._+]+\@[\w.-]+)" # Lists, divs, spans - + r"|(?P

  • ^\s+\*)" - + r"|(?P
    (\{\{\{|\s*\}\}\}))"
    -            + r"|(?P(\{\{|\}\}))"
    +            + r"|(?P
  • ^\s+[\*#] +)" + + r"|(?P
    \{\{\{|\s*\}\}\})"
     
                 # Tables
    -            + r"|(?P
  • ^\s*\|\|\s*)" - + r"|(?P\s*\|\|\s*$)" - + r"|(?P^\s*\|\|(=|)\s*)" + + r"|(?P\s*\|\|(=|)\s*$)" + + r"|(?P
    ' + out = out + '
    ', ''][word.strip() == '||='] - def _tre_repl(self, word): + def _td_repl(self, word): if self.in_table: - return '
    ', ''][word.strip() == '||='] return '' - def _td_repl(self, word): + def _tre_repl(self, word): if self.in_table: - return '' + return ['
    \s*\|\|\s*)" - - # Macros - + r"|(?P\[\[(TitleSearch|FullSearch|WordIndex|TitleIndex)\]\])" + + r"|(?P
    \s*\|\|(=|)\s*)" + r")") pre_re = re.compile( r"(?:" + r"(?P
    \s*\}\}\})"
    +            + r"|(?P[<>&])"
                 + r")")
             blank_re = re.compile(r"^\s*$")
             indent_re = re.compile(r"^\s*")
    @@ -551,7 +556,7 @@ class PageFormatter:
             if self.in_pre: print '
    ' if self.in_table: print '

    ' print self._undent() - print "

    " + print '

    ' # ---------------------------------------------------------- class Page: @@ -559,7 +564,6 @@ class Page: self.page_name = page_name self.msg = '' self.msg_type = 'error' - self.attrs = {} def split_title(self): # look for the end of words and the start of a new word, @@ -567,45 +571,39 @@ class Page: return re.sub('([a-z])([A-Z])', r'\1 \2', self.page_name) def _text_filename(self): - return path.join(text_dir, self.page_name) + return path.join(data_dir, self.page_name) def _tmp_filename(self): - return path.join(text_dir, ('#' + self.page_name + '.' + `os.getpid()` + '#')) + return path.join(data_dir, ('#' + self.page_name.replace('/','_') + '.' + `os.getpid()` + '#')) def exists(self): try: os.stat(self._text_filename()) - return 1 + return True except OSError, er: if er.errno == errno.ENOENT: - return 0 - else: - raise er + return False + raise er def link_to(self): word = self.page_name if self.exists(): return link_tag(word, word, 'wikilink') else: - if nonexist_qm: - return link_tag(word, '?', 'nonexistent') + word - else: - return link_tag(word, word, 'nonexistent') - + return link_tag(word, nonexist_pfx + word, 'nonexistent') def get_raw_body(self): try: return open(self._text_filename(), 'rt').read() except IOError, er: if er.errno == errno.ENOENT: - # just doesn't exist, use default - return 'Describe %s here.' % self.page_name - else: - raise er + return '' # just doesn't exist, use default + raise er def get_attrs(self): - if self.attrs: + if self.__dict__.has_key('attrs'): return self.attrs + self.attrs = {} try: file = open(self._text_filename(), 'rt') attr_re = re.compile(r"^#(\S*)(.*)$") @@ -620,46 +618,68 @@ class Page: raise er return self.attrs - def can_edit(self): - attrs = self.get_attrs() + def get_attr(self, name, default): + return self.get_attrs().get(name, default) + + def can(self, action, default=True): try: - # SomeUser:read,write All:read - acl = attrs["acl"] + #acl SomeUser:read,write All:read + acl = self.get_attr("acl", None) for rule in acl.split(): - (user,perms) = acl.split(':') + (user,perms) = rule.split(':') if user == remote_user() or user == "All": - if 'write' in perms.split(','): - return True + return action in perms.split(',') return False - except: + except Exception, er: pass - return True + return default + + def can_write(self): + return self.can("write", True) + + def can_read(self): + return self.can("read", True) def send_page(self): page_name = None - if self.can_edit(): + if self.can_write(): page_name = self.page_name + + #FIXME: are there security implications? + #css foo.css bar.css + global css_url + css_url = css_url + self.get_attr("css", "").split() + send_title(page_name, self.split_title(), msg=self.msg, msg_type=self.msg_type) - PageFormatter(self.get_raw_body()).print_html() + if self.can_read(): + PageFormatter(self.get_raw_body()).print_html() + else: + send_guru("Read access denied by ACLs", "notice") send_footer(page_name, self._last_modified()) def _last_modified(self): - if not self.exists(): + try: + from time import localtime, strftime + modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME]) + except OSError, er: + if er.errno != errno.ENOENT: + raise er return None - from time import localtime, strftime - modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME]) return strftime(datetime_fmt, modtime) def send_editor(self, preview=None): send_title(None, 'Edit ' + self.split_title(), msg=self.msg, msg_type=self.msg_type) + if not self.can_write(): + send_guru("Write access denied by ACLs", "error") + return print ('

    Editing ' + self.page_name + ' for ' + cgi.escape(remote_user()) + ' from ' + cgi.escape(get_hostname(remote_host())) + '

    ') - print '
    ' % (script_name(), self.page_name) + print '
    ' % relative_url(self.page_name) print '' % (self.page_name) - print """""" % (preview or self.get_raw_body()) + print """""" % (preview or self.get_raw_body()) print """
    @@ -676,6 +696,9 @@ class Page: send_footer(self.page_name) def send_raw(self): + if not self.can_read(): + send_title(None, msg='Read access denied by ACLs', msg_type='notice') + return emit_header("text/plain") print self.get_raw_body() @@ -688,16 +711,21 @@ class Page: try: os.remove(text) except OSError, er: - if er.errno <> errno.ENOENT: raise er + if er.errno != errno.ENOENT: raise er os.rename(tmp_filename, text) def save_text(self, newtext): + if not self.can_write(): + self.msg = 'Write access denied by ACLs' + self.msg_type = 'error' + return + self._write_file(newtext) rc = 0 if post_edit_hook: # FIXME: what's the std way to perform shell quoting in python? cmd = ( post_edit_hook - + " '" + text_dir + '/' + self.page_name + + " '" + data_dir + '/' + self.page_name + "' '" + remote_user() + "' '" + remote_host() + "'" ) @@ -710,35 +738,19 @@ class Page: if msg: self.msg += 'Output follows:\n' + msg else: - self.msg = 'Thankyou for your contribution. Your attention to detail is appreciated.' + self.msg = 'Thank you for your contribution. Your attention to detail is appreciated.' self.msg_type = 'success' +#TODO: merge into send_raw() def send_verbatim(filename, mime_type='application/octet-stream'): - pathname = path.join(text_dir, filename) + pathname = path.join(data_dir, filename) data = open(pathname, 'rb').read() emit_header(mime_type) sys.stdout.write(data) # Main --------------------------------------------------------------- try: - # Configuration values - site_name = 'Codewiz' - - # set to None for read-only sites - # leave empty ('') to allow anonymous edits - # otherwise, set to a URL that requires authentication - privileged_url = 'https://www.codewiz.org/~bernie/wiki' - - data_dir = '/home/bernie/public_html/wiki' - text_dir = path.join(data_dir, 'text') - css_url = '../wikidata/geekigeeki.css' # optional stylesheet link - history_url = '../wikigit/wiki.git' - post_edit_hook = './post_edit_hook.sh' - datetime_fmt = '%a %d %b %Y %I:%M %p' - allow_edit = True # Is it possible to edit pages? - show_hosts = True # show hostnames? - nonexist_qm = False # show '?' for nonexistent? - debug_cgi = False # Set to True for CGI var dump + execfile("geekigeeki.conf.py") form = cgi.FieldStorage() @@ -768,7 +780,7 @@ try: else: send_verbatim(query) else: - # TODO: return 404? + print "Status: 404 Not Found" send_title(None, msg='Can\'t work out query: ' + query) except: import traceback