X-Git-Url: https://codewiz.org/gitweb?p=geekigeeki.git;a=blobdiff_plain;f=geekigeeki.py;h=5340646c63101b24629d96a26ff2adea8e66a3b7;hp=a0345ee950f0c0fc750e874717dc9d5e34ddbad6;hb=61f00a0d333aeb1cb45e98ab4c1902ff1981ad6a;hpb=727c1cfdb0040ca5d95efb81c975d36d1f568c0b diff --git a/geekigeeki.py b/geekigeeki.py index a0345ee..5340646 100755 --- a/geekigeeki.py +++ b/geekigeeki.py @@ -1,8 +1,9 @@ -#! /usr/bin/env python +#!/usr/bin/python +# -*- coding: utf-8 -*- # # Copyright 1999, 2000 Martin Pool # Copyright 2002 Gerardo Poggiali -# Copyright 2007 Bernardo Innocenti +# Copyright 2007, 2008 Bernardo Innocenti # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,15 +18,18 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -__version__ = '$Id$'[4:-2] +__version__ = '$Id$'[4:12] -import cgi, sys, string, os, re, errno, time, stat +from time import clock +start_time = clock() + +import cgi, sys, string, os, re, errno, stat from os import path, environ # Regular expression defining a WikiWord # (but this definition is also assumed in other places) -file_re = re.compile(r"^\b([A-Za-z0-9_\.\-]+)\b$") -word_re = re.compile(r"^\b([A-Z][a-z]+){2,}\b$") +file_re = re.compile(r"^\b([A-Za-z0-9_\.\-/]+)\b$") +word_re = re.compile(r"^\b((([A-Z][a-z]+){2,}/)*([A-Z][a-z]+){2,})\b$") img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg)$", re.IGNORECASE) url_re = re.compile(r"^[a-z]{3,8}://[^\s'\"]+\S$") @@ -56,6 +60,15 @@ def get_hostname(addr): except: return addr +def relative_url(path, privileged=False): + if not (url_re.match(path) or path.startswith('/')): + if privileged: + url = privileged_path() + else: + url = script_name() + path = url + '/' + path + return path + # Formatting stuff -------------------------------------------------- def emit_header(type="text/html"): @@ -100,16 +113,17 @@ def send_title(name, text="Limbo", msg=None, msg_type='error'): # Head emit_header() - print """ - -""" + print '' + print '' + + site_name = globals().get('site_name', 'Unconfigured Site') print "%s: %s" % (site_name, text) print ' ' if not name: print ' ' - if css_url: - print ' ' % css_url + for css in css_url: + print ' ' % relative_url(css) print '' # Body @@ -125,18 +139,20 @@ def send_title(name, text="Limbo", msg=None, msg_type='error'): print '' -def link_tag(params, text=None, ss_class=None, authentication=False): +def link_tag(params, text=None, ss_class=None, privileged=False): if text is None: text = params # default classattr = '' @@ -155,11 +171,9 @@ def link_tag(params, text=None, ss_class=None, authentication=False): # Prevent crawlers from following links potentially added by spammers or to generated pages if ss_class == 'external' or ss_class == 'navlink': classattr += 'rel="nofollow" ' - if authentication: - path = privileged_path() - else: - path = script_name() - return '%s' % (classattr, path, params, text) + elif url_re.match(params): + classattr += 'rel="nofollow" ' + return '%s' % (classattr, relative_url(params, privileged=privileged), text) # Search --------------------------------------------------- @@ -184,13 +198,12 @@ def do_fullsearch(needle): for (count, page_name) in hits: print '
  • ' + Page(page_name).link_to() print ' . . . . ' + `count` - print ['match', 'matches'][count <> 1] + print ['match', 'matches'][count != 1] print '

  • ' print "" print_search_stats(len(hits), len(all_pages)) - def do_titlesearch(needle): # TODO: check needle is legal -- but probably we can just accept any RE send_title(None, "Title search for \"" + needle + '"') @@ -206,10 +219,10 @@ def do_titlesearch(needle): print_search_stats(len(hits), len(all_pages)) - def print_search_stats(hits, searched): print "

    %d hits out of %d pages searched.

    " % (hits, searched) +#TODO: merge into do_savepage() def do_edit(pagename): Page(pagename).send_editor() @@ -239,26 +252,23 @@ def make_index_key(): s = s + '

    ' return s - def page_list(): - return filter(word_re.match, os.listdir(text_dir)) - + return filter(word_re.match, os.listdir(data_dir)) def send_footer(name, mod_string=None): - if debug_cgi: + if globals().get('debug_cgi', False): cgi.print_arguments() cgi.print_form(cgi.FieldStorage()) cgi.print_environ() global __version__ print '' - # ---------------------------------------------------------- # Macros def _macro_TitleSearch(*vargs): @@ -292,7 +302,7 @@ def _macro_WordIndex(*vargs): # set title for word in all_words: letter = string.lower(word[0]) - if letter <> last_letter: + if letter != last_letter: s = s + ';

    %s

    ' % (letter, letter) last_letter = letter @@ -314,12 +324,12 @@ def _macro_TitleIndex(*vargs): current_letter = None for name in pages: letter = string.lower(name[0]) - if letter <> current_letter: - s = s + '

    %s

    ' % (letter, letter) + if letter != current_letter: + s += '

    %s

    ' % (letter, letter) current_letter = letter else: - s = s + '
    ' - s = s + Page(name).link_to() + s += '
    ' + s += Page(name).link_to() return s @@ -332,23 +342,28 @@ class PageFormatter: """ def __init__(self, raw): self.raw = raw - self.is_em = self.is_b = 0 self.h_level = 0 - self.h_count = 0 - self.list_indents = [] - self.in_pre = False - self.in_table = False - self.tr_cnt = 0 - self.in_var = False + self.in_pre = self.in_table = False self.in_header = True + self.list_indents = [] + self.tr_cnt = self.h_cnt = 0 + self.styles = { + #wiki html enabled? + "//": ["em", False], + "''": ["em", False], + "**": ["b", False], + "'''": ["b", False], + "##": ["tt", False], + "``": ["tt", False], + "__": ["u", False], + "^^": ["sup", False], + ",,": ["sub", False] + } - def _emph_repl(self, word): - if len(word) == 3: - self.is_b = not self.is_b - return ['', ''][self.is_b] - else: - self.is_em = not self.is_em - return ['', ''][self.is_em] + def _b_repl(self, word): + style = self.styles[word] + style[1] = not style[1] + return ['' def _tit_repl(self, word): if self.h_level: @@ -356,10 +371,14 @@ class PageFormatter: self.h_level = 0 else: self.h_level = len(word) - 1 - self.h_count += 1 - result = '* ' % (self.h_level, self.h_count, self.h_count) + self.h_cnt += 1 + #abridged = re.sub('[^a-z_]', '', word.lower().replace(' ', '_')) + result = '¶ ' % (self.h_level, self.h_cnt, self.h_cnt) return result + def _br_repl(self, word): + return '
    ' + def _rule_repl(self, word): return self._undent() + '\n
    \n' % (len(word) - 2) @@ -367,7 +386,7 @@ class PageFormatter: return Page(word).link_to() def _img_repl(self, word): - path = script_name() + '/' + word; + path = relative_url(word) return '' % (path, path) def _url_repl(self, word): @@ -377,21 +396,22 @@ class PageFormatter: return '%s' % (word, word) def _hurl_repl(self, word): - m = re.compile("\[\[(\S+)([^\]]*)\]\]").match(word) + m = re.compile("\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\]").match(word) name = m.group(1) - descr = m.group(2).strip() or name + descr = m.group(2) or name macro = globals().get('_macro_' + name) if macro: return apply(macro, (name, descr)) elif img_re.match(name): - return '%s' % (name, name, descr) - elif url_re.match(name): - return '%s' % (name, descr) - elif name.startswith('/'): - return '%s' % (name, descr) + name = relative_url(name) + # The "extthumb" nonsense works around a limitation of the HTML block model + return '
    %s
    %s
    ' % (name, name, descr, descr) else: - return link_tag(name, descr) + if img_re.match(descr): + descr = '' + + return link_tag(name, descr, 'wikilink') def _email_repl(self, word): return '%s' % (word, word) @@ -417,22 +437,7 @@ class PageFormatter: return '' def _hi_repl(self, word): - if word == 'FIXME': - cl = 'error' - elif word == 'DONE': - cl = 'success' - elif word == 'TODO': - cl = 'notice' - return '' + word + '' - - def _var_repl(self, word): - if word == '{{' and not self.in_var: - self.in_var = True - return '' - elif self.in_var: - self.in_var = False - return '' - return '' + return '' + word + '' def _tr_repl(self, word): out = '' @@ -441,16 +446,17 @@ class PageFormatter: self.tr_cnt = 0 out = '

    \n' self.tr_cnt += 1 - return out + '' + return out + ['' + return ['', ''][word.strip() == '||='] return '' def _indent_level(self): @@ -492,8 +498,9 @@ class PageFormatter: scan_re = re.compile( r"(?:" # Formatting - + r"(?P'{2,3})" + + r"(?P\*\*|'''|//|''|##|``|__|\^\^|,,)" + r"|(?P\={2,6})" + + r"|(?P
    \\\\)" + r"|(?P^-{3,})" + r"|(?P<(/|)(div|span|iframe)[^<>]*>)" + r"|(?P[<>&])" @@ -502,23 +509,23 @@ class PageFormatter: # Links + r"|(?P\b[a-zA-Z0-9_-]+\.(png|gif|jpg|jpeg|bmp))" + r"|(?P\b(?:[A-Z][a-z]+){2,}\b)" - + r"|(?P\[\[\S+[^\]]*\]\])" + + r"|(?P\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\])" + r"|(?P(http|https|ftp|mailto)\:[^\s'\"]+\S)" + r"|(?P[-\w._+]+\@[\w.-]+)" # Lists, divs, spans - + r"|(?P
  • ^\s+\*)" - + r"|(?P
    (\{\{\{|\s*\}\}\}))"
    -            + r"|(?P(\{\{|\}\}))"
    +            + r"|(?P
  • ^\s+[\*#] +)" + + r"|(?P
    \{\{\{|\s*\}\}\})"
     
                 # Tables
    -            + r"|(?P
  • ^\s*\|\|\s*)" - + r"|(?P\s*\|\|\s*$)" - + r"|(?P^\s*\|\|(=|)\s*)" + + r"|(?P\s*\|\|(=|)\s*$)" + + r"|(?P
    ' + out = out + '
    ', ''][word.strip() == '||='] - def _tre_repl(self, word): + def _td_repl(self, word): if self.in_table: - return '
    ', ''][word.strip() == '||='] return '' - def _td_repl(self, word): + def _tre_repl(self, word): if self.in_table: - return '' + return ['
    \s*\|\|\s*)" + + r"|(?P
    \s*\|\|(=|)\s*)" + r")") pre_re = re.compile( r"(?:" + r"(?P
    \s*\}\}\})"
    +            + r"|(?P[<>&])"
                 + r")")
             blank_re = re.compile(r"^\s*$")
             indent_re = re.compile(r"^\s*")
    @@ -557,7 +564,6 @@ class Page:
             self.page_name = page_name
             self.msg = ''
             self.msg_type = 'error'
    -        self.attrs = {}
     
         def split_title(self):
             # look for the end of words and the start of a new word,
    @@ -565,20 +571,19 @@ class Page:
             return re.sub('([a-z])([A-Z])', r'\1 \2', self.page_name)
     
         def _text_filename(self):
    -        return path.join(text_dir, self.page_name)
    +        return path.join(data_dir, self.page_name)
     
         def _tmp_filename(self):
    -        return path.join(text_dir, ('#' + self.page_name + '.' + `os.getpid()` + '#'))
    +        return path.join(data_dir, ('#' + self.page_name.replace('/','_') + '.' + `os.getpid()` + '#'))
     
         def exists(self):
             try:
                 os.stat(self._text_filename())
    -            return 1
    +            return True
             except OSError, er:
                 if er.errno == errno.ENOENT:
    -                return 0
    -            else:
    -                raise er
    +                return False
    +            raise er
     
         def link_to(self):
             word = self.page_name
    @@ -592,14 +597,13 @@ class Page:
                 return open(self._text_filename(), 'rt').read()
             except IOError, er:
                 if er.errno == errno.ENOENT:
    -                # just doesn't exist, use default
    -                return 'Describe %s here.' % self.page_name
    -            else:
    -                raise er
    +                return '' # just doesn't exist, use default
    +            raise er
     
         def get_attrs(self):
    -        if self.attrs:
    +        if self.__dict__.has_key('attrs'):
                 return self.attrs
    +        self.attrs = {}
             try:
                 file = open(self._text_filename(), 'rt')
                 attr_re = re.compile(r"^#(\S*)(.*)$")
    @@ -614,18 +618,17 @@ class Page:
                     raise er
             return self.attrs
     
    +    def get_attr(self, name, default):
    +        return self.get_attrs().get(name, default)
    +
         def can(self, action, default=True):
    -        attrs = self.get_attrs()
             try:
    -            # SomeUser:read,write All:read
    -            acl = attrs["acl"]
    +            #acl SomeUser:read,write All:read
    +            acl = self.get_attr("acl", None)
                 for rule in acl.split():
                     (user,perms) = rule.split(':')
                     if user == remote_user() or user == "All":
    -                    if action in perms.split(','):
    -                        return True
    -                    else:
    -                        return False
    +                    return action in perms.split(',')
                 return False
             except Exception, er:
                 pass
    @@ -641,6 +644,12 @@ class Page:
             page_name = None
             if self.can_write():
                 page_name = self.page_name
    +
    +        #FIXME: are there security implications?
    +        #css foo.css bar.css
    +        global css_url
    +        css_url = css_url + self.get_attr("css", "").split()
    +
             send_title(page_name, self.split_title(), msg=self.msg, msg_type=self.msg_type)
             if self.can_read():
                 PageFormatter(self.get_raw_body()).print_html()
    @@ -649,10 +658,13 @@ class Page:
             send_footer(page_name, self._last_modified())
     
         def _last_modified(self):
    -        if not self.exists():
    +        try:
    +            from time import localtime, strftime
    +            modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME])
    +        except OSError, er:
    +            if er.errno != errno.ENOENT:
    +                raise er
                 return None
    -        from time import localtime, strftime
    -        modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME])
             return strftime(datetime_fmt, modtime)
     
         def send_editor(self, preview=None):
    @@ -665,9 +677,9 @@ class Page:
                 + ' for ' + cgi.escape(remote_user())
                 + ' from ' + cgi.escape(get_hostname(remote_host()))
                 + '

    ') - print '
    ' % (script_name(), self.page_name) + print '
    ' % relative_url(self.page_name) print '' % (self.page_name) - print """""" % (preview or self.get_raw_body()) + print """""" % (preview or self.get_raw_body()) print """
    @@ -699,7 +711,7 @@ class Page: try: os.remove(text) except OSError, er: - if er.errno <> errno.ENOENT: raise er + if er.errno != errno.ENOENT: raise er os.rename(tmp_filename, text) def save_text(self, newtext): @@ -713,7 +725,7 @@ class Page: if post_edit_hook: # FIXME: what's the std way to perform shell quoting in python? cmd = ( post_edit_hook - + " '" + text_dir + '/' + self.page_name + + " '" + data_dir + '/' + self.page_name + "' '" + remote_user() + "' '" + remote_host() + "'" ) @@ -729,31 +741,16 @@ class Page: self.msg = 'Thank you for your contribution. Your attention to detail is appreciated.' self.msg_type = 'success' +#TODO: merge into send_raw() def send_verbatim(filename, mime_type='application/octet-stream'): - pathname = path.join(text_dir, filename) + pathname = path.join(data_dir, filename) data = open(pathname, 'rb').read() emit_header(mime_type) sys.stdout.write(data) # Main --------------------------------------------------------------- try: - # Configuration values - site_name = 'Codewiz' - - # set to None for read-only sites, leave empty ('') to allow anonymous edits - # otherwise, set to a URL that requires authentication - privileged_url = 'https://www.codewiz.org/~bernie/wiki' - - data_dir = '/home/bernie/public_html/wiki' - text_dir = path.join(data_dir, 'text') - css_url = '../wikidata/geekigeeki.css' # optional stylesheet link - history_url = '../wikigit/wiki.git' - post_edit_hook = './post_edit_hook.sh' - datetime_fmt = '%a %d %b %Y %I:%M %p' - allow_edit = True # Is it possible to edit pages? - show_hosts = True # show hostnames? - nonexist_pfx = '' # prefix before nonexistent link (usually '?') - debug_cgi = False # Set to True for CGI var dump + execfile("geekigeeki.conf.py") form = cgi.FieldStorage() @@ -783,7 +780,7 @@ try: else: send_verbatim(query) else: - # TODO: return 404? + print "Status: 404 Not Found" send_title(None, msg='Can\'t work out query: ' + query) except: import traceback