X-Git-Url: https://codewiz.org/gitweb?a=blobdiff_plain;f=geekigeeki.py;h=76888601650f52e137b24a9278cbf9c5c12aad04;hb=dfa7879fd3551cd71fe2977ca3146833189b90cf;hp=116b03b480911485162d8c6bacc49b3cddd1f523;hpb=1c33760fee962c9aa3b77ac6b354485b377e3d9e;p=geekigeeki.git
diff --git a/geekigeeki.py b/geekigeeki.py
index 116b03b..7688860 100755
--- a/geekigeeki.py
+++ b/geekigeeki.py
@@ -1,10 +1,9 @@
-#! /usr/bin/env python
-"""Quick-quick implementation of WikiWikiWeb in Python
-"""
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
#
-# Copyright (C) 1999, 2000 Martin Pool
-# This version includes additional changes by Gerardo Poggiali (2002)
-# This version includes additional changes by Bernardo Innocenti (2007)
+# Copyright 1999, 2000 Martin Pool
+# Copyright 2002 Gerardo Poggiali
+# Copyright 2007, 2008 Bernardo Innocenti
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -19,15 +18,19 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-__version__ = '$Revision: 1.63+gerry+bernie $'[11:-2]
+__version__ = '$Id$'[4:12]
-import cgi, sys, string, os, re, errno, time, stat
+from time import clock
+start_time = clock()
+
+import cgi, sys, os, re, errno, stat
from os import path, environ
# Regular expression defining a WikiWord
# (but this definition is also assumed in other places)
-file_re = re.compile(r"^\b([A-Za-z0-9_\.\-]+)\b$")
-word_re = re.compile(r"^\b([A-Z][a-z]+){2,}\b$")
+word_re = re.compile(r"^\b((([A-Z][a-z]+){2,}/)*([A-Z][a-z]+){2,})\b$")
+# FIXME: we accept stuff like foo/../bar and we shouldn't
+file_re = re.compile(r"^\b([A-Za-z0-9_\-][A-Za-z0-9_\.\-/]*)\b$")
img_re = re.compile(r"^.*\.(png|gif|jpg|jpeg)$", re.IGNORECASE)
url_re = re.compile(r"^[a-z]{3,8}://[^\s'\"]+\S$")
@@ -55,25 +58,33 @@ def get_hostname(addr):
try:
from socket import gethostbyaddr
return gethostbyaddr(addr)[0] + ' (' + addr + ')'
- except:
+ except Exception, er:
return addr
+def relative_url(pathname, privileged=False):
+ if not (url_re.match(pathname) or pathname.startswith('/')):
+ if privileged:
+ url = privileged_path()
+ else:
+ url = script_name()
+ pathname = url + '/' + pathname
+ return pathname
+
# Formatting stuff --------------------------------------------------
-def emit_header(type="text/html"):
- print "Content-type: " + type + "; charset=utf-8"
- print
+def emit_header(mime_type="text/html"):
+ print "Content-type: " + mime_type + "; charset=utf-8\n"
-def send_guru(msg, msg_type):
- if msg is None or msg == '': return
+def send_guru(msg_text, msg_type):
+ if not msg_text: return
print ''
if msg_type == 'error':
print ' Software Failure. Press left mouse button to continue.\n'
- print msg
+ print msg_text
if msg_type == 'error':
print ' Guru Meditation #DEADBEEF.ABADC0DE'
print ' '
- # FIXME: This simple JS snippet is harder to pass than ACID 3.0
+ # FIXME: This little JS snippet is harder to pass than ACID 3.0
print """
"""
-def send_title(name, text="Limbo", msg=None, msg_type='error'):
+def send_title(name, text="Limbo", msg_text=None, msg_type='error'):
global title_done
if title_done: return
# Head
emit_header()
- print """
-
-"""
+ print ''
+ print ''
+
+ site_name = globals().get('site_name', 'Unconfigured Site')
print "%s: %s " % (site_name, text)
- print ' '
+ print ' '
if not name:
print ' '
- if css_url:
- print ' ' % css_url
+ for css in css_url:
+ print ' ' % relative_url(css)
print ''
# Body
@@ -121,24 +133,25 @@ def send_title(name, text="Limbo", msg=None, msg_type='error'):
print ''
title_done = True
- send_guru(msg, msg_type)
+ send_guru(msg_text, msg_type)
# Navbar
print ''
- print '
' + site_name + ': ',
if name:
- print link_tag('?fullsearch=' + name, text) + ' '
+ print '
' + link_tag('?fullsearch=' + name, text, 'navlink') + ' '
else:
- print text + ' '
+ print '
' + text + ' '
print ' | ' + link_tag('FrontPage', 'Front Page', 'navlink')
print ' | ' + link_tag('FindPage', 'Find Page', 'navlink')
- print ' |
Recent Changes '
+ if 'history_url' in globals():
+ print ' |
Recent Changes '
+ if name:
+ print ' |
Page History '
if name:
- print ' |
Page History '
print ' | ' + link_tag('?raw=' + name, 'Raw Text', 'navlink')
if privileged_url is not None:
- print ' | ' + link_tag('?edit=' + name, 'Edit Page', 'navlink', authentication=True)
+ print ' | ' + link_tag('?edit=' + name, 'Edit Page', 'navlink', privileged=True)
else:
print ' |
Immutable Page '
@@ -148,25 +161,22 @@ def send_title(name, text="Limbo", msg=None, msg_type='error'):
print '
'
-def link_tag(params, text=None, ss_class=None, authentication=False):
+def link_tag(params, text=None, ss_class=None, privileged=False):
if text is None:
text = params # default
classattr = ''
if ss_class:
classattr += 'class="%s" ' % ss_class
- # Prevent crawlers from following links to generated pages
- # and links added by potential spammers
+ # Prevent crawlers from following links potentially added by spammers or to generated pages
if ss_class == 'external' or ss_class == 'navlink':
classattr += 'rel="nofollow" '
- if authentication:
- path = privileged_path()
- else:
- path = script_name()
- return '%s ' % (classattr, path, params, text)
+ elif url_re.match(params):
+ classattr += 'rel="nofollow" '
+ return '%s ' % (classattr, relative_url(params, privileged=privileged), text)
# Search ---------------------------------------------------
-def do_fullsearch(needle):
+def handle_fullsearch(needle):
send_title(None, 'Full text search for "%s"' % (needle))
needle_re = re.compile(needle, re.IGNORECASE)
@@ -187,14 +197,13 @@ def do_fullsearch(needle):
for (count, page_name) in hits:
print '' + Page(page_name).link_to()
print ' . . . . ' + `count`
- print ['match', 'matches'][count <> 1]
+ print ['match', 'matches'][count != 1]
print '
'
print ""
print_search_stats(len(hits), len(all_pages))
-
-def do_titlesearch(needle):
+def handle_titlesearch(needle):
# TODO: check needle is legal -- but probably we can just accept any RE
send_title(None, "Title search for \"" + needle + '"')
@@ -209,75 +218,64 @@ def do_titlesearch(needle):
print_search_stats(len(hits), len(all_pages))
-
def print_search_stats(hits, searched):
print "%d hits out of %d pages searched.
" % (hits, searched)
-def do_edit(pagename):
- Page(pagename).send_editor()
-
-def do_raw(pagename):
+def handle_raw(pagename):
Page(pagename).send_raw()
-def do_savepage(pagename):
- if privileged_url is None:
- raise 'editing disallowed for ' + pagename
-
- global form
+def handle_edit(pagename):
pg = Page(pagename)
- if 'preview' in form:
- pg.send_editor(form['savetext'].value)
- elif 'save' in form:
- pg.save_text(form['savetext'].value)
+ if 'save' in form:
+ if form['file'].value:
+ pg.save(form['file'].file.read())
+ else:
+ pg.save(form['savetext'].value.replace('\r\n', '\n'))
pg.send_page()
elif 'cancel' in form:
- pg.msg = 'Editing cancelled'
+ pg.msg_text = 'Editing canceled'
pg.msg_type = 'notice'
pg.send_page()
- else:
- raise 'What did you press?'
+ else: # preview or edit
+ text = None
+ if 'preview' in form:
+ text = form['savetext'].value
+ pg.send_editor(text)
def make_index_key():
- s = '
'
- links = map(lambda ch: '%s ' % (ch, ch),
- string.lowercase)
- s = s + string.join(links, ' | ')
- s = s + '
'
- return s
-
+ links = map(lambda ch: '%s ' % (ch, ch), 'abcdefghijklmnopqrstuvwxyz')
+ return '
'+ ' | '.join(links) + ' '
def page_list():
- return filter(word_re.match, os.listdir(text_dir))
-
+ return filter(word_re.match, os.listdir(data_dir))
def send_footer(name, mod_string=None):
- if debug_cgi:
+ if globals().get('debug_cgi', False):
cgi.print_arguments()
- cgi.print_form(cgi.FieldStorage())
+ cgi.print_form(form)
cgi.print_environ()
print ''
-
-# ----------------------------------------------------------
-# Macros
-def _macro_TitleSearch():
+# Macros ------------------------------------------------------------
+def _macro_TitleSearch(*vargs):
return _macro_search("titlesearch")
-def _macro_FullSearch():
+def _macro_FullSearch(*vargs):
return _macro_search("fullsearch")
def _macro_search(type):
- if form.has_key('value'):
- default = form["value"].value
- else:
- default = ''
- return """""" % (type, default)
+ default = ''
+ if 'value' in form:
+ default = form['value'].value
+ return """""" % (type, default)
-def _macro_WordIndex():
+def _macro_WordIndex(*vargs):
s = make_index_key()
pages = list(page_list())
map = {}
@@ -294,8 +292,8 @@ def _macro_WordIndex():
last_letter = None
# set title
for word in all_words:
- letter = string.lower(word[0])
- if letter <> last_letter:
+ letter = word[0].lower()
+ if letter != last_letter:
s = s + '; %s ' % (letter, letter)
last_letter = letter
@@ -310,23 +308,21 @@ def _macro_WordIndex():
return s
-def _macro_TitleIndex():
+def _macro_TitleIndex(*vargs):
s = make_index_key()
pages = list(page_list())
pages.sort()
current_letter = None
for name in pages:
- letter = string.lower(name[0])
- if letter <> current_letter:
- s = s + '%s ' % (letter, letter)
+ letter = name[0].lower()
+ if letter != current_letter:
+ s += '%s ' % (letter, letter)
current_letter = letter
else:
- s = s + ' '
- s = s + Page(name).link_to()
+ s += ' '
+ s += Page(name).link_to()
return s
-
-# ----------------------------------------------------------
class PageFormatter:
"""Object that turns Wiki markup into HTML.
@@ -335,100 +331,125 @@ class PageFormatter:
"""
def __init__(self, raw):
self.raw = raw
- self.is_em = self.is_b = 0
self.h_level = 0
- self.list_indents = []
- self.in_pre = 0
- self.in_var = 0
+ self.in_pre = self.in_table = self.in_li = False
self.in_header = True
+ self.list_indents = []
+ self.tr_cnt = 0
+ self.styles = {
+ #wiki html enabled?
+ "//": ["em", False],
+ "''": ["em", False],
+ "**": ["b", False],
+ "'''": ["b", False],
+ "##": ["tt", False],
+ "``": ["tt", False],
+ "__": ["u", False],
+ "^^": ["sup", False],
+ ",,": ["sub", False]
+ }
- def _emph_repl(self, word):
- if len(word) == 3:
- self.is_b = not self.is_b
- return ['', ''][self.is_b]
- else:
- self.is_em = not self.is_em
- return ['', ''][self.is_em]
+ def _b_repl(self, word):
+ style = self.styles[word]
+ style[1] = not style[1]
+ return ['', '<'][style[1]] + style[0] + '>'
def _tit_repl(self, word):
if self.h_level:
- result = "" % self.h_level
+ result = '\n' % self.h_level
self.h_level = 0
else:
self.h_level = len(word) - 1
- result = "" % self.h_level
+ abridged = re.sub('[^A-Za-z0-9_]', '', self.line).lower()
+ result = '\n
¶ ' % (self.h_level, abridged, abridged)
return result
+ def _br_repl(self, word):
+ return ' '
+
def _rule_repl(self, word):
- s = self._undent()
- if len(word) <= 3:
- s = s + "\n \n"
- else:
- s = s + "\n \n" % (len(word) - 2 )
- return s
+ return self._undent() + '\n \n' % (len(word) - 2)
def _word_repl(self, word):
return Page(word).link_to()
def _img_repl(self, word):
- return ' ' % (script_name(), word)
+ path = relative_url(word)
+ return ' ' % (path, path)
def _url_repl(self, word):
if img_re.match(word):
- return ' ' % word
+ return ' ' % (word, word)
else:
return '%s ' % (word, word)
def _hurl_repl(self, word):
- m = re.compile("\[\[(\S+)\ (.+)\]\]").match(word)
- anchor = m.group(1)
- descr = m.group(2)
- if img_re.match(anchor):
- return ' ' % (anchor, descr)
- elif url_re.match(anchor):
- return '%s ' % (anchor, descr)
- elif anchor.startswith('/'):
- return '%s ' % (anchor, descr)
+ m = re.compile("\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\]").match(word)
+ name = m.group(1)
+ descr = m.group(2) or name
+
+ macro = globals().get('_macro_' + name)
+ if macro:
+ return macro(name, descr)
+ elif img_re.match(name):
+ name = relative_url(name)
+ # The "extthumb" nonsense works around a limitation of the HTML block model
+ return '%s
' % (name, name, descr, descr)
else:
- return link_tag(anchor, descr)
+ if img_re.match(descr):
+ descr = ' '
+
+ return link_tag(name, descr, 'wikilink')
def _email_repl(self, word):
return '%s ' % (word, word)
+ def _html_repl(self, word):
+ return word; # Pass through
def _ent_repl(self, s):
return {'&': '&',
'<': '<',
'>': '>'}[s]
-
def _li_repl(self, match):
- return ''
-
+ if self.in_li:
+ return ' '
+ else:
+ self.in_li = True
+ return ' '
def _pre_repl(self, word):
if word == '{{{' and not self.in_pre:
- self.in_pre = 1
+ self.in_pre = True
return ''
elif self.in_pre:
- self.in_pre = 0
+ self.in_pre = False
return ' '
- else:
- return ''
-
- def _var_repl(self, word):
- if word == '{{' and not self.in_var:
- self.in_var = 1
- return ''
- elif self.in_var:
- self.in_var = 0
- return '
'
- else:
- return ''
- def _macro_repl(self, word):
- macro_name = word[2:-2]
- # TODO: Somehow get the default value into the search field
- return apply(globals()['_macro_' + macro_name], ())
+ return ''
+
+ def _hi_repl(self, word):
+ return '' + word + ' '
+
+ def _tr_repl(self, word):
+ out = ''
+ if not self.in_table:
+ self.in_table = True
+ self.tr_cnt = 0
+ out = '\n'
+ self.tr_cnt += 1
+ out = out + ''
+ return out + ['', ' '][word.strip() == '||=']
+
+ def _td_repl(self, word):
+ if self.in_table:
+ return [' ', ' '][word.strip() == '||=']
+ return ''
+
+ def _tre_repl(self, word):
+ if self.in_table:
+ return [' ', ''][word.strip() == '||=']
+ return ''
def _indent_level(self):
return len(self.list_indents) and self.list_indents[-1]
@@ -439,6 +460,9 @@ class PageFormatter:
s = ''
while self._indent_level() > new_level:
del(self.list_indents[-1])
+ if self.in_li:
+ s += ''
+ self.in_li = False # FIXME
s += '\n'
while self._indent_level() < new_level:
self.list_indents.append(new_level)
@@ -456,112 +480,124 @@ class PageFormatter:
def replace(self, match):
for type, hit in match.groupdict().items():
if hit:
- return apply(getattr(self, '_' + type + '_repl'), (hit,))
+ return getattr(self, '_' + type + '_repl')(hit)
else:
raise "Can't handle match " + `match`
def print_html(self):
- print ""
+ print '
'
# For each line, we scan through looking for magic
# strings, outputting verbatim any intervening text
+ # TODO: highlight search words (look at referrer)
scan_re = re.compile(
r"(?:"
- + r"(?P'{2,3})"
+ # Formatting
+ + r"(?P\*\*|'''|//|''|##|``|__|\^\^|,,)"
+ r"|(?P\={2,6})"
+ + r"|(?P \\\\)"
+ + r"|(?P^-{3,})"
+ + r"|(?P<(/|)(div|span|iframe)[^<>]*>)"
+ r"|(?P[<>&])"
+ + r"|(?P\b(FIXME|TODO|DONE)\b)"
+
+ # Links
+ r"|(?P \b[a-zA-Z0-9_-]+\.(png|gif|jpg|jpeg|bmp))"
+ r"|(?P\b(?:[A-Z][a-z]+){2,}\b)"
- + r"|(?P^-{3,})"
- + r"|(?P\[\[\S+\s+.+\]\])"
- + r"|(?P(http|ftp|nntp|news|mailto)\:[^\s'\"]+\S)"
+ + r"|(?P\[\[([^ \t\n\r\f\v\|]+)(?:\s*\|\s*([^\]]+)|)\]\])"
+ + r"|(?P(http|https|ftp|mailto)\:[^\s'\"]+\S)"
+ r"|(?P[-\w._+]+\@[\w.-]+)"
- + r"|(?P^\s+\*)"
- + r"|(?P(\{\{\{|\s*\}\}\}))"
- + r"|(?P(\{\{|\}\}))"
- + r"|(?P\[\[(TitleSearch|FullSearch|WordIndex|TitleIndex)\]\])"
+
+ # Lists, divs, spans
+ + r"|(?P^\s+[\*#] +)"
+ + r"|(?P\{\{\{|\s*\}\}\})"
+
+ # Tables
+ + r"|(?P^\s*\|\|(=|)\s*)"
+ + r"|(?P\s*\|\|(=|)\s*$)"
+ + r"|(?P\s*\|\|(=|)\s*)"
+ r")")
pre_re = re.compile(
r"(?:"
+ r"(?P\s*\}\}\})"
+ + r"|(?P[<>&])"
+ r")")
- blank_re = re.compile("^\s*$")
- indent_re = re.compile("^\s*")
- eol_re = re.compile(r'\r?\n')
- raw = string.expandtabs(self.raw)
- for line in eol_re.split(raw):
+ blank_re = re.compile(r"^\s*$")
+ indent_re = re.compile(r"^\s*")
+ tr_re = re.compile(r"^\s*\|\|")
+ eol_re = re.compile(r"\r?\n")
+ for self.line in eol_re.split(self.raw.expandtabs()):
# Skip ACLs
if self.in_header:
- if line.startswith('#'):
- continue
+ if self.line.startswith('#'):
+ continue
self.in_header = False
+
if self.in_pre:
- print re.sub(pre_re, self.replace, line)
+ print re.sub(pre_re, self.replace, self.line)
else:
- # XXX: Should we check these conditions in this order?
- if blank_re.match(line):
+ if self.in_table and not tr_re.match(self.line):
+ self.in_table = False
+ print '
'
+
+ if blank_re.match(self.line):
print '
'
- continue
- indent = indent_re.match(line)
- print self._indent_to(len(indent.group(0)))
- print re.sub(scan_re, self.replace, line)
+ else:
+ indent = indent_re.match(self.line)
+ print self._indent_to(len(indent.group(0)))
+ print re.sub(scan_re, self.replace, self.line)
+
if self.in_pre: print ''
+ if self.in_table: print '
'
print self._undent()
- print "
"
+ print ''
-# ----------------------------------------------------------
class Page:
def __init__(self, page_name):
self.page_name = page_name
- self.msg = ''
+ self.msg_text = ''
self.msg_type = 'error'
- self.attrs = {}
def split_title(self):
- # look for the end of words and the start of a new word,
- # and insert a space there
+ # look for the end of words and the start of a new word and insert a space there
return re.sub('([a-z])([A-Z])', r'\1 \2', self.page_name)
def _text_filename(self):
- return path.join(text_dir, self.page_name)
+ return path.join(data_dir, self.page_name)
def _tmp_filename(self):
- return path.join(text_dir, ('#' + self.page_name + '.' + `os.getpid()` + '#'))
+ return path.join(data_dir, ('#' + self.page_name.replace('/','_') + '.' + `os.getpid()` + '#'))
def exists(self):
try:
os.stat(self._text_filename())
- return 1
+ return True
except OSError, er:
if er.errno == errno.ENOENT:
- return 0
- else:
- raise er
+ return False
+ raise er
def link_to(self):
word = self.page_name
if self.exists():
return link_tag(word, word, 'wikilink')
else:
- if nonexist_qm:
- return link_tag(word, '?', 'nonexistent') + word
- else:
- return link_tag(word, word, 'nonexistent')
-
+ return link_tag(word, nonexist_pfx + word, 'nonexistent')
def get_raw_body(self):
try:
- return open(self._text_filename(), 'rt').read()
+ return open(self._text_filename(), 'rb').read()
except IOError, er:
if er.errno == errno.ENOENT:
- # just doesn't exist, use default
- return 'Describe %s here.' % self.page_name
- else:
- raise er
+ return '' # just doesn't exist, use default
+ if er.errno == errno.EISDIR:
+ return 'DIR'
+ raise er
def get_attrs(self):
- if self.attrs:
+ if 'attrs' in self.__dict__:
return self.attrs
+ self.attrs = {}
try:
file = open(self._text_filename(), 'rt')
attr_re = re.compile(r"^#(\S*)(.*)$")
@@ -576,46 +612,74 @@ class Page:
raise er
return self.attrs
- def can_edit(self):
- attrs = self.get_attrs()
+ def get_attr(self, name, default):
+ return self.get_attrs().get(name, default)
+
+ def can(self, action, default=True):
+ acl = None
try:
- # SomeUser:read,write All:read
- acl = attrs["acl"]
+ #acl SomeUser:read,write All:read
+ acl = self.get_attr("acl", None)
for rule in acl.split():
- (user,perms) = acl.split(':')
+ (user, perms) = rule.split(':')
if user == remote_user() or user == "All":
- if 'write' in perms.split(','):
- return True
+ return action in perms.split(',')
return False
- except:
- pass
- return True
+ except Exception, er:
+ if acl:
+ self.msg_text = 'Illegal acl line: ' + acl
+ return default
+
+ def can_write(self):
+ return self.can("write", True)
+
+ def can_read(self):
+ return self.can("read", True)
def send_page(self):
page_name = None
- if self.can_edit():
+ if self.can_write():
page_name = self.page_name
- send_title(page_name, self.split_title(), msg=self.msg, msg_type=self.msg_type)
- PageFormatter(self.get_raw_body()).print_html()
+
+ #css foo.css bar.css
+ global css_url
+ css_url = self.get_attr("css", "").split() + css_url
+
+ send_title(page_name, self.split_title(), msg_text=self.msg_text, msg_type=self.msg_type)
+ if self.can_read():
+ PageFormatter(self.get_raw_body()).print_html()
+ else:
+ send_guru("Read access denied by ACLs", "notice")
send_footer(page_name, self._last_modified())
def _last_modified(self):
- if not self.exists():
+ try:
+ from time import localtime, strftime
+ modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME])
+ except OSError, er:
+ if er.errno != errno.ENOENT:
+ raise er
return None
- from time import localtime, strftime
- modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME])
return strftime(datetime_fmt, modtime)
def send_editor(self, preview=None):
- send_title(None, 'Edit ' + self.split_title(), msg=self.msg, msg_type=self.msg_type)
+ send_title(None, 'Edit ' + self.split_title(), msg_text=self.msg_text, msg_type=self.msg_type)
+ if not self.can_write():
+ send_guru("Write access denied by ACLs", "error")
+ return
+
+ file = ''
+ if 'file' in form:
+ file = form['file'].value
print ('Editing ' + self.page_name
+ ' for ' + cgi.escape(remote_user())
+ ' from ' + cgi.escape(get_hostname(remote_host()))
+ '
')
- print '