--- /dev/null
+#!/usr/bin/env python
+from __future__ import with_statement
+import urllib, HTMLParser, os
+
+class PageParser(HTMLParser.HTMLParser):
+ def __init__(self, output_file):
+ HTMLParser.HTMLParser.__init__(self, )
+ self.out_file = open(output_file, "w")
+ self.ignore = 0
+
+ def __del__(self):
+ self.out_file.close()
+
+ def handle_starttag(self, tag, attrs):
+ d = dict(attrs)
+ if tag == 'head':
+ self.ignore += 1
+ elif tag == 'div':
+ if 'id' in d and (d['id'] == 'banner' or d['id'] == 'mainnav' or d['id'] == 'ctxtnav' \
+ or d['id'] == 'footer' or d['id'] == 'altlinks'):
+ self.ignore += 1
+ elif self.ignore:
+ self.ignore += 1
+ elif tag == 'form':
+ self.ignore += 1
+ elif tag == 'script':
+ self.ignore += 1
+ if not self.ignore:
+ self.out_file.write(self.get_starttag_text())
+
+ def handle_endtag(self, tag):
+ if not self.ignore:
+ #self.out_file.write(self.get_starttag_text() + '\n')
+ self.out_file.write(r'</' + tag + r'>')
+
+ if tag == 'head':
+ self.ignore -= 1
+ elif tag == 'div' and self.ignore:
+ self.ignore -= 1
+ elif tag == 'form':
+ self.ignore -= 1
+ elif tag == 'script':
+ self.ignore -= 1
+
+ def handle_startendtag(self, tag, attrs):
+ if not self.ignore:
+ self.out_file.write(self.get_starttag_text())
+
+ def handle_data(self, data):
+ if not self.ignore:
+ self.out_file.write(data)
+
+local_file = "dev_status.html"
+stripped_file = "stripped.html"
+urllib.urlretrieve("http://dev.bertos.org/wiki/DevelopmentStatus", local_file)
+development_status_parser = PageParser(stripped_file)
+with open(local_file, "r") as f:
+ for line in f:
+ if line.find('body>') != -1 or line.find('</html') != -1 or line.find('<html') != -1:
+ continue
+ development_status_parser.feed(line)
+ development_status_parser.close()
+
+# TODO: append tabs header
+# TODO: make internal links really internal
+# TODO: rename stripped file?
+os.unlink(local_file)