2 from __future__ import with_statement
3 import urllib, HTMLParser, os
5 class PageParser(HTMLParser.HTMLParser):
6 def __init__(self, output_file):
7 HTMLParser.HTMLParser.__init__(self, )
8 self.out_file = open(output_file, "w")
14 def handle_starttag(self, tag, attrs):
19 if 'id' in d and (d['id'] == 'banner' or d['id'] == 'mainnav' or d['id'] == 'ctxtnav' \
20 or d['id'] == 'footer' or d['id'] == 'altlinks'):
29 self.out_file.write(self.get_starttag_text())
31 def handle_endtag(self, tag):
33 #self.out_file.write(self.get_starttag_text() + '\n')
34 self.out_file.write(r'</' + tag + r'>')
38 elif tag == 'div' and self.ignore:
45 def handle_startendtag(self, tag, attrs):
47 self.out_file.write(self.get_starttag_text())
49 def handle_data(self, data):
51 self.out_file.write(data)
53 local_file = "dev_status.html"
54 stripped_file = "stripped.html"
55 urllib.urlretrieve("http://dev.bertos.org/wiki/DevelopmentStatus", local_file)
56 development_status_parser = PageParser(stripped_file)
57 with open(local_file, "r") as f:
59 if line.find('body>') != -1 or line.find('</html') != -1 or line.find('<html') != -1:
61 development_status_parser.feed(line)
62 development_status_parser.close()
64 # TODO: append tabs header
65 # TODO: make internal links really internal
66 # TODO: rename stripped file?