doc: Improve and rename tag filter script
[bertos.git] / doc / dev-status-page.py
1 #!/usr/bin/env python
2 from __future__ import with_statement
3 import urllib, HTMLParser, os
4
5 class PageParser(HTMLParser.HTMLParser):
6         def __init__(self, output_file):
7                 HTMLParser.HTMLParser.__init__(self, )
8                 self.out_file = open(output_file, "w")
9                 self.ignore = 0
10         
11         def __del__(self):
12                 self.out_file.close()
13
14         def handle_starttag(self, tag, attrs):
15                 d = dict(attrs)
16                 if tag == 'head':
17                         self.ignore += 1
18                 elif tag == 'div':
19                         if 'id' in d and (d['id'] == 'banner' or d['id'] == 'mainnav' or d['id'] == 'ctxtnav' \
20                                         or d['id'] == 'footer' or d['id'] == 'altlinks'):
21                                 self.ignore += 1
22                         elif self.ignore:
23                                 self.ignore += 1
24                 elif tag == 'form':
25                         self.ignore += 1
26                 elif tag == 'script':
27                         self.ignore += 1
28                 if not self.ignore:
29                         self.out_file.write(self.get_starttag_text())
30
31         def handle_endtag(self, tag):
32                 if not self.ignore:
33                         #self.out_file.write(self.get_starttag_text() + '\n')
34                         self.out_file.write(r'</' + tag + r'>')
35
36                 if tag == 'head':
37                         self.ignore -= 1
38                 elif tag == 'div' and self.ignore:
39                         self.ignore -= 1
40                 elif tag == 'form':
41                         self.ignore -= 1
42                 elif tag == 'script':
43                         self.ignore -= 1
44
45         def handle_startendtag(self, tag, attrs):
46                 if not self.ignore:
47                         self.out_file.write(self.get_starttag_text())
48         
49         def handle_data(self, data):
50                 if not self.ignore:
51                         self.out_file.write(data)
52
53 local_file = "dev_status.html"
54 stripped_file = "stripped.html"
55 urllib.urlretrieve("http://dev.bertos.org/wiki/DevelopmentStatus", local_file)
56 development_status_parser = PageParser(stripped_file)
57 with open(local_file, "r") as f:
58         for line in f:
59                 if line.find('body>') != -1 or line.find('</html') != -1 or line.find('<html') != -1:
60                         continue
61                 development_status_parser.feed(line)
62         development_status_parser.close()
63
64 # TODO: append tabs header
65 # TODO: make internal links really internal
66 # TODO: rename stripped file?
67 os.unlink(local_file)