First version of dev status page downloader.
[bertos.git] / doc / dev-status-page.py
diff --git a/doc/dev-status-page.py b/doc/dev-status-page.py
new file mode 100644 (file)
index 0000000..9c81cd5
--- /dev/null
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+from __future__ import with_statement
+import urllib, HTMLParser, os
+
+class PageParser(HTMLParser.HTMLParser):
+       def __init__(self, output_file):
+               HTMLParser.HTMLParser.__init__(self, )
+               self.out_file = open(output_file, "w")
+               self.ignore = 0
+       
+       def __del__(self):
+               self.out_file.close()
+
+       def handle_starttag(self, tag, attrs):
+               d = dict(attrs)
+               if tag == 'head':
+                       self.ignore += 1
+               elif tag == 'div':
+                       if 'id' in d and (d['id'] == 'banner' or d['id'] == 'mainnav' or d['id'] == 'ctxtnav' \
+                                       or d['id'] == 'footer' or d['id'] == 'altlinks'):
+                               self.ignore += 1
+                       elif self.ignore:
+                               self.ignore += 1
+               elif tag == 'form':
+                       self.ignore += 1
+               elif tag == 'script':
+                       self.ignore += 1
+               if not self.ignore:
+                       self.out_file.write(self.get_starttag_text())
+
+       def handle_endtag(self, tag):
+               if not self.ignore:
+                       #self.out_file.write(self.get_starttag_text() + '\n')
+                       self.out_file.write(r'</' + tag + r'>')
+
+               if tag == 'head':
+                       self.ignore -= 1
+               elif tag == 'div' and self.ignore:
+                       self.ignore -= 1
+               elif tag == 'form':
+                       self.ignore -= 1
+               elif tag == 'script':
+                       self.ignore -= 1
+
+       def handle_startendtag(self, tag, attrs):
+               if not self.ignore:
+                       self.out_file.write(self.get_starttag_text())
+       
+       def handle_data(self, data):
+               if not self.ignore:
+                       self.out_file.write(data)
+
+local_file = "dev_status.html"
+stripped_file = "stripped.html"
+urllib.urlretrieve("http://dev.bertos.org/wiki/DevelopmentStatus", local_file)
+development_status_parser = PageParser(stripped_file)
+with open(local_file, "r") as f:
+       for line in f:
+               if line.find('body>') != -1 or line.find('</html') != -1 or line.find('<html') != -1:
+                       continue
+               development_status_parser.feed(line)
+       development_status_parser.close()
+
+# TODO: append tabs header
+# TODO: make internal links really internal
+# TODO: rename stripped file?
+os.unlink(local_file)