From: lottaviano Date: Wed, 1 Jul 2009 10:46:44 +0000 (+0000) Subject: First version of dev status page downloader. X-Git-Tag: 2.2.0~245 X-Git-Url: https://codewiz.org/gitweb?a=commitdiff_plain;h=7253a8a7aac939250ab75bf05a0a6f34c045c6d3;p=bertos.git First version of dev status page downloader. git-svn-id: https://src.develer.com/svnoss/bertos/trunk@2723 38d2e660-2303-0410-9eaa-f027e97ec537 --- diff --git a/doc/dev-status-page.py b/doc/dev-status-page.py new file mode 100644 index 00000000..9c81cd5c --- /dev/null +++ b/doc/dev-status-page.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +from __future__ import with_statement +import urllib, HTMLParser, os + +class PageParser(HTMLParser.HTMLParser): + def __init__(self, output_file): + HTMLParser.HTMLParser.__init__(self, ) + self.out_file = open(output_file, "w") + self.ignore = 0 + + def __del__(self): + self.out_file.close() + + def handle_starttag(self, tag, attrs): + d = dict(attrs) + if tag == 'head': + self.ignore += 1 + elif tag == 'div': + if 'id' in d and (d['id'] == 'banner' or d['id'] == 'mainnav' or d['id'] == 'ctxtnav' \ + or d['id'] == 'footer' or d['id'] == 'altlinks'): + self.ignore += 1 + elif self.ignore: + self.ignore += 1 + elif tag == 'form': + self.ignore += 1 + elif tag == 'script': + self.ignore += 1 + if not self.ignore: + self.out_file.write(self.get_starttag_text()) + + def handle_endtag(self, tag): + if not self.ignore: + #self.out_file.write(self.get_starttag_text() + '\n') + self.out_file.write(r'') + + if tag == 'head': + self.ignore -= 1 + elif tag == 'div' and self.ignore: + self.ignore -= 1 + elif tag == 'form': + self.ignore -= 1 + elif tag == 'script': + self.ignore -= 1 + + def handle_startendtag(self, tag, attrs): + if not self.ignore: + self.out_file.write(self.get_starttag_text()) + + def handle_data(self, data): + if not self.ignore: + self.out_file.write(data) + +local_file = "dev_status.html" +stripped_file = "stripped.html" +urllib.urlretrieve("http://dev.bertos.org/wiki/DevelopmentStatus", local_file) +development_status_parser = PageParser(stripped_file) +with open(local_file, "r") as f: + for line in f: + if line.find('body>') != -1 or line.find('