doc/dev-status-page.py

   1 #!/usr/bin/env python
   2 from __future__ import with_statement
   3 import urllib, HTMLParser, os
   4
   5 class PageParser(HTMLParser.HTMLParser):
   6         def __init__(self, output_file):
   7                 HTMLParser.HTMLParser.__init__(self, )
   8                 self.out_file = open(output_file, "w")
   9                 self.ignore = 0
  10
  11         def __del__(self):
  12                 self.out_file.close()
  13
  14         def handle_starttag(self, tag, attrs):
  15                 d = dict(attrs)
  16                 if tag == 'head':
  17                         self.ignore += 1
  18                 elif tag == 'div':
  19                         if 'id' in d and (d['id'] == 'banner' or d['id'] == 'mainnav' or d['id'] == 'ctxtnav' \
  20                                         or d['id'] == 'footer' or d['id'] == 'altlinks'):
  21                                 self.ignore += 1
  22                         elif self.ignore:
  23                                 self.ignore += 1
  24                 elif tag == 'form':
  25                         self.ignore += 1
  26                 elif tag == 'script':
  27                         self.ignore += 1
  28                 if not self.ignore:
  29                         self.out_file.write(self.get_starttag_text())
  30
  31         def handle_endtag(self, tag):
  32                 if not self.ignore:
  33                         #self.out_file.write(self.get_starttag_text() + '\n')
  34                         self.out_file.write(r'</' + tag + r'>')
  35
  36                 if tag == 'head':
  37                         self.ignore -= 1
  38                 elif tag == 'div' and self.ignore:
  39                         self.ignore -= 1
  40                 elif tag == 'form':
  41                         self.ignore -= 1
  42                 elif tag == 'script':
  43                         self.ignore -= 1
  44
  45         def handle_startendtag(self, tag, attrs):
  46                 if not self.ignore:
  47                         self.out_file.write(self.get_starttag_text())
  48
  49         def handle_data(self, data):
  50                 if not self.ignore:
  51                         self.out_file.write(data)
  52
  53 local_file = "dev_status.html"
  54 stripped_file = "stripped.html"
  55 urllib.urlretrieve("http://dev.bertos.org/wiki/DevelopmentStatus", local_file)
  56 development_status_parser = PageParser(stripped_file)
  57 with open(local_file, "r") as f:
  58         for line in f:
  59                 if line.find('body>') != -1 or line.find('</html') != -1 or line.find('<html') != -1:
  60                         continue
  61                 development_status_parser.feed(line)
  62         development_status_parser.close()
  63
  64 # TODO: append tabs header
  65 # TODO: make internal links really internal
  66 # TODO: rename stripped file?
  67 os.unlink(local_file)