#!/usr/bin/env python # Script that reads through all of the *.ht files, determines their # dates, and outputs a tab-separated file of news items to standard output. import glob, re, sys import rfc822 pat = re.compile('\s*(\d{4}-\d{1,2}-\d{1,2}).*(\d{4}-\d{1,2}-\d{1,2})') def main (): L = [] for filename in glob.glob('20*.ht'): input = open(filename, 'r') msg = rfc822.Message(input) title = msg.getheader('title') if title is None: print >>sys.stderr, 'File %s lacks title -- skipping' % filename input.close() m = pat.search(title) if m is None: print >>sys.stderr, ('No start/end dates found in title of %s\n%r' % (filename, title)) continue start, end = m.group(1,2) url = 'http://www.python.org/dev/summary/%sml' % filename L.append((end, title, url)) L.sort() ; L.reverse() for date, title, url in L: print date + '\t' + title + '\t' + url if __name__ == '__main__': main()