Since last April I’ve been posting collections of links to Netbehaviour . These are links that I’ve found during my web browsing that are on the subject of art, technology and society. I try to arrange them to create associations or narratives wherever possible.
I’ve written a script to convert a calendar year’s worth of links from emails to an HTML page for browsing.
Here it is:
#!/usr/bin/env python # Copyright 2012 Rob Myers# Licenced GPLv3 or later ################################################################################ # Imports ################################################################################ import cgi import email import mailbox import re import sys import time ################################################################################ # Configuration ################################################################################ links_year = "2011" mailbox_path = "/home/rob/.thunderbird/tq4afdtc.default/ImapMail/imap.robmyers.org/INBOX.sbd/Archives-1.sbd/2011" ################################################################################ # The messages ################################################################################ messages = [message for message in mailbox.mbox(mailbox_path).itervalues() \ if message['subject'] \ and message['subject'].startswith('[NetBehaviour] Links') \ and links_year in message['date']] # Sort messages by date. As they may have been files out of order # Wasteful as we parse it again later messages.sort(key=lambda m: time.mktime(email.utils.parsedate(m['Date']))) ################################################################################ # Reformat and print the links with their commentary ################################################################################ print " Links For %s " % links_year print "Links For %s
" % links_year for message in messages: # Keep track of whether the last line was commentary (or links/whitespace) last_line_was_commentary = False # Print a YYYY-MM-DD date as the title date = email.utils.parsedate(message['Date']) print '%s-%s-%s
' % (date[0], date[1], date[2]) # Email structure is...interesting... for part in message.walk(): if part.get_content_type() == "text/plain": body = part.get_payload(decode=True) break elif part.get_content_type() == "text/html": body = part.get_payload(decode=True) # Strip html tags to give plain text body = re.sub(r'<.*?>', '', body) # Keep trying to find text # Strip footer try: body = body.split('_______________________')[0] except: print >> sys.stderr, "Can't get body for %s %s" % (message['date'], message['subject']) pass # Regularize leading and trailing whitespace body = body.strip() for line in body.split('\n'): stripped = line.strip() if '://' in stripped: print '
' print '%s' % (stripped, stripped) print '
' last_line_was_commentary = False elif stripped != '': # Join multi-line commentary into single line if last_line_was_commentary: print ' ', print '%s' % cgi.escape(line) last_line_was_commentary = True else: last_line_was_commentary = False print '
' print 'Links curated by Rob Myers.
' print ''
And you can download an archive of the links here: links-2011.html.gz
There are a couple of glitches in the file as a result of the ad-hoc nature of the original emails. Finding them is left as an exercise for the reader.