Since last April I’ve been posting collections of links to Netbehaviour . These are links that I’ve found during my web browsing that are on the subject of art, technology and society. I try to arrange them to create associations or narratives wherever possible.
I’ve written a script to convert a calendar year’s worth of links from emails to an HTML page for browsing.
Here it is:
#!/usr/bin/env python
# Copyright 2012 Rob Myers
# Licenced GPLv3 or later
################################################################################
# Imports
################################################################################
import cgi
import email
import mailbox
import re
import sys
import time
################################################################################
# Configuration
################################################################################
links_year = "2011"
mailbox_path = "/home/rob/.thunderbird/tq4afdtc.default/ImapMail/imap.robmyers.org/INBOX.sbd/Archives-1.sbd/2011"
################################################################################
# The messages
################################################################################
messages = [message for message in mailbox.mbox(mailbox_path).itervalues() \
if message['subject'] \
and message['subject'].startswith('[NetBehaviour] Links') \
and links_year in message['date']]
# Sort messages by date. As they may have been files out of order
# Wasteful as we parse it again later
messages.sort(key=lambda m: time.mktime(email.utils.parsedate(m['Date'])))
################################################################################
# Reformat and print the links with their commentary
################################################################################
print "Links For %s" % links_year
print "Links For %s
" % links_year
for message in messages:
# Keep track of whether the last line was commentary (or links/whitespace)
last_line_was_commentary = False
# Print a YYYY-MM-DD date as the title
date = email.utils.parsedate(message['Date'])
print '%s-%s-%s
' % (date[0], date[1], date[2])
# Email structure is...interesting...
for part in message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
break
elif part.get_content_type() == "text/html":
body = part.get_payload(decode=True)
# Strip html tags to give plain text
body = re.sub(r'<.*?>', '', body)
# Keep trying to find text
# Strip footer
try:
body = body.split('_______________________')[0]
except:
print >> sys.stderr, "Can't get body for %s %s" % (message['date'],
message['subject'])
pass
# Regularize leading and trailing whitespace
body = body.strip()
for line in body.split('\n'):
stripped = line.strip()
if '://' in stripped:
print '
'
print '%s' % (stripped, stripped)
print '
'
last_line_was_commentary = False
elif stripped != '':
# Join multi-line commentary into single line
if last_line_was_commentary:
print ' ',
print '%s' % cgi.escape(line)
last_line_was_commentary = True
else:
last_line_was_commentary = False
print '
'
print 'Links curated by Rob Myers.
'
print ''
And you can download an archive of the links here: links-2011.html.gz
There are a couple of glitches in the file as a result of the ad-hoc nature of the original emails. Finding them is left as an exercise for the reader.