#!/usr/bin/python import sys, mailbox, re class record(object): def __init__(self, title, total, sends): self.title = title self.total = total ftotal = float(total) self.sends = [(100*(x[1]/ftotal), x[0]) for x in sends] def make_record(title, senders, limiter): noisy_bastards = sorted(senders.iteritems(), key=lambda x:x[1], reverse=True)[:limiter] total = sum(senders.itervalues()) return record(title, total, noisy_bastards) def collapse_mail(mbox, into=None): if into is None: d = {} else: d = into for msg in mbox: date = msg.getdate('Date') if not date: continue if date[0] < 2000 or date[0] > 2007: continue sender = msg.getaddr('from')[1].lower() d.setdefault(date[0], {}).setdefault(date[1], {}).setdefault(sender, 0) d[date[0]][date[1]][sender] += 1 return d def years_feed(data, limiter=20, reversed=False): for year in sorted(data, reverse=reversed): d = {} for v in data[year].itervalues(): for sender, val in v.iteritems(): d.setdefault(sender, 0) d[sender] += val yield make_record(year, d, limiter) def months_feed(data, limiter=20, reversed=False): for year in sorted(data, reverse=reversed): for month in sorted(data[year], reverse=reversed): yield make_record('%i-%i' % (month, year), data[year][month], limiter) def do_conversions(path, data): for line in open(path): x = line.split() if len(x) == 1: continue target = x[0].lower() if target == 're': if len(x) == 3: convert_re(x[1], x[2], data) else: for y in x[1:]: convert(y.lower(), target, data) def convert_re(pattern, repl, years_dict): r = re.compile(pattern) renames = [] for months in years_dict.itervalues(): for v in months.itervalues(): renames.extend(x for x in v if r.match(x) is not None) for x in renames: convert(x, r.sub(repl, x), data) def convert(orig_from, new_from, years_dict): for months in years_dict.itervalues(): for v in months.itervalues(): if orig_from not in v: continue v.setdefault(new_from, 0) v[new_from] += v.pop(orig_from) def text_handler(feed, handle): for rec in feed: print >> handle, "%s %i messages\n%s\n" % (rec.title, rec.total, '\n'.join("%02.2f%% %s" % x for x in rec.sends)) def guidexml_handler(feed, handle): for rec in feed: s = "\n ".join("%02.2f%%%s" % x for x in rec.sends) print >> handle, ("" "\n %s\n
%s, %i emails
percentperson
" % (rec.title, rec.total, s)) # ought to convert this over to optparse, but I hate optparse. # so... someone else gets to do it. :) def arg_state(unparsed_args, *allowed_args): args = unparsed_args for form in allowed_args: args = [x for x in args if x != form] if len(args) == len(unparsed_args): return False unparsed_args[:] = args return True def paired_arg(unparsed_args, *allowed_args): args = unparsed_args new_a = [] val = None i = iter(args) for arg in i: if arg not in allowed_args: new_a.append(arg) continue val = i.next() unparsed_args[:] = new_a return val if __name__ == '__main__': args = sys.argv months = paired_arg(args, '--month', '-m') years = paired_arg(args, '--year', '-y') if not years and not months: years = '-' guidexml = arg_state(args, '--guidexml', '-g') reversed = arg_state(args, '--reversed', '-r') if len(args) not in (2,3) or "--help" in sys.argv or "-h" in sys.argv: print "need filename to parse, with optional second arg of a conversions file" print "optionally, can supply --year file and/or --month file to write to files; defaults to --year -" print "finally, --guidexml if supplied dumps a guidexml snippet, else it's intended for terms" print "finally finally, --reversed reverses the ordering :)" sys.exit(1) data = collapse_mail(mailbox.Maildir(args[1])) if len(args) == 3: do_conversions(args[2], data) for fn, func in ((years, years_feed), (months, months_feed)): if fn is None: continue if fn == '-': handle = sys.stdout elif fn == '\-': handle = open('-', 'w') else: handle = open(fn, 'w') feed = func(data, reversed=reversed) if guidexml: guidexml_handler(feed, handle) else: text_handler(feed, handle) if handle is not sys.stdout: handle.close() sys.exit(0)