Tuesday, July 19, 2011

Eliminate Duplicate emails in evolution

Aim: To clean duplicated emails in evolution
Required: python
Usage: python cleanupmbox.py -i ~/.local/share/evolution/mail/local/Inbox -o ~/.local/share/evolution/mail/Inbox.ok -h inbox.h

Last steps, replace Inbox.ok with the original Inbox. Remember to do backup of your Inbox file.

cleanupmbox.py script
#!/usr/bin/env python
# author Marilen Corciovei len@len.ro, this code is offered AS IS, use at your own risk

import re, sys, email, getopt, marshal

msg_start = 'From'
cleaned = None
mids = {}

def parse_mbox(file_name):
file = open(file_name, 'r')
msg = ''
lastLine = ''
while 1:
line = file.readline()
if not line: break
if line.startswith(msg_start) and lastLine == '':
if len(msg) > 0:
parse_msg(msg)
msg = ''
msg = msg + line #+ '\n'
lastLine = line.strip()

def parse_msg(smsg):
m = email.message_from_string(smsg)
if 'message-id' in m:
mid = m['message-id']
if mid in mids:
print 'Duplicate Message-ID:', mid
else:
print 'New Message-ID:', mid
mids[mid]=mid
cleaned.write(smsg)

if __name__=='__main__':
in_file = ''
out_file = ''
hash_file = ''
try:
opts, args = getopt.getopt(sys.argv[1:], "i:o:h:")
except getopt.GetoptError:
print 'Usage', sys.argv[0], '-i input -o output [-h hash file]'
sys.exit(2)
for o, a in opts:
if o == "-i":
in_file = a
if o == "-o":
out_file = a
if o == "-h":
hash_file = a

if in_file == '' or out_file == '':
print 'Usage', sys.argv[0], '-i input -o output [-h hash file]'
sys.exit(2)

#global cleaned
cleaned = open(out_file, 'w')
if hash_file != '':
try:
mids = marshal.load(open(hash_file,'r'))
except:
pass

parse_mbox(in_file)
if hash_file != '':
marshal.dump(mids, open(hash_file,'w'))

Original article from: len.ro

0 comments: