This is the source code of gmail_imap.py for
quickly importing your GMail mail to Outlook Exchange with all your labels. Open an editor, such as notepad and copy and paste the code below, then save as gmail_imap.py.
# Copyright 2012 Lajos Molnar.
# Licensed under the Creative Commons Attribution-ShareAlike 3.0 license
# See http://creativecommons.org/licenses/by-sa/3.0/
import imaplib, sys, hashlib, base64, pickle, os, getpass, argparse
p = argparse.ArgumentParser('gmail_imap.py', description='helper script to migrate GMail labels to Outlook Categories')
p.add_argument('operation', choices=('import', 'purge', 'apply'))
p.add_argument('-f', '--file', help='db file to import', default='gmail_imported.pickle')
p.add_argument('-r', '--reimport', help='reimport already imported labels', action='store_true')
p.add_argument('-e', '--email', help='gmail account name')
p.add_argument('-p', '--password', '--pwd', help='gmail account name')
p.add_argument('-I', '--inbox', help='label used instead of Inbox', default='Inbox')
p.add_argument('-T', '--trash', help='label used instead of Trash', default='Trash')
p.add_argument('--import-trash', help='also import/purge trash', action='store_true')
p.add_argument('--final', help='purge imported messages', action='store_true')
p.add_argument('--folder', help='Outlook folder where mail was imported to')
p.add_argument('--limit', type=int, help='Maximum number of items to purge at one time')
p.add_argument('--recurse', help='apply labels recursively for messages in all subfolders', action='store_true')
a = p.parse_args()
all_mail, trash = '[Gmail]/All Mail', '[Gmail]/Trash'
# read any existing state
LABELS, labels_done = {}, set()
try:
with open(a.file, 'rb') as f:
LABELS = pickle.load(f)
labels_done = set(v for k, v in LABELS.items() if k != 'MAIL')
except:
pass
def hashOf(msg):
m = hashlib.sha1()
m.update(msg.encode('utf-16')[2:])
return base64.b64encode(m.digest()).decode('ascii')
if a.operation == 'apply':
from win32com.client import Dispatch
print('connecting to Outlook...')
O = Dispatch('Outlook.Application')
print('browsing to folder', a.folder, O)
F = MAPI = O.GetNamespace('MAPI')
for f in a.folder.split('\\'):
fs = [f.Name for f in F.Folders]
F = F.Folders(f)
print('Cataloging mail...', F.StoreID)
MAIL = {}
if a.recurse:
special = ()
def walk(F):
yield F
for f in F.Folders:
for f_ in walk(f):
yield f_
folders = list(walk(F))
else:
special = ('Inbox', 'Trash')
folders = [f.Name for f in F.Folders]
for f in special:
if f not in folders:
F.Folders.Add(f)
folders = [F] + [f for f in F.Folders if f.Name in special]
for f in folders:
def cats(i):
c = set()
if i.Parent.Name == special:
c.add(i.Parent.Name)
if i.Categories:
c |= set(i.Categories.split(', '))
return c
N = f.Items.Count
print('Applying labels in', f.Name, 'for', N, 'items ...')
for ix, i in enumerate(f.Items, 1):
PR_TRANSPORT_MESSAGE_HEADER = "http://schemas.microsoft.com/mapi/proptag/0x007D001E"
msg = i.PropertyAccessor.GetProperty(PR_TRANSPORT_MESSAGE_HEADER)
h = hashOf(msg)
c = set()
if h in MAIL:
if (i.EntryID, f.StoreID) != MAIL[h]:
# already imported, combine labels
old = MAPI.GetItemFromID(*MAIL[h])
c = cats(old)
if old.UnRead:
i.UnRead = True
print('already imported', h, 'with', c)
print('adding to categories', cats(i))
old.Delete()
elif h in LABELS:
c = set(c.replace('[Gmail]/', '') for c in LABELS[h] if c != all_mail)
c -= cats(i)
if c:
for cat in c:
# apply categories
if cat == 'Important':
i.Importance = 2
elif cat in special and i.Parent.Name != cat:
i = i.Move(F.Folders(cat))
elif not i.Categories:
i.Categories = cat
else:
i.Categories = i.Categories + ', ' + cat
i.Save()
try:
del LABELS[h]
except:
pass
MAIL[h] = (i.EntryID, f.StoreID)
if ix % 100 == 0:
print("{:%}".format(ix / N), end='\r', file=sys.stderr)
sys.stderr.flush()
# also save all imported hash
if LABELS:
print('WARNING: Could not apply labels for', len(LABELS), 'messages. ')
print('Please reapply the created', a.file + '.remaining', 'database file later.')
LABELS['MAIL'] = list(MAIL.keys())
with open(a.file + '.remaining', 'wb') as f:
pickle.dump(LABELS, f)
else:
if a.operation == 'purge' and a.final and 'MAIL' not in LABELS:
print('For final merge, please specify .remaining file from apply')
sys.exit(1)
# connect to IMAP
M = imaplib.IMAP4_SSL("imap.gmail.com")
print('logging in...')
M.login(a.email or getpass.getpass('Email:'), a.password or getpass.getpass())
print('querying labels...')
typ, data = M.list()
assert typ == 'OK', typ
labels = [ eval(d.partition(b') "')[2].partition(b' ')[2], None, None) for d in data ]
print('found', len(labels), 'labels')
total_found = 0
# since all_mail label is critical, verify that it is correct
assert all_mail in labels or any(l.lower() == all_mail.lower() for l in labels if l != all_mail), "{} not in IMAP folder list".format(all_mail)
def group(nums, limit=None):
if limit == None:
limit = len(nums)
while nums:
num_s = num_e = min(nums)
while num_e + 1 in nums and num_e + 1 < num_s + limit:
num_e += 1
yield str(num_s) if num_s == num_e else "%d:%d" % (num_s, num_e)
nums -= set(range(num_s, num_e + 1))
def process(M, l, a, nums, LABELS, limit=None, trash=False):
if l == a.inbox:
l = 'Inbox'
elif l == a.trash:
l = trash
if limit == None:
limit = len(nums)
for num in group(nums, 50):
print(num, end='\r', file=sys.stderr)
sys.stderr.flush()
deleted = set()
typ, data = M.fetch(num, '(BODY.PEEK[HEADER])')
assert typ == 'OK', typ
for d in data:
if type(d) == type((1,2)):
h = hashOf(d[1].decode('ascii'))
if a.operation == 'import':
try:
if l not in LABELS[h]:
LABELS[h].append(l)
except:
LABELS[h] = [l]
elif a.operation == 'purge':
num = eval(d[0].partition(b' ')[0], None, None)
try:
if l in LABELS[h]:
deleted.add(num)
if l == all_mail:
try:
LABELS['TRASH'].append(h)
except:
LABELS['TRASH'] = [h]
except:
pass
if a.operation == 'purge':
limit -= len(deleted)
for num in group(deleted):
print("deleting", num)
if trash:
M.store(num, '+X-GM-LABELS', '\\Trash')
else:
M.store(num, '+FLAGS.SILENT', '\\Deleted')
if limit < 0:
break
if a.operation == 'import':
print('done', 'total', len(LABELS), 'mail')
with open(a.file + '.new', 'wb') as f:
pickle.dump(LABELS, f)
if sys.platform == 'win32':
os.unlink(a.file)
os.rename(a.file + '.new', a.file)
elif a.operation == 'purge':
print('expunging...')
M.expunge()
M.close()
def get_nums(M, l):
print('cataloguing label', l, end='... ') os.unlink(a.file)
sys.stdout.flush()
typ, data = M.select('"' + l + '"')
if typ == 'NO':
return set()
typ, data = M.search('', 'ALL')
assert typ == 'OK', typ
nums = set(map(int, data[0].split()))
print('has', len(nums), 'messages')
return nums
for l in labels:
# don't reimport existing labels
if a.operation == 'import' and l in labels_done and not a.reimport:
continue
elif a.operation == 'purge' and l == all_mail:
continue
nums = get_nums(M, l)
if nums and (l != trash or (a.import_trash and not a.final)):
total_found += len(nums)
process(M, l, a, nums, LABELS)
if a.operation =='purge' and a.final and total_found == 0:
print('purging imported mail items (total', len(LABELS['MAIL']), ')')
while True:
nums = get_nums(M, all_mail)
if not nums:
break
LABELS2 = dict((i, [all_mail]) for i in LABELS['MAIL'])
process(M, all_mail, a, nums, LABELS2, limit=a.limit, trash=True)
if not a.limit:
break
# remove deleted messages from Trash
if 'TRASH' in LABELS2:
while True:
nums = get_nums(M, trash)
if not nums:
break
process(M, trash, a, nums, dict((i, [trash]) for i in LABELS2['TRASH']), limit=a.limit)
if not a.limit:
break
M.logout()