2012-10-24

gmail_imap.py

This is the source code of gmail_imap.py for quickly importing your GMail mail to Outlook Exchange with all your labels.  Open an editor, such as notepad and copy and paste the code below, then save as gmail_imap.py.


# Copyright 2012 Lajos Molnar.
# Licensed under the Creative Commons Attribution-ShareAlike 3.0 license
# See http://creativecommons.org/licenses/by-sa/3.0/

import imaplib, sys, hashlib, base64, pickle, os, getpass, argparse

p = argparse.ArgumentParser('gmail_imap.py', description='helper script to migrate GMail labels to Outlook Categories')
p.add_argument('operation', choices=('import', 'purge', 'apply'))
p.add_argument('-f', '--file', help='db file to import', default='gmail_imported.pickle')
p.add_argument('-r', '--reimport', help='reimport already imported labels', action='store_true')
p.add_argument('-e', '--email', help='gmail account name')
p.add_argument('-p', '--password', '--pwd', help='gmail account name')
p.add_argument('-I', '--inbox', help='label used instead of Inbox', default='Inbox')
p.add_argument('-T', '--trash', help='label used instead of Trash', default='Trash')
p.add_argument('--import-trash', help='also import/purge trash', action='store_true')
p.add_argument('--final', help='purge imported messages', action='store_true')
p.add_argument('--folder', help='Outlook folder where mail was imported to')
p.add_argument('--limit', type=int, help='Maximum number of items to purge at one time')
p.add_argument('--recurse', help='apply labels recursively for messages in all subfolders', action='store_true')
a = p.parse_args()

all_mail, trash = '[Gmail]/All Mail', '[Gmail]/Trash'
# read any existing state
LABELS, labels_done = {}, set()
try:
    with open(a.file, 'rb') as f:
        LABELS = pickle.load(f)
        labels_done = set(v for k, v in LABELS.items() if k != 'MAIL')
except:
    pass

def hashOf(msg):
    m = hashlib.sha1()
    m.update(msg.encode('utf-16')[2:])
    return base64.b64encode(m.digest()).decode('ascii')

if a.operation == 'apply':
    from win32com.client import Dispatch
    print('connecting to Outlook...')
    O = Dispatch('Outlook.Application')
    print('browsing to folder', a.folder, O)
    F = MAPI = O.GetNamespace('MAPI')
    for f in a.folder.split('\\'):
        fs = [f.Name for f in F.Folders]
        F = F.Folders(f)
    print('Cataloging mail...', F.StoreID)
    MAIL = {}
    if a.recurse:
        special = ()
        def walk(F):
            yield F
            for f in F.Folders:
                for f_ in walk(f):
                    yield f_
        folders = list(walk(F))
    else:
        special = ('Inbox', 'Trash')
        folders = [f.Name for f in F.Folders]
        for f in special:
            if f not in folders:
                F.Folders.Add(f)
        folders = [F] + [f for f in F.Folders if f.Name in special]

    for f in folders:
        def cats(i):
            c = set()
            if i.Parent.Name == special:
                c.add(i.Parent.Name)
            if i.Categories:
                c |= set(i.Categories.split(', '))
            return c

        N = f.Items.Count
        print('Applying labels in', f.Name, 'for', N, 'items ...')
        for ix, i in enumerate(f.Items, 1):
            PR_TRANSPORT_MESSAGE_HEADER = "http://schemas.microsoft.com/mapi/proptag/0x007D001E"
            msg = i.PropertyAccessor.GetProperty(PR_TRANSPORT_MESSAGE_HEADER)
            h = hashOf(msg)
            c = set()
            if h in MAIL:
                if (i.EntryID, f.StoreID) != MAIL[h]:
                    # already imported, combine labels
                    old = MAPI.GetItemFromID(*MAIL[h])
                    c = cats(old)
                    if old.UnRead:
                        i.UnRead = True
                    print('already imported', h, 'with', c)
                    print('adding to categories', cats(i))
                    old.Delete()
            elif h in LABELS:
                c = set(c.replace('[Gmail]/', '') for c in LABELS[h] if c != all_mail)
            c -= cats(i)
            if c:
                for cat in c:
                    # apply categories
                    if cat == 'Important':
                        i.Importance = 2
                    elif cat in special and i.Parent.Name != cat:
                        i = i.Move(F.Folders(cat))
                    elif not i.Categories:
                        i.Categories = cat
                    else:
                        i.Categories = i.Categories + ', ' + cat
                i.Save()

            try:
                del LABELS[h]
            except:
                pass
            MAIL[h] = (i.EntryID, f.StoreID)
            if ix % 100 == 0:
                print("{:%}".format(ix / N), end='\r', file=sys.stderr)
                sys.stderr.flush()

    # also save all imported hash
    if LABELS:
        print('WARNING: Could not apply labels for', len(LABELS), 'messages. ')
        print('Please reapply the created', a.file + '.remaining', 'database file later.')
    LABELS['MAIL'] = list(MAIL.keys())

    with open(a.file + '.remaining', 'wb') as f:
        pickle.dump(LABELS, f)
else:
    if a.operation == 'purge' and a.final and 'MAIL' not in LABELS:
        print('For final merge, please specify .remaining file from apply')
        sys.exit(1)

    # connect to IMAP
    M = imaplib.IMAP4_SSL("imap.gmail.com")
    print('logging in...')
    M.login(a.email or getpass.getpass('Email:'), a.password or getpass.getpass())
    
    print('querying labels...')
    typ, data = M.list()
    assert typ == 'OK', typ
    labels = [ eval(d.partition(b') "')[2].partition(b' ')[2], None, None) for d in data ]
    print('found', len(labels), 'labels')
    total_found = 0
    # since all_mail label is critical, verify that it is correct
    assert all_mail in labels or any(l.lower() == all_mail.lower() for l in labels if l != all_mail), "{} not in IMAP folder list".format(all_mail)

    def group(nums, limit=None):
        if limit == None:
            limit = len(nums)
        while nums:
            num_s = num_e = min(nums)
            while num_e + 1 in nums and num_e + 1 < num_s + limit:
                num_e += 1
            yield str(num_s) if num_s == num_e else "%d:%d" % (num_s, num_e)
            nums -= set(range(num_s, num_e + 1))

    def process(M, l, a, nums, LABELS, limit=None, trash=False):
        if l == a.inbox:
            l = 'Inbox'
        elif l == a.trash:
            l = trash

        if limit == None:
            limit = len(nums)
        for num in group(nums, 50):
            print(num, end='\r', file=sys.stderr)
            sys.stderr.flush()

            deleted = set()

            typ, data = M.fetch(num, '(BODY.PEEK[HEADER])')
            assert typ == 'OK', typ
            for d in data:
                if type(d) == type((1,2)):
                    h = hashOf(d[1].decode('ascii'))
                    if a.operation == 'import':
                        try:
                            if l not in LABELS[h]:
                                LABELS[h].append(l)
                        except:
                            LABELS[h] = [l]
                    elif a.operation == 'purge':
                        num = eval(d[0].partition(b' ')[0], None, None)
                        try:
                            if l in LABELS[h]:
                                deleted.add(num)
                                if l == all_mail:
                                    try:
                                        LABELS['TRASH'].append(h)
                                    except:
                                        LABELS['TRASH'] = [h]                                    
                        except:
                            pass

            if a.operation == 'purge':
                limit -= len(deleted)
                for num in group(deleted):
                    print("deleting", num)
                    if trash:
                        M.store(num, '+X-GM-LABELS', '\\Trash')
                    else:
                        M.store(num, '+FLAGS.SILENT', '\\Deleted')
                if limit < 0:
                    break

        if a.operation == 'import':
            print('done', 'total', len(LABELS), 'mail')
            with open(a.file + '.new', 'wb') as f:
                pickle.dump(LABELS, f)
            if sys.platform == 'win32':
                os.unlink(a.file)
            os.rename(a.file + '.new', a.file)
        elif a.operation == 'purge':
            print('expunging...')
            M.expunge()

        M.close()

    def get_nums(M, l):
        print('cataloguing label', l, end='... ')            os.unlink(a.file)

        sys.stdout.flush()
        typ, data = M.select('"' + l + '"')
        if typ == 'NO':
            return set()

        typ, data = M.search('', 'ALL')
        assert typ == 'OK', typ
        nums = set(map(int, data[0].split()))
        print('has', len(nums), 'messages')
        return nums

    for l in labels:
        # don't reimport existing labels
        if a.operation == 'import' and l in labels_done and not a.reimport:
            continue
        elif a.operation == 'purge' and l == all_mail:
            continue

        nums = get_nums(M, l)
        if nums and (l != trash or (a.import_trash and not a.final)):
            total_found += len(nums)
            process(M, l, a, nums, LABELS)
    if a.operation =='purge' and a.final and total_found == 0:
        print('purging imported mail items (total', len(LABELS['MAIL']), ')')
        while True:
            nums = get_nums(M, all_mail)
            if not nums:
                break
            LABELS2 = dict((i, [all_mail]) for i in LABELS['MAIL'])
            process(M, all_mail, a, nums, LABELS2, limit=a.limit, trash=True)
            if not a.limit:
                break

        # remove deleted messages from Trash
        if 'TRASH' in LABELS2:
            while True:
                nums = get_nums(M, trash)
                if not nums:
                    break
                process(M, trash, a, nums, dict((i, [trash]) for i in LABELS2['TRASH']), limit=a.limit)
                if not a.limit:
                    break

    M.logout()
  

No comments:

Post a Comment