#!/usr/bin/env python3

import os
from typing import Tuple
import sys
import mimetypes
import argparse
import logging

try:
    from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
    from libmat2 import check_dependencies, UnknownMemberPolicy
except ValueError as e:
    print(e)
    sys.exit(1)

__version__ = '0.4.0'

def __check_file(filename: str, mode: int=os.R_OK) -> bool:
    if not os.path.exists(filename):
        print("[-] %s is doesn't exist." % filename)
        return False
    elif not os.path.isfile(filename):
        print("[-] %s is not a regular file." % filename)
        return False
    elif not os.access(filename, mode):
        print("[-] %s is not readable and writeable." % filename)
        return False
    return True


def create_arg_parser():
    parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
    parser.add_argument('files', nargs='*', help='the files to process')
    parser.add_argument('-v', '--version', action='version',
                        version='MAT2 %s' % __version__)
    parser.add_argument('-l', '--list', action='store_true',
                        help='list all supported fileformats')
    parser.add_argument('--check-dependencies', action='store_true',
                        help='check if MAT2 has all the dependencies it needs')
    parser.add_argument('-V', '--verbose', action='store_true',
                        help='show more verbose status information')
    parser.add_argument('--unknown-members', metavar='policy', default='abort',
                        help='how to handle unknown members of archive-style files (policy should' +
                        ' be one of: %s)' % ', '.join(p.value for p in UnknownMemberPolicy))


    info = parser.add_mutually_exclusive_group()
    info.add_argument('-s', '--show', action='store_true',
                      help='list harmful metadata detectable by MAT2 without removing them')
    info.add_argument('-L', '--lightweight', action='store_true',
                      help='remove SOME metadata')
    return parser


def show_meta(filename: str):
    if not __check_file(filename):
        return

    p, mtype = parser_factory.get_parser(filename)  # type: ignore
    if p is None:
        print("[-] %s's format (%s) is not supported" % (filename, mtype))
        return

    print("[+] Metadata for %s:" % filename)
    for k, v in p.get_meta().items():
        try:  # FIXME this is ugly.
            print("  %s: %s" % (k, v))
        except UnicodeEncodeError:
            print("  %s: harmful content" % k)

def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
    filename, is_lightweight, unknown_member_policy = params
    if not __check_file(filename, os.R_OK|os.W_OK):
        return False

    p, mtype = parser_factory.get_parser(filename)  # type: ignore
    if p is None:
        print("[-] %s's format (%s) is not supported" % (filename, mtype))
        return False
    p.unknown_member_policy = unknown_member_policy
    if is_lightweight:
        return p.remove_all_lightweight()
    return p.remove_all()


def show_parsers():
    print('[+] Supported formats:')
    formats = list()
    for parser in parser_factory._get_parsers():
        for mtype in parser.mimetypes:
            extensions = set()
            for extension in mimetypes.guess_all_extensions(mtype):
                if extension[1:] not in UNSUPPORTED_EXTENSIONS:  # skip the dot
                    extensions.add(extension)
            if not extensions:
                # we're not supporting a single extension in the current
                # mimetype, so there is not point in showing the mimetype at all
                continue
            formats.append('  - %s (%s)' % (mtype, ', '.join(extensions)))
    print('\n'.join(sorted(formats)))


def __get_files_recursively(files):
    for f in files:
        if os.path.isdir(f):
            for path, _, _files in os.walk(f):
                for _f in _files:
                    fname = os.path.join(path, _f)
                    if __check_file(fname):
                        yield fname
        elif __check_file(f):
            yield f

def main():
    arg_parser = create_arg_parser()
    args = arg_parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    if not args.files:
        if args.list:
            show_parsers()
        elif args.check_dependencies:
            print("Dependencies required for MAT2 %s:" % __version__)
            for key, value in sorted(check_dependencies().items()):
                print('- %s: %s' % (key, 'yes' if value else 'no'))
        else:
            return arg_parser.print_help()
        return 0

    elif args.show:
        for f in __get_files_recursively(args.files):
            show_meta(f)
        return 0

    else:
        unknown_member_policy = UnknownMemberPolicy(args.unknown_members)
        if unknown_member_policy == UnknownMemberPolicy.KEEP:
            logging.warning('Keeping unknown member files may leak metadata in the resulting file!')

        no_failure = True
        for f in __get_files_recursively(args.files):
            if clean_meta([f, args.lightweight, unknown_member_policy]) is False:
                no_failure = False
        return 0 if no_failure is True else -1


if __name__ == '__main__':
    sys.exit(main())
