#!/usr/bin/python
# -*- coding: utf-8 -*-

# Gorazd Export
# export slovníkových hesel ze systému Invenio
# Copyright (C) 2018  Vít Tuček, Slovanský ústav AV ČR, v. v. i.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import argparse
import datetime
import errno
import logging
import os
import sys
from os.path import join

from invenio.bibdocfile import BibRecDocs
from lxml import etree as et

log = logging.getLogger(__name__)
log.addHandler(logging.NullHandler())

LOG_DIR = 'log'


class InvenioExporter(object):
    def __init__(self, output_dir='.', input_file=None, xsl_transform=None, print_file=False):
        self.ns = {'m': 'http://www.loc.gov/MARC21/slim'}
        self.log = logging.getLogger(__name__)
        self.output_dir = output_dir
        self.print_file = print_file
        if self.print_file:
            # In this case we are exporting all records into one html file
            # and so we keep the records in a list of tuples where the first element is the page_id
            # of the exported record and the second element is result of XSL transformation on the exported record.
            self.print_file_list = []
        if input_file:
            self.input_file = input_file
        else:
            self.log.critical("No input file given. Exiting.")
            sys.exit(1)

            # identity transform
        if xsl_transform:
            self.transforms = []
            for xslt in xsl_transform:
                self.log.debug("Parsing transform: %s" % xslt)
                self.transforms.append(et.XSLT(et.parse(xslt)))
        else:
            self.transforms = [et.XSLT(et.XML('''
                 <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
                   <xsl:template match="@*|node()">
                     <xsl:copy>
                       <xsl:apply-templates select="@*|node()"/>
                     </xsl:copy>
                   </xsl:template>
                 </xsl:stylesheet>
            '''))]
        self.log.debug("Exporter initialized.")

    def get_recid_name(self, e):
        #    recid = e[0].text
        recid = e.xpath('m:controlfield[@tag="001"]', namespaces=self.ns)[0].text
        return int(recid), recid + ".xml"

    def get_full_path(self, recid):
        try:
            docs = BibRecDocs(recid)
            doc = docs.list_bibdocs_by_names()['gorazdXML']
            lf = doc.list_latest_files()[0]
            return lf.get_full_path()
        except Exception as e:
            self.log.error('recid: %d exception: %s' % (recid, str(e)))
            return None

    def export_record(self, e):
        recid, name = self.get_recid_name(e)
        self.log.debug("Exporting record with id: %s and name: %s" % (recid, name))
        src_path = self.get_full_path(recid)
        if src_path:
            # copyfile(src_path, os.path.join(self.output_dir, name))
            record_xml = et.parse(src_path)
            savexml(record_xml, join(self.output_dir, name))
            if self.print_file:
                page_id = e.xpath('m:datafield[@tag="035"]/m:subfield[@code="a"]', namespaces=self.ns)[0].text
                self.log.debug("PAGE_ID: %s" % page_id)
                self.print_file_list.append((page_id, record_xml))
        else:
            self.log.error("Filepath for recid %s with name %s not found." % (recid, name))

    def export(self, tree):
        for record in tree.findall("m:record", namespaces=self.ns):
            self.export_record(record)
        if self.print_file:
            out_path = join(self.output_dir, "print.html")
            xml = et.XML('''
                            <slovnik>
                            </slovnik>
                        ''')
            # self.log.debug("PAGE_ID list before sorting:\n%s\n" % "\n".join([x[0] for x in self.print_file_list]))
            self.print_file_list.sort(key=lambda x: x[0])
            # self.log.debug("PAGE_ID list after sorting:\n%s\n" % "\n".join([x[0] for x in self.print_file_list]))
            xml.extend(map(lambda x: x[1].getroot(), self.print_file_list))

            tree = et.ElementTree(xml)
            for (i, transform) in enumerate(self.transforms, 1):
                tree.write(join(self.output_dir, "input-for-%d.xml" % i),
                           pretty_print='True',
                           encoding="utf-8")
                self.log.debug("Applying transform number %d" % i)
                tree = transform(tree)
                if tree is None:
                    self.log.critical("Transform %d didn't produce anything." % i)
                    sys.exit(1)

            self.log.info("Saving resulting html file to %s" % out_path)
#            tree.write(out_path, pretty_print='True', encoding="utf-8")  # broken?
            with open(out_path, 'w') as f:
                f.write(str(tree))

    def run(self):
        create_path(self.output_dir)
        self.log.info("Parsing %s" % self.input_file)
        xmltree = et.parse(self.input_file)
        self.log.info("Exporting records to %s" % self.output_dir)
        self.export(xmltree)


def create_path(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            log.warning("The directory %s exists, some files may be overwritten." % path)
        else:
            log.critical("Something wrong happened when creating directories: %s" % path)
            sys.exit(1)


def init_logs(name, log_dir='', console_loglevel=logging.DEBUG, info_file_loglevel=logging.DEBUG,
              err_file_loglevel=logging.WARNING,
              default_level=logging.DEBUG):
    log = logging.getLogger(__name__)
    log.setLevel(default_level)

    formatter = logging.Formatter('%(levelname)s:%(name)s> %(message)s')

    console_handler = logging.StreamHandler()
    console_handler.setLevel(console_loglevel)
    console_handler.setFormatter(formatter)
    log.addHandler(console_handler)

    if log_dir == '':
        log_dir = os.getcwdu()

    create_path(log_dir)

    timestamp = str(datetime.datetime.now()).replace(' ', '_')
    log_filename = name + '-'
    info_log = join(log_dir, log_filename + timestamp + '.log')
    err_log = join(log_dir, log_filename + timestamp + '.err')

    try:
        info_file_handler = logging.FileHandler(info_log)
        # info_file_handler = logging.handlers.RotatingFileHandler(info_log,maxBytes = 1048576, backupCount = 100) #100 of 1MB logs
        info_file_handler.setLevel(info_file_loglevel)
        info_file_handler.setFormatter(formatter)

        err_file_handler = logging.FileHandler(err_log)
        # err_file_handler = logging.handlers.RotatingFileHandler(err_log,maxBytes = 1048576, backupCount = 100) #100 of 1MB logs
        err_file_handler.setLevel(err_file_loglevel)
        err_file_handler.setFormatter(formatter)

        log.addHandler(info_file_handler)
        log.addHandler(err_file_handler)

    except IOError as err:
        log.critical("Unable to open file for logging.")
        log.critical(str(err))
        # sys.exit(1)

    log.debug('Logging initialized')


def savexml(xml, fname):
    # lxml
    if isinstance(xml, type(et.ElementTree())):
        tree = xml
    else:
        tree = et.ElementTree(xml)
    tree.write(fname, encoding='utf-8', xml_declaration=True, pretty_print=True)


def handle_exception(exc_type, exc_value, exc_traceback):
    log.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))


def main():
    parser = argparse.ArgumentParser(description='Exporter of Gorazd XML files')
    parser.add_argument("marcxml", help="MARC XML export from Invenio")
    parser.add_argument("output_dir", help="Directory for storing the exported files")
    parser.add_argument("transform",
                        nargs="*",
                        help="XSL transforms for producing the output")
    parser.add_argument("-p", "--print_file", help="Create print.html file in the output directory that contains all " +
                                                   "the transformed records sorted by their PAGE ID",
                        action='store_true', default=False)
    parser.add_argument("-l", "--log_dir", help="Directory for log files. Default is the current working directory.",
                        default=LOG_DIR)
    args = parser.parse_args()
    init_logs("GorazdExporter", log_dir=args.log_dir)
    sys.excepthook = handle_exception
    exporter = InvenioExporter(input_file=args.marcxml,
                               output_dir=args.output_dir,
                               xsl_transform=args.transform,
                               print_file=args.print_file)
    exporter.log.info("The program has been run with the following arguments: %s" % vars(args))
    exporter.run()


if __name__ == "__main__":
    main()
