#!/usr/bin/env python

import xml.etree.ElementTree as ET
import sys
import re
from xml.dom import minidom
from translator import Translator

class SBVR2XML():
    def __init__(self, input_filename):
        self.input_filename = input_filename
        self.translator.klass = self
        self.nouns = {}
        self.declared_ints = []

        self.root = ET.Element("xmi:XMI")
        self.root.set("xmi:version", self.XMI_VERSION)
        self.root.set("xmlns:xmi", self.XMI_SPEC)
        self.root.set("xmlns:sbvr", self.SBVR_SPEC)

    translator = Translator()
    XMI_VERSION = "2.1"
    XMI_SPEC = "http://schema.omg.org/spec/XMI/2.1"
    SBVR_SPEC = "http://www.omg.org/spec/SBVR/1.0/"

    QUANTIFICATORS = {
        "(.+) each (.+)": None,
        "(.+) some (.+)": None,
        "(.+) at least one (.+)": None,
        "(.+) at least ([0-9]+) (.+)": None,
        "(.+) at most one (.+)": None,
        "(.+) at most ([0-9]+) (.+)": None,
        "(.+) exactly one (.+)": None,
        "(.+) exactly ([0-9]+) (.+)": None,
        "(.+) at least ([0-9]+) (.+) and at most ([0-9]+) (.+)": None,
        "(.+) more than one (.+)": None,
        }

    LOGICAL = {
        "it is not the case that (.+)": None,
        "(.+) and (.+)": None,
        "(.+) or (.+)": None,
        "(.+) or (.+) but not both": None,
        "if (.+) then (.+)": None,
        "(?!it is)(.+) if (.+)": None,
        "(.+) if and only if (.+)": None,
        "not both (.+) and (.+)": None,
        "neither (.+) nor (.+)": None,
        "(.+) whether or not (.+)": None,
    }

    MODAL = {
        "^it is obligatory that (.+)": None,
        "^it is prohibited that (.+)": None,
        "^it is necessary that (.+)": None,
        "^it is impossible that (.+)": None,
        "^it is possible that (.+)$(<!only)": None,
        "^it is permitted that (.+)$(<!only)": None,
        "(.+) must (.+) ": None,
        "(.+) must not (.+)": None,
        "(.+) always (.+)": None,
        "(.+) never (.+)": None,
        "(.+) may (.+)": None,
        "(.+) may (.+) only if (.+)": None,
        "^it is permitted that (.+) only if (.+)": None,
        "^it is possible that (.+) only if (.+)": None,
    }

    OTHER = {
        "the": None,
        "a": None,
        "an": None,
        "another": None,
        "a given": None,
        "that": None,
        "who": None,
        "is of": None,
        "what": None,
    }

    XML_PATTERNS = {
        "(.+) Vocabulary": None,
        "Included Vocabulary: (.+) Vocabulary": None,
        "Language: (.+)": None,
        "Namespace URI: (.+)": None,
        "Speech Community: (.+)": None,
        "example term": None,
        "Concept Type: (.+)": None,
        "Definition: (.+)": None,
        "General Concept: (.+)": translator.add_general_concept,
    }

    EXPRESSIONS_GROUPS = [QUANTIFICATORS, LOGICAL, MODAL, OTHER, XML_PATTERNS]

    def read(self):
        with open(self.input_filename) as file:
            for line in file.readlines():
                self.analyze_line(line.strip())

    def analyze_line(self, line):
        if not line or line.startswith("#"):
            return
        line = line.lower().replace('the ', '').replace('a ', '').replace('an', '')
        for expression_group in self.EXPRESSIONS_GROUPS:
            for statement, f in expression_group.items():
                m = re.match(statement, line)
                if m:
                    if not f:
                        continue
                    f(m.groups())
        if any([noun in line for noun in self.nouns.keys()]):
            new_sentence = self.translator.add_sentence(line)
            if new_sentence:
                return new_sentence
        return self.translator.add_noun(line)



def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """

    rough_string = ET.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="\t")

if __name__ == "__main__":
    if sys.argv:
        t = SBVR2XML(sys.argv[1])
        t.read()
        print prettify(t.root)
