import glob
import os
from xml.dom import minidom
from datetime import datetime
from rdflib import Namespace, Graph, RDF, Literal, XSD

count = 10400

# Initialize graph and namespaces
graph = Graph()
rsd = Namespace("http://bindetad.com/trafficevents/")
geo = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
graph.bind('rsd', rsd)
graph.bind('geo', geo)
rsdtype = rsd["TrafficEvent"]
line = 0
files = glob.glob("./dopravni-info/*")

for file in files:
    # Ignore any empty CSVs
    if os.path.getsize(file) == 0:
        continue
    xmlfile = minidom.parse(file)
    # Get all messages in file
    msgs = xmlfile.getElementsByTagName("MSG")
    for msg in msgs:
        # If the region code does not match those of Středočeský kraj nor Praha
        # MSG -> MDST -> DEST
        regioncode = int(msg.childNodes[4].childNodes[0].attributes["RegionCode"].value)
        if not (regioncode == 19 or regioncode == 27):
            continue
        idsubject = str(msg.attributes["id"].value)
        # Account for anomalies where some ids are wrapped in curly brackets.
        if idsubject.startswith("{"):
            idsubject = idsubject[1:-1]
        subject = rsd[idsubject]
        # Some events are recorded multiple times, mainly due to updates of roadworks ETAs. Ignore any IDs that are
        # already included.
        if (subject, None, None) in graph:
            continue

        # MSG -> MTIME -> TGEN/TSTA/TSTO
        timeIssued = datetime.strptime(msg.childNodes[0].childNodes[0].firstChild.nodeValue[:-6], "%Y-%m-%dT%H:%M:%S")
        timeFrom = datetime.strptime(msg.childNodes[0].childNodes[1].firstChild.nodeValue[:-6], "%Y-%m-%dT%H:%M:%S")
        timeTo = datetime.strptime(msg.childNodes[0].childNodes[2].firstChild.nodeValue[:-6], "%Y-%m-%dT%H:%M:%S")

        # MSG -> MTXT
        text = msg.childNodes[1].firstChild.nodeValue

        # MSG -> MLOC -> SNTL -> COORD
        xcoord = msg.childNodes[3].childNodes[1].childNodes[0].attributes["x"].value
        ycoord = msg.childNodes[3].childNodes[1].childNodes[0].attributes["y"].value

        # MSG -> MEVT -> TMCE -> TXTMCE
        eventname = msg.childNodes[2].childNodes[0].childNodes[-1].firstChild.nodeValue
        eventcodes = []

        # MSG -> MEVT -> TMCE -> EVI
        for event in msg.childNodes[2].childNodes[0].childNodes:
            if event.tagName == "EVI":
                eventcodes.append(int(event.attributes["eventcode"].value))

        graph.add((subject, RDF.type, rsdtype))
        graph.add((subject, rsd["regionCode"], Literal(regioncode, datatype=XSD.integer)))
        graph.add((subject, rsd["timeIssued"], Literal(timeIssued, datatype=XSD.datetime)))
        graph.add((subject, rsd["timeFrom"], Literal(timeFrom, datatype=XSD.datetime)))
        graph.add((subject, rsd["timeTo"], Literal(timeTo, datatype=XSD.datetime)))
        graph.add((subject, rsd["description"], Literal(text, datatype=XSD.string)))
        graph.add((subject, geo["lat"], Literal(xcoord, datatype=XSD.string)))
        graph.add((subject, geo["long"], Literal(ycoord, datatype=XSD.string)))
        graph.add((subject, rsd["eventDescription"], Literal(eventname, datatype=XSD.string)))
        for event in eventcodes:
            graph.add((subject, rsd["eventCode"], Literal(event, datatype=XSD.integer)))

    line += 1
    if line % 1000 == 0:
        print(str(round(((line / count) * 100), 2)) + "%")

graph.serialize(destination="rsd.ttl", format="turtle")
