import csv
from datetime import datetime

from rdflib import Graph, Namespace, RDF, Literal, XSD, URIRef

# Initialize graph and namespace
graph = Graph()
ppra = Namespace("http://bindetad.com/pprattendance/")
graph.bind('ppra', ppra)
ppratype = ppra["ParkAndRideAttendanceRecord"]
init = True

lines = 13496344
count = 0

prevdate = 0
line = 0
parking_lots = {}


def find(plot):
    return plot in parking_lots


def add(plot):
    parking_lots[plot] = {
        "enter": 0,
        "exit": 0,
        "occupancy": 0,
        "statusfree": 0,
        "statusfull": 0,
        "freespace": 0,
        "capacity": 0,
        "count": 0
    }
    return True


with open('TSK_data_2016_2018.csv', encoding="utf8") as csvfile:
    csvreader = csv.reader(csvfile, delimiter=";")
    # Skip header
    next(csvreader, None)
    for row in csvreader:
        # The records are extremely detailed for our purposes, with a row for each ~30minutes for each P+R.
        # Therefore, we add up the total number of entrys/exits and compute the average occupancy for each day.
        date = datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S").date()
        lot = row[1]
        if init:
            prevdate = date
            init = False
        if not find(lot):
            add(lot)
        iterate = parking_lots[lot]
        # If the date on the current row does not match the previous row, that means that we are on a new day. Count
        # all the information gathered for the previous day and add it to the graph.
        if prevdate != date:
            for plot in parking_lots:
                subject = URIRef(u'http://bindetad.com/pprattendance/' + str(plot).replace(" ", "_") + "/" + str(
                    prevdate.year) + "/" + str(prevdate.month) + "/" + str(prevdate.day))
                plotiter = parking_lots[plot]
                if plotiter["count"] == 0:
                    add(plot)
                    continue
                graph.add((subject, RDF.type, ppratype))
                graph.add((subject, ppra["date"], Literal(prevdate, datatype=XSD.date)))
                graph.add((subject, ppra["parking"], Literal(plot, datatype=XSD.string)))
                graph.add((subject, ppra["enter"], Literal(plotiter["enter"], datatype=XSD.integer)))
                graph.add((subject, ppra["exit"], Literal(plotiter["exit"], datatype=XSD.integer)))
                graph.add((subject, ppra["occupancy"],
                           Literal(round(plotiter["occupancy"] / plotiter["count"]), datatype=XSD.integer)))
                graph.add((subject, ppra["status"], Literal(round(plotiter["statusfull"] / plotiter["statusfree"], 2),
                                                            datatype=XSD.float)))
                graph.add((subject, ppra["freespace"],
                           Literal(round(plotiter["freespace"] / plotiter["count"]), datatype=XSD.integer)))
                graph.add((subject, ppra["capacity"], Literal(plotiter["capacity"], datatype=XSD.integer))),
                add(plot)
                line += 1
        # Account for an anomaly in the data
        if row[6] == '   7 810':
            continue
        # If the date on the current row matches the previous row, that means we are on the same date. Continue
        # counting.
        iterate["count"] += 1
        iterate["enter"] += int(row[2])
        iterate["exit"] += int(row[3])
        iterate["occupancy"] += int(row[4])
        if row[5] == "volno":
            iterate["statusfree"] += 1
        elif row[5] == "obsazeno":
            iterate["statusfull"] += 1
        iterate["freespace"] += int(row[6])
        iterate["capacity"] = int(row[7])
        prevdate = date
        count += 1
        # Display progress in console
        if count % 500000 == 0:
            print(str(round(((count / lines) * 100), 2)) + "%")

    graph.serialize(destination="tsk.ttl", format="turtle")
