from matplotlib import pyplot as plt

# -------------------------------------------------------------
# Problem:
# We are given a text file containing
# daily measurements of outdoor temperatures
# on a particular meteorological stations.
# Process and display average temperature
# in each complete year on the record




# Simple helper formatting function
# turns date format YYYYMMDD into DD.MM.YYYY
def getDate( dateStr ):
    # from format  YYYYMMDD to format DD.MM.YYYY
    year = dateStr[0:4]
    month = dateStr[4:6]
    day = dateStr[6:8]
    return day + "." + month + "." + year

# -------------------------------------------------------------
# The avgYearTemperatures funcion depends in all details
# on the exact format of the data.
# Check the data file visually to see how the data are organized.

def avgYearTemperatures( fileName ):
    # named constants do help
    # columns 0 and 1 are not essential here
    dateColumn = 2
    temperColumn = 3
    qualityColumn = 4

    qualityValid = 0
    qualitySuspect = 1
    qualityMissing = 9

    # start reading
    file = open( fileName, "r" )

    # skip the file header
    while True:
        line = file.readline()
        print( line )  # echo file header
        if line[0:5] == "STAID": break

    minTemper = 10000 # start with big temperature
    dateMinTemper = 0

    totalTemper = 0
    noOfDays = 0

    # return values to be filled
    yearsList = []
    avgYearTList = []

    # process all lines with day data
    while True:
        line = file.readline()
        # detect possible empty lines to stop
        if line == None or line.strip(" ") == "":
            break

        # extract info from a text line
        lineVals = line.split(",")
        date =  lineVals[dateColumn]
        #date =  lineVals[2]

        dayTemper = int( lineVals[temperColumn] )
        quality = int( lineVals[qualityColumn] )

        # do not accept dubious data
        if quality != qualityValid:
            continue

        # record all years,
        # in each year record its avg temperature
        # return list of years (x-values) and a list
        # of corresponding avg temperatures (y-values)

        if date[4:] == "0101":  # Jan 1
            totalTemper = dayTemper
            noOfDays = 1
            continue

        if date[4:] == "1231":  # Dec 31
            if noOfDays < 365:
                continue    # skip incomplete years

            avgTemper = totalTemper / noOfDays

            # add another return value
            yearsList.append( int(date[0:4]) )  # date e.g. "20200602"
            avgYearTList.append( avgTemper/10 )

            if avgTemper < minTemper:
                minTemper = avgTemper
                print( "min avg in year", date[0:4], "is ", minTemper/10, "degrees" )
            continue

        # remaining other days
        totalTemper += dayTemper
        noOfDays += 1

    file.close()

    return yearsList, avgYearTList


def disp( xValues, yValues):
    plt.plot( xValues, yValues )
    plt.grid( True, which='both' )
    # see grid params:  https://matplotlib.org/3.2.1/api/_as_gen/matplotlib.pyplot.grid.html
    plt.axhline(y=1, color='b') # color codes -- see below
    plt.axvline(x=2, color='r')
    plt.show()

# -----------------------------------------------------------------------
#   M A I N
# -----------------------------------------------------------------------

path = ""  #  "d:\\Iskola\\PGE2021\\data\\"
fname = "TG_STAID000169.txt"
#fname = "TG_STAID000027.txt"

yearsList, avgYearTList = avgYearTemperatures( path + fname )
print( yearsList, avgYearTList )

disp( yearsList, avgYearTList )






