"""
MLP
@jiri.spilka
"""

import numpy as np
from scipy.stats import logistic
from matplotlib import pyplot as plt

from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler

# load iris dataset and use only two features and binary classification
# make a two class
# X, y = load_iris(return_X_y=True)
# X = X[:, [0, 2]]
# y[y == 2] = 1

# load data from the AI course
data = np.loadtxt('data_1_small.csv', delimiter=',')
# data = np.loadtxt('data_xor_rect.csv', delimiter=',')

X = data[:, 0:2]
y = data[:, 2].astype(int)

X, y = shuffle(X, y)

print('Data')
print(X.shape)
print(np.sum(y == 0))
print(np.sum(y == 1))

sc = StandardScaler()
X = sc.fit_transform(X)

X = np.hstack((X, np.ones((X.shape[0], 1))))  # append zeros at the end (to ease plotting)


class MLP:

    def __init__(self, epoch, alpha):
        self.epoch = epoch
        self.alpha = alpha
        self.w1i = np.array([0])  # j = 1
        self.w2i = np.array([0])  # j = 2
        self.w3i = np.array([0])  # j = 3, output neuron
        self.errors_ = []
        self.z1 = self.z2 = self.z3 = 0

    @staticmethod
    def activation(a):
        return logistic.cdf(a)

    @staticmethod
    def loss(y_true, y_pred):
        diffs = y_true - y_pred
        return np.mean(diffs.dot(diffs))

    def fit(self, X, y):

        # init wji to some small value
        self.w1i = 0.001 * np.random.rand(X.shape[1], )
        self.w2i = 0.001 * np.random.rand(X.shape[1], )
        self.w3i = 0.001 * np.random.rand(X.shape[1], )

        yhat = np.zeros((len(y), ), dtype=np.float)

        # online learning
        for i_epoch in range(self.epoch):

            for i in range(0, X.shape[0]):

                yhat[i] = self.predict(X[i, :])

                delta3 = self.z3 * (1 - self.z3) * (y[i] - yhat[i])

                # back-propagate the errors and update
                # using the derivative w.r.t to w -> update
                self.w1i += self.alpha * X[i, :] * self.z1 * (1 - self.z1) * self.w3i[0] * delta3
                self.w2i += self.alpha * X[i, :] * self.z2 * (1 - self.z2) * self.w3i[1] * delta3
                self.w3i += self.alpha * np.array([self.z1, self.z2, 1]) * delta3

            current_loss = self.loss(y, yhat)
            print('i_epoch i={}, loss={}'.format(i_epoch, current_loss))
            self.errors_.append(current_loss)

    def predict(self, x):

        self.z1 = self.activation(x.dot(self.w1i))
        self.z2 = self.activation(x.dot(self.w2i))
        self.z3 = self.activation(np.array([self.z1, self.z2, 1]).dot(self.w3i))
        return self.z3


def plot_decision(X, y, model):
    """Function to plot the decision boundaries
    """
    h = .01  # point in the mesh [x_min, m_max]x[y_min, y_max].
    o = 1

    # Plotting decision regions
    x_min, x_max = X[:, 0].min() - o, X[:, 0].max() + o
    y_min, y_max = X[:, 1].min() - o, X[:, 1].max() + o
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    v = np.ones_like(xx.ravel())
    Z = model.predict(np.c_[xx.ravel(), yy.ravel(), v]) > 0.5
    Z = Z.reshape(xx.shape)

    plt.contourf(xx, yy, Z, alpha=0.2)
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k')

    return plt


pp = MLP(epoch=100, alpha=0.2)
pp.fit(X, y)

plt.figure()
plot_decision(X, y, pp)
plt.grid(True)

plt.figure()
plt.plot(pp.errors_)
plt.grid(True)

plt.show()