Search
# Import modulu import numpy as np # modul pro praci s vektory/maticemi import pandas as pd # modul na analyzu dat import matplotlib.pyplot as plt # modul pro kresleni grafu # magicky vyraz pro vykreslovani dat %matplotlib inline
pole = np.array([1,2,3],dtype=float) print(pole) print(type(pole)) print('dimenze pole', pole.ndim) print('tvar pole', pole.shape) pole.dtype
pole = np.arange(0,10) matice = pole.reshape(2,5) # pretvaruje 1D pole na 2D pole s 2 radky a 5 sl. print(matice) print(matice.transpose()) # transponuje matici, pripadne matice.T
ravel()
flatten()
matice2 = matice.copy() print('Originalni matice:\n', matice2,'\n') pole2 = matice2.flatten() # prevod z matice na vektor pole2[0] = 10 # uprava prvniho prvku print('Flatten\nVektor: ',pole2,'\n') print('Originalni matice:\n', matice2,'\n') pole3 = matice2.ravel() pole3[0] = 10 print('Ravel\nVektor: ',pole3,'\n') print('Originalni matice:\n', matice2,'\n')
print(matice2[0,0]) print(matice2[0][0]) print(matice2[matice2>5])
vektor1 = np.array([1,2,3,4]) vektor2 = vektor1.copy() + 2 print(vektor1, vektor2,'\n') # scitani prvek po prvku print(vektor1+vektor2,'\n') # nasobeni prvek po prvku print(vektor1*vektor2) print(np.multiply(vektor1,vektor2),'\n') print(vektor1@vektor2) # skalarni soucin v pripade dvou vektoru, maticove nasobeni v pripade matic print(np.dot(vektor1,vektor2)) # skalarni soucin / dot product print(np.matmul(vektor1,vektor2),'\n') # maticove nasobeni / skalarni soucin print(np.outer(vektor1,vektor2),'\n') # vektorovy soucin / outer product mat = vektor1.reshape(2,2) print('determinant matice: ', np.linalg.det(mat),'\n') # vsimnete si vysledku (problem s presnosti, vysledek by mel byt -2) print('inversni matice') inv_m = np.linalg.inv(mat) print(inv_m,'\n') # problem s presnosti print(mat@inv_m) print(np.dot(mat,inv_m)) # skalarni soucin / dot product
figure
axes
line
df = pd.read_csv('nfl_2013.csv') # https://gist.github.com/craigmbooth/5a9be04fe72d77fa3cff # mozno specifikovat jaky datovy typ je v kazdem sloupci #df = pd.read_csv('nfl_2013.csv', dtype={'cislo': int, 'pozice':str, 'jmeno':str, 'vyska_inch':float, 'vaha_lbs':float, 'team':str}) df.head() # .head ukaze prvnich N prvku, kde vychozi hodnota N=5 #df.tail() # jako .head, ale ukazuje poslednich N prvku
df['vyska_cm'] = df['vyska_inch'] * 2.54 df['vaha_kg'] = df['vaha_lbs'] * 0.4536 df['vek'] = (pd.to_datetime('1/1/2013') - pd.to_datetime(df['datum_narozeni'])).astype('<m8[Y]').astype(int) # https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units df.head(10)
print(df.columns,'\n') # vypis velicin print(df.index,'\n') # vypis poctu prvku print(df.dtypes,'\n') # vypis datovych typu pro jednotlive sloupce
df.info() # podobne jako drive uvedene metody
df.describe()
#import qgrid #widget = qgrid.show_grid(df, show_toolbar=True) #widget
osa_x = 'vaha_kg' osa_y = 'vyska_cm' _= df.plot.scatter( x=osa_x, y=osa_y, alpha=0.1, #c='b', title='Korelace: {0:.3f}'.format(df[osa_x].corr(df[osa_y])) )
corr = df.corr() corr.style.background_gradient() #cmap='coolwarm_r'
osa_x = 'vaha_kg' osa_y = 'vyska_cm' #plt.scatter(x=osa_x,y=osa_y,data=df, alpha=0.1) #plt.title('Korelace: {0:.3f}'.format(df[osa_x].corr(df[osa_y]))) #plt.show() # dopocet trendu (pomoci polynomu N-teho radu) deg = 2 # polynom druheho radu pfit = np.polynomial.polynomial.polyfit(df[osa_x], df[osa_y], deg) y_fitted = np.polynomial.polynomial.polyval(df[osa_x],pfit) fig, ax = plt.subplots() ax.scatter(x=osa_x,y=osa_y,data=df, alpha=0.1, color='teal') ax.plot(df[osa_x],y_fitted,'.', alpha=0.1,color='black') # hodnoty polynomu ziskaneho fitovanim polynomu 2 radu do bodu ax.set_xlabel(osa_x) ax.set_ylabel(osa_y) ax.set_title('Korelace: {0:.3f}'.format(df[osa_x].corr(df[osa_y]))) plt.show()
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.hist.html _ = df.hist(layout=(1,6), figsize=(20,3), density=False)
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.loc.html #df.loc[df['pozice']=='C']['vyska_cm'].plot.hist(subplots=False, alpha=0.4) df.loc[df['vek']>25]['vyska_cm'].plot.hist(subplots=False)
_ = df.groupby(by='pozice')['vyska_cm'].plot.hist(subplots=False, alpha=0.4)
# Histogramy podle pozici fig, ax = plt.subplots(5, 5, figsize=(20, 10)) for id, pozice in enumerate(pd.unique(df['pozice'])): #for r in range(5): # for c in range(5): c = id%5 r = id//5 _, bins, _ = ax[r, c].hist(df.groupby(by='pozice').get_group(pozice)['vyska_cm'], density=False, bins=10) #_, bins, _ = ax[r, c].hist(df.groupby(by='pozice').get_group(pozice)['vek']) #ax[r, c].set_xticks(bins) ax[r, c].set_title(pozice) plt.subplots_adjust(hspace=.5, wspace=.5) plt.show()
osa_x = 'vaha_kg' osa_y = 'vyska_cm' podminka = df['vek']<=25 #podminka = df['pozice']=='RB' #podminka = df['team']=='ATL' fig, axs = plt.subplots(1,2) axs[0].hist2d(df.loc[podminka][osa_x], df.loc[podminka][osa_y], bins=20) axs[1].hist2d(df.loc[~podminka][osa_x], df.loc[~podminka][osa_y], bins=20) axs[0].set(xlim=(min(df[osa_x]),max(df[osa_x]))) axs[1].set(xlim=(min(df[osa_x]),max(df[osa_x]))) axs[0].set(ylim=(min(df[osa_y]),max(df[osa_y]))) axs[1].set(ylim=(min(df[osa_y]),max(df[osa_y]))) plt.show()
_ = df.boxplot(column='vek', by='pozice', figsize=(20,10), grid=False) #_ = df.boxplot(column='vyska_cm', by='team', figsize=(20,10), grid=False)
np_table = df.to_numpy() # prevod do numpy matice print(type(np_table)) print(np_table.shape)
Příklad je postavený na jednom z ukázkových příkladů dostupných na webu.
from sklearn import datasets, svm, metrics from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split
digits = datasets.load_digits() # nacteni interniho datasetu # dataset obsahuje cernobile snimky o velikosti 8x8 pixelu # Vykresleni 4 snimku _, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3)) for ax, image, label in zip(axes, digits.images, digits.target): ax.set_axis_off() ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest") ax.set_title("Label: %i" % label)
# dataset obsahuje vice klicu - hlavni jsou images a target print(digits.keys())
Rozpoznávání čísel (rostlin, zvířat, lidí, věcí, apod.), patří pod tzv. klasifikaci. Cílem je predikovat co za číslo je na daném vstupním obrázku (rozložení pravděpodobnosti).
Pro správný trénink se z pravidla rozdělí dataset na trénovací a testovací množinu. Cílem je navrhnout takový algoritmus/model, který bude dobře fungovat nejen na známe trénovací množině, ale také na neznáme testovací množině.
# převod snímků na vektory hodnot n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) # Tvorba klasifikátoru # https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html?highlight=svc#sklearn.svm.SVC clf = svm.SVC() ''' # Uprava vstupnich parametru clf = svm.SVC( C=0.5, kernel='poly', gamma=0.005, verbose=False, max_iter=1 ) ''' # Rozdeleni dat - dle poctu delime na 50/50, pripadne casteji na 70/30 nebo 80/20 X_train, X_test, y_train, y_test = train_test_split( data, digits.target, test_size=0.5, shuffle=False ) # Trenink (s vyuzitim vychozich hodnot pro vstupni parametry) clf.fit(X_train, y_train) # Predikce na testovacich datech predicted = clf.predict(X_test) # Hodnoceni uspesnosti predikce na trenovacich a testovacich datech print("Training set score: %f" % clf.score(X_train, y_train)) print("Test set score: %f" % clf.score(X_test, y_test))
Vizualizace vysledku
_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3)) for ax, image, prediction in zip(axes, X_test, predicted): ax.set_axis_off() image = image.reshape(8, 8) ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest") ax.set_title(f"Predikce: {prediction}")
Hodnocení navrženého klasifikátoru pomocí metrik
print( f"Classification report for classifier {clf}:\n" f"{metrics.classification_report(y_test, predicted)}\n" )
Výpočet tzv. confusion matrix (neboli matice záměn)
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test, predicted) disp.figure_.suptitle("Confusion Matrix") print(f"Confusion matrix:\n{disp.confusion_matrix}") plt.show()
Interaktivní úvod do neuronových sítí dostupný na http://playground.tensorflow.org/, případně na http://www.r2d3.us.
import warnings from sklearn.exceptions import ConvergenceWarning # https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html mlp = MLPClassifier() ''' mlp = MLPClassifier( random_state=1, max_iter=200, activation='relu', solver='adam', batch_size=16, alpha=0.5, learning_rate_init=0.0001, learning_rate='adaptive', hidden_layer_sizes=(10,) ) ''' with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") mlp.fit(X_train, y_train) print("Training set score: %f" % mlp.score(X_train, y_train)) print("Test set score: %f" % mlp.score(X_test, y_test))
# Vykresleni (deseti) koeficientu navrzene site fig, axes = plt.subplots(2, 5) # use global min / max to ensure all weights are shown on the same scale vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max() for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()): ax.matshow(coef.reshape(8, 8), cmap=plt.cm.gray, vmin=0.5 * vmin, vmax=0.5 * vmax) ax.set_xticks(()) ax.set_yticks(())
# Nacteni dat z https://www.openml.org/d/554 #X, y = datasets.fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False) #np.savez_compressed('mnist_784', X=X, y=y) # ulozeni datasetu jako npz #X = X / 255.0 # Nacteni lokalne ulozeneho npz datasetu data_mnist = np.load('mnist_784.npz',allow_pickle=True) X_all = data_mnist['X'] X = X_all[:5000] / 255.0 y_all = data_mnist['y'] y = y_all[:5000]
_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3)) for ax, image, label in zip(axes, X, y): ax.set_axis_off() ax.imshow(image.reshape(28,28), cmap=plt.cm.gray_r, interpolation="nearest") ax.set_title("Label: %s" % label)
# vytvoreni klasifikatoru clf = svm.SVC() ''' # Uprava vstupnich parametru clf = svm.SVC( C=0.5, kernel='poly', gamma=0.005, verbose=False, max_iter=1 ) ''' # Rozdeleni na trenovaci a testovaci sadu X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, shuffle=True ) # Trenink clf.fit(X_train, y_train) # Predikce predicted = clf.predict(X_test) # Hodnoceni uspesnosti predikce na trenovacich a testovacich datech print("Training set score: %f" % clf.score(X_train, y_train)) print("Test set score: %f" % clf.score(X_test, y_test))
import warnings from sklearn.exceptions import ConvergenceWarning ''' mlp = MLPClassifier( hidden_layer_sizes=(40,), max_iter=100, alpha=1e-4, solver="sgd", verbose=10, random_state=1, learning_rate_init=0.02, ) ''' ''' mlp = MLPClassifier( random_state=1, max_iter=100, activation='relu', solver='adam', batch_size=128, alpha=1, learning_rate_init=0.003, learning_rate='adaptive', hidden_layer_sizes=(10,) ) ''' mlp = MLPClassifier( random_state=1, max_iter=100, activation='tanh', solver='adam', #batch_size=128, alpha=1e-4, learning_rate_init=0.02, learning_rate='adaptive', hidden_layer_sizes=(40,) ) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") mlp.fit(X_train, y_train) print("Training set score: %f" % mlp.score(X_train, y_train)) print("Test set score: %f" % mlp.score(X_test, y_test))
fig, axes = plt.subplots(2, 5) # use global min / max to ensure all weights are shown on the same scale vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max() for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()): ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=0.5 * vmin, vmax=0.5 * vmax) ax.set_xticks(()) ax.set_yticks(())
# predikce na testovacim setu predicted = mlp.predict(X_test) # nalezeni 10 chybnych klasifikaci id_wrong = [] for id, pred_num in enumerate(predicted.tolist()): if y_test[id] != pred_num: id_wrong.append(id) if len(id_wrong) == 10: break print(id_wrong)
# Predikce prvnich 10 cisel pred_list = predicted.tolist() print('Target: ',y_test[id_wrong]) print('Predicted: ',predicted[id_wrong])
_, axes = plt.subplots(nrows=1, ncols=10, figsize=(50, 5)) for ax, image, prediction, actual in zip(axes, X_test[id_wrong], predicted[id_wrong], y_test[id_wrong]): image = image.reshape(28, 28) ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest") ax.set_title(f"Prediction: {prediction}, actual:{actual}")
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test, predicted) disp.figure_.suptitle("Confusion Matrix") plt.show()