1 2 3 4 |
import pandas as pd import numpy as np df=pd.read_excel("data/PDMS.xlsx") |
1 2 3 |
df.drop('Strata', axis=1, inplace=True) df.drop('RID', axis=1, inplace=True) df.drop('FCSG', axis=1, inplace=True) |
1 |
df.head() |
1 2 3 4 5 6 7 8 9 10 11 12 |
from sklearn.preprocessing import StandardScaler col=df.columns feature=col.tolist() data=df.loc[:,feature].values sc=StandardScaler() data=sc.fit_transform(data) data_x=pd.DataFrame(data,columns=feature).head().T data_x.head().T |
1 2 3 4 5 6 7 8 9 10 11 12 |
from sklearn.decomposition import PCA pca = PCA(n_components = 5) pca.fit(data_x) principal_components = pca.transform(data_x) percentage_var_explained = pca.explained_variance_ratio_; cum_var_explained=np.cumsum(percentage_var_explained) print(pca.explained_variance_ratio_,"yuzde = ", sum(pca.explained_variance_ratio_)*100) |
1 |
[4.89869574e-01 3.38506520e-01 1.53860335e-01 1.77635703e-02 5.23048137e-33] yuzde = 100.0 |
1 2 3 4 5 6 7 8 9 10 11 |
import matplotlib.pyplot as plt #plot PCA spectrum plt.figure(1,figsize=(6,4)) plt.clf() plt.plot(cum_var_explained,linewidth=2) plt.axis('tight') plt.grid() plt.xlabel('n_components') plt.ylabel('Cumulative_Variance_explained') plt.show() |
1 2 3 |
data_x=pd.DataFrame(principal_components).head() data_x.head() |