Machine learning 13 (pca)
PCA
붓꽃 데이터
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target
iris_df.head(3)
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | target | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
sepal_length, sepal_width 두개의 특성으로 산점도
plt.scatter(iris_df['sepal length (cm)'], iris_df['sepal width (cm)'], c=iris_df['target'])
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.show()
X = iris.data
y = iris.target
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
iris_scaled = scaler.fit_transform(X)
from sklearn.decomposition import PCA
pca = PCA(n_components=2) # 4차원을 특성을 2차원으로 축소
iris_pca = pca.fit_transform(iris_scaled)
iris_pca.shape
(150, 2)
pca_columns = ['pca_component1', 'pca_component2']
iris_pca_df = pd.DataFrame(iris_pca, columns=pca_columns)
iris_pca_df['target'] = iris.target
iris_pca_df.head(3)
pca_component1 | pca_component2 | target | |
---|---|---|---|
0 | -2.264703 | 0.480027 | 0 |
1 | -2.080961 | -0.674134 | 0 |
2 | -2.364229 | -0.341908 | 0 |
PCA로 차원축소된 새로운 특성으로 산점도
plt.scatter(iris_pca_df['pca_component1'], iris_pca_df['pca_component2'], c=iris_pca_df['target'])
plt.xlabel('pca component 1')
plt.ylabel('pca component 2')
plt.show()
pca.explained_variance_ratio_ # 각 pca component 의 분산 비율
array([0.72962445, 0.22850762])
0.72962445 + 0.22850762
0.95813207
원본 데이터와 PCA 변환된 데이터로 모델 성능 측정
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
rf_clf = RandomForestClassifier(random_state=42)
scores = cross_val_score(rf_clf, X, y, scoring='accuracy', cv=3)
scores
array([0.98, 0.94, 0.98])
rf_clf = RandomForestClassifier(random_state=42)
scores = cross_val_score(rf_clf, iris_pca, y, scoring='accuracy', cv=3)
scores
array([0.88, 0.88, 0.9 ])
iris_pca.shape
(150, 2)
댓글남기기