playdata
ML(0813_day8) - 실습_붓꽃 데이터 품종 예측하기(로지스틱 회귀, 소프트맥스 회귀)
_JAEJAE_
2021. 8. 13. 17:57
로지스틱 회귀¶
In [1]:
import numpy as np
import matplotlib.pyplot as plt
In [2]:
from sklearn import datasets
In [4]:
iris = datasets.load_iris()
In [5]:
iris.keys()
Out[5]:
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])
In [14]:
X = iris['data'][:, 3:]
In [17]:
y = (iris['target'] == 2).astype(np.int32) # Virginica이면 1, 아니면 0
In [18]:
from sklearn.linear_model import LogisticRegression
In [19]:
log_reg = LogisticRegression(random_state=42)
In [20]:
log_reg.fit(X, y)
Out[20]:
LogisticRegression(random_state=42)
In [21]:
X_new = np.linspace(0, 3, 1000).reshape(-1, 1)
y_proba = log_reg.predict_proba(X_new)
decision_boundary = X_new[y_proba[:, 1] >= 0.5][0] #확률이 50%가 넘기 시작한 제일 처음 샘플
plt.figure(figsize=(8, 3))
plt.plot(X[y==0], y[y==0], "bs")
plt.plot(X[y==1], y[y==1], "g^")
plt.plot([decision_boundary, decision_boundary], [-1, 2], "k:", linewidth=2)
plt.plot(X_new, y_proba[:, 1], "g-", linewidth=2, label="Iris-Virginica")
plt.plot(X_new, y_proba[:, 0], "b--", linewidth=2, label="Not Iris-Virginica")
plt.legend(loc="center left", fontsize=14)
plt.axis([0, 3, -0.02, 1.02])
plt.show()
In [22]:
decision_boundary
Out[22]:
array([1.66066066])
In [26]:
log_reg.predict([[1.7], [1.5]])
Out[26]:
array([1, 0])
소프트 맥스 회귀¶
In [32]:
X = iris['data'][:, 2:] # 꽃잎 길이, 꽃잎 너비
# X = iris['data'][:, (2, 3)]
In [34]:
y = iris['target']
In [38]:
softmax_reg = LogisticRegression(multi_class="multinomial", solver='lbfgs', C=10, random_state=42)
In [39]:
softmax_reg.fit(X, y)
Out[39]:
LogisticRegression(C=10, multi_class='multinomial', random_state=42)
In [40]:
softmax_reg.predict([[5, 2]])
Out[40]:
array([2])
- 각 클래스에 대한 확률값 모두 보여줌
In [41]:
softmax_reg.predict_proba([[5, 2]])
Out[41]:
array([[6.38014896e-07, 5.74929995e-02, 9.42506362e-01]])
In [ ]: