ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

logistics二分类 数据集:https://archive.ics.uci.edu/ml/datasets/Glass+Identification

2019-09-18 15:00:07  阅读:316  来源: 互联网

标签:logistics datasets ml np shape train 0.0 test theta


binaryclassification

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn 
import sklearn.preprocessing as pre
df=pd.read_csv('data\glassi\glass.data')
df.head()
 idRINaMgAlSiKCaBaFeclass
0 1 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0.0 0.0 1
1 2 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0.0 0.0 1
2 3 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0.0 0.0 1
3 4 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0.0 0.0 1
4 5 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0.0 0.0 1
X,y=df.iloc[:,1:-1],df.iloc[:,-1]
X,y=np.array(X),np.array(y)

#change the value the element

for idx,class_name in enumerate(sorted(list(set(y)))):
y[y==class_name]=idx

y
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5], dtype=int64)
#make the matrix's elements 2 value
#if element doesn't equals to 1 then make it 0
#'1' stands for the '2' class

for i in range(len(y)):
    if y[i]!=1:
        y[i]=0
#split our training dataset randomly

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=44)
X_train.shape,y_train.shape,X_test.shape,y_test.shape
((181, 9), (181,), (33, 9), (33,))
f_mean=np.mean(X_train,axis=0)
f_std=np.std(X_train,axis=0)
f_mean,f_std
(array([1.51832884e+00, 1.33736464e+01, 2.69287293e+00, 1.46425414e+00,
        7.26391160e+01, 5.17016575e-01, 8.95314917e+00, 1.71104972e-01,
        6.02762431e-02]),
 array([0.00300427, 0.79769555, 1.42353328, 0.49169919, 0.77056863,
        0.69105168, 1.42892902, 0.5002639 , 0.10131419]))
#standardize training set

X_train=(X_train-f_mean)/f_std
X_test=(X_test-f_mean)/f_std

theta = np.zeros((X_train.shape[1] + 1))
theta.shape
(10,)
#add constant parameter

X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)
X_train.shape,X_test.shape,theta.shape
((181, 10), (33, 10), (10,))
#initialize the parameter

np.random.seed(42)
theta = np.random.rand(*theta.shape)
theta
array([0.37454012, 0.95071431, 0.73199394, 0.59865848, 0.15601864,
       0.15599452, 0.05808361, 0.86617615, 0.60111501, 0.70807258])
#cross_entropy_loss: loss function
#h: hypothesis function
#gradient: gradient function

num_epoch=500000
for epoch in range(num_epoch):
    logist = np.dot(X_train, theta)
    h = 1 / (1 + np.exp(-logist))
    cross_entropy_loss = (-y_train * np.log(h) - (1 - y_train) * np.log(1 - h)).mean()
    gradient = np.dot((h - y_train), X_train) / y_train.size
    theta = theta -  0.01*gradient
    if epoch%100000==0:
        print('Epoch={}\tLoss={}'.format(epoch,cross_entropy_loss))
Epoch=0	Loss=0.9770836920534414
Epoch=100000	Loss=0.5884129057196792
Epoch=200000	Loss=0.5828823869347305
Epoch=300000	Loss=0.5798937167992417
Epoch=400000	Loss=0.5782071252958373
h_test = 1 / (1 + np.exp(-np.dot(X_test, theta)))

#accurancy
((h_test > 0.5) == y_test).sum() / y_test.size
0.8484848484848485

标签:logistics,datasets,ml,np,shape,train,0.0,test,theta
来源: https://www.cnblogs.com/Aurora-Borealis/p/11542208.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有