ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

神经网络二分类数据集练习

2021-11-20 21:02:20  阅读:255  来源: 互联网

标签:theta2 theta3 分类 练习 a1 神经网络 theta1 a4 np


神经网络(二分类)

1. 鸢尾花数据集

from sklearn.datasets import load_iris,load_boston,load_breast_cancer,load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import numpy as np
import matplotlib.pyplot as plt
#加载数据集
data=load_iris()       #加载鸢尾花数据集
X=data.data       #取data关键字对应的值为特征
Y=data.target        #取target关键字对应的值为标签
# print(X.shape)#(150, 4)
# print(Y)
#选择特征和类别
X=X[Y!=2,0:]      #选择特征2,3
Y=Y[Y!=2]
print(X.shape)#(150, 4)
print(X)
print(Y)
#缩放
def suofang(x):
    xmin=np.min(x,axis=0)
    xmax=np.max(x,axis=0)
    s=(x-xmin)/(xmax-xmin)
    return s
x=suofang(X)
y=Y
print(y)

#打乱顺序
m=x.shape[0]
np.random.seed(4)
order=np.random.permutation(m)
x=x[order]
y=y[order]
print(x.shape)
print(y.shape)

#拼接
xx=np.c_[np.ones(len(x)),x]
a=int(len(x)*0.7)
y=np.c_[ya]
trainx=xx[:a]
trainy=y[:a]
testx=xx[a:]
testy=y[a:]
#模型函数
def model(x,theta):
    return x.dot(theta)
#sigmoid函数
def sigmoid(z):
    return 1/(1+np.exp(-z))
#代价函数
def cost(h,y):
    return -1/m*np.sum(y*np.log(h)+(1-y)*np.log(1-h))
#向前传播函数
def forwardp(a1,theta1,theta2,theta3):
    z1=a1.dot(theta1)
    a2=sigmoid(z1)
    z2 = a2.dot(theta2)
    a3 = sigmoid(z2)
    z3 = a3.dot(theta3)
    a4 = sigmoid(z3)
    return a2,a3,a4
    
#向后传播函数
def backp(a1,a2,a3,a4,y,theta1,theta2,theta3,alpha=0.005):
    m,n=a1.shape
    sigma4=a4-y
    sigma3=sigma4.dot(theta3.T)*a3*(1-a3)
    sigma2=sigma3.dot(theta2.T)*a2*(1-a2)

    dt3=1/m*a3.T.dot(sigma4)
    dt2 = 1 / m * a2.T.dot(sigma3)
    dt1 = 1 / m * a1.T.dot(sigma2)

    theta3=theta3-alpha*dt3
    theta2 = theta2 - alpha * dt2
    theta1 = theta1 - alpha * dt1

    return theta3,theta2,theta1
    
#梯度下降函数
def gradeDecline(a1,y,nums,k,l):
    m,n=a1.shape
    # np.random.seed(4)
    j=np.zeros(nums)
    # theta1 =2* np.random.randn(n,k)-1
    # theta2 =2* np.random.randn(k, l)-1
    # theta3 =2* np.random.randn(l, 1)-1
    theta1=np.zeros((n,k))
    theta2 = np.zeros((k, l))
    theta3 = np.zeros((l, 1))
    for i in range(nums):
        a2,a3,a4=forwardp(a1, theta1, theta2, theta3)
        j[i]=cost(a4,y)
        
     theta3,theta2,theta1=backp(a1,a2,a3,a4,y,theta1,theta2,theta3,alpha=0.005)

    return theta1,theta2,theta3,j,a4
    
#精度函数
def accuracy(a1,a4,y):#accuracy准确性
    m, n = a1.shape
    count=0
    for i in range(m):
        if np.where(a4[i]>0.5,1,0)==y[i]:
            count+=1

    return count/m
    
#x,y为和1拼接之后的,通过切片拼接之后的x和y,为trainx,trainy   
theta1,theta2,theta3,j,a4=gradeDecline(trainx,trainy,100000,7,10)
print(theta1,theta2,theta3)
#theta1为(5,5)
#theta2为(5,2)
#theta3为(2,1)
print('训练集精度:',accuracy(trainx,a4,trainy)*100,'%')
a2,a3,a44=forwardp(testx,theta1,theta2,theta3)
print('测试集精度:',accuracy(testx,a44,testy)*100,'%')

#画代价函数图
plt.plot(j)
plt.show()

#画二分类直线和散点图
def tu(x,y):
    plt.scatter(x[y == 0, 1], x[y == 0, 2])
    plt.scatter(x[y == 1, 1], x[y == 1, 2])
    x1min = x[:, 1].min()
    x1max = x[:, 1].max()
    x2min = x[:, 2].min()
    x2max = x[:, 2].max()
    plt.plot([x1min,x1max],[x2max,x2min])
#调用参数x,ya为拼接之前的x,ya;ya必须是一维数组(不是二维)
tu(x,ya)

plt.show()

#代价函数图如下:
在这里插入图片描述
#二分类散点图和直线如下:
在这里插入图片描述

2. 苹果数据集

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# 深度学习(DL, Deep Learning)是机器学习(ML, Machine Learning)领域中一个新的研究方向,它被引入机器学习使其更接近于最初的目标——人工智能(AI, Artificial Intelligence)。
# 深度学习是学习样本数据的内在规律和表示层次,这些学习过程中获得的信息对诸如文字,图像和声音等数据的解释有很大的帮助。它的最终目标是让机器能够像人一样具有分析学习能力,能够识别文字、图像和声音等数据。 深度学习是一个复杂的机器学习算法,在语音和图像识别方面取得的效果,远远超过先前相关技术。
# 深度学习在搜索技术,数据挖掘,机器学习,机器翻译,自然语言处理,多媒体学习,语音,推荐和个性化技术,以及其他相关领域都取得了很多成果。深度学习使机器模仿视听和思考等人类的活动,解决了很多复杂的模式识别难题,使得人工智能相关技术取得了很大进步。 
# 已知某一果园的一组数据集(apple.txt文件中),有两个标签,分别为苹果的大小与苹果重量,y标签表示苹果的等级:1表示优等,0表示普通
# 利用神经网络模型, 编写神经网络底层代码,实现对数据的训练和预测,主要涉及以下步骤:数据集的预处理、代价函数、激活函数、梯度下降、训练和预测。
# 具体要求如下:

# 完成数据集的加载
data=np.loadtxt('apple.txt',delimiter=',')
x=data[:,:-1]
y=data[:,-1]
print(x)
print(y)

# 特征缩放
def suofang(x):
    xmin=np.min(x,axis=0)
    xmax=np.max(x,axis=0)
    s=(x-xmin)/(xmax-xmin)
    return s

x=suofang(x)
# 数据拼接
m,n=x.shape
xx=np.c_[np.ones(m),x]
# 数据的洗牌
def wash(x,y):
    m,n=x.shape
    np.random.seed(4)
    order=np.random.permutation(m)
    x=x[order]
    y=y[order]
    return x,y
xx,y=wash(xx,y)
# 5.分割成训练集和测试集
a=int(len(x)*0.7)

trainx=xx[:a]
trainy=y[:a]
testx=xx[a:]
testy=y[a:]

trainy=np.c_[trainy]
testy=np.c_[testy]
# 实现sigmoid激活函数及其导数
def model(x,theta):
    z = x.dot(theta)
    return z
def sigmoid(z,grad=False):
    if grad==True:
        return z*(1-z)
    return 1/(1+np.exp(-z))

# 前向传播
def forwardp(a1,theta1,theta2,theta3):
    z1 = a1.dot(theta1)
    a2 = sigmoid(z1)
    z2 = a2.dot(theta2)
    a3 = sigmoid(z2)
    z4 = a3.dot(theta3)
    a4 = sigmoid(z4)
    return a2,a3,a4
# 自定义实现代价函数
def cost(a4,y):
    return -np.mean(y*np.log(a4)+(1-y)*np.log(1-a4))

# 反向传播
def backp(a4,a3,a2,a1,y,theta1,theta2,theta3,alpha):
    sigma4=a4-y
    sigma3 = sigma4.dot(theta3.T) * sigmoid(a3,grad=True)
    sigma2 = sigma3.dot(theta2.T) * sigmoid(a2, grad=True)
    dt3 = 1 / m * a3.T.dot(sigma4)
    dt2 = 1 / m * a2.T.dot(sigma3)
    dt1 = 1 / m * a1.T.dot(sigma2)
    theta3 = theta3-alpha * dt3
    theta2 = theta2 - alpha * dt2
    theta1 = theta1 - alpha * dt1
    return theta1,theta2,theta3,a4
# 实现梯度下降并记录代价函数
def gradeDecline(a1,y,nums,k,l):
    m,n=a1.shape
    j=np.zeros(nums)
    np.random.seed(4)
    theta1=2*np.random.rand(n,k)-1
    theta2=2*np.random.rand(k,l)-1
    theta3=2*np.random.rand(l,1)-1
    for i in range(nums):
        a2,a3,a4=forwardp(a1, theta1, theta2, theta3)
        j[i]=cost(a4,y)
        theta1,theta2,theta3,a4=backp(a4,a3,a2,a1,y,theta1,theta2,theta3,alpha=0.1)
    return theta1,theta2,theta3,j,a4
# 自定义准确率计算方法
def score(a1,y,a4):
    m,n=a1.shape
    count=0
    for i in range(m):
        if (np.where(a4[i]>0.5,1,0)==y[i]):
            count+=1
    acc=count/m
    return acc
# 写出主函数调用梯度下降完成模型的训练
theta1,theta2,theta3,j,a4=gradeDecline(trainx,trainy,10000,4,2)
print(j)
print('训练集的精确度为:',score(trainx,trainy,a4)*100,'%')
# 在测试集上完成预测
a2,a3,a44=forwardp(testx,theta1,theta2,theta3)
# 画出代价函数曲线
plt.plot(j)
plt.show()
# 计算并输出在测试集上的准确率
print('测试集的精确度为:',score(testx,testy,a44)*100,'%')
# 测试集的精确度为: 81.81818181818183 %
# 测试集的精确度为: 50.0 %

#画图
def tu(x,y):
    plt.scatter(x[y == 0, 1], x[y == 0, 2])
    plt.scatter(x[y == 1, 1], x[y == 1, 2])
    x1min = x[:, 1].min()
    x1max = x[:, 1].max()
    x2min = x[:, 2].min()
    x2max = x[:, 2].max()
    plt.plot([x1min,x1max],[x2max,x2min])
#调用参数x,ya为拼接之前的x,ya;ya必须是一维数组(不是二维)
tu(xx,y)
plt.show()
apple.txt
0.697,0.46,1
0.774,0.376,1
0.634,0.264,1
0.608,0.318,1
0.556,0.215,1
0.403,0.237,1
0.481,0.149,1
0.437,0.211,1
0.666,0.091,0
0.243,0.267,0
0.245,0.057,0
0.343,0.099,0
0.639,0.161,0
0.657,0.198,0
0.36,0.37,0
0.593,0.042,0
0.719,0.103,0

#代价函数图如下:
在这里插入图片描述
#二分类散点图和直线如下:
在这里插入图片描述

3. 西瓜数据集

import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif']=['SimHei']
#西瓜数据
x1 = [0.697,0.774,0.634,0.608,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]
x2 = [0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]
y = [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]

x11=np.c_[x1,x2]

#拼接
xx=np.c_[np.ones(len(x1)),x1,x2]
yy=np.c_[y]

#洗牌
np.random.seed(4)
order=np.random.permutation(len(xx))
xxx=xx[order]
yyy=yy[order]
m,n=xx.shape

def model(x,theta):
    return x.dot(theta)

def sigmoid(z):
    return 1/(1+np.exp(-z))

def cost(a5,y):
    return -np.mean(y*np.log(a5)+(1-y)*np.log(1-a5))

def frontprogatation(a1,theta1,theta2,theta3,theta4):
    z1=model(a1,theta1)
    a2=sigmoid(z1)
    z2=model(a2,theta2)
    a3=sigmoid(z2)
    z3=model(a3,theta3)
    a4=sigmoid(z3)
    z4 = model(a4, theta4)
    a5 = sigmoid(z4)
    return a2,a3,a4,a5

def backprogatation(y,theta1,theta2,theta3,theta4,a1,a2,a3,a4,a5,alpha):
    m=len(a5)
    sigma5 = a5 - y
    sigma4 = sigma5.dot(theta4.T) * a4 * (1 - a4)
    sigma3 = sigma4.dot(theta3.T) * a3 * (1 - a3)
    sigma2 = sigma3.dot(theta2.T) * a2 * (1 - a2)

    dt4 = 1 / m * a4.T.dot(sigma5)
    dt3 = 1 / m * a3.T.dot(sigma4)
    dt2 = 1 / m * a2.T.dot(sigma3)
    dt1 = 1 / m * a1.T.dot(sigma2)

    theta4 = theta4 - alpha * dt4
    theta3 = theta3 - alpha * dt3
    theta2 = theta2 - alpha * dt2
    theta1 = theta1 - alpha * dt1

    return theta1,theta2,theta3,theta4



def gradeDecline(a1,y,alpha,nums):
    m,n=a1.shape
    j=np.zeros(nums)
    theta1=np.zeros((n,6))
    theta2 = np.zeros((6,5))
    theta3 = np.zeros((5,7))
    theta4 = np.zeros((7,1))
    for i in range(nums):
        a2,a3,a4,a5=frontprogatation(a1,theta1,theta2,theta3,theta4)
        j[i]=cost(a5,y)
        theta1,theta2,theta3,theta4=backprogatation\
            (y, theta1, theta2, theta3, theta4, a1, a2, a3, a4,a5, alpha)

    return theta1,theta2,theta3,theta4,j



theta1,theta2,theta3,theta4,j=gradeDecline(xxx,yyy,0.01,10000)
print(j)
print(theta1)
print(theta2)
print(theta3)
print(theta4)
plt.plot(j)
plt.show()

在这里插入图片描述

4. 神经网络sklearn.neural_network

import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LinearRegression,logistic
import warnings
from sklearn.metrics import confusion_matrix,classification_report
warnings.filterwarnings('ignore')
x1=[0.697,0.774,0.634,0.608,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]
x2=[0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]
y=[1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]

#拼接
xx=np.c_[np.ones(len(x1)),x1,x2]
yy=np.c_[y]
#调用神经网络库
clf=MLPClassifier(alpha=0.02)
clf.fit(xx,yy)
print('预测y',clf.predict([[1,0.64,0.28]]))
print('精度:',clf.score(xx,yy)*100,'%')
print('斜率',clf.coefs_)
print('截距',clf.intercepts_)

标签:theta2,theta3,分类,练习,a1,神经网络,theta1,a4,np
来源: https://blog.csdn.net/m0_47405013/article/details/121431112

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有