标签:pre 科学技术 clf 建模 test train 第五章 time print
基于Keras建立深度神经网络模型,在bankpep数据集上训练神经网络分类模型,将训练模型的耗时以及模型性能,与XGBoost、SVM、朴素贝叶斯等方法进行比较。
import pandas,datetime,xgboost,numpy from sklearn import model_selection,preprocessing,metrics,tree,naive_bayes,svm from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier import matplotlib.pyplot as plt from keras.models import Sequential from keras.layers import Dense,Activation from keras.utils import np_utils from graphviz import Source from IPython.display import Image #请根据 bankpep.csv 保存位置适当调整代码 df=pandas.read_csv('data/bankpep.csv',index_col='id') seq=['married','car','save_act','current_act','mortgage','pep'] for feature in seq: df.loc[df[feature]=='YES',feature]=1 df.loc[df[feature] == 'NO', feature] = 0 df.loc[df['sex']=='FEMALE','sex']=1 df.loc[df['sex']=='MALE','sex']=0 dumm_region=pandas.get_dummies(df['region'],prefix='region') dumm_child=pandas.get_dummies(df['children'],prefix='children') df=df.drop(['region','children'],axis=1) df=df.join([dumm_region,dumm_child],how='outer') x=df.drop(['pep'],axis=1).values.astype(float) #x=preprocessing.scale(x) y=df['pep'].values.astype(int) x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y,test_size=0.2,random_state=1) featureName=df.drop(['pep'],axis=1).columns.values className=['pep','no pep'] #tree print('Tree') start_time=datetime.datetime.now() clf_tree=tree.DecisionTreeClassifier() clf_tree.fit(x_train,y_train) pre_y_train_tree=clf_tree.predict(x_train) pre_y_test_tree=clf_tree.predict(x_test) print('train_tree') print(clf_tree.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_tree)) print(metrics.confusion_matrix(y_train,pre_y_train_tree)) print('test_tree') tree_score=clf_tree.score(x_test,y_test) print(tree_score) print(metrics.classification_report(y_test,pre_y_test_tree)) print(metrics.confusion_matrix(y_test,pre_y_test_tree)) ''' graph_tree=Source(tree.export_graphviz(clf_tree,out_file=None,feature_names=featureName,class_names=className)) png_bytes=graph_tree.pipe(format='png') with open('mooc_5.2_tree.png','wb') as f: f.write(png_bytes) ''' end_time = datetime.datetime.now() time_tree=end_time-start_time print("time:",time_tree) #naive_bayes.MultinomialNB print('MultinomialNB') start_time=datetime.datetime.now() clf_MultinomialNB=naive_bayes.MultinomialNB() clf_MultinomialNB.fit(x_train,y_train) pre_y_train_MultinomialNB=clf_MultinomialNB.predict(x_train) pre_y_test_MultinomialNB=clf_MultinomialNB.predict(x_test) print('train_MultinomialNB') print(clf_MultinomialNB.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_MultinomialNB)) print(metrics.confusion_matrix(y_train,pre_y_train_MultinomialNB)) print('test_MultinomialNB') MultinomialNB_score=clf_MultinomialNB.score(x_test,y_test) print(MultinomialNB_score) print(metrics.classification_report(y_test,pre_y_test_MultinomialNB)) print(metrics.confusion_matrix(y_test,pre_y_test_MultinomialNB)) end_time=datetime.datetime.now() time_MultinomialNB=end_time-start_time print("time:",time_MultinomialNB) #naive_bayes.GaussianNB print('GaussianNB') start_time=datetime.datetime.now() clf_GaussianNB=naive_bayes.GaussianNB() clf_GaussianNB.fit(x_train,y_train) pre_y_train_GaussianNB=clf_GaussianNB.predict(x_train) pre_y_test_GaussianNB=clf_GaussianNB.predict(x_test) print('train_GaussianNB') print(clf_GaussianNB.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_GaussianNB)) print(metrics.confusion_matrix(y_train,pre_y_train_GaussianNB)) print('test_GaussianNB') GaussianNB_score=clf_GaussianNB.score(x_test,y_test) print(GaussianNB_score) print(metrics.classification_report(y_test,pre_y_test_GaussianNB)) print(metrics.confusion_matrix(y_test,pre_y_test_GaussianNB)) end_time=datetime.datetime.now() time_GaussianNB=end_time-start_time print("time:",time_GaussianNB) #naive_bayes.BernoulliNB print('BernoulliNB') start_time=datetime.datetime.now() clf_BernoulliNB=naive_bayes.BernoulliNB() clf_BernoulliNB.fit(x_train,y_train) pre_y_train_BernoulliNB=clf_BernoulliNB.predict(x_train) pre_y_test_BernoulliNB=clf_BernoulliNB.predict(x_test) print('train_BernoulliNB') print(clf_BernoulliNB.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_BernoulliNB)) print(metrics.confusion_matrix(y_train,pre_y_train_BernoulliNB)) print('test_BernoulliNB') BernoulliNB_score=clf_BernoulliNB.score(x_test,y_test) print(BernoulliNB_score) print(metrics.classification_report(y_test,pre_y_test_BernoulliNB)) print(metrics.confusion_matrix(y_test,pre_y_test_BernoulliNB)) end_time=datetime.datetime.now() time_BernoulliNB=end_time-start_time print("time:",time_BernoulliNB) #SVM print('SVM') start_time=datetime.datetime.now() clf_SVM=svm.SVC() clf_SVM.fit(x_train,y_train) pre_y_train_SVM=clf_SVM.predict(x_train) pre_y_test_SVM=clf_SVM.predict(x_test) print('train_SVM') print(clf_SVM.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_SVM)) print(metrics.confusion_matrix(y_train,pre_y_train_SVM)) print('test_SVM') SVM_score=clf_SVM.score(x_test,y_test) print(SVM_score) print(metrics.classification_report(y_test,pre_y_test_SVM)) print(metrics.confusion_matrix(y_test,pre_y_test_SVM)) end_time=datetime.datetime.now() time_SVM=end_time-start_time print("time:",time_SVM) #GBM print('GBM') start_time=datetime.datetime.now() clf_GBM=GradientBoostingClassifier() clf_GBM.fit(x_train,y_train) pre_y_train_GBM=clf_GBM.predict(x_train) pre_y_test_GBM=clf_GBM.predict(x_test) print('train_GBM') print(clf_GBM.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_GBM)) print(metrics.confusion_matrix(y_train,pre_y_train_GBM)) print('test_GBM') GBM_score=clf_GBM.score(x_test,y_test) print(GBM_score) print(metrics.classification_report(y_test,pre_y_test_GBM)) print(metrics.confusion_matrix(y_test,pre_y_test_GBM)) end_time=datetime.datetime.now() time_GBM=end_time-start_time print("time:",time_GBM) #XGBoost print('XGBoost') start_time=datetime.datetime.now() clf_XGBoost=xgboost.XGBClassifier() clf_XGBoost.fit(x_train,y_train) pre_y_train_XGBoost=clf_XGBoost.predict(x_train) pre_y_test_XGBoost=clf_XGBoost.predict(x_test) print('train_XGBoost') print(clf_XGBoost.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_XGBoost)) print(metrics.confusion_matrix(y_train,pre_y_train_XGBoost)) print('test_XGBoost') XGBoost_score=clf_XGBoost.score(x_test,y_test) print(XGBoost_score) print(metrics.classification_report(y_test,pre_y_test_XGBoost)) print(metrics.confusion_matrix(y_test,pre_y_test_XGBoost)) end_time=datetime.datetime.now() time_XGBoost=end_time-start_time print("time:",time_XGBoost) #RandomForestClassifier print('RFC') start_time=datetime.datetime.now() clf_RFC=RandomForestClassifier() clf_RFC.fit(x_train,y_train) pre_y_train_RFC=clf_RFC.predict(x_train) pre_y_test_RFC=clf_RFC.predict(x_test) print('train_RFC') print(clf_RFC.score(x_train,y_train)) print(metrics.classification_report(y_train,pre_y_train_RFC)) print(metrics.confusion_matrix(y_train,pre_y_train_RFC)) print('test_RFC') RFC_score=clf_RFC.score(x_test,y_test) print(RFC_score) print(metrics.classification_report(y_test,pre_y_test_RFC)) print(metrics.confusion_matrix(y_test,pre_y_test_RFC)) end_time=datetime.datetime.now() time_RFC=end_time-start_time print("time:",time_RFC) #Keras print('Keras') start_time=datetime.datetime.now() model=Sequential() model.add(Dense(units=16,input_shape=(16,))) model.add(Activation('relu')) model.add(Dense(100)) model.add(Activation('relu')) model.add(Dense(2)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['binary_accuracy']) y_train_ohe=np_utils.to_categorical(y_train,2) y_test_ohe=np_utils.to_categorical(y_test,2) model.fit(x_train,y_train_ohe,epochs=25,batch_size=1,verbose=2,validation_data=(x_test,y_test_ohe)) loss,accuracy=model.evaluate(x_test,y_test_ohe) print(loss,accuracy) classes=model.predict(x_test,batch_size=1,verbose=2) Keras_score=loss end_time=datetime.datetime.now() time_Keras=end_time-start_time print("time:",time_Keras) #Matplotlib model=['tree','MultinomialNB','GaussianNB','BernoulliNB','SVM','GBM','XGBoost','RFC'] column=['Score','Time'] datas=[] for i in model: data=[] data.append(eval(i+"_score")) data.append(eval("time_"+i).total_seconds()) datas.append(data) df_Matplotlib=pandas.DataFrame(datas,columns=column,index=model) print(df_Matplotlib) print('Keras',loss,accuracy,time_Keras.total_seconds()) df_Matplotlib.plot() plt.grid() plt.show()
输出结果:
Score Time tree 0.775000 0.081810 MultinomialNB 0.666667 0.009974 GaussianNB 0.700000 0.008011 BernoulliNB 0.741667 0.009941 SVM 0.566667 0.027959 GBM 0.825000 0.100698 XGBoost 0.816667 0.153870 RFC 0.833333 0.282304 Keras 0.6881586909294128 0.550000011920929 13.049028
标签:pre,科学技术,clf,建模,test,train,第五章,time,print 来源: https://www.cnblogs.com/fydkk/p/14105311.html
本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享; 2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关; 3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关; 4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除; 5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。