标签:预测 name import model dummies data 房态 sklearn
import os import time import pandas as pd import numpy as np import matplotlib.pyplot as plt # 模型处理模块 from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import StandardScaler # 常规模型 from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier # 集成学习和stacking模型 from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier import xgboost as xgb # 评价标准模块 from sklearn import metrics from sklearn.metrics import accuracy_score,roc_auc_score,recall_score,precision_score, classification_report import warnings warnings.filterwarnings('ignore') %matplotlib inline data = pd.read_csv(r"E:\Excersise\ML\Trip\order_train_merage.csv",parse_dates=["orderdate","arrival","etd"]) data.head() # 检测个字段的缺失及占比 data.apply(lambda x: [x.isnull().sum(), x.isnull().sum()/x.size], axis=0) data.dropna(inplace=True) data.label.value_counts() data.duplicated().sum() data.describe(include="object") dummies = pd.get_dummies(data.hotelbelongto ,prefix='hotelbelongto') dummies_1 = pd.get_dummies(data.supplierchannel ,prefix='supplierchannel') data = pd.concat([data,dummies,dummies_1],axis=1) data.head() #ADASYN自适应采样 from imblearn.over_sampling import ADASYN sample =ADASYN() #抽样的X,Y都要为数组 X_resampled,y_resampled = sample.fit_resample(data.loc[:,data.columns != "label"].values,data.label.values) model_name_param_dict = { 'LR': (LogisticRegression()), 'DT': (DecisionTreeClassifier()), 'AdaBoost': (AdaBoostClassifier()), 'GBDT': (GradientBoostingClassifier()), 'RF': (RandomForestClassifier()), 'XGBoost':(XGBClassifier()) } result = {} for model_name, model in model_name_param_dict.items(): result[model_name] = train_model(X_train, y_train, X_test, y_test, model,model_name)
标签:预测,name,import,model,dummies,data,房态,sklearn 来源: https://www.cnblogs.com/lilingxin/p/15407236.html
本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享; 2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关; 3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关; 4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除; 5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。