1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
| import numpy as np import pandas as pd import xgboost as xgb import time from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import train_test_split train_x, train_y, test_x = load_data()
X, val_X, y, val_y = train_test_split( train_x, train_y, test_size=0.01, random_state=1, stratify=train_y )
xgb_val = xgb.DMatrix(val_X, label=val_y) xgb_train = xgb.DMatrix(X, label=y) xgb_test = xgb.DMatrix(test_x)
params = { 'booster': 'gbtree', 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'gamma': 0.1, 'max_depth': 8, 'alpha': 0, 'lambda': 10, 'subsample': 0.7, 'colsample_bytree': 0.5, 'min_child_weight': 3, 'silent': 0, 'eta': 0.03, 'seed': 1000, 'nthread': -1, 'missing': 1, 'scale_pos_weight': (np.sum(y==0)/np.sum(y==1)) } plst = list(params.items()) num_rounds = 2000 watchlist = [(xgb_train, 'train'), (xgb_val, 'val')]
result = xgb.cv(plst, xgb_train, num_boost_round=200, nfold=4, early_stopping_rounds=200, verbose_eval=True, folds=StratifiedKFold(n_splits=4).split(X, y))
model = xgb.train(plst, xgb_train, num_rounds, watchlist, early_stopping_rounds=200) model.save_model('../data/model/xgb.model') preds = model.predict(xgb_test)
threshold = 0.5 for pred in preds: result = 1 if pred > threshold else 0
|