12345678910111213141516171819202122232425262728293031323334 |
- # 导入必要的库
- import pandas as pd
- from catboost import CatBoostClassifier, Pool
- from sklearn.model_selection import train_test_split
- # 加载示例数据集
- data = pd.read_csv('mushroom.csv')
- X = pd.get_dummies(data.drop('class', axis=1))
- y = data['class']
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- # 定义CatBoost分类器并训练模型
- model = CatBoostClassifier(iterations=100, depth=2, learning_rate=0.1, loss_function='Logloss')
- model.fit(X_train, y_train, verbose=False)
- # 评估模型性能
- print('Train accuracy:', model.score(X_train, y_train))
- print('Test accuracy:', model.score(X_test, y_test))
- # 保存模型
- model.save_model('catboost_model.bin')
- # 加载模型
- loaded_model = CatBoostClassifier()
- loaded_model.load_model('catboost_model.bin')
- # X_test = [['x', 's', 'n', 't', 'p', 'f', 'c', 'n', 'k', 'e', 'e', 's', 's', 'w', 'w', 'p', 'w', 'o', 'p', 'k', 's', 'u']]
- # 使用模型进行预测
- preds_class = loaded_model.predict(X_test)
- preds_proba = loaded_model.predict_proba(X_test)
- # 输出预测结果
- print('Predicted classes:', preds_class)
- print('Predicted probabilities:', preds_proba)
|