tmp.py 1.0 KB

123456789101112131415161718192021222324252627282930313233
  1. # 导入必要的库
  2. import pandas as pd
  3. from catboost import CatBoostClassifier, Pool
  4. from sklearn.model_selection import train_test_split
  5. # 加载示例数据集
  6. data = pd.read_csv('/Users/alvin/Downloads/mushrooms.csv')
  7. X = pd.get_dummies(data.drop('class', axis=1))
  8. y = data['class']
  9. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  10. # 定义CatBoost分类器并训练模型
  11. model = CatBoostClassifier(iterations=100, depth=2, learning_rate=0.1, loss_function='Logloss')
  12. model.fit(X_train, y_train, verbose=False)
  13. # 评估模型性能
  14. print('Train accuracy:', model.score(X_train, y_train))
  15. print('Test accuracy:', model.score(X_test, y_test))
  16. # 保存模型
  17. model.save_model('catboost_model.bin')
  18. # 加载模型
  19. loaded_model = CatBoostClassifier()
  20. loaded_model.load_model('catboost_model.bin')
  21. # 使用模型进行预测
  22. preds_class = loaded_model.predict(X_test)
  23. preds_proba = loaded_model.predict_proba(X_test)
  24. # 输出预测结果
  25. print('Predicted classes:', preds_class)
  26. print('Predicted probabilities:', preds_proba)