import pandas as pd from catboost import CatBoostClassifier, Pool from sklearn.model_selection import train_test_split def train_model(data_path, model_path): # 加载数据集 data = pd.read_csv(data_path) print(data) X = pd.get_dummies(data.drop('class', axis=1)) y = data['class'] # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 定义CatBoost分类器并训练模型 model = CatBoostClassifier(iterations=100, depth=2, learning_rate=0.1, loss_function='Logloss') model.fit(X_train, y_train, verbose=False) # 保存模型 model.save_model(model_path) # 返回训练好的模型 return model, list(X.columns) def predict(model_path, input_data, input_columns): # 加载模型 loaded_model = CatBoostClassifier() loaded_model.load_model(model_path) # 将输入数据转换为DataFrame格式 input_df = pd.DataFrame(input_data, columns=input_columns) input_df = pd.get_dummies(input_df) # 使用模型进行预测 preds_class = loaded_model.predict(input_df) preds_proba = loaded_model.predict_proba(input_df) # 返回预测结果 return preds_class, preds_proba data_path = 'mushroom.csv' model_path = 'catboost_model.bin' # 训练模型 trained_model, input_columns = train_model(data_path, model_path) # 输入数据示例 input_data = [['x', 's', 'n', 't', 'p', 'f', 'c', 'n', 'k', 'e', 'e', 's', 's', 'w', 'w', 'p', 'w', 'o', 'p', 'k', 's', 'u']] # 进行预测 preds_class, preds_proba = predict(model_path, input_data, input_columns) # 输出预测结果 print('Predicted classes:', preds_class) print('Predicted probabilities:', preds_proba)