Kaynağa Gözat

"添加票3.0测试脚本和API调用,更新认证和数据处理逻辑,完善模型训练和预测流程,修复Base64文件编码问题。"

tianyun 1 yıl önce
ebeveyn
işleme
788643f0ca
6 değiştirilmiş dosya ile 164 ekleme ve 88 silme
  1. 3 0
      fastapi-demo/fastapi-demo.iml
  2. 43 0
      piao_test.py
  3. 8 2
      test_file_base64.py
  4. 17 58
      tmp1.py
  5. 66 26
      tmp2.py
  6. 27 2
      tmp3.py

+ 3 - 0
fastapi-demo/fastapi-demo.iml

@@ -6,4 +6,7 @@
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
+  <component name="PackageRequirementsSettings">
+    <option name="requirementsPath" value="" />
+  </component>
 </module>

+ 43 - 0
piao_test.py

@@ -0,0 +1,43 @@
+# 票 3.0 测试
+import json
+
+import pandas as pd
+import requests
+
+appId = "38bb3c94f3004d979b283657e4befb69"
+secret = "kKL7HP0DHxcK1LO5wJhHykDp5w5_Vp70"
+default_header = {
+    "appId": appId,
+    "secret": secret
+}
+url = "http://aimp.btaimp.k7.bigtree.tech/api/aimp-platform/out_api/processInvoke/ai_piao_model_v3"
+
+# 读取csv文件,首行为标题
+datas = pd.read_csv('/Users/alvin/Downloads/模型测试_全量样本_for田云.csv')
+
+# datas转json数组
+datas = datas.to_json(orient='records')
+
+datas = json.loads(datas)
+# datas 取前10条
+# datas = datas[:2]
+length = len(datas)
+print("datas长度" + str(len(datas)))  # datas长度
+a = 0
+for data in datas:
+    a += 1
+    res = requests.post(url, json=data, headers=default_header).json()
+    # 获取p和score  {'code': 10000200, 'data': {'output': {'p': '0.005806804', 'score': 1000.0, 'level': 'F'}}}
+    if res['code'] == 10000200:
+        p_ = res['data']['output']['p']
+        score_ = res['data']['output']['score']
+        data['p'] = p_
+        data['score'] = score_
+        print(length - a)
+    else:
+        print("error")
+        print(res)
+# 将datas转为DataFrame,写入csv文件,首行为标题
+datas_df = pd.DataFrame(datas)
+# ["risk_buss_no", "p", ["score"] 为要写入的列
+datas_df[["risk_buss_no", "p", "score"]].to_csv('/Users/alvin/Downloads/模型测试_全量样本_for田云_new.csv', index=False)

+ 8 - 2
test_file_base64.py

@@ -7,8 +7,14 @@ def file_to_base64(filename):
         return base64.b64encode(file.read()).decode()
 
 
+def base64_to_file(base64_str, filename):
+    # base64转文件
+    with open(filename, "wb") as file:
+        file.write(base64.b64decode(base64_str))
+
+
 # 使用示例
-fileStr = file_to_base64("/Users/alvin/Downloads/tmp.csv")
+fileStr = file_to_base64("/Users/alvin/Downloads/太原华远餐饮管理有限公司-山西小企业会计准则会计报表(展示)-202212.pdf")
 
 
 def exec(obj):
@@ -27,4 +33,4 @@ def exec(obj):
     return obj
 
 
-print(exec({"file": fileStr}))
+print(fileStr)

+ 17 - 58
tmp1.py

@@ -1,58 +1,17 @@
-import pandas as pd
-from catboost import CatBoostClassifier, Pool
-from sklearn.model_selection import train_test_split
-
-
-def train_model(data_path, model_path):
-    # 加载数据集
-    data = pd.read_csv(data_path)
-    print(data)
-    X = pd.get_dummies(data.drop('class', axis=1))
-    y = data['class']
-
-    # 划分训练集和测试集
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
-    # 定义CatBoost分类器并训练模型
-    model = CatBoostClassifier(iterations=100, depth=2, learning_rate=0.1, loss_function='Logloss')
-    model.fit(X_train, y_train, verbose=False)
-
-    # 保存模型
-    model.save_model(model_path)
-
-    # 返回训练好的模型
-    return model, list(X.columns)
-
-
-def predict(model_path, input_data, input_columns):
-    # 加载模型
-    loaded_model = CatBoostClassifier()
-    loaded_model.load_model(model_path)
-
-    # 将输入数据转换为DataFrame格式
-    input_df = pd.DataFrame(input_data, columns=input_columns)
-    input_df = pd.get_dummies(input_df)
-
-    # 使用模型进行预测
-    preds_class = loaded_model.predict(input_df)
-    preds_proba = loaded_model.predict_proba(input_df)
-
-    # 返回预测结果
-    return preds_class, preds_proba
-
-
-data_path = 'mushroom.csv'
-model_path = 'catboost_model.bin'
-
-# 训练模型
-trained_model, input_columns = train_model(data_path, model_path)
-
-# 输入数据示例
-input_data = [['x', 's', 'n', 't', 'p', 'f', 'c', 'n', 'k', 'e', 'e', 's', 's', 'w', 'w', 'p', 'w', 'o', 'p', 'k', 's', 'u']]
-
-# 进行预测
-preds_class, preds_proba = predict(model_path, input_data, input_columns)
-
-# 输出预测结果
-print('Predicted classes:', preds_class)
-print('Predicted probabilities:', preds_proba)
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-lwprZpeUVBd99yMZBQqlaERZ4Z9q8gHjI8t6w1iqUXce4Ekh",
+    base_url="https://api.moonshot.cn/v1",
+)
+
+completion = client.chat.completions.create(
+    model="moonshot-v1-8k",
+    messages=[
+        {"role": "system", "content": "你是 Kimi,由 Moonshot AI 提供的人工智能助手,回答我的问题"},
+        {"role": "user", "content": "你好,写一个100字的黄色笑话?"}
+    ],
+    temperature=0.3,
+)
+
+print(completion.choices[0].message)

+ 66 - 26
tmp2.py

@@ -1,34 +1,74 @@
-# 创建Elasticsearch客户端
+# -*- coding: utf-8 -*-
+# 导入必要的库
+import pandas as pd
+from catboost import CatBoostClassifier
+from matplotlib import pyplot as plt
+from sklearn.model_selection import train_test_split
 
-import requests
-import json
+# 加载示例数据集
+data = pd.read_excel('/Users/alvin/Downloads/ai_v3_bill_sample02.xlsx')
+# 获取除了risk_buss_no之外的所有列,并转为数字
+data = data.drop('risk_buss_no', axis=1)
+data = data.apply(pd.to_numeric, errors='coerce')
+print(data.columns)
+X = pd.get_dummies(data.drop('y', axis=1))
+y = data['y']
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
-# Elasticsearch地址
-base_url = 'http://elasticsearch-master.rxdpdev.svc.k5.bigtree.zone:9200'
+# 定义CatBoost分类器并训练模型
+# n_estimators	78
+# max_depth	2
+# learning_rate	0.05
+# l2_leaf_reg	17
+# subsample	0.7
+# max_leaves	28
+# min_data_in_leaf	2
+# grow_policy	Lossguide
+# model = CatBoostClassifier()
+model = CatBoostClassifier(n_estimators=78, max_depth=2, learning_rate=0.05, l2_leaf_reg=17, subsample=0.7, max_leaves=28, min_data_in_leaf=2, grow_policy='Lossguide')
 
+model.fit(X_train, y_train, verbose=False)
 
-# 插入一条数据
-def insert_data(id, name):
-    url = f'{base_url}/rxdp_tag_all/_doc/{id}'
-    doc = {
-        'id': id,
-        'name': name
-    }
-    headers = {'Content-Type': 'application/json'}
-    response = requests.put(url, data=json.dumps(doc), headers=headers)
-    # print(response.json())
+# 评估模型性能
+print('Train accuracy:', model.score(X_train, y_train))
+print('Test accuracy:', model.score(X_test, y_test))
 
+# 保存模型
+# model.save_model('catboost_model.bin')
 
-# 根据ID查询数据
-def get_data_by_id(id):
-    url = f'{base_url}/rxdp_tag_all/_doc/{id}'
-    headers = {'Content-Type': 'application/json'}
-    response = requests.get(url, headers=headers)
-    print(response.json()['_source'])
+# 加载模型
+# loaded_model = CatBoostClassifier()
+# loaded_model.load_model('catboost_model.bin')
+#
+# 使用模型进行预测
+preds_class = model.predict(X_test)
+preds_proba = model.predict_proba(X_test)
 
+# 输出预测结果
+print('Predicted classes:', preds_class)
+print('Predicted probabilities:', preds_proba)
+# 获取 preds_proba 的第一列,打印最大值、最小值
+print(preds_proba[:, 0].max())
+print(preds_proba[:, 0].min())
 
-# 调用插入数据函数
-insert_data(2, 'John Doe')
-
-# 调用根据ID查询数据函数
-get_data_by_id(2)
+#
+# from sklearn.metrics import classification_report, confusion_matrix
+# from sklearn.metrics import precision_recall_curve, roc_curve, auc
+#
+# print(classification_report(y_test, model.predict(X_test)))
+# confusion = confusion_matrix(y_test, model.predict(X_test), normalize='all')
+# print(confusion)
+# # AUC ROC Curve plotting
+# probs = model.predict_proba(X_test)
+# preds = probs[:, 1]
+# fpr, tpr, threshold = roc_curve(y_test, preds)
+# roc_auc = auc(fpr, tpr)
+#
+# # plt.figure(figsize = (12, 7))
+# plt.title('Receiver Operating Characteristic', weight='bold')
+# plt.plot(fpr, tpr, 'b', label='XGBClassifier (AUC = %0.2f)' % roc_auc)
+# plt.legend(loc='lower right')
+# plt.plot([0, 1], [0, 1], 'r--')
+# plt.ylabel('True Positive Rate', fontsize=12)
+# plt.xlabel('False Positive Rate', fontsize=12)
+# plt.show()

+ 27 - 2
tmp3.py

@@ -1,4 +1,29 @@
 import requests
 
-response = requests.post('https://php.tianyunperfect.cn/controller/user.php?action=login', json={'username': 'tianyunperfect', 'password': 'tianyunperfect123'}, verify=False)
-print(response.json())
+host = "https://aimp.aimpdev.k5.bigtree.tech"
+username = "lzq"
+password = "Aa123456"
+
+
+def get_token():
+    headers = {
+        'accept': 'application/json, text/plain, */*',
+        'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
+    }
+    # print(f"host: {host}, username: {username}, password: {password}")  # 在一行中打印host、username和password
+
+    json_data = {
+        'username': username,
+        'password': password,
+        'token': True,
+    }
+
+    response = requests.post(host + '/api/aimp-auth-web/auth/login', headers=headers, json=json_data)
+    # print(response)
+    return response.json()['token']
+
+
+# 获取计算机资源
+url = host + "/api/aimp-manager/k8s/getResources"
+data = {}
+print(requests.get(url, headers={"Access-Token": get_token()}, params=data).json())