tmp10.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import requests
  2. import json
  3. def chat_with_llm(messages):
  4. url = "http://192.168.10.91:8000/v1/chat/completions"
  5. headers = {
  6. "Content-Type": "application/json",
  7. "Accept": "text/event-stream"
  8. }
  9. data = {
  10. "messages": messages
  11. }
  12. # 发送请求并获取响应
  13. response = requests.post(url, headers=headers, json=data, stream=True)
  14. # 完整的响应文本
  15. full_response = ""
  16. # 处理流式响应
  17. try:
  18. for line in response.iter_lines():
  19. if not line:
  20. continue
  21. # 解码二进制数据
  22. line = line.decode('utf-8')
  23. # 检查是否是SSE数据行
  24. if line.startswith('data: '):
  25. data = line[6:] # 移除 'data: ' 前缀
  26. if data == '[DONE]':
  27. print("\n[会话结束]")
  28. break
  29. # 打印实时token并刷新输出
  30. print(data, end='', flush=True)
  31. full_response += data
  32. except KeyboardInterrupt:
  33. print("\n[用户中断]")
  34. return full_response
  35. except Exception as e:
  36. print(f"\n[错误] {str(e)}")
  37. return full_response
  38. return full_response
  39. def main():
  40. # 初始化对话历史
  41. conversation = []
  42. print("开始对话 (输入 'quit' 结束对话)")
  43. while True:
  44. # 获取用户输入
  45. user_input = input("\n用户: ")
  46. if user_input.lower() == 'quit':
  47. print("对话结束")
  48. break
  49. # 添加用户消息到对话历史
  50. conversation.append({
  51. "role": "user",
  52. "content": user_input
  53. })
  54. # 调用API并打印响应
  55. print("\nAI: ", end='')
  56. response = chat_with_llm(conversation)
  57. if response: # 只有在有响应时才添加到对话历史
  58. conversation.append({
  59. "role": "assistant",
  60. "content": response
  61. })
  62. if __name__ == "__main__":
  63. main()