tmp11.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. import pandas as pd
  2. from openpyxl.reader.excel import load_workbook
  3. # 读取模板Excel文件
  4. template_path = '/Users/alvin/Downloads/合同抽取字段表述250321.xlsx'
  5. df_template = pd.read_excel(template_path, engine='openpyxl')
  6. # 预处理:处理合并单元格逻辑
  7. for i in range(1, len(df_template)): # 从第二行开始检查
  8. if pd.isna(df_template.loc[i, '类别']) and not pd.isna(df_template.loc[i, '字段描述']):
  9. df_template.loc[i, '类别'] = df_template.loc[i-1, '类别']
  10. # 定义JSON数据
  11. json_data = {
  12. "基础": {
  13. "合同总金额": "未填写(赊销总额度100万元)",
  14. "不含税金额": "",
  15. "税率": "",
  16. "税额": "",
  17. "合同类型": "买卖合同",
  18. "签订日期": "",
  19. "开始日期": "2021年(具体月份未填写)",
  20. "结束日期": "2027年12月31日",
  21. "违约金": "逾期60天内按0.03%/日计费,超60天按0.05%/日计费",
  22. "质保金": "",
  23. "质保期": "",
  24. "固定总价": "",
  25. "固定单价": "",
  26. "暂估金额": "",
  27. "付款方式": "账期结算(固定账单日每月15日,账期60天)",
  28. "发票类型": "增值税专用发票"
  29. },
  30. "甲乙双方信息": {
  31. "乙方名称": "北京京东世纪信息技术有限公司",
  32. "乙方账户名称": "北京京东世纪信息技术有限公司",
  33. "乙方开户行": "招商银行股份有限公司北京青年路支行",
  34. "乙方银行账号": "110907697010208",
  35. "乙方联系人": "张洪君",
  36. "乙方联系电话": "15701277612",
  37. "乙方邮箱": "zhanghongjun13@jd.com",
  38. "乙方地址": "北京经济技术开发区科创十一街18号院C座2层215室",
  39. "甲方名称": "海信集团有限公司",
  40. "甲方账户名称": "",
  41. "甲方开户行": "",
  42. "甲方银行账号": "",
  43. "甲方联系人": "李明",
  44. "甲方联系电话": "13812345678",
  45. "甲方邮箱": "554857923@qq.com",
  46. "甲方地址": "青岛市市南区东海西路17号"
  47. },
  48. "履行": {
  49. "交付与验收条件": "当场验收货物品类/规格/型号/数量/外包装",
  50. "交付时间": "按订单约定时间发货(普通快递时效)",
  51. "交付金额": "",
  52. "支付方式": "账期支付",
  53. "支付金额": "",
  54. "支付时间节点": "固定账单日每月15日出账,还款周期内支付"
  55. },
  56. "表格": {
  57. "序号": "",
  58. "标的物": "",
  59. "生产厂家": "",
  60. "品牌": "",
  61. "规格型号": "",
  62. "计量单位": "",
  63. "数量": "",
  64. "含税单价": "",
  65. "不含税总价": "",
  66. "税率": "",
  67. "增值税合计": "",
  68. "价税合计": ""
  69. }
  70. }
  71. # 将JSON数据转换为DataFrame以方便操作
  72. data_rows = []
  73. for category, fields in json_data.items():
  74. for field_name, attributes in fields.items():
  75. data_rows.append({'类别': category, '字段描述': field_name, '属性': attributes})
  76. df_data = pd.DataFrame(data_rows)
  77. # 合并两个DataFrame:基于'类别'和'字段描述'列进行左连接
  78. merged_df = df_template.merge(df_data, on=['类别', '字段描述'], how='left', suffixes=('', '_data'))
  79. # 更新属性列,用来自JSON的数据填充
  80. merged_df['属性'] = merged_df['属性_data'].fillna(merged_df['属性'])
  81. del merged_df['属性_data']
  82. # 使用openpyxl加载原始Excel文件
  83. wb = load_workbook(template_path)
  84. ws = wb.active
  85. # 遍历merged_df并将属性列的值写回到对应的单元格中
  86. row_num = 2 # 假设从第二行开始有数据
  87. for idx, row in merged_df.iterrows():
  88. target_cell = ws[f"C{row_num}"] # 假定属性列是C列
  89. target_cell.value = row['属性']
  90. row_num += 1
  91. # 保存工作簿
  92. output_path = '更新后的合同模板.xlsx'
  93. wb.save(output_path)
  94. print(f"已成功保存到{output_path}")