import pandas as pd from openpyxl.reader.excel import load_workbook # 读取模板Excel文件 template_path = '/Users/alvin/Downloads/合同抽取字段表述250321.xlsx' df_template = pd.read_excel(template_path, engine='openpyxl') # 预处理:处理合并单元格逻辑 for i in range(1, len(df_template)): # 从第二行开始检查 if pd.isna(df_template.loc[i, '类别']) and not pd.isna(df_template.loc[i, '字段描述']): df_template.loc[i, '类别'] = df_template.loc[i-1, '类别'] # 定义JSON数据 json_data = { "基础": { "合同总金额": "未填写(赊销总额度100万元)", "不含税金额": "", "税率": "", "税额": "", "合同类型": "买卖合同", "签订日期": "", "开始日期": "2021年(具体月份未填写)", "结束日期": "2027年12月31日", "违约金": "逾期60天内按0.03%/日计费,超60天按0.05%/日计费", "质保金": "", "质保期": "", "固定总价": "", "固定单价": "", "暂估金额": "", "付款方式": "账期结算(固定账单日每月15日,账期60天)", "发票类型": "增值税专用发票" }, "甲乙双方信息": { "乙方名称": "北京京东世纪信息技术有限公司", "乙方账户名称": "北京京东世纪信息技术有限公司", "乙方开户行": "招商银行股份有限公司北京青年路支行", "乙方银行账号": "110907697010208", "乙方联系人": "张洪君", "乙方联系电话": "15701277612", "乙方邮箱": "zhanghongjun13@jd.com", "乙方地址": "北京经济技术开发区科创十一街18号院C座2层215室", "甲方名称": "海信集团有限公司", "甲方账户名称": "", "甲方开户行": "", "甲方银行账号": "", "甲方联系人": "李明", "甲方联系电话": "13812345678", "甲方邮箱": "554857923@qq.com", "甲方地址": "青岛市市南区东海西路17号" }, "履行": { "交付与验收条件": "当场验收货物品类/规格/型号/数量/外包装", "交付时间": "按订单约定时间发货(普通快递时效)", "交付金额": "", "支付方式": "账期支付", "支付金额": "", "支付时间节点": "固定账单日每月15日出账,还款周期内支付" }, "表格": { "序号": "", "标的物": "", "生产厂家": "", "品牌": "", "规格型号": "", "计量单位": "", "数量": "", "含税单价": "", "不含税总价": "", "税率": "", "增值税合计": "", "价税合计": "" } } # 将JSON数据转换为DataFrame以方便操作 data_rows = [] for category, fields in json_data.items(): for field_name, attributes in fields.items(): data_rows.append({'类别': category, '字段描述': field_name, '属性': attributes}) df_data = pd.DataFrame(data_rows) # 合并两个DataFrame:基于'类别'和'字段描述'列进行左连接 merged_df = df_template.merge(df_data, on=['类别', '字段描述'], how='left', suffixes=('', '_data')) # 更新属性列,用来自JSON的数据填充 merged_df['属性'] = merged_df['属性_data'].fillna(merged_df['属性']) del merged_df['属性_data'] # 使用openpyxl加载原始Excel文件 wb = load_workbook(template_path) ws = wb.active # 遍历merged_df并将属性列的值写回到对应的单元格中 row_num = 2 # 假设从第二行开始有数据 for idx, row in merged_df.iterrows(): target_cell = ws[f"C{row_num}"] # 假定属性列是C列 target_cell.value = row['属性'] row_num += 1 # 保存工作簿 output_path = '更新后的合同模板.xlsx' wb.save(output_path) print(f"已成功保存到{output_path}")