123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- import pandas as pd
- from openpyxl.reader.excel import load_workbook
- # 读取模板Excel文件
- template_path = '/Users/alvin/Downloads/合同抽取字段表述250321.xlsx'
- df_template = pd.read_excel(template_path, engine='openpyxl')
- # 预处理:处理合并单元格逻辑
- for i in range(1, len(df_template)): # 从第二行开始检查
- if pd.isna(df_template.loc[i, '类别']) and not pd.isna(df_template.loc[i, '字段描述']):
- df_template.loc[i, '类别'] = df_template.loc[i-1, '类别']
- # 定义JSON数据
- json_data = {
- "基础": {
- "合同总金额": "未填写(赊销总额度100万元)",
- "不含税金额": "",
- "税率": "",
- "税额": "",
- "合同类型": "买卖合同",
- "签订日期": "",
- "开始日期": "2021年(具体月份未填写)",
- "结束日期": "2027年12月31日",
- "违约金": "逾期60天内按0.03%/日计费,超60天按0.05%/日计费",
- "质保金": "",
- "质保期": "",
- "固定总价": "",
- "固定单价": "",
- "暂估金额": "",
- "付款方式": "账期结算(固定账单日每月15日,账期60天)",
- "发票类型": "增值税专用发票"
- },
- "甲乙双方信息": {
- "乙方名称": "北京京东世纪信息技术有限公司",
- "乙方账户名称": "北京京东世纪信息技术有限公司",
- "乙方开户行": "招商银行股份有限公司北京青年路支行",
- "乙方银行账号": "110907697010208",
- "乙方联系人": "张洪君",
- "乙方联系电话": "15701277612",
- "乙方邮箱": "zhanghongjun13@jd.com",
- "乙方地址": "北京经济技术开发区科创十一街18号院C座2层215室",
- "甲方名称": "海信集团有限公司",
- "甲方账户名称": "",
- "甲方开户行": "",
- "甲方银行账号": "",
- "甲方联系人": "李明",
- "甲方联系电话": "13812345678",
- "甲方邮箱": "554857923@qq.com",
- "甲方地址": "青岛市市南区东海西路17号"
- },
- "履行": {
- "交付与验收条件": "当场验收货物品类/规格/型号/数量/外包装",
- "交付时间": "按订单约定时间发货(普通快递时效)",
- "交付金额": "",
- "支付方式": "账期支付",
- "支付金额": "",
- "支付时间节点": "固定账单日每月15日出账,还款周期内支付"
- },
- "表格": {
- "序号": "",
- "标的物": "",
- "生产厂家": "",
- "品牌": "",
- "规格型号": "",
- "计量单位": "",
- "数量": "",
- "含税单价": "",
- "不含税总价": "",
- "税率": "",
- "增值税合计": "",
- "价税合计": ""
- }
- }
- # 将JSON数据转换为DataFrame以方便操作
- data_rows = []
- for category, fields in json_data.items():
- for field_name, attributes in fields.items():
- data_rows.append({'类别': category, '字段描述': field_name, '属性': attributes})
- df_data = pd.DataFrame(data_rows)
- # 合并两个DataFrame:基于'类别'和'字段描述'列进行左连接
- merged_df = df_template.merge(df_data, on=['类别', '字段描述'], how='left', suffixes=('', '_data'))
- # 更新属性列,用来自JSON的数据填充
- merged_df['属性'] = merged_df['属性_data'].fillna(merged_df['属性'])
- del merged_df['属性_data']
- # 使用openpyxl加载原始Excel文件
- wb = load_workbook(template_path)
- ws = wb.active
- # 遍历merged_df并将属性列的值写回到对应的单元格中
- row_num = 2 # 假设从第二行开始有数据
- for idx, row in merged_df.iterrows():
- target_cell = ws[f"C{row_num}"] # 假定属性列是C列
- target_cell.value = row['属性']
- row_num += 1
- # 保存工作簿
- output_path = '更新后的合同模板.xlsx'
- wb.save(output_path)
- print(f"已成功保存到{output_path}")
|