123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- def exec(obj):
- from sqlalchemy import create_engine
- import pandas as pd
- from urllib.parse import quote_plus as urlquote
- import jieba
- from gensim import corpora, models, similarities
- def cReate_dAta_cOnn():
- engine = create_engine('mysql+pymysql://aimp_user:'+urlquote('vjeygLP76n7%UPx@')+'@rm-qsls3302.mysql.rds.aliyuncs.com:3302/bi_application')
- return engine
- def rEad_aNd_dAtaframe(sql_query):
- engine = cReate_dAta_cOnn()
- df = pd.read_sql(sql_query,engine)
- engine.dispose()
- return df
- def flag(x):
- if (x['sensoir'] == x['person']) and len(x['sensoir'])==3:
- return 1
- elif (x['sensoir'] == x['person']) and len(x['sensoir'])==2:
- return 2
- else:
- return 3
- def flag1(x):
- if x['f1'] == 1:
- return 1
- else:
- return 0
- def flag2(x):
- if (x['f1'] == 2) and x['sims']>0.8:
- return 1
- else:
- return 0
- def flag3(x):
- if x['sims']>0.95:
- return 1
- else:
- return 0
- def cal_similar(doc_goal,ssim):
- doc = rEad_aNd_dAtaframe('''select distinct credit_no,econ_reg_address from ext_anti_fraud_address ''')
- doc_list = [jieba.lcut(w) for w in doc['econ_reg_address']]
- target = [word for word in jieba.cut(doc_goal)]
- dictionary = corpora.Dictionary(doc_list)
- corpus = [dictionary.doc2bow(doc) for doc in doc_list]
- doc_goal_vec = dictionary.doc2bow(target)
- tfidf = models.TfidfModel(corpus)
- index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features = len(dictionary.keys()))
- sims = index[tfidf[doc_goal_vec]]
- similary = pd.DataFrame({"risk_address": list(doc['econ_reg_address']), "sims": list(sims)})
- similary["申请企业注册地址"] = doc_goal
- similary_data = similary[["申请企业注册地址", "risk_address", "sims"]].drop_duplicates()
- similary_data= similary_data[similary_data["sims"]>=ssim]
- return similary_data
- lcity = list(obj['city'])
- if len(lcity)>1 and len(lcity[0])>1:
- lcity = list(obj['city'])[0]
- elif len(lcity)==1 and len(lcity[0])>1:
- lcity = obj['city']
- else:
- obj['flag1'] = 0
- obj['flag2'] = 0
- obj['flag3'] = 0
- return obj
- credit = []
- address = []
- sensoir = list(obj['oper_names'])
- if len(sensoir)<1:
- obj['flag1'] = 0
- obj['flag2'] = 0
- credit.append(obj['credit_code'])
- address.append(obj['address_'])
- df_app = pd.DataFrame({"credit": credit,"address": address})
- df_add = rEad_aNd_dAtaframe("select distinct credit_no,econ_reg_address from ext_anti_fraud_address where city = {}".format(lcity))
- similary_data=cal_similar(df_app['address'].max(),0.95)
- if similary_data.shape[0]>0:
- obj['flag3'] = 1
- else:
- obj['flag3'] = 0
- obj['similary_data']=similary_data.to_json(orient='records')
- return obj
- else:
- for i in sensoir:
- credit.append(obj['credit_code'])
- address.append(obj['address_'])
- df_app = pd.DataFrame({"credit": credit, "sensoir": sensoir,"address": address})
- df_add = rEad_aNd_dAtaframe("select distinct credit_no,econ_reg_address from ext_anti_fraud_address where city = {}".format(lcity))
- lcredit =str(list(df_add['credit_no'])).replace('[','').replace(']','')
- df_per = rEad_aNd_dAtaframe("select distinct credit_no,person from ext_anti_fraud_senior_person where credit_no in ({})".format(lcredit))
- df_dec = pd.merge(df_app,df_per,left_on = 'sensoir',right_on = 'person',how = 'inner')
- df_f = pd.merge(df_dec,df_add,on = 'credit_no',how = 'left')
- if df_f.shape[0]<1:
- obj['flag1'] = 0
- obj['flag2'] = 0
- obj['flag3'] = 0
- obj['df_f']=df_f.to_json(orient='records')
- return obj
- else:
- df_f['f1']=df_f.apply(flag,axis=1)
- similary_data=cal_similar(df_f['address'].max(),0)
- df = pd.merge(df_f,similary_data,left_on='econ_reg_address',right_on = 'risk_address',how = 'left')
- df['flag1']=df.apply(flag1,axis=1)
- df['flag2']=df.apply(flag2,axis=1)
- df['flag3']=df.apply(flag3,axis=1)#保存
- obj['df']=df.to_json(orient='records')
- obj['flag1']=df['flag1'].max()
- obj['flag2']=df['flag2'].max()
- obj['flag3']=df['flag3'].max()
- return obj
|