def exec(obj): from sqlalchemy import create_engine import pandas as pd from urllib.parse import quote_plus as urlquote import jieba from gensim import corpora, models, similarities def cReate_dAta_cOnn(): engine = create_engine('mysql+pymysql://aimp_user:'+urlquote('vjeygLP76n7%UPx@')+'@rm-qsls3302.mysql.rds.aliyuncs.com:3302/bi_application') return engine def rEad_aNd_dAtaframe(sql_query): engine = cReate_dAta_cOnn() df = pd.read_sql(sql_query,engine) engine.dispose() return df def flag(x): if (x['sensoir'] == x['person']) and len(x['sensoir'])==3: return 1 elif (x['sensoir'] == x['person']) and len(x['sensoir'])==2: return 2 else: return 3 def flag1(x): if x['f1'] == 1: return 1 else: return 0 def flag2(x): if (x['f1'] == 2) and x['sims']>0.8: return 1 else: return 0 def flag3(x): if x['sims']>0.95: return 1 else: return 0 def cal_similar(doc_goal,ssim): doc = rEad_aNd_dAtaframe('''select distinct credit_no,econ_reg_address from ext_anti_fraud_address ''') doc_list = [jieba.lcut(w) for w in doc['econ_reg_address']] target = [word for word in jieba.cut(doc_goal)] dictionary = corpora.Dictionary(doc_list) corpus = [dictionary.doc2bow(doc) for doc in doc_list] doc_goal_vec = dictionary.doc2bow(target) tfidf = models.TfidfModel(corpus) index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features = len(dictionary.keys())) sims = index[tfidf[doc_goal_vec]] similary = pd.DataFrame({"risk_address": list(doc['econ_reg_address']), "sims": list(sims)}) similary["申请企业注册地址"] = doc_goal similary_data = similary[["申请企业注册地址", "risk_address", "sims"]].drop_duplicates() similary_data= similary_data[similary_data["sims"]>=ssim] return similary_data lcity = list(obj['city']) if len(lcity)>1 and len(lcity[0])>1: lcity = list(obj['city'])[0] elif len(lcity)==1 and len(lcity[0])>1: lcity = obj['city'] else: obj['flag1'] = 0 obj['flag2'] = 0 obj['flag3'] = 0 return obj credit = [] address = [] sensoir = list(obj['oper_names']) if len(sensoir)<1: obj['flag1'] = 0 obj['flag2'] = 0 credit.append(obj['credit_code']) address.append(obj['address_']) df_app = pd.DataFrame({"credit": credit,"address": address}) df_add = rEad_aNd_dAtaframe("select distinct credit_no,econ_reg_address from ext_anti_fraud_address where city = {}".format(lcity)) similary_data=cal_similar(df_app['address'].max(),0.95) if similary_data.shape[0]>0: obj['flag3'] = 1 else: obj['flag3'] = 0 obj['similary_data']=similary_data.to_json(orient='records') return obj else: for i in sensoir: credit.append(obj['credit_code']) address.append(obj['address_']) df_app = pd.DataFrame({"credit": credit, "sensoir": sensoir,"address": address}) df_add = rEad_aNd_dAtaframe("select distinct credit_no,econ_reg_address from ext_anti_fraud_address where city = {}".format(lcity)) lcredit =str(list(df_add['credit_no'])).replace('[','').replace(']','') df_per = rEad_aNd_dAtaframe("select distinct credit_no,person from ext_anti_fraud_senior_person where credit_no in ({})".format(lcredit)) df_dec = pd.merge(df_app,df_per,left_on = 'sensoir',right_on = 'person',how = 'inner') df_f = pd.merge(df_dec,df_add,on = 'credit_no',how = 'left') if df_f.shape[0]<1: obj['flag1'] = 0 obj['flag2'] = 0 obj['flag3'] = 0 obj['df_f']=df_f.to_json(orient='records') return obj else: df_f['f1']=df_f.apply(flag,axis=1) similary_data=cal_similar(df_f['address'].max(),0) df = pd.merge(df_f,similary_data,left_on='econ_reg_address',right_on = 'risk_address',how = 'left') df['flag1']=df.apply(flag1,axis=1) df['flag2']=df.apply(flag2,axis=1) df['flag3']=df.apply(flag3,axis=1)#保存 obj['df']=df.to_json(orient='records') obj['flag1']=df['flag1'].max() obj['flag2']=df['flag2'].max() obj['flag3']=df['flag3'].max() return obj