tmp5.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. def exec(obj):
  2. from sqlalchemy import create_engine
  3. import pandas as pd
  4. from urllib.parse import quote_plus as urlquote
  5. import jieba
  6. from gensim import corpora, models, similarities
  7. def cReate_dAta_cOnn():
  8. engine = create_engine('mysql+pymysql://aimp_user:'+urlquote('vjeygLP76n7%UPx@')+'@rm-qsls3302.mysql.rds.aliyuncs.com:3302/bi_application')
  9. return engine
  10. def rEad_aNd_dAtaframe(sql_query):
  11. engine = cReate_dAta_cOnn()
  12. df = pd.read_sql(sql_query,engine)
  13. engine.dispose()
  14. return df
  15. def flag(x):
  16. if (x['sensoir'] == x['person']) and len(x['sensoir'])==3:
  17. return 1
  18. elif (x['sensoir'] == x['person']) and len(x['sensoir'])==2:
  19. return 2
  20. else:
  21. return 3
  22. def flag1(x):
  23. if x['f1'] == 1:
  24. return 1
  25. else:
  26. return 0
  27. def flag2(x):
  28. if (x['f1'] == 2) and x['sims']>0.8:
  29. return 1
  30. else:
  31. return 0
  32. def flag3(x):
  33. if x['sims']>0.95:
  34. return 1
  35. else:
  36. return 0
  37. def cal_similar(doc_goal,ssim):
  38. doc = rEad_aNd_dAtaframe('''select distinct credit_no,econ_reg_address from ext_anti_fraud_address ''')
  39. doc_list = [jieba.lcut(w) for w in doc['econ_reg_address']]
  40. target = [word for word in jieba.cut(doc_goal)]
  41. dictionary = corpora.Dictionary(doc_list)
  42. corpus = [dictionary.doc2bow(doc) for doc in doc_list]
  43. doc_goal_vec = dictionary.doc2bow(target)
  44. tfidf = models.TfidfModel(corpus)
  45. index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features = len(dictionary.keys()))
  46. sims = index[tfidf[doc_goal_vec]]
  47. similary = pd.DataFrame({"risk_address": list(doc['econ_reg_address']), "sims": list(sims)})
  48. similary["申请企业注册地址"] = doc_goal
  49. similary_data = similary[["申请企业注册地址", "risk_address", "sims"]].drop_duplicates()
  50. similary_data= similary_data[similary_data["sims"]>=ssim]
  51. return similary_data
  52. lcity = list(obj['city'])
  53. if len(lcity)>1 and len(lcity[0])>1:
  54. lcity = list(obj['city'])[0]
  55. elif len(lcity)==1 and len(lcity[0])>1:
  56. lcity = obj['city']
  57. else:
  58. obj['flag1'] = 0
  59. obj['flag2'] = 0
  60. obj['flag3'] = 0
  61. return obj
  62. credit = []
  63. address = []
  64. sensoir = list(obj['oper_names'])
  65. if len(sensoir)<1:
  66. obj['flag1'] = 0
  67. obj['flag2'] = 0
  68. credit.append(obj['credit_code'])
  69. address.append(obj['address_'])
  70. df_app = pd.DataFrame({"credit": credit,"address": address})
  71. df_add = rEad_aNd_dAtaframe("select distinct credit_no,econ_reg_address from ext_anti_fraud_address where city = {}".format(lcity))
  72. similary_data=cal_similar(df_app['address'].max(),0.95)
  73. if similary_data.shape[0]>0:
  74. obj['flag3'] = 1
  75. else:
  76. obj['flag3'] = 0
  77. obj['similary_data']=similary_data.to_json(orient='records')
  78. return obj
  79. else:
  80. for i in sensoir:
  81. credit.append(obj['credit_code'])
  82. address.append(obj['address_'])
  83. df_app = pd.DataFrame({"credit": credit, "sensoir": sensoir,"address": address})
  84. df_add = rEad_aNd_dAtaframe("select distinct credit_no,econ_reg_address from ext_anti_fraud_address where city = {}".format(lcity))
  85. lcredit =str(list(df_add['credit_no'])).replace('[','').replace(']','')
  86. df_per = rEad_aNd_dAtaframe("select distinct credit_no,person from ext_anti_fraud_senior_person where credit_no in ({})".format(lcredit))
  87. df_dec = pd.merge(df_app,df_per,left_on = 'sensoir',right_on = 'person',how = 'inner')
  88. df_f = pd.merge(df_dec,df_add,on = 'credit_no',how = 'left')
  89. if df_f.shape[0]<1:
  90. obj['flag1'] = 0
  91. obj['flag2'] = 0
  92. obj['flag3'] = 0
  93. obj['df_f']=df_f.to_json(orient='records')
  94. return obj
  95. else:
  96. df_f['f1']=df_f.apply(flag,axis=1)
  97. similary_data=cal_similar(df_f['address'].max(),0)
  98. df = pd.merge(df_f,similary_data,left_on='econ_reg_address',right_on = 'risk_address',how = 'left')
  99. df['flag1']=df.apply(flag1,axis=1)
  100. df['flag2']=df.apply(flag2,axis=1)
  101. df['flag3']=df.apply(flag3,axis=1)#保存
  102. obj['df']=df.to_json(orient='records')
  103. obj['flag1']=df['flag1'].max()
  104. obj['flag2']=df['flag2'].max()
  105. obj['flag3']=df['flag3'].max()
  106. return obj