Commit e5738731 authored by 张彦钊's avatar 张彦钊

add

parent c304c4d8
...@@ -24,7 +24,7 @@ def doctor(): ...@@ -24,7 +24,7 @@ def doctor():
df = df.rename(columns=dict(zip(list(range(len(name))), name))) df = df.rename(columns=dict(zip(list(range(len(name))), name)))
sql = "select merchant_id,doctor_ad_money_30_days,expand_rechange_amount_30," \ sql = "select merchant_id,doctor_discount_30_days,expand_rechange_amount_30," \
"service_pv_30,expert_pv_30,organization_pv_30 from statistic_merchant_rank_factor " \ "service_pv_30,expert_pv_30,organization_pv_30 from statistic_merchant_rank_factor " \
"where partition_date = '{}';".format(date_str) "where partition_date = '{}';".format(date_str)
...@@ -33,7 +33,7 @@ def doctor(): ...@@ -33,7 +33,7 @@ def doctor():
result = cursor.fetchall() result = cursor.fetchall()
db.close() db.close()
tmp = pd.DataFrame(list(result)) tmp = pd.DataFrame(list(result))
name = ["merchant_id", "doctor_ad_money_30_days", "expand_rechange_amount_30", "service_pv_30", name = ["merchant_id", "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
"mexpert_pv_30", "organization_pv_30"] "mexpert_pv_30", "organization_pv_30"]
tmp = tmp.rename(columns=dict(zip(list(range(len(name))), name))) tmp = tmp.rename(columns=dict(zip(list(range(len(name))), name)))
...@@ -42,7 +42,7 @@ def doctor(): ...@@ -42,7 +42,7 @@ def doctor():
df = pd.merge(df, tmp, on='merchant_id') df = pd.merge(df, tmp, on='merchant_id')
for i in ["service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30", for i in ["service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30",
"doctor_ad_money_30_days", "expand_rechange_amount_30", "service_pv_30", "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
"mexpert_pv_30", "organization_pv_30", "budan_payment_30_days"]: "mexpert_pv_30", "organization_pv_30", "budan_payment_30_days"]:
df[i] = df[i].astype("float") df[i] = df[i].astype("float")
...@@ -55,10 +55,11 @@ def doctor(): ...@@ -55,10 +55,11 @@ def doctor():
df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \ df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \
df["expert_exposure_pv_30"]/df["all_exposure"] * (df["expert_pv_30"] / df["expert_exposure_pv_30"]) df["expert_exposure_pv_30"]/df["all_exposure"] * (df["expert_pv_30"] / df["expert_exposure_pv_30"])
df.loc[df["doctor_ad_money_30_days"] < 0, ["doctor_ad_money_30_days"]] = 0 df.loc[df["doctor_discount_30_days"] < 0, ["doctor_discount_30_days"]] = 0
df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0 df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0
df.loc[df["expand_rechange_amount_30"] < 0, ["expand_rechange_amount_30"]] = 0
df["commission"] = (df["doctor_ad_money_30_days"] + df["budan_payment_30_days"])/df["tmp"] df["commission"] = (df["doctor_discount_30_days"] + df["budan_payment_30_days"])/df["tmp"]
df["pv_ad"] = df["expand_rechange_amount_30"]/df["tmp"] df["pv_ad"] = df["expand_rechange_amount_30"]/df["tmp"]
df["score"] = df["ctr"]**0.5 * (df["commission"] + df["pv_ad"]) df["score"] = df["ctr"]**0.5 * (df["commission"] + df["pv_ad"])
...@@ -72,12 +73,12 @@ def doctor(): ...@@ -72,12 +73,12 @@ def doctor():
columns = ["doctor_id","score","ctr","commission","pv_ad","service_exposure_pv_30", columns = ["doctor_id","score","ctr","commission","pv_ad","service_exposure_pv_30",
"service_ctr_30","expert_exposure_pv_30","expert_pv_30", "service_ctr_30","expert_exposure_pv_30","expert_pv_30",
"merchant_id","doctor_ad_money_30_days","expand_rechange_amount_30","service_pv_30", "merchant_id","doctor_discount_30_days","expand_rechange_amount_30","service_pv_30",
"mexpert_pv_30","organization_pv_30","budan_payment_30_days"] "mexpert_pv_30","organization_pv_30","budan_payment_30_days"]
data = df.loc[:, columns] data = df.loc[:, columns]
data = data.drop_duplicates() data = data.drop_duplicates()
data.to_csv('/tmp/doctor.csv',index=False) data.to_csv('/tmp/20_doctor.csv',index=False)
print("doctor end") print("doctor end")
...@@ -106,10 +107,8 @@ def hospital(): ...@@ -106,10 +107,8 @@ def hospital():
df = df.rename(columns=dict(zip(list(range(len(name))), name))) df = df.rename(columns=dict(zip(list(range(len(name))), name)))
print(df.head(6)) sql = "select merchant_id,doctor_discount_30_days," \
"service_pv_30,expert_pv_30,organization_pv_30,expand_rechange_amount_30 from statistic_merchant_rank_factor " \
sql = "select merchant_id,doctor_ad_money_30_days," \
"service_pv_30,expert_pv_30,organization_pv_30,doctor_discount_30_days from statistic_merchant_rank_factor " \
"where partition_date = '{}';".format(date_str) "where partition_date = '{}';".format(date_str)
cursor = db.cursor() cursor = db.cursor()
...@@ -117,7 +116,7 @@ def hospital(): ...@@ -117,7 +116,7 @@ def hospital():
result = cursor.fetchall() result = cursor.fetchall()
db.close() db.close()
tmp = pd.DataFrame(list(result)) tmp = pd.DataFrame(list(result))
name = ["merchant_id", "doctor_ad_money_30_days","service_pv_30","mexpert_pv_30","organization_pv_30", name = ["merchant_id", "doctor_discount_30_days","service_pv_30","mexpert_pv_30","organization_pv_30",
"doctor_discount_30_days"] "doctor_discount_30_days"]
tmp = tmp.rename(columns=dict(zip(list(range(len(name))), name))) tmp = tmp.rename(columns=dict(zip(list(range(len(name))), name)))
print(tmp.head(6)) print(tmp.head(6))
...@@ -128,8 +127,8 @@ def hospital(): ...@@ -128,8 +127,8 @@ def hospital():
for i in ["hospital_exposure_pv_30","service_exposure_pv_30","expert_exposure_pv_30", for i in ["hospital_exposure_pv_30","service_exposure_pv_30","expert_exposure_pv_30",
"service_ctr_30","hospital_ctr_30","expert_ctr_30","budan_payment_30_days", "service_ctr_30","hospital_ctr_30","expert_ctr_30","budan_payment_30_days",
"doctor_ad_money_30_days","service_pv_30","mexpert_pv_30","organization_pv_30", "doctor_discount_30_days","service_pv_30","mexpert_pv_30","organization_pv_30",
"doctor_discount_30_days"]: "expand_rechange_amount_30"]:
df[i] = df[i].astype("float") df[i] = df[i].astype("float")
df["all_exposure"] = df["hospital_exposure_pv_30"] + df["service_exposure_pv_30"] + df["expert_exposure_pv_30"] df["all_exposure"] = df["hospital_exposure_pv_30"] + df["service_exposure_pv_30"] + df["expert_exposure_pv_30"]
...@@ -141,11 +140,12 @@ def hospital(): ...@@ -141,11 +140,12 @@ def hospital():
df["hospital_exposure_pv_30"]/ df["all_exposure"] * df["hospital_ctr_30"] + \ df["hospital_exposure_pv_30"]/ df["all_exposure"] * df["hospital_ctr_30"] + \
df["expert_exposure_pv_30"]/df["all_exposure"] * df["expert_ctr_30"] df["expert_exposure_pv_30"]/df["all_exposure"] * df["expert_ctr_30"]
df.loc[df["doctor_ad_money_30_days"] < 0, ["doctor_ad_money_30_days"]] = 0 df.loc[df["doctor_discount_30_days"] < 0, ["doctor_discount_30_days"]] = 0
df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0 df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0
df.loc[df["expand_rechange_amount_30"] < 0, ["expand_rechange_amount_30"]] = 0
df["commission"] = (df["doctor_ad_money_30_days"] + df["budan_payment_30_days"])/df["tmp"] df["commission"] = (df["doctor_discount_30_days"] + df["budan_payment_30_days"])/df["tmp"]
df["cpt"] = df["doctor_discount_30_days"]/df["tmp"] df["cpt"] = df["expand_rechange_amount_30"]/df["tmp"]
df["score"] = df["ctr"]**0.5 * (df["commission"] + df["cpt"]) df["score"] = df["ctr"]**0.5 * (df["commission"] + df["cpt"])
df.loc[df["all_exposure"] <= 1500, ["ctr"]] = 0.01 df.loc[df["all_exposure"] <= 1500, ["ctr"]] = 0.01
...@@ -159,14 +159,14 @@ def hospital(): ...@@ -159,14 +159,14 @@ def hospital():
columns = ["doctor_id","score","ctr","commission","cpt","hospital_id","hospital_exposure_pv_30", columns = ["doctor_id","score","ctr","commission","cpt","hospital_id","hospital_exposure_pv_30",
"service_exposure_pv_30","expert_exposure_pv_30", "service_exposure_pv_30","expert_exposure_pv_30",
"service_ctr_30","hospital_ctr_30","expert_ctr_30","merchant_id","budan_payment_30_days", "service_ctr_30","hospital_ctr_30","expert_ctr_30","merchant_id","budan_payment_30_days",
"doctor_ad_money_30_days","service_pv_30","mexpert_pv_30","organization_pv_30", "doctor_discount_30_days","service_pv_30","mexpert_pv_30","organization_pv_30",
"doctor_discount_30_days"] "expand_rechange_amount_30"]
data = df.loc[:, columns] data = df.loc[:, columns]
data = data.drop_duplicates() data = data.drop_duplicates()
print(data.head(6)) print(data.head(6))
data.to_csv('/tmp/hospital.csv',index=False,encoding="utf_8_sig") data.to_csv('/tmp/20_hospital.csv',index=False)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment