Commit 57dd48b1 authored by 张彦钊's avatar 张彦钊

add

parent 213c8c9d
...@@ -321,11 +321,11 @@ def old(): ...@@ -321,11 +321,11 @@ def old():
def new_doctor(): def new_doctor():
date_str = "20200101" date_str = "20200101"
sql = "select d.doctor_id,d.service_exposure_pv_30,d.service_ctr_30,d.expert_exposure_pv_30," \ sql = "select d.doctor_id,d.service_exposure_pv_30,d.service_ctr_30,d.expert_exposure_pv_30," \
"d.expert_pv_30,b.merchant_id,m.doctor_discount_30_days from statistic_doctor_rank_factor d " \ "d.expert_pv_30,b.merchant_id,m.doctor_discount_30_days,m.expand_rechange_amount_30," \
"m.service_pv_30,m.expert_pv_30,m.organization_pv_30 from statistic_doctor_rank_factor d " \
"left join hippo_merchantrelevance b on d.doctor_id = b.doctor_id " \ "left join hippo_merchantrelevance b on d.doctor_id = b.doctor_id " \
"left join statistic_merchant_rank_factor m on b.merchant_id = m.merchant_id " \ "left join statistic_merchant_rank_factor m on b.merchant_id = m.merchant_id " \
"where d.partition_date = '{}' and m.partition_date = '{}';".format(date_str,date_str) "where d.partition_date = '{}' and m.partition_date = '{}';".format(date_str,date_str)
print(sql)
db = pymysql.connect(host='172.16.30.141', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing') db = pymysql.connect(host='172.16.30.141', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
cursor = db.cursor() cursor = db.cursor()
...@@ -333,89 +333,59 @@ def new_doctor(): ...@@ -333,89 +333,59 @@ def new_doctor():
result = cursor.fetchall() result = cursor.fetchall()
df = pd.DataFrame(list(result)) df = pd.DataFrame(list(result))
print(df.shape) print(df.shape)
print(df.head(6))
# name = ["doctor_id", "service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30", name = ["doctor_id", "service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30",
# "merchant_id"] "merchant_id", "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
# "mexpert_pv_30", "organization_pv_30"]
# df = df.rename(columns=dict(zip(list(range(len(name))), name)))
# print(df.shape) df = df.rename(columns=dict(zip(list(range(len(name))), name)))
# for i in ["service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30",
# df = df.dropna(subset=["merchant_id"]) "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
# print("drop") "mexpert_pv_30", "organization_pv_30"]:
# print(df.shape) df[i] = df[i].astype("float")
# print(df.head(6))
# df["all_exposure"] = df["service_exposure_pv_30"] + df["expert_exposure_pv_30"]
# sql = "select merchant_id,doctor_discount_30_days,expand_rechange_amount_30," \ df = df[~df["expert_exposure_pv_30"].isin([0.0])]
# "service_pv_30,expert_pv_30,organization_pv_30 from statistic_merchant_rank_factor " \ print("expert_exposure_pv_30")
# "where partition_date = '{}';".format(date_str) print(df.shape)
# df = df[~df["all_exposure"].isin([0.0])]
# cursor = db.cursor() print("all_exposure")
# cursor.execute(sql) print(df.shape)
# result = cursor.fetchall() df["tmp"] = df["service_pv_30"] + df["mexpert_pv_30"] +df["organization_pv_30"]
# db.close() df = df[~df["tmp"].isin([0.0])]
# tmp = pd.DataFrame(list(result)) print("tmp")
# name = ["merchant_id", "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30", print(df.shape)
# "mexpert_pv_30", "organization_pv_30"] df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \
# tmp = tmp.rename(columns=dict(zip(list(range(len(name))), name))) df["expert_exposure_pv_30"]/df["all_exposure"] * (df["expert_pv_30"] / df["expert_exposure_pv_30"])
# print("tmp")
# print(tmp.shape) df.loc[df["doctor_discount_30_days"] < 0, ["doctor_discount_30_days"]] = 0
# print(tmp.head(6)) # df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0
# df.loc[df["expand_rechange_amount_30"] < 0, ["expand_rechange_amount_30"]] = 0
# df["merchant_id"] = df["merchant_id"].astype('int64')
# df["merchant_id"] = df["merchant_id"].astype("str") df["commission"] = df["doctor_discount_30_days"]/df["tmp"]
# tmp["merchant_id"] = tmp["merchant_id"].astype("str") df["pv_ad"] = df["expand_rechange_amount_30"]/df["tmp"]
# df = pd.merge(df, tmp, on='merchant_id')
# df.loc[df["all_exposure"] <= 1500, ["ctr"]] = 0.01
# print("merge") df.loc[df["ctr"] < 0.01, ["ctr"]] = 0.01
# print(df.shape) df.loc[df["ctr"] > 0.2, ["ctr"]] = 0.2
# df.loc[df["commission"] > 20, ["commission"]] = 20
# for i in ["service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30", df.loc[df["commission"] < 0.01, ["commission"]] = 0.01
# "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30", df.loc[df["pv_ad"] > 20, ["pv_ad"]] = 20
# "mexpert_pv_30", "organization_pv_30"]: df.loc[df["pv_ad"] < 0.01, ["pv_ad"]] = 0.01
# df[i] = df[i].astype("float")
# df["score"] = df["ctr"] ** 0.5 * (df["commission"] + df["pv_ad"])
# df["all_exposure"] = df["service_exposure_pv_30"] + df["expert_exposure_pv_30"] print(df.shape)
# df = df[~df["expert_exposure_pv_30"].isin([0.0])]
# print("expert_exposure_pv_30") columns = ["doctor_id","score","ctr","commission","pv_ad","service_exposure_pv_30",
# print(df.shape) "service_ctr_30","expert_exposure_pv_30","expert_pv_30",
# df = df[~df["all_exposure"].isin([0.0])] "merchant_id","doctor_discount_30_days","expand_rechange_amount_30","service_pv_30",
# print("all_exposure") "mexpert_pv_30","organization_pv_30"]
# print(df.shape) data = df.loc[:, columns]
# df["tmp"] = df["service_pv_30"] + df["mexpert_pv_30"] +df["organization_pv_30"]
# df = df[~df["tmp"].isin([0.0])] data = data.drop_duplicates()
# print("tmp") print(data.shape)
# print(df.shape) data.to_csv('/tmp/21_doctor.csv',index=False)
# df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \ print("doctor end")
# df["expert_exposure_pv_30"]/df["all_exposure"] * (df["expert_pv_30"] / df["expert_exposure_pv_30"])
#
# df.loc[df["doctor_discount_30_days"] < 0, ["doctor_discount_30_days"]] = 0
# # df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0
# df.loc[df["expand_rechange_amount_30"] < 0, ["expand_rechange_amount_30"]] = 0
#
# df["commission"] = df["doctor_discount_30_days"]/df["tmp"]
# df["pv_ad"] = df["expand_rechange_amount_30"]/df["tmp"]
#
# df.loc[df["all_exposure"] <= 1500, ["ctr"]] = 0.01
# df.loc[df["ctr"] < 0.01, ["ctr"]] = 0.01
# df.loc[df["ctr"] > 0.2, ["ctr"]] = 0.2
# df.loc[df["commission"] > 20, ["commission"]] = 20
# df.loc[df["commission"] < 0.01, ["commission"]] = 0.01
# df.loc[df["pv_ad"] > 20, ["pv_ad"]] = 20
# df.loc[df["pv_ad"] < 0.01, ["pv_ad"]] = 0.01
#
# df["score"] = df["ctr"] ** 0.5 * (df["commission"] + df["pv_ad"])
# print(df.shape)
#
# columns = ["doctor_id","score","ctr","commission","pv_ad","service_exposure_pv_30",
# "service_ctr_30","expert_exposure_pv_30","expert_pv_30",
# "merchant_id","doctor_discount_30_days","expand_rechange_amount_30","service_pv_30",
# "mexpert_pv_30","organization_pv_30"]
# data = df.loc[:, columns]
#
# data = data.drop_duplicates()
# print(data.shape)
# data.to_csv('/tmp/1_doctor.csv',index=False)
# print("doctor end")
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment