import datetime
import pymysql
import pandas as pd


def hospital():
    sql = "select api.id," \
          "h.hospital_id,h.hospital_exposure_pv_30,h.service_exposure_pv_30,h.expert_exposure_pv_30," \
          "h.service_ctr_30,h.hospital_ctr_30,h.expert_ctr_30,b.merchant_id," \
          "m.doctor_discount_30_days,m.expand_rechange_amount_30," \
          "m.service_pv_30,m.expert_pv_30,m.organization_pv_30 " \
          "from api_doctor api " \
          "left join statistic_hospital_rank_factor h on api.hospital_id = h.hospital_id " \
          "left join hippo_merchantrelevance b on api.id = b.doctor_id " \
          "left join statistic_merchant_rank_factor m on b.merchant_id = m.merchant_id " \
          "where api.doctor_type = 1 and h.date = '{}' and m.partition_date = '{}';".format(date_str,date_str)

    db = pymysql.connect(host='172.16.30.141', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    df = pd.DataFrame(list(result))
    name = ["doctor_id","hospital_id","hospital_exposure_pv_30","service_exposure_pv_30",
            "expert_exposure_pv_30","service_ctr_30","hospital_ctr_30","expert_ctr_30","merchant_id",
            "doctor_discount_30_days","expand_rechange_amount_30","service_pv_30",
            "mexpert_pv_30","organization_pv_30"]

    df = df.rename(columns=dict(zip(list(range(len(name))), name)))
    print("df")
    print(df.shape)

    for i in ["hospital_exposure_pv_30","service_exposure_pv_30","expert_exposure_pv_30",
              "service_ctr_30","hospital_ctr_30","expert_ctr_30",
              "doctor_discount_30_days","service_pv_30","mexpert_pv_30","organization_pv_30",
              "expand_rechange_amount_30"]:
        df[i] = df[i].astype("float")

    df["all_exposure"] = df["hospital_exposure_pv_30"] + df["service_exposure_pv_30"] + df["expert_exposure_pv_30"]
    df = df[~df["all_exposure"].isin([0.0])]
    print("filter 1")
    print(df.shape)
    df["tmp"] = df["service_pv_30"] + df["mexpert_pv_30"] +df["organization_pv_30"]

    df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \
                df["hospital_exposure_pv_30"]/ df["all_exposure"] * df["hospital_ctr_30"] + \
                df["expert_exposure_pv_30"]/df["all_exposure"] * df["expert_ctr_30"]

    df.loc[df["doctor_discount_30_days"] < 0, ["doctor_discount_30_days"]] = 0
    df.loc[df["expand_rechange_amount_30"] < 0, ["expand_rechange_amount_30"]] = 0

    df.loc[df["tmp"] <= 3000, ["tmp"]] = 3000

    df["commission"] = df["doctor_discount_30_days"]/df["tmp"]
    df["cpt"] = df["expand_rechange_amount_30"]/df["tmp"]

    df.loc[df["all_exposure"] <= 3000, ["ctr"]] = 0.01
    df.loc[df["ctr"] < 0.01, ["ctr"]] = 0.01
    df.loc[df["ctr"] > 0.2, ["ctr"]] = 0.2
    df.loc[df["cpt"] > 3, ["cpt"]] = 3
    df.loc[df["cpt"] < 0.01, ["cpt"]] = 0.01
    df.loc[df["commission"] > 3, ["commission"]] = 3
    df.loc[df["commission"] < 0.01, ["commission"]] = 0.01

    df["score"] = df["ctr"] ** 0.5 * (df["commission"] + df["cpt"])

    columns = ["doctor_id","score","ctr","commission","cpt","hospital_id","hospital_exposure_pv_30",
               "service_exposure_pv_30","expert_exposure_pv_30",
              "service_ctr_30","hospital_ctr_30","expert_ctr_30","merchant_id",
              "doctor_discount_30_days","service_pv_30","mexpert_pv_30","organization_pv_30",
              "expand_rechange_amount_30"]
    data = df.loc[:, columns]

    data = data.drop_duplicates()

    print(data.shape)
    data.to_csv('/tmp/311_hospital.csv',index=False)


def new_doctor():
    sql = "select d.doctor_id,d.service_exposure_pv_30,d.service_ctr_30,d.expert_exposure_pv_30," \
          "d.expert_pv_30,b.merchant_id,m.doctor_discount_30_days,m.expand_rechange_amount_30," \
          "m.service_pv_30,m.expert_pv_30,m.organization_pv_30 from statistic_doctor_rank_factor d " \
          "left join hippo_merchantrelevance b on d.doctor_id = b.doctor_id " \
          "left join statistic_merchant_rank_factor m on b.merchant_id = m.merchant_id " \
          "where d.partition_date = '{}' and m.partition_date = '{}';".format(date_str,date_str)

    db = pymysql.connect(host='172.16.30.141', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    df = pd.DataFrame(list(result))
    print(df.shape)

    name = ["doctor_id", "service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30",
            "merchant_id", "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
            "mexpert_pv_30", "organization_pv_30"]

    df = df.rename(columns=dict(zip(list(range(len(name))), name)))
    for i in ["service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30",
      "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
     "mexpert_pv_30", "organization_pv_30"]:
        df[i] = df[i].astype("float")

    df["all_exposure"] = df["service_exposure_pv_30"] + df["expert_exposure_pv_30"]
    df = df[~df["expert_exposure_pv_30"].isin([0.0])]
    print("expert_exposure_pv_30")
    print(df.shape)
    df = df[~df["all_exposure"].isin([0.0])]
    print("all_exposure")
    print(df.shape)
    df["tmp"] = df["service_pv_30"] + df["mexpert_pv_30"] +df["organization_pv_30"]
    df.loc[df["tmp"] <= 3000, ["tmp"]] = 3000

    df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \
                df["expert_exposure_pv_30"]/df["all_exposure"] * (df["expert_pv_30"] / df["expert_exposure_pv_30"])

    df.loc[df["doctor_discount_30_days"] < 0, ["doctor_discount_30_days"]] = 0
    df.loc[df["expand_rechange_amount_30"] < 0, ["expand_rechange_amount_30"]] = 0

    df["commission"] = df["doctor_discount_30_days"]/df["tmp"]
    df["pv_ad"] = df["expand_rechange_amount_30"]/df["tmp"]

    df.loc[df["all_exposure"] <= 3000, ["ctr"]] = 0.01
    df.loc[df["ctr"] < 0.01, ["ctr"]] = 0.01
    df.loc[df["ctr"] > 0.2, ["ctr"]] = 0.2
    df.loc[df["commission"] > 3, ["commission"]] = 3
    df.loc[df["commission"] < 0.01, ["commission"]] = 0.01
    df.loc[df["pv_ad"] > 3, ["pv_ad"]] = 3
    df.loc[df["pv_ad"] < 0.01, ["pv_ad"]] = 0.01

    df["score"] = df["ctr"] ** 0.5 * (df["commission"] + df["pv_ad"])
    print(df.shape)

    columns = ["doctor_id","score","ctr","commission","pv_ad","service_exposure_pv_30",
               "service_ctr_30","expert_exposure_pv_30","expert_pv_30",
            "merchant_id","doctor_discount_30_days","expand_rechange_amount_30","service_pv_30",
            "mexpert_pv_30","organization_pv_30"]
    data = df.loc[:, columns]

    data = data.drop_duplicates()
    print(data.shape)
    data.to_csv('/tmp/311_doctor.csv',index=False)
    print("doctor end")


if __name__ == "__main__":
    date_str = "20200310"
    new_doctor()
    hospital()