import timeit

import pandas as pd

from utils.cache import redis_db_client

from .utils import common_elements, nth_element

DIARY_COLUMNS = [
    "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "favor_num", "vote_num",
    "one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "first_demands", "second_demands", "first_solutions", "second_solutions",
    "first_positions", "second_positions", "projects"
]


def read_csv_data(dataset_path):
    diary_df = pd.read_csv(dataset_path.joinpath("diary.csv"), sep="|")
    click_df = pd.read_csv(dataset_path.joinpath("diary_click.csv"), sep="|")
    conversion_df = pd.read_csv(dataset_path.joinpath("diary_click_cvr.csv"), sep="|")
    return diary_df, click_df, conversion_df


def get_diary_dict_from_redis():
    """
    return: {diary_id: {first_demands: [], is_pure_author: 1}}
    """
    db_key = "cvr:db:content:diary"
    column_key = db_key + ":column"
    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
    d = redis_db_client.hgetall(db_key)
    res = {}
    for i in d.values():
        row_list = str(i, "utf-8").split("|")
        tmp = {}
        for (index, elem) in enumerate(row_list):
            col_name = columns[index]
            if col_name in [
                    "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
                    "second_positions", "projects"
            ]:
                tmp[col_name] = elem.split(",")
            elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
                if elem == "true":
                    tmp[col_name] = 1
                else:
                    tmp[col_name] = 0
            else:
                tmp[col_name] = elem
            res[int(tmp["card_id"])] = tmp
    return res


def diary_feature_engineering(df):
    diary_df = df.copy()

    diary_df["first_demands"] = diary_df["first_demands"].str.split(",")
    diary_df["second_demands"] = diary_df["second_demands"].str.split(",")
    diary_df["first_solutions"] = diary_df["first_solutions"].str.split(",")
    diary_df["second_solutions"] = diary_df["second_solutions"].str.split(",")
    diary_df["first_positions"] = diary_df["first_positions"].str.split(",")
    diary_df["second_positions"] = diary_df["second_positions"].str.split(",")
    diary_df["projects"] = diary_df["projects"].str.split(",")

    diary_df["first_demands"] = diary_df["first_demands"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_demands"] = diary_df["second_demands"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["first_solutions"] = diary_df["first_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_solutions"] = diary_df["second_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["first_positions"] = diary_df["first_positions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_positions"] = diary_df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["projects"] = diary_df["projects"].apply(lambda d: d if isinstance(d, list) else [])

    diary_df["is_pure_author"] = diary_df["is_pure_author"].astype(int)
    diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
    diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)

    print("diary:")
    nullseries = diary_df.isnull().sum()
    print(nullseries[nullseries > 0])
    print(diary_df.shape)
    return diary_df[DIARY_COLUMNS]


def click_feature_engineering(click_df, conversion_df):
    # click_df = click_df.copy()
    # conversion_df = conversion_df.copy()

    click_df.rename(columns={"label": "click_label"}, inplace=True)
    conversion_df.rename(columns={"label": "conversion_label"}, inplace=True)
    cc_df = pd.merge(click_df, conversion_df, how="left", left_on=["cl_id", "card_id"], right_on=["cl_id", "card_id"])
    cc_df.drop(["partition_date_x", "partition_date_y"], axis=1, inplace=True)
    cc_df["conversion_label"].fillna(0, inplace=True)

    print("click:")
    nullseries = cc_df.isnull().sum()
    print(nullseries[nullseries > 0])
    print(cc_df.shape)

    return cc_df


def join_features(device_df, diary_df, cc_df):
    a = pd.merge(device_df, cc_df, how="inner", left_on="device_id", right_on="cl_id")
    df = pd.merge(a, diary_df, how="inner", left_on="card_id", right_on="card_id")

    df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)

    df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))

    df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))

    df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
    df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
    df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
    df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
    df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
    df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
    df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
    df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
    df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
    df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
    df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
    df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
    df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
    df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
    df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
    df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
    df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
    df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
    df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
    df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
    df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))

    print("df:")
    nullseries = df.isnull().sum()
    print(nullseries[nullseries > 0])
    print(df.shape)

    drop_columns = [
        "cl_id", "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
        "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
        "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
        "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
    ]
    # for col in drop_columns:
    #     if col in df.columns:
    #         df.drop(col, inplace=True, axis=1)
    df.drop(drop_columns, inplace=True, axis=1)
    return df


def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
    time_1 = timeit.default_timer()
    device_info = device_dict.get(device_id, {}).copy()
    if not device_info:
        device_info = {
            "device_id": device_id,
            "active_type": "1",
            "active_days": "0",
            "past_consume_ability_history": "极弱",
            "potential_consume_ability_history": "极弱",
            "price_sensitive_history": "不敏感无消费",
            "device_click_num_1d": 0,
            "device_click_num_3d": 0,
            "device_click_num_7d": 0,
            "device_click_num_15d": 0,
            "device_click_num_30d": 0,
            "device_click_num_180d": 0
        }
    device_fd = device_info.get("first_demands", [])
    device_sd = device_info.get("second_demands", [])
    device_fs = device_info.get("first_solutions", [])
    device_ss = device_info.get("second_solutions", [])
    device_fp = device_info.get("first_positions", [])
    device_sp = device_info.get("second_positions", [])
    device_p = device_info.get("projects", [])
    device_info["device_fd"] = nth_element(device_fd, 0)
    device_info["device_sd"] = nth_element(device_sd, 0)
    device_info["device_fs"] = nth_element(device_fs, 0)
    device_info["device_ss"] = nth_element(device_ss, 0)
    device_info["device_fp"] = nth_element(device_fp, 0)
    device_info["device_sp"] = nth_element(device_sp, 0)
    device_info["device_p"] = nth_element(device_p, 0)
    diary_lst = []
    diary_ids_res = []
    for id in diary_ids:
        diary_info = diary_dict.get(id, {}).copy()
        if diary_info:
            diary_ids_res.append(diary_info.get("card_id", "-1"))
            diary_fd = diary_info.get("first_demands", [])
            diary_sd = diary_info.get("second_demands", [])
            diary_fs = diary_info.get("first_solutions", [])
            diary_ss = diary_info.get("second_solutions", [])
            diary_fp = diary_info.get("first_positions", [])
            diary_sp = diary_info.get("second_positions", [])
            diary_p = diary_info.get("projects", [])
            common_fd = common_elements(device_fd, diary_fd)
            common_sd = common_elements(device_sd, diary_sd)
            common_fs = common_elements(device_fs, diary_fs)
            common_ss = common_elements(device_ss, diary_ss)
            common_fp = common_elements(device_fp, diary_fp)
            common_sp = common_elements(device_sp, diary_sp)
            common_p = common_elements(device_p, diary_p)
            diary_info["content_fd"] = nth_element(diary_fd, 0)
            diary_info["content_sd"] = nth_element(diary_sd, 0)
            diary_info["content_fs"] = nth_element(diary_fs, 0)
            diary_info["content_ss"] = nth_element(diary_ss, 0)
            diary_info["content_fp"] = nth_element(diary_fp, 0)
            diary_info["content_sp"] = nth_element(diary_sp, 0)
            diary_info["content_p"] = nth_element(diary_p, 0)
            diary_info["fd1"] = nth_element(common_fd, 0)
            diary_info["fd2"] = nth_element(common_fd, 1)
            diary_info["fd3"] = nth_element(common_fd, 2)
            diary_info["sd1"] = nth_element(common_sd, 0)
            diary_info["sd2"] = nth_element(common_sd, 1)
            diary_info["sd3"] = nth_element(common_sd, 2)
            diary_info["fs1"] = nth_element(common_fs, 0)
            diary_info["fs2"] = nth_element(common_fs, 1)
            diary_info["fs3"] = nth_element(common_fs, 2)
            diary_info["ss1"] = nth_element(common_ss, 0)
            diary_info["ss2"] = nth_element(common_ss, 1)
            diary_info["ss3"] = nth_element(common_ss, 2)
            diary_info["fp1"] = nth_element(common_fp, 0)
            diary_info["fp2"] = nth_element(common_fp, 1)
            diary_info["fp3"] = nth_element(common_fp, 2)
            diary_info["sp1"] = nth_element(common_sp, 0)
            diary_info["sp2"] = nth_element(common_sp, 1)
            diary_info["sp3"] = nth_element(common_sp, 2)
            diary_info["p1"] = nth_element(common_p, 0)
            diary_info["p2"] = nth_element(common_p, 1)
            diary_info["p3"] = nth_element(common_p, 2)
            diary_lst.append(diary_info)
    total_1 = (timeit.default_timer() - time_1)
    print("join device diary cost {:.5f}s".format(total_1))
    return device_info, diary_lst, diary_ids_res
