import timeit

import pandas as pd
from utils.cache import redis_db_client

from ..utils import common_elements, nth_element

DIARY_COLUMNS = [
    "card_id",
    "is_pure_author",
    "is_have_reply",
    "is_have_pure_reply",
    "content_level",
    "topic_num",
    "topic_seven_click_num",
    "topic_thirty_click_num",
    "seven_transform_num",
    "thirty_transform_num",
    "favor_num",
    "favor_pure_num",
    "vote_num",
    "vote_display_num",
    "reply_num",
    "reply_pure_num",
    "one_reply_user_num",
    "three_reply_user_num",
    "seven_reply_user_num",
    "fifteen_reply_user_num",
    "thirty_reply_user_num",
    "sixty_reply_user_num",
    "ninety_reply_user_num",
    "history_reply_user_num",
    "one_reply_num",
    "three_reply_num",
    "seven_reply_num",
    "fifteen_reply_num",
    "thirty_reply_num",
    "sixty_reply_num",
    "ninety_reply_num",
    "history_reply_num",
    "one_click_num",
    "three_click_num",
    "seven_click_num",
    "fifteen_click_num",
    "thirty_click_num",
    "sixty_click_num",
    "ninety_click_num",
    "history_click_num",
    "one_precise_exposure_num",
    "three_precise_exposure_num",
    "seven_precise_exposure_num",
    "fifteen_precise_exposure_num",
    "thirty_precise_exposure_num",
    "sixty_precise_exposure_num",
    "ninety_precise_exposure_num",
    "history_precise_exposure_num",
    "one_vote_user_num",
    "three_vote_user_num",
    "seven_vote_user_num",
    "fifteen_vote_user_num",
    "thirty_vote_user_num",
    "sixty_vote_user_num",
    "ninety_vote_user_num",
    "history_vote_user_num",
    "one_browse_user_num",
    "three_browse_user_num",
    "seven_browse_user_num",
    "fifteen_browse_user_num",
    "thirty_browse_user_num",
    "sixty_browse_user_num",
    "ninety_browse_user_num",
    "history_browse_user_num",
    # "one_vote_pure_rate",
    # "three_vote_pure_rate",
    # "seven_vote_pure_rate",
    # "fifteen_vote_pure_rate",
    # "thirty_vote_pure_rate",
    # "sixty_vote_pure_rate",
    # "ninety_vote_pure_rate",
    # "history_vote_pure_rate",
    # "one_reply_pure_rate",
    # "three_reply_pure_rate",
    # "seven_reply_pure_rate",
    # "fifteen_reply_pure_rate",
    # "thirty_reply_pure_rate",
    # "sixty_reply_pure_rate",
    # "ninety_reply_pure_rate",
    # "history_reply_pure_rate",
    "one_ctr",
    "three_ctr",
    "seven_ctr",
    "fifteen_ctr",
    "thirty_ctr",
    "sixty_ctr",
    "ninety_ctr",
    "history_ctr",
    "first_demands",
    "second_demands",
    "first_solutions",
    "second_solutions",
    "first_positions",
    "second_positions",
    "projects",
    "first_demands_num",
    "second_demands_num",
    "first_solutions_num",
    "second_solutions_num",
    "first_positions_num",
    "second_positions_num",
    "projects_num",
]
INT_COLUMNS = [
    "active_days",
    "topic_num",
    "favor_num",
    "favor_pure_num",
    "vote_num",
    "vote_display_num",
    "reply_num",
    "reply_pure_num",
    "one_reply_user_num",
    "three_reply_user_num",
    "seven_reply_user_num",
    "fifteen_reply_user_num",
    "thirty_reply_user_num",
    "sixty_reply_user_num",
    "ninety_reply_user_num",
    "history_reply_user_num",
    "one_reply_num",
    "three_reply_num",
    "seven_reply_num",
    "fifteen_reply_num",
    "thirty_reply_num",
    "sixty_reply_num",
    "ninety_reply_num",
    "history_reply_num",
    "one_click_num",
    "three_click_num",
    "seven_click_num",
    "fifteen_click_num",
    "thirty_click_num",
    "sixty_click_num",
    "ninety_click_num",
    "history_click_num",
    "topic_seven_click_num",
    "topic_thirty_click_num",
    "one_precise_exposure_num",
    "three_precise_exposure_num",
    "seven_precise_exposure_num",
    "fifteen_precise_exposure_num",
    "thirty_precise_exposure_num",
    "sixty_precise_exposure_num",
    "ninety_precise_exposure_num",
    "history_precise_exposure_num",
    "one_vote_user_num",
    "three_vote_user_num",
    "seven_vote_user_num",
    "fifteen_vote_user_num",
    "thirty_vote_user_num",
    "sixty_vote_user_num",
    "ninety_vote_user_num",
    "history_vote_user_num",
    "seven_transform_num",
    "thirty_transform_num",
    "one_browse_user_num",
    "three_browse_user_num",
    "seven_browse_user_num",
    "fifteen_browse_user_num",
    "thirty_browse_user_num",
    "sixty_browse_user_num",
    "ninety_browse_user_num",
    "history_browse_user_num",
    "first_demands_num",
    "second_demands_num",
    "first_solutions_num",
    "second_solutions_num",
    "first_positions_num",
    "second_positions_num",
    "projects_num",
]
FLOAT_COLUMNS = [
    "one_ctr",
    "three_ctr",
    "seven_ctr",
    "fifteen_ctr",
    "thirty_ctr",
    "sixty_ctr",
    "ninety_ctr",
    "history_ctr",
    # "one_vote_pure_rate",
    # "three_vote_pure_rate",
    # "seven_vote_pure_rate",
    # "fifteen_vote_pure_rate",
    # "thirty_vote_pure_rate",
    # "sixty_vote_pure_rate",
    # "ninety_vote_pure_rate",
    # "history_vote_pure_rate",
    # "one_reply_pure_rate",
    # "three_reply_pure_rate",
    # "seven_reply_pure_rate",
    # "fifteen_reply_pure_rate",
    # "thirty_reply_pure_rate",
    # "sixty_reply_pure_rate",
    # "ninety_reply_pure_rate",
    # "history_reply_pure_rate",
]
CATEGORICAL_COLUMNS = [
    "device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
    "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "device_fd", "content_fd", "fd1", "fd2",
    "fd3", "device_sd", "content_sd", "sd1", "sd2", "sd3", "device_fs", "content_fs", "fs1", "fs2", "fs3", "device_ss",
    "content_ss", "ss1", "ss2", "ss3", "device_fp", "content_fp", "fp1", "fp2", "fp3", "device_sp", "content_sp", "sp1", "sp2",
    "sp3", "device_p", "content_p", "p1", "p2", "p3", "click_diary_id1", "click_diary_id2", "click_diary_id3", "click_diary_id4",
    "click_diary_id5"
]


def read_csv_data(dataset_path):
    diary_df = pd.read_csv(dataset_path.joinpath("diary.csv"), sep="|")
    click_df = pd.read_csv(dataset_path.joinpath("diary_click.csv"), sep="|")
    conversion_df = pd.read_csv(dataset_path.joinpath("diary_click_cvr.csv"), sep="|")
    return diary_df, click_df, conversion_df


def get_diary_dict_from_redis():
    """
    return: {diary_id: {first_demands: [], is_pure_author: 1}}
    """
    db_key = "cvr:db:content:diary"
    column_key = db_key + ":column"
    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
    d = redis_db_client.hgetall(db_key)
    res = {}
    for i in d.values():
        row_list = str(i, "utf-8").split("|")
        tmp = {}
        for (index, elem) in enumerate(row_list):
            col_name = columns[index]
            if col_name in [
                    "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
                    "second_positions", "projects"
            ]:
                tmp[col_name] = elem.split(",")
                if "" in tmp[col_name]:
                    tmp[col_name].remove("")
                tmp[col_name + "_num"] = len(tmp[col_name])
            elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
                if elem == "true":
                    tmp[col_name] = 1
                else:
                    tmp[col_name] = 0
            else:
                tmp[col_name] = elem
            res[int(tmp["card_id"])] = tmp
    return res


def diary_feature_engineering(df):
    diary_df = df.copy()

    diary_df["first_demands"] = diary_df["first_demands"].str.split(",")
    diary_df["second_demands"] = diary_df["second_demands"].str.split(",")
    diary_df["first_solutions"] = diary_df["first_solutions"].str.split(",")
    diary_df["second_solutions"] = diary_df["second_solutions"].str.split(",")
    diary_df["first_positions"] = diary_df["first_positions"].str.split(",")
    diary_df["second_positions"] = diary_df["second_positions"].str.split(",")
    diary_df["projects"] = diary_df["projects"].str.split(",")

    diary_df["first_demands"] = diary_df["first_demands"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_demands"] = diary_df["second_demands"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["first_solutions"] = diary_df["first_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_solutions"] = diary_df["second_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["first_positions"] = diary_df["first_positions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_positions"] = diary_df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["projects"] = diary_df["projects"].apply(lambda d: d if isinstance(d, list) else [])

    diary_df["first_demands_num"] = diary_df["first_demands"].apply(lambda d: len(d))
    diary_df["second_demands_num"] = diary_df["second_demands"].apply(lambda d: len(d))
    diary_df["first_solutions_num"] = diary_df["first_solutions"].apply(lambda d: len(d))
    diary_df["second_solutions_num"] = diary_df["second_solutions"].apply(lambda d: len(d))
    diary_df["first_positions_num"] = diary_df["first_positions"].apply(lambda d: len(d))
    diary_df["second_positions_num"] = diary_df["second_positions"].apply(lambda d: len(d))
    diary_df["projects_num"] = diary_df["projects"].apply(lambda d: len(d))

    diary_df["is_pure_author"] = diary_df["is_pure_author"].astype(int)
    diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
    diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)

    diary_df = diary_df[DIARY_COLUMNS]

    print("diary: " + str(diary_df.shape))
    nullseries = diary_df.isnull().sum()
    nulls = nullseries[nullseries > 0]
    if nulls.any():
        print(nulls)
        print("!!!!!!!!!!!!!!!!!!!!!!\n")
    return diary_df


def join_features(device_df, diary_df, cc_df):
    a = pd.merge(device_df, cc_df, how="inner", left_on="device_id", right_on="cl_id")
    df = pd.merge(a, diary_df, how="inner", left_on="card_id", right_on="card_id")

    df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)

    df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))

    df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))

    df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
    df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
    df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
    df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
    df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
    df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
    df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
    df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
    df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
    df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
    df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
    df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
    df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
    df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
    df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
    df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
    df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
    df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
    df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
    df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
    df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))

    print("joined df: " + str(df.shape))
    nullseries = df.isnull().sum()
    nulls = nullseries[nullseries > 0]
    if nulls.any():
        print(nulls)
        print("!!!!!!!!!!!!!!!!!!!!!!\n")

    drop_columns = [
        "cl_id", "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
        "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
        "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
        "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
    ]
    # for col in drop_columns:
    #     if col in df.columns:
    #         df.drop(col, inplace=True, axis=1)
    df.drop(drop_columns, inplace=True, axis=1)
    return df


def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
    time_1 = timeit.default_timer()
    device_info = device_dict.get(device_id, {}).copy()
    if not device_info:
        device_info = {
            "device_id": device_id,
            "active_type": "1",
            "active_days": "0",
            "past_consume_ability_history": "极弱",
            "potential_consume_ability_history": "极弱",
            "price_sensitive_history": "不敏感无消费",
            "device_click_num_1d": 0,
            "device_click_num_3d": 0,
            "device_click_num_7d": 0,
            "device_click_num_15d": 0,
            "device_click_num_30d": 0,
            "device_click_num_180d": 0,
            "click_diary_id1": "-1",
            "click_diary_id2": "-1",
            "click_diary_id3": "-1",
            "click_diary_id4": "-1",
            "click_diary_id5": "-1"
        }
    device_fd = device_info.get("first_demands", [])
    device_sd = device_info.get("second_demands", [])
    device_fs = device_info.get("first_solutions", [])
    device_ss = device_info.get("second_solutions", [])
    device_fp = device_info.get("first_positions", [])
    device_sp = device_info.get("second_positions", [])
    device_p = device_info.get("projects", [])
    device_info["device_fd"] = nth_element(device_fd, 0)
    device_info["device_sd"] = nth_element(device_sd, 0)
    device_info["device_fs"] = nth_element(device_fs, 0)
    device_info["device_ss"] = nth_element(device_ss, 0)
    device_info["device_fp"] = nth_element(device_fp, 0)
    device_info["device_sp"] = nth_element(device_sp, 0)
    device_info["device_p"] = nth_element(device_p, 0)
    diary_lst = []
    diary_ids_res = []
    for id in diary_ids:
        diary_info = diary_dict.get(id, {}).copy()
        if diary_info:
            diary_ids_res.append(diary_info.get("card_id", "-1"))
            diary_fd = diary_info.get("first_demands", [])
            diary_sd = diary_info.get("second_demands", [])
            diary_fs = diary_info.get("first_solutions", [])
            diary_ss = diary_info.get("second_solutions", [])
            diary_fp = diary_info.get("first_positions", [])
            diary_sp = diary_info.get("second_positions", [])
            diary_p = diary_info.get("projects", [])
            common_fd = common_elements(device_fd, diary_fd)
            common_sd = common_elements(device_sd, diary_sd)
            common_fs = common_elements(device_fs, diary_fs)
            common_ss = common_elements(device_ss, diary_ss)
            common_fp = common_elements(device_fp, diary_fp)
            common_sp = common_elements(device_sp, diary_sp)
            common_p = common_elements(device_p, diary_p)
            diary_info["content_fd"] = nth_element(diary_fd, 0)
            diary_info["content_sd"] = nth_element(diary_sd, 0)
            diary_info["content_fs"] = nth_element(diary_fs, 0)
            diary_info["content_ss"] = nth_element(diary_ss, 0)
            diary_info["content_fp"] = nth_element(diary_fp, 0)
            diary_info["content_sp"] = nth_element(diary_sp, 0)
            diary_info["content_p"] = nth_element(diary_p, 0)
            diary_info["fd1"] = nth_element(common_fd, 0)
            diary_info["fd2"] = nth_element(common_fd, 1)
            diary_info["fd3"] = nth_element(common_fd, 2)
            diary_info["sd1"] = nth_element(common_sd, 0)
            diary_info["sd2"] = nth_element(common_sd, 1)
            diary_info["sd3"] = nth_element(common_sd, 2)
            diary_info["fs1"] = nth_element(common_fs, 0)
            diary_info["fs2"] = nth_element(common_fs, 1)
            diary_info["fs3"] = nth_element(common_fs, 2)
            diary_info["ss1"] = nth_element(common_ss, 0)
            diary_info["ss2"] = nth_element(common_ss, 1)
            diary_info["ss3"] = nth_element(common_ss, 2)
            diary_info["fp1"] = nth_element(common_fp, 0)
            diary_info["fp2"] = nth_element(common_fp, 1)
            diary_info["fp3"] = nth_element(common_fp, 2)
            diary_info["sp1"] = nth_element(common_sp, 0)
            diary_info["sp2"] = nth_element(common_sp, 1)
            diary_info["sp3"] = nth_element(common_sp, 2)
            diary_info["p1"] = nth_element(common_p, 0)
            diary_info["p2"] = nth_element(common_p, 1)
            diary_info["p3"] = nth_element(common_p, 2)
            diary_lst.append(diary_info)
    total_1 = (timeit.default_timer() - time_1)
    print("join device diary cost {:.5f}s".format(total_1))
    return device_info, diary_lst, diary_ids_res
