import timeit

import pandas as pd
from utils.cache import redis_db_client

from ..utils import common_elements, nth_element

TRACTATE_COLUMNS = [
    "card_id", "is_pure_author", "is_have_pure_reply", "is_have_reply", "content_level", "show_tag_id", "reply_num",
    "reply_pure_num", "one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr",
    "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions", "second_positions", "projects"
]

INT_COLUMNS = ["active_days", "reply_num", "reply_pure_num"]
FLOAT_COLUMNS = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"]
CATEGORICAL_COLUMNS = [
    "device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
    "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "show_tag_id", "device_fd", "content_fd",
    "fd1", "fd2", "fd3", "device_sd", "content_sd", "sd1", "sd2", "sd3", "device_fs", "content_fs", "fs1", "fs2", "fs3",
    "device_ss", "content_ss", "ss1", "ss2", "ss3", "device_fp", "content_fp", "fp1", "fp2", "fp3", "device_sp", "content_sp",
    "sp1", "sp2", "sp3", "device_p", "content_p", "p1", "p2", "p3", "click_tractate_id1", "click_tractate_id2",
    "click_tractate_id3", "click_tractate_id4", "click_tractate_id5"
]


def read_csv_data(dataset_path):
    tractate_df = pd.read_csv(dataset_path.joinpath("tractate.csv"), sep="|")
    click_df = pd.read_csv(dataset_path.joinpath("tractate_click.csv"), sep="|")
    conversion_df = pd.read_csv(dataset_path.joinpath("tractate_click_cvr.csv"), sep="|")
    return tractate_df, click_df, conversion_df


def get_tractate_from_redis():
    """
    return: {tractate_id: {first_demands: [], is_pure_author: 1}}
    """
    db_key = "cvr:db:content:tractate"
    column_key = db_key + ":column"
    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
    d = redis_db_client.hgetall(db_key)
    res = {}
    for i in d.values():
        row_list = str(i, "utf-8").split("|")
        tmp = {}
        for (index, elem) in enumerate(row_list):
            col_name = columns[index]
            if col_name in [
                    "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
                    "second_positions", "projects"
            ]:
                tmp[col_name] = elem.split(",")
            elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
                if elem == "true":
                    tmp[col_name] = 1
                else:
                    tmp[col_name] = 0
            else:
                tmp[col_name] = elem
            res[int(tmp["card_id"])] = tmp
    return res


def tractate_feature_engineering(tractate_df):
    df = tractate_df.copy()

    df["first_demands"] = df["first_demands"].str.split(",")
    df["second_demands"] = df["second_demands"].str.split(",")
    df["first_solutions"] = df["first_solutions"].str.split(",")
    df["second_solutions"] = df["second_solutions"].str.split(",")
    df["first_positions"] = df["first_positions"].str.split(",")
    df["second_positions"] = df["second_positions"].str.split(",")
    df["projects"] = df["projects"].str.split(",")

    df["first_demands"] = df["first_demands"].apply(lambda d: d if isinstance(d, list) else [])
    df["second_demands"] = df["second_demands"].apply(lambda d: d if isinstance(d, list) else [])
    df["first_solutions"] = df["first_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    df["second_solutions"] = df["second_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    df["first_positions"] = df["first_positions"].apply(lambda d: d if isinstance(d, list) else [])
    df["second_positions"] = df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
    df["projects"] = df["projects"].apply(lambda d: d if isinstance(d, list) else [])

    df["is_pure_author"] = df["is_pure_author"].astype(int)
    df["is_have_pure_reply"] = df["is_have_pure_reply"].astype(int)
    df["is_have_reply"] = df["is_have_reply"].astype(int)
    df["show_tag_id"] = df["show_tag_id"].astype(str)

    df = df[TRACTATE_COLUMNS]

    print("tractate:")
    nullseries = df.isnull().sum()
    print(nullseries[nullseries > 0])
    print(df.shape)
    return df


def join_features(device_df, tractate_df, cc_df):
    a = pd.merge(device_df, cc_df, how="inner", left_on="device_id", right_on="cl_id")
    df = pd.merge(a, tractate_df, how="inner", left_on="card_id", right_on="card_id")

    df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)

    df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))

    df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))

    df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
    df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
    df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
    df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
    df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
    df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
    df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
    df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
    df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
    df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
    df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
    df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
    df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
    df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
    df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
    df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
    df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
    df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
    df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
    df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
    df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))

    print("df:")
    nullseries = df.isnull().sum()
    print(nullseries[nullseries > 0])
    print(df.shape)

    drop_columns = [
        "cl_id", "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
        "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
        "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
        "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
    ]
    df.drop(drop_columns, inplace=True, axis=1)
    return df


def device_tractate_fe(device_id, tractate_ids, device_dict, tractate_dict):
    time_1 = timeit.default_timer()
    device_info = device_dict.get(device_id, {}).copy()
    if not device_info:
        device_info = {
            "device_id": device_id,
            "active_type": "1",
            "active_days": "0",
            "channel_first": "App Store",
            "city_first": "beijing",
            "model_first": "iPhone10",
            "past_consume_ability_history": "极弱",
            "potential_consume_ability_history": "极弱",
            "price_sensitive_history": "不敏感无消费",
            "device_click_num_1d": 0,
            "device_click_num_3d": 0,
            "device_click_num_7d": 0,
            "device_click_num_15d": 0,
            "device_click_num_30d": 0,
            "device_click_num_180d": 0,
            "click_tractate_id1": "-1",
            "click_tractate_id2": "-1",
            "click_tractate_id3": "-1",
            "click_tractate_id4": "-1",
            "click_tractate_id5": "-1"
        }
    device_fd = device_info.get("first_demands", [])
    device_sd = device_info.get("second_demands", [])
    device_fs = device_info.get("first_solutions", [])
    device_ss = device_info.get("second_solutions", [])
    device_fp = device_info.get("first_positions", [])
    device_sp = device_info.get("second_positions", [])
    device_p = device_info.get("projects", [])
    device_info["device_fd"] = nth_element(device_fd, 0)
    device_info["device_sd"] = nth_element(device_sd, 0)
    device_info["device_fs"] = nth_element(device_fs, 0)
    device_info["device_ss"] = nth_element(device_ss, 0)
    device_info["device_fp"] = nth_element(device_fp, 0)
    device_info["device_sp"] = nth_element(device_sp, 0)
    device_info["device_p"] = nth_element(device_p, 0)
    tractate_lst = []
    tractate_ids_res = []
    for id in tractate_ids:
        tractate_info = tractate_dict.get(id, {}).copy()
        if tractate_info:
            tractate_ids_res.append(tractate_info.get("card_id", "-1"))
            tractate_fd = tractate_info.get("first_demands", [])
            tractate_sd = tractate_info.get("second_demands", [])
            tractate_fs = tractate_info.get("first_solutions", [])
            tractate_ss = tractate_info.get("second_solutions", [])
            tractate_fp = tractate_info.get("first_positions", [])
            tractate_sp = tractate_info.get("second_positions", [])
            tractate_p = tractate_info.get("projects", [])
            common_fd = common_elements(device_fd, tractate_fd)
            common_sd = common_elements(device_sd, tractate_sd)
            common_fs = common_elements(device_fs, tractate_fs)
            common_ss = common_elements(device_ss, tractate_ss)
            common_fp = common_elements(device_fp, tractate_fp)
            common_sp = common_elements(device_sp, tractate_sp)
            common_p = common_elements(device_p, tractate_p)
            tractate_info["content_fd"] = nth_element(tractate_fd, 0)
            tractate_info["content_sd"] = nth_element(tractate_sd, 0)
            tractate_info["content_fs"] = nth_element(tractate_fs, 0)
            tractate_info["content_ss"] = nth_element(tractate_ss, 0)
            tractate_info["content_fp"] = nth_element(tractate_fp, 0)
            tractate_info["content_sp"] = nth_element(tractate_sp, 0)
            tractate_info["content_p"] = nth_element(tractate_p, 0)
            tractate_info["fd1"] = nth_element(common_fd, 0)
            tractate_info["fd2"] = nth_element(common_fd, 1)
            tractate_info["fd3"] = nth_element(common_fd, 2)
            tractate_info["sd1"] = nth_element(common_sd, 0)
            tractate_info["sd2"] = nth_element(common_sd, 1)
            tractate_info["sd3"] = nth_element(common_sd, 2)
            tractate_info["fs1"] = nth_element(common_fs, 0)
            tractate_info["fs2"] = nth_element(common_fs, 1)
            tractate_info["fs3"] = nth_element(common_fs, 2)
            tractate_info["ss1"] = nth_element(common_ss, 0)
            tractate_info["ss2"] = nth_element(common_ss, 1)
            tractate_info["ss3"] = nth_element(common_ss, 2)
            tractate_info["fp1"] = nth_element(common_fp, 0)
            tractate_info["fp2"] = nth_element(common_fp, 1)
            tractate_info["fp3"] = nth_element(common_fp, 2)
            tractate_info["sp1"] = nth_element(common_sp, 0)
            tractate_info["sp2"] = nth_element(common_sp, 1)
            tractate_info["sp3"] = nth_element(common_sp, 2)
            tractate_info["p1"] = nth_element(common_p, 0)
            tractate_info["p2"] = nth_element(common_p, 1)
            tractate_info["p3"] = nth_element(common_p, 2)
            tractate_lst.append(tractate_info)
    total_1 = (timeit.default_timer() - time_1)
    print("join device tractate cost {:.5f}s".format(total_1))
    return device_info, tractate_lst, tractate_ids_res
