import pandas as pd

from utils.cache import redis_db_client

from .utils import common_elements, nth_element


def read_csv_data(dataset_path):
    device_df = pd.read_csv(dataset_path.joinpath("device.csv"), sep="|")
    diary_df = pd.read_csv(dataset_path.joinpath("diary.csv"), sep="|")
    click_df = pd.read_csv(dataset_path.joinpath("click.csv"), sep="|")
    conversion_df = pd.read_csv(dataset_path.joinpath("click_cvr.csv"), sep="|")
    # TODO remove sample
    # return device_df.sample(10000), diary_df.sample(5000), click_df, conversion_df
    return device_df, diary_df, click_df, conversion_df


def _get_data_from_redis(key):
    column_key = key + ":column"
    d = redis_db_client.hgetall(key)
    tmp = d.values()
    lists = []
    for i in tmp:
        lists.append(str(i, "utf-8").split("|"))
    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
    df = pd.DataFrame(lists, columns=columns)
    return df


def get_device_df_from_redis():
    return _get_data_from_redis("cvr:db:device")


def get_diary_df_from_redis():
    return _get_data_from_redis("cvr:db:content:diary")


def device_feature_engineering(df):
    device_df = df.copy()

    device_df["first_demands"] = device_df["first_demands"].str.split(",")
    device_df["second_demands"] = device_df["second_demands"].str.split(",")
    device_df["first_solutions"] = device_df["first_solutions"].str.split(",")
    device_df["second_solutions"] = device_df["second_solutions"].str.split(",")
    device_df["first_positions"] = device_df["first_positions"].str.split(",")
    device_df["second_positions"] = device_df["second_positions"].str.split(",")
    device_df["projects"] = device_df["projects"].str.split(",")

    device_df["first_demands"] = device_df["first_demands"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["second_demands"] = device_df["second_demands"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["first_solutions"] = device_df["first_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["second_solutions"] = device_df["second_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["first_positions"] = device_df["first_positions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["second_positions"] = device_df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["projects"] = device_df["projects"].apply(lambda d: d if isinstance(d, list) else [])

    nullseries = device_df.isnull().sum()
    print("device:")
    print(nullseries[nullseries > 0])
    # print(device_df.size)

    device_columns = [
        "device_id", "active_type", "active_days", "past_consume_ability_history", "potential_consume_ability_history",
        "price_sensitive_history", "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
        "second_positions", "projects"
    ]
    return device_df[device_columns]


def diary_feature_engineering(df, from_redis=False):
    diary_df = df.copy()
    str_bool_map = {"true": True, "false": False}

    diary_df["first_demands"] = diary_df["first_demands"].str.split(",")
    diary_df["second_demands"] = diary_df["second_demands"].str.split(",")
    diary_df["first_solutions"] = diary_df["first_solutions"].str.split(",")
    diary_df["second_solutions"] = diary_df["second_solutions"].str.split(",")
    diary_df["first_positions"] = diary_df["first_positions"].str.split(",")
    diary_df["second_positions"] = diary_df["second_positions"].str.split(",")
    diary_df["projects"] = diary_df["projects"].str.split(",")

    diary_df["first_demands"] = diary_df["first_demands"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_demands"] = diary_df["second_demands"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["first_solutions"] = diary_df["first_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_solutions"] = diary_df["second_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["first_positions"] = diary_df["first_positions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["second_positions"] = diary_df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
    diary_df["projects"] = diary_df["projects"].apply(lambda d: d if isinstance(d, list) else [])

    if from_redis:
        diary_df["is_pure_author"] = diary_df["is_pure_author"].map(str_bool_map)
        diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].map(str_bool_map)
        diary_df["is_have_reply"] = diary_df["is_have_reply"].map(str_bool_map)

    diary_df["is_pure_author"] = diary_df["is_pure_author"].astype(int)
    diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
    diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)

    print("diary:")
    nullseries = diary_df.isnull().sum()
    print(nullseries[nullseries > 0])
    # print(diary_df.size)

    diary_columns = [
        "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "favor_num", "vote_num",
        "one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "first_demands", "second_demands", "first_solutions",
        "second_solutions", "first_positions", "second_positions", "projects"
    ]
    return diary_df[diary_columns]


def click_feature_engineering(click_df, conversion_df):
    # click_df = click_df.copy()
    # conversion_df = conversion_df.copy()

    click_df.rename(columns={"label": "click_label"}, inplace=True)
    conversion_df.rename(columns={"label": "conversion_label"}, inplace=True)
    cc_df = pd.merge(click_df, conversion_df, how="left", left_on=["cl_id", "card_id"], right_on=["cl_id", "card_id"])
    cc_df.drop(["partition_date_x", "partition_date_y"], axis=1, inplace=True)
    cc_df["conversion_label"].fillna(0, inplace=True)

    print("click:")
    nullseries = cc_df.isnull().sum()
    print(nullseries[nullseries > 0])
    # print(cc_df.size)

    return cc_df


def join_features(device_df, diary_df, cc_df):
    a = pd.merge(device_df, cc_df, how="inner", left_on="device_id", right_on="cl_id")
    df = pd.merge(a, diary_df, how="inner", left_on="card_id", right_on="card_id")

    df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)

    df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))

    df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))

    df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
    df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
    df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
    df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
    df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
    df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
    df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
    df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
    df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
    df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
    df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
    df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
    df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
    df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
    df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
    df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
    df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
    df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
    df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
    df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
    df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))

    print("df:")
    nullseries = df.isnull().sum()
    print(nullseries[nullseries > 0])
    # print(df.size)

    drop_columns = [
        "cl_id", "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
        "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
        "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
        "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
    ]
    # for col in drop_columns:
    #     if col in df.columns:
    #         df.drop(col, inplace=True, axis=1)
    df.drop(drop_columns, inplace=True, axis=1)
    return df


def join_device_diary(device_id, diary_ids, device_df, diary_df):
    a_df = device_df.loc[device_df["device_id"] == device_id]
    b_df = diary_df.loc[diary_df["card_id"].isin(diary_ids)]
    b_df["device_id"] = device_id

    df = pd.merge(a_df, b_df, how="left", on="device_id")

    df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
    df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)

    df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
    df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
    df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
    df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))

    df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
    df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
    df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
    df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))

    df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
    df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
    df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
    df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
    df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
    df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
    df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
    df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
    df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
    df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
    df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
    df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
    df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
    df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
    df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
    df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
    df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
    df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
    df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
    df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
    df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))

    drop_columns = [
        "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
        "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
        "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
        "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
    ]
    df.drop(drop_columns, inplace=True, axis=1)
    return df
