import pandas as pd

from utils.cache import redis_db_client

# "channel_first", "city_first", "model_first",
DIARY_DEVICE_COLUMNS = [
    "device_id", "active_type", "active_days", "past_consume_ability_history", "potential_consume_ability_history",
    "price_sensitive_history", "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
    "second_positions", "projects"
]


def read_csv_data(dataset_path):
    device_df = pd.read_csv(dataset_path.joinpath("device.csv"), sep="|")
    device_df.drop_duplicates(subset=["device_id"], inplace=True)
    return device_df


def get_device_dict_from_redis():
    """
    return: {device_id: {first_demands: [], city_first: ""}}
    """
    db_key = "cvr:db:device"
    column_key = db_key + ":column"
    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
    d = redis_db_client.hgetall(db_key)
    res = {}
    for i in d.values():
        row_list = str(i, "utf-8").split("|")
        tmp = {}
        for (index, elem) in enumerate(row_list):
            col_name = columns[index]
            if col_name in [
                    "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
                    "second_positions", "projects"
            ]:
                tmp[col_name] = elem.split(",")
            else:
                tmp[col_name] = elem
            res[tmp["device_id"]] = tmp
    return res


def device_feature_engineering(df):
    device_df = df.copy()

    device_df["first_demands"] = device_df["first_demands"].str.split(",")
    device_df["second_demands"] = device_df["second_demands"].str.split(",")
    device_df["first_solutions"] = device_df["first_solutions"].str.split(",")
    device_df["second_solutions"] = device_df["second_solutions"].str.split(",")
    device_df["first_positions"] = device_df["first_positions"].str.split(",")
    device_df["second_positions"] = device_df["second_positions"].str.split(",")
    device_df["projects"] = device_df["projects"].str.split(",")

    device_df["first_demands"] = device_df["first_demands"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["second_demands"] = device_df["second_demands"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["first_solutions"] = device_df["first_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["second_solutions"] = device_df["second_solutions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["first_positions"] = device_df["first_positions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["second_positions"] = device_df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
    device_df["projects"] = device_df["projects"].apply(lambda d: d if isinstance(d, list) else [])

    device_df["city_first"] = device_df["city_first"].fillna("")
    device_df["model_first"] = device_df["model_first"].fillna("")

    nullseries = device_df.isnull().sum()
    print("device:")
    print(nullseries[nullseries > 0])
    print(device_df.shape)
    return device_df[DIARY_DEVICE_COLUMNS]
