import os
import random
import shutil
import time
import timeit
from datetime import datetime
from pathlib import Path

import tensorflow as tf
from sklearn.model_selection import train_test_split

from models.esmm.fe import click_fe, device_fe, fe, tractate_fe
from models.esmm.input_fn import esmm_input_fn
from models.esmm.model import esmm_model_fn, model_export
from models.esmm.tractate_model import (PREDICTION_ALL_COLUMNS, model_predict_tractate)
from utils.cache import get_essm_model_save_path, set_essm_model_save_path


def main():
    time_begin = time.time()

    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

    tractate_train_columns = set(tractate_fe.INT_COLUMNS + tractate_fe.FLOAT_COLUMNS + tractate_fe.CATEGORICAL_COLUMNS)
    print("features: " + str(len(tractate_train_columns)))
    tractate_predict_columns = set(PREDICTION_ALL_COLUMNS)
    print(tractate_predict_columns.difference(tractate_train_columns))
    print(tractate_train_columns.difference(tractate_predict_columns))
    assert tractate_predict_columns == tractate_train_columns

    # dataset_path = Path("~/data/cvr_data").expanduser()  # local
    dataset_path = Path("/srv/apps/node2vec_git/cvr_data/")  # server
    tractate_df, tractate_click_df, tractate_conversion_df = tractate_fe.read_csv_data(dataset_path)
    tractate_df = tractate_fe.tractate_feature_engineering(tractate_df)
    device_df = device_fe.read_csv_data(dataset_path)
    device_df = device_fe.device_feature_engineering(device_df, "tractate")
    # print(device_df.columns)
    # print(device_df.dtypes, "\n")

    cc_df = click_fe.click_feature_engineering(tractate_click_df, tractate_conversion_df)
    df = tractate_fe.join_features(device_df, tractate_df, cc_df)
    # for i in df.columns:
    #     print(i)
    # print(df.dtypes)

    train_df, test_df = train_test_split(df, test_size=0.2)
    train_df, val_df = train_test_split(train_df, test_size=0.2)

    all_features = fe.build_features(df, tractate_fe.INT_COLUMNS, tractate_fe.FLOAT_COLUMNS, tractate_fe.CATEGORICAL_COLUMNS)
    params = {"feature_columns": all_features, "hidden_units": [360, 200, 80, 2], "learning_rate": 0.2}
    model_path = str(Path("/data/files/model_tmp/tractate/").expanduser())
    if os.path.exists(model_path):
        shutil.rmtree(model_path)

    session_config = tf.compat.v1.ConfigProto()
    session_config.gpu_options.allow_growth = True
    session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
    estimator_config = tf.estimator.RunConfig(session_config=session_config)

    model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
    train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
    res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
    print(res[0])
    print("ctr_auc: " + str(res[0]["ctr_auc"]))
    print("ctcvr_auc: " + str(res[0]["ctcvr_auc"]))
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")

    model_export_path = str(Path("/data/files/models/tractate/").expanduser())
    save_path = model_export(model, all_features, model_export_path)
    print("save to: " + save_path)
    set_essm_model_save_path("tractate", save_path)
    print("============================================================")

    save_path = get_essm_model_save_path("tractate")
    print("load path: " + save_path)

    # save_path = str(Path("~/data/models/tractate/1598236893").expanduser())  # local
    # save_path = "/data/files/models/tractate/1598254242"  # server
    predict_fn = tf.contrib.predictor.from_saved_model(save_path)

    device_dict = device_fe.get_device_dict_from_redis()
    tractate_dict = tractate_fe.get_tractate_dict_from_redis()
    print("redis data: " + str(len(device_dict)) + " " + str(len(tractate_dict)))

    device_ids = list(device_dict.keys())[:20]
    tractate_ids = list(tractate_dict.keys())

    # print(device_dict[device_ids[0]], "\n")
    # print(tractate_dict[tractate_ids[0]], "\n")

    for i in range(5):
        time_1 = timeit.default_timer()
        res = model_predict_tractate(
            random.sample(device_ids, 1)[0], random.sample(tractate_ids, 200), device_dict, tractate_dict, predict_fn)
        print(res[:10])
        total_1 = (timeit.default_timer() - time_1)
        print("total prediction cost {:.5f}s".format(total_1), "\n")

    total_time = (time.time() - time_begin) / 60
    print("total cost {:.2f} mins at {}".format(total_time, datetime.now()))


if __name__ == "__main__":
    main()
