from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pymysql
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import redis
import datetime
from pyspark import SparkConf
import time
from pyspark.sql import SparkSession
import json
import numpy as np
import pandas as pd
from pyspark.sql.functions import lit
from pyspark.sql.functions import concat_ws
from tool import *


def get_hot_search_words_tag():
    try:
        hot_search = """
                        SELECT a.keywords,
                               b.id,
                               b.tag_type
                        FROM api_hot_search_words a
                        LEFT JOIN api_tag b ON a.keywords=b.name
                        WHERE a.is_delete=0
                          AND b.tag_type+0<'4'+0
                          AND b.is_online=1
                        ORDER BY a.sorted DESC
                        """
        mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing', hot_search)
        return mysql_results
    except Exception as e:
        print(e)
        return []

def get_user_history_order_service_tag(user_id):
    if user_id:
        db_zhengxing = pymysql.connect(host="172.16.30.141", port=3306, user="work",
                                       password="BJQaT9VzDcuPBqkd",
                                       db="zhengxing", cursorclass=pymysql.cursors.DictCursor)
        cur_zhengxing = db_zhengxing.cursor()

        sql = "select a.tag_id from api_servicetag a left join api_tag b on a.tag_id=b.id " \
              "where a.service_id in (select service_id from api_order where user_id={user_id} and status=1) " \
              "and b.tag_type+0 <'4'+0 ".format(user_id=user_id)
        cur_zhengxing.execute(sql)
        tags_dict = cur_zhengxing.fetchall()
        tags_list = [i["tag_id"] for i in tags_dict]
        db_zhengxing.close()
        cur_zhengxing.close()
        # # 写gmkv
        # 写redis
        user_history_order_tags_key = "user:history_order:tags:user_id:" + str(user_id)
        tags_list_json = json.dumps(tags_list)
        redis_client = redis.StrictRedis.from_url('redis://:ReDis!GmTx*0aN9@172.16.40.173:6379')
        redis_client.set(user_history_order_tags_key, tags_list_json)
        redis_client.expire(user_history_order_tags_key, time=30 * 24 * 60 * 60)
        return user_id


if __name__ == '__main__':
    db_zhengxing = pymysql.connect(host="172.16.30.141", port=3306, user="work",
                                   password="BJQaT9VzDcuPBqkd",
                                   db="zhengxing", cursorclass=pymysql.cursors.DictCursor)
    cur_zhengxing = db_zhengxing.cursor()

    # 获取所有用户的设备id
    sql_device_ids = "select distinct user_id from api_order where status=1 and pay_time>'2015-08-16'"
    cur_zhengxing.execute(sql_device_ids)
    device_ids_lst = [i["user_id"] for i in cur_zhengxing.fetchall()]
    # stat_date = datetime.datetime.today().strftime('%Y-%m-%d')
    db_zhengxing.close()
    cur_zhengxing.close()

    # 画像冷启动
    redis_client = redis.StrictRedis.from_url('redis://:ReDis!GmTx*0aN9@172.16.40.173:6379')
    hot_search_words = get_hot_search_words_tag()
    hot_search_words_portrait = list()
    for tag_info in hot_search_words:
        tmp = dict()
        tmp["tag_score"] = 0.2
        tmp["weight"] = 10
        tmp["tag2"] = tag_info["id"]
        hot_search_words_portrait.append(tmp)

    hot_search_words_portrait_portrait_key2 = "user:service_coldstart_tags2"
    hot_search_words_portrait_dict = {i["id"]: 0.2 for i in hot_search_words}
    redis_client.hmset(hot_search_words_portrait_portrait_key2, hot_search_words_portrait_dict)

    hot_search_words_portrait_portrait_key2 = "user:service_coldstart_tags2_name"
    hot_search_words_portrait_dict = {i["keywords"]: 0.2 for i in hot_search_words}
    redis_client.hmset(hot_search_words_portrait_portrait_key2, hot_search_words_portrait_dict)

    hot_search_words = ["瘦脸针", "双眼皮", "线雕", "水光针", "吸脂", "热玛吉", "鼻综合", "玻尿酸", "光子嫩肤"]
    hot_search_words_portrait_portrait_key3 = "user:service_coldstart_tags3"
    hot_search_words_portrait3_dict = {i: 0.2 for i in hot_search_words}
    redis_client.hmset(hot_search_words_portrait_portrait_key3, hot_search_words_portrait3_dict)

    # rdd
    sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
        .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
        .set("spark.tispark.plan.allow_index_double_read", "false") \
        .set("spark.tispark.plan.allow_index_read", "true") \
        .set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
        .set("spark.tispark.pd.addresses", "172.16.40.170:2379").set("spark.io.compression.codec", "lzf") \
        .set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")

    spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
    spark.sparkContext.setLogLevel("WARN")
    spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py")
    device_ids_lst_rdd = spark.sparkContext.parallelize(device_ids_lst)
    result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_history_order_service_tag(x))
    result.collect()
