kv.py 6.64 KB
Newer Older
张彦钊's avatar
张彦钊 committed
1 2
import pymysql
import datetime
张彦钊's avatar
张彦钊 committed
3
import json
张彦钊's avatar
张彦钊 committed
4
import redis
张彦钊's avatar
张彦钊 committed
5
import pandas as pd
张彦钊's avatar
张彦钊 committed
6
from sqlalchemy import create_engine
张彦钊's avatar
张彦钊 committed
7 8


张彦钊's avatar
张彦钊 committed
9 10 11 12 13 14 15 16 17
def get_mysql_data(host,port,user,passwd,db,sql):
    db = pymysql.connect(host=host, port=port, user=user, passwd=passwd,db=db)
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result


张彦钊's avatar
张彦钊 committed
18
def get_esmm_users():
张彦钊's avatar
张彦钊 committed
19 20 21 22
    try:
        stat_date = (datetime.date.today() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        sql = "select distinct device_id,city_id from data_feed_exposure_precise " \
              "where stat_date = '{}'".format(stat_date)
张彦钊's avatar
张彦钊 committed
23 24
        result = get_mysql_data('172.16.40.158', 4000, 'root','3SYz54LS9#^9sBvC','jerry_prod',sql)
        result = list(result)
张彦钊's avatar
张彦钊 committed
25 26 27 28
        return result
    except:
        return []

张彦钊's avatar
张彦钊 committed
29 30


张彦钊's avatar
张彦钊 committed
31
def get_user_profile(device_id,top_k = 5):
张彦钊's avatar
张彦钊 committed
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
    try:
        r = redis.Redis(host="172.16.40.135", port=5379, password="", db=2)
        key = "user:portrait_tags:cl_id:" + str(device_id)
        if r.exists(key):
            tmp = json.loads(r.get(key).decode('utf-8'))
            tag_score = {}
            for i in tmp:
                if i["type"] == "tag":
                    tag_score[i["content"]] = i["score"]
                elif i["content"] in name_tag.keys():
                    tag_score[name_tag[i["content"]]] = i["score"]
            tag_sort = sorted(tag_score.items(), key=lambda x: x[1], reverse=True)
            tags = []
            if len(tag_sort) > top_k:
                for i in range(top_k):
                    tags.append(tag_sort[i][0])
            else:
                for i in tag_sort:
                    tags.append(i[0])
张彦钊's avatar
张彦钊 committed
52

53 54 55 56 57
            return tags
        else:
            return []
    except:
        return []
张彦钊's avatar
张彦钊 committed
58

张彦钊's avatar
张彦钊 committed
59

张彦钊's avatar
张彦钊 committed
60

张彦钊's avatar
张彦钊 committed
61 62 63
def get_searchworlds_to_tagid():
    try:
        sql = 'select id, name from api_tag where is_online = 1 and tag_type < 4'
张彦钊's avatar
张彦钊 committed
64
        tag_id = get_mysql_data('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing', sql)
张彦钊's avatar
张彦钊 committed
65 66 67
        searchworlds_to_tagid = {}
        for i in tag_id:
            searchworlds_to_tagid[i[1]] = i[0]
张彦钊's avatar
张彦钊 committed
68

张彦钊's avatar
张彦钊 committed
69 70
        return searchworlds_to_tagid
    except Exception as e:
张彦钊's avatar
张彦钊 committed
71
        return {}
张彦钊's avatar
张彦钊 committed
72 73


张彦钊's avatar
张彦钊 committed
74
def get_queues(device_id,city_id):
张彦钊's avatar
张彦钊 committed
75 76 77 78 79 80 81 82 83 84 85 86 87 88
    try:
        db = pymysql.connect(host='172.16.40.158', port=4000, user='root',
                             passwd='3SYz54LS9#^9sBvC', db='jerry_test')
        cursor = db.cursor()
        sql = "select native_queue, nearby_queue, nation_queue, megacity_queue from esmm_device_diary_queue " \
              "where device_id = '{}' and city_id = '{}'".format(device_id, city_id)
        cursor.execute(sql)
        result = cursor.fetchone()
        db.close()
        if result is not None:
            return list(result)
        else:
            return []
    except:
张彦钊's avatar
张彦钊 committed
89
        return []
张彦钊's avatar
张彦钊 committed
90

张彦钊's avatar
张彦钊 committed
91 92

def tag_boost(cid_str, tag_list):
张彦钊's avatar
张彦钊 committed
93
    if cid_str is not None and cid_str != "" and len(tag_list) != 0:
张彦钊's avatar
张彦钊 committed
94
        cids = cid_str.split(",")
张彦钊's avatar
张彦钊 committed
95 96 97 98 99 100
        try:
            if len(cids) > 6 and len(tag_list) > 0:
                sql = "select id,group_concat(diary_id) from " \
                      "(select a.diary_id,b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b " \
                      "on a.tag_id = b.id where b.tag_type < '4' and a.diary_id in {}) tmp " \
                      "where id in {} group by id".format(tuple(cids), tuple(tag_list))
张彦钊's avatar
张彦钊 committed
101
                result = get_mysql_data('172.16.40.158', 4000, 'root', '3SYz54LS9#^9sBvC','eagle',sql)
张彦钊's avatar
张彦钊 committed
102 103 104 105 106
                if len(result) > 0:
                    tag_cids = {}
                    left_cids = []
                    for i in result:
                        tmp = i[1].split(",")
张彦钊's avatar
张彦钊 committed
107
                        tmp = [i for i in cids if i in tmp]
张彦钊's avatar
张彦钊 committed
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
                        tag_cids[i[0]] = tmp
                        left_cids.extend(tmp)

                    left_cids = list(set(left_cids))
                    right_cids = [i for i in cids if i not in left_cids]
                    tag_cids["right"] = right_cids
                    tag_list.append("right")
                    sort_cids = []
                    n = 0
                    while n != len(tag_cids) - 1:
                        for i in tag_list:
                            if i in tag_cids.keys():
                                if len(tag_cids[i]) > 0:
                                    sort_cids.append(tag_cids[i][0])
                                    value = tag_cids[i]
                                    value.pop(0)
                                    tag_cids[i] = value
                                    if len(value) == 0 and i != "right":
                                        n = n + 1

                    if len(tag_cids["right"]) > 0:
                        sort_cids.extend(tag_cids["right"])

                    news_ids = []
                    for id in sort_cids:
                        if id not in news_ids:
                            news_ids.append(id)

                    new_str = ",".join([str(i) for i in news_ids])
                    return new_str

                else:
                    return cid_str
张彦钊's avatar
张彦钊 committed
141 142
            else:
                return cid_str
张彦钊's avatar
张彦钊 committed
143
        except:
张彦钊's avatar
张彦钊 committed
144
            #TODO 往sentry发,并且在本地也要打出日志
张彦钊's avatar
张彦钊 committed
145
            return cid_str
张彦钊's avatar
张彦钊 committed
146

张彦钊's avatar
张彦钊 committed
147 148 149
    else:
        return cid_str

张彦钊's avatar
张彦钊 committed
150

张彦钊's avatar
张彦钊 committed
151 152
def to_data_base(df):
    sql = "select distinct device_id from esmm_resort_diary_queue"
张彦钊's avatar
张彦钊 committed
153
    result = get_mysql_data('172.16.40.158', 4000, 'root','3SYz54LS9#^9sBvC', 'jerry_test',sql)
张彦钊's avatar
张彦钊 committed
154 155 156 157 158
    old_uid = [i[0] for i in result]
    if len(old_uid) > 0:
        old_uid = set(df["device_id"].values)&set(old_uid)
        old_number = len(old_uid)
        if old_number > 0:
张彦钊's avatar
张彦钊 committed
159 160
            db = pymysql.connect(host='172.16.40.158', port=4000, user='root',
                                 passwd='3SYz54LS9#^9sBvC', db='jerry_test')
张彦钊's avatar
张彦钊 committed
161 162 163 164 165 166
            sql = "delete from esmm_resort_diary_queue where device_id in {}".format(tuple(old_uid))

            cursor = db.cursor()
            cursor.execute(sql)
            db.commit()
            cursor.close()
张彦钊's avatar
张彦钊 committed
167 168 169 170
            db.close()

    yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@172.16.40.158:4000/jerry_test?charset=utf8')
    pd.io.sql.to_sql(df, "esmm_resort_diary_queue", yconnect, schema='jerry_test', if_exists='append', index=False,
张彦钊's avatar
张彦钊 committed
171
                     chunksize=200)
张彦钊's avatar
张彦钊 committed
172 173
    print("insert done")

张彦钊's avatar
张彦钊 committed
174

175 176 177 178 179 180 181 182 183
def get_all_users():
    try:
        sql = "select distinct device_id,city_id from esmm_device_diary_queue"
        result = get_mysql_data('172.16.40.158', 4000, 'root','3SYz54LS9#^9sBvC','jerry_test',sql)
        result = list(result)
        return result
    except:
        return []

张彦钊's avatar
张彦钊 committed
184
def
185

张彦钊's avatar
张彦钊 committed
186
if __name__ == "__main__":
张彦钊's avatar
张彦钊 committed
187 188 189
    device_id = "868663038800476"
    city_id = "beijing"
    queues = get_queues(device_id, city_id)
张彦钊's avatar
张彦钊 committed
190

张彦钊's avatar
张彦钊 committed
191

张彦钊's avatar
张彦钊 committed
192 193 194 195 196 197 198 199 200 201 202 203