Commit cd788597 authored by 张彦钊's avatar 张彦钊

change test file

parent 666c3051
......@@ -6,24 +6,29 @@ import pandas as pd
from sqlalchemy import create_engine
def get_mysql_data(host,port,user,passwd,db,sql):
db = pymysql.connect(host=host, port=port, user=user, passwd=passwd,db=db)
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def get_esmm_users():
try:
db = pymysql.connect(host='172.16.40.158', port=4000, user='root',
passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
cursor = db.cursor()
stat_date = (datetime.date.today() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
sql = "select distinct device_id,city_id from data_feed_exposure_precise " \
"where stat_date = '{}'".format(stat_date)
cursor.execute(sql)
result = list(cursor.fetchall())
db.close()
result = get_mysql_data('172.16.40.158', 4000, 'root','3SYz54LS9#^9sBvC','jerry_prod',sql)
result = list(result)
return result
except:
return []
def get_user_profile(device_id):
def get_user_profile(device_id,top_k = 5):
try:
r = redis.Redis(host="172.16.40.135", port=5379, password="", db=2)
key = "user:portrait_tags:cl_id:" + str(device_id)
......@@ -37,8 +42,8 @@ def get_user_profile(device_id):
tag_score[name_tag[i["content"]]] = i["score"]
tag_sort = sorted(tag_score.items(), key=lambda x: x[1], reverse=True)
tags = []
if len(tag_sort) > 5:
for i in range(5):
if len(tag_sort) > top_k:
for i in range(top_k):
tags.append(i[0])
else:
for i in tag_sort:
......@@ -54,12 +59,7 @@ def get_user_profile(device_id):
def get_searchworlds_to_tagid():
try:
sql = 'select id, name from api_tag where is_online = 1 and tag_type < 4'
db = pymysql.connect(host='172.16.30.141', port=3306, user='work',
passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
cursor = db.cursor()
cursor.execute(sql)
tag_id = cursor.fetchall()
db.close()
tag_id = get_mysql_data('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing', sql)
searchworlds_to_tagid = {}
for i in tag_id:
searchworlds_to_tagid[i[1]] = i[0]
......@@ -87,28 +87,22 @@ def get_queues(device_id,city_id):
return []
def tag_boost(cid_str, tag_list):
if cid_str is not None and cid_str != "":
cids = cid_str.split(",")
try:
if len(cids) > 6 and len(tag_list) > 0:
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
db='eagle')
sql = "select id,group_concat(diary_id) from " \
"(select a.diary_id,b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b " \
"on a.tag_id = b.id where b.tag_type < '4' and a.diary_id in {}) tmp " \
"where id in {} group by id".format(tuple(cids), tuple(tag_list))
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
result = get_mysql_data('172.16.40.158', 4000, 'root', '3SYz54LS9#^9sBvC','eagle',sql)
if len(result) > 0:
tag_cids = {}
left_cids = []
for i in result:
tmp = i[1].split(",")
tmp = [i for i in cids if i in tmp]
tag_cids[i[0]] = tmp
left_cids.extend(tmp)
......@@ -145,60 +139,23 @@ def tag_boost(cid_str, tag_list):
else:
return cid_str
except:
#TODO 往sentry发,并且在本地也要打出日志
return cid_str
else:
return cid_str
def data_base(df2):
device_count = df2.shape[0]
con = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cur = con.cursor()
for i in range(0, device_count):
query = """INSERT INTO esmm_resort_diary_queue (device_id, city_id, native_queue,nearby_queue,
nation_queue,megacity_queue,time) VALUES('%s', '%s', '%s', '%s', %s', '%s', '%s') \
ON DUPLICATE KEY UPDATE device_id='%s', city_id='%s', native_queue='%s',
nearby_queue='%s',nation_queue='%s', megacity_queue='%s',time='%s'""" % (
df2["device_id"][i], df2["city_id"][i], df2["native_queue"][i], df2["nearby_queue"][i],
df2["nation_queue"][i],df2["megacity_queue"][i], df2["time"][i],
df2["device_id"][i], df2["city_id"][i], df2["native_queue"][i], df2["nearby_queue"][i],
df2["nation_queue"][i],df2["megacity_queue"][i], df2["time"][i])
cur.execute(query)
con.commit()
con.close()
print("insert or update sucess")
# try:
# for i in range(0, device_count):
# query = """INSERT INTO esmm_resort_diary_queue (device_id, city_id, native_queue,nearby_queue,
# nation_queue,megacity_queue,time) VALUES('%s', '%s', '%s', '%s',%s', '%s', '%s') \
# ON DUPLICATE KEY UPDATE device_id='%s', city_id='%s', native_queue='%s',
# nearby_queue='%s',nation_queue='%s', megacity_queue='%s',time='%s'""" % (
# df2.device_id[i], df2.city_id[i],df2.native_queue[i], df2.nearby_queue[i],df2.nation_queue[i],
# df2.megacity_queue[i],df2.time[i],df2.device_id[i], df2.city_id[i],df2.native_queue[i], df2.nearby_queue[i],df2.nation_queue[i],
# df2.megacity_queue[i],df2.time[i])
# cur.execute(query)
# con.commit()
# con.close()
# print("insert or update sucess")
# except Exception as e:
# print(e)
def to_data_base(df):
db = pymysql.connect(host='172.16.40.158', port=4000, user='root',
passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select distinct device_id from esmm_resort_diary_queue"
cursor.execute(sql)
result = cursor.fetchall()
cursor.close()
result = get_mysql_data('172.16.40.158', 4000, 'root','3SYz54LS9#^9sBvC', 'jerry_test',sql)
old_uid = [i[0] for i in result]
if len(old_uid) > 0:
old_uid = set(df["device_id"].values)&set(old_uid)
old_number = len(old_uid)
if old_number > 0:
db = pymysql.connect(host='172.16.40.158', port=4000, user='root',
passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "delete from esmm_resort_diary_queue where device_id in {} limit 2000".format(tuple(old_uid))
if old_number > 2000:
cursor = db.cursor()
......@@ -219,33 +176,26 @@ def to_data_base(df):
print("insert done")
if __name__ == "__main__":
users_list = get_esmm_users()
total_samples = list()
name_tag = get_searchworlds_to_tagid()
# TODO 把下面的截断改掉
for i in users_list[:60]:
tag_list = get_user_profile(i[0])
queues = get_queues(i[0],i[1])
for uid_city in users_list[:6]:
tag_list = get_user_profile(uid_city[0])
queues = get_queues(uid_city[0], uid_city[1])
if len(queues) > 0 and len(tag_list) > 0:
new_native = tag_boost(queues[0], tag_list)
new_nearby = tag_boost(queues[1], tag_list)
insert_time = str(datetime.datetime.now().strftime('%Y%m%d%H%M'))
sample = [i[0],i[1],new_native,new_nearby,queues[2],queues[3],insert_time]
sample = [uid_city[0], uid_city[1], new_native, new_nearby, queues[2], queues[3], insert_time]
total_samples.append(sample)
if len(total_samples) > 0:
df = pd.DataFrame(total_samples)
df = df.rename(columns={0: "device_id", 1: "city_id",2:"native_queue",
3:"nearby_queue",4:"nation_queue",5:"megacity_queue",6:"time"})
print(df.head(2))
to_data_base(df)
print("good boy")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment