1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
import pymysql
def con_sql(db,sql):
cursor = db.cursor()
try:
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
except Exception:
print("发生异常", Exception)
df = pd.DataFrame()
finally:
db.close()
return df
def exp():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select native_queue from esmm_device_diary_queue where device_id = '358035085192742'"
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchone()[0]
native = tuple(result.split(","))
print("total")
print(len(native))
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
sql = "select diary_id,level1_ids,level2_ids,level3_ids from diary_feat where diary_id in {}".format(native)
df = con_sql(db,sql)
n = df.shape[0]
one = df[1].unique()
one_map = {}
for i in one:
one_map[i] = df.loc[df[1]==i].shape[0]/n
print(sorted(one_map.items(),key = lambda x:x[1]))
two = df[2].unique()
two_map = {}
print("分界线")
for i in two:
two_map[i] = df.loc[df[2] == i].shape[0] / n
print(sorted(two_map.items(), key=lambda x: x[1]))
def click():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
sql = "select d.cid_id,f.level1_ids,f.level2_ids from data_feed_click d left join diary_feat f " \
"on d.cid_id = f.diary_id where d.device_id = '358035085192742' " \
"and (d.cid_type = 'diary' or d.cid_type = 'diary_video') and d.stat_date > '2018-12-20'"
df = con_sql(db, sql)
n = df.shape[0]
print(n)
one = df[1].unique()
one_map = {}
for i in one:
one_map[i] = df.loc[df[1] == i].shape[0] / n
print(sorted(one_map.items(), key=lambda x: x[1],reverse=True))
two = df[2].unique()
two_map = {}
print("分界线")
for i in two:
two_map[i] = df.loc[df[2] == i].shape[0] / n
print(sorted(two_map.items(), key=lambda x: x[1],reverse=True))
if __name__ == "__main__":
click()