Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
cd788597
Commit
cd788597
authored
5 years ago
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change test file
parent
666c3051
master
mr/beta/bug22
offic
rtt
updatedb
zhao
zhao22
1 merge request
!32
新增把esmm排序结果重排
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
76 deletions
+26
-76
rerank_esmm.py
rerank_esmm.py
+26
-76
No files found.
rerank_esmm.py
View file @
cd788597
...
...
@@ -6,24 +6,29 @@ import pandas as pd
from
sqlalchemy
import
create_engine
def
get_mysql_data
(
host
,
port
,
user
,
passwd
,
db
,
sql
):
db
=
pymysql
.
connect
(
host
=
host
,
port
=
port
,
user
=
user
,
passwd
=
passwd
,
db
=
db
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
get_esmm_users
():
try
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
stat_date
=
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
sql
=
"select distinct device_id,city_id from data_feed_exposure_precise "
\
"where stat_date = '{}'"
.
format
(
stat_date
)
cursor
.
execute
(
sql
)
result
=
list
(
cursor
.
fetchall
())
db
.
close
()
result
=
get_mysql_data
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_prod'
,
sql
)
result
=
list
(
result
)
return
result
except
:
return
[]
def
get_user_profile
(
device_id
):
def
get_user_profile
(
device_id
,
top_k
=
5
):
try
:
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
,
db
=
2
)
key
=
"user:portrait_tags:cl_id:"
+
str
(
device_id
)
...
...
@@ -37,8 +42,8 @@ def get_user_profile(device_id):
tag_score
[
name_tag
[
i
[
"content"
]]]
=
i
[
"score"
]
tag_sort
=
sorted
(
tag_score
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
tags
=
[]
if
len
(
tag_sort
)
>
5
:
for
i
in
range
(
5
):
if
len
(
tag_sort
)
>
top_k
:
for
i
in
range
(
top_k
):
tags
.
append
(
i
[
0
])
else
:
for
i
in
tag_sort
:
...
...
@@ -54,12 +59,7 @@ def get_user_profile(device_id):
def
get_searchworlds_to_tagid
():
try
:
sql
=
'select id, name from api_tag where is_online = 1 and tag_type < 4'
db
=
pymysql
.
connect
(
host
=
'172.16.30.141'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
tag_id
=
cursor
.
fetchall
()
db
.
close
()
tag_id
=
get_mysql_data
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql
)
searchworlds_to_tagid
=
{}
for
i
in
tag_id
:
searchworlds_to_tagid
[
i
[
1
]]
=
i
[
0
]
...
...
@@ -87,28 +87,22 @@ def get_queues(device_id,city_id):
return
[]
def
tag_boost
(
cid_str
,
tag_list
):
if
cid_str
is
not
None
and
cid_str
!=
""
:
cids
=
cid_str
.
split
(
","
)
try
:
if
len
(
cids
)
>
6
and
len
(
tag_list
)
>
0
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
sql
=
"select id,group_concat(diary_id) from "
\
"(select a.diary_id,b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b "
\
"on a.tag_id = b.id where b.tag_type < '4' and a.diary_id in {}) tmp "
\
"where id in {} group by id"
.
format
(
tuple
(
cids
),
tuple
(
tag_list
))
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
result
=
get_mysql_data
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'eagle'
,
sql
)
if
len
(
result
)
>
0
:
tag_cids
=
{}
left_cids
=
[]
for
i
in
result
:
tmp
=
i
[
1
]
.
split
(
","
)
tmp
=
[
i
for
i
in
cids
if
i
in
tmp
]
tag_cids
[
i
[
0
]]
=
tmp
left_cids
.
extend
(
tmp
)
...
...
@@ -145,60 +139,23 @@ def tag_boost(cid_str, tag_list):
else
:
return
cid_str
except
:
#TODO 往sentry发,并且在本地也要打出日志
return
cid_str
else
:
return
cid_str
def
data_base
(
df2
):
device_count
=
df2
.
shape
[
0
]
con
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cur
=
con
.
cursor
()
for
i
in
range
(
0
,
device_count
):
query
=
"""INSERT INTO esmm_resort_diary_queue (device_id, city_id, native_queue,nearby_queue,
nation_queue,megacity_queue,time) VALUES('
%
s', '
%
s', '
%
s', '
%
s',
%
s', '
%
s', '
%
s')
\
ON DUPLICATE KEY UPDATE device_id='
%
s', city_id='
%
s', native_queue='
%
s',
nearby_queue='
%
s',nation_queue='
%
s', megacity_queue='
%
s',time='
%
s'"""
%
(
df2
[
"device_id"
][
i
],
df2
[
"city_id"
][
i
],
df2
[
"native_queue"
][
i
],
df2
[
"nearby_queue"
][
i
],
df2
[
"nation_queue"
][
i
],
df2
[
"megacity_queue"
][
i
],
df2
[
"time"
][
i
],
df2
[
"device_id"
][
i
],
df2
[
"city_id"
][
i
],
df2
[
"native_queue"
][
i
],
df2
[
"nearby_queue"
][
i
],
df2
[
"nation_queue"
][
i
],
df2
[
"megacity_queue"
][
i
],
df2
[
"time"
][
i
])
cur
.
execute
(
query
)
con
.
commit
()
con
.
close
()
print
(
"insert or update sucess"
)
# try:
# for i in range(0, device_count):
# query = """INSERT INTO esmm_resort_diary_queue (device_id, city_id, native_queue,nearby_queue,
# nation_queue,megacity_queue,time) VALUES('%s', '%s', '%s', '%s',%s', '%s', '%s') \
# ON DUPLICATE KEY UPDATE device_id='%s', city_id='%s', native_queue='%s',
# nearby_queue='%s',nation_queue='%s', megacity_queue='%s',time='%s'""" % (
# df2.device_id[i], df2.city_id[i],df2.native_queue[i], df2.nearby_queue[i],df2.nation_queue[i],
# df2.megacity_queue[i],df2.time[i],df2.device_id[i], df2.city_id[i],df2.native_queue[i], df2.nearby_queue[i],df2.nation_queue[i],
# df2.megacity_queue[i],df2.time[i])
# cur.execute(query)
# con.commit()
# con.close()
# print("insert or update sucess")
# except Exception as e:
# print(e)
def
to_data_base
(
df
):
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select distinct device_id from esmm_resort_diary_queue"
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
cursor
.
close
()
result
=
get_mysql_data
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql
)
old_uid
=
[
i
[
0
]
for
i
in
result
]
if
len
(
old_uid
)
>
0
:
old_uid
=
set
(
df
[
"device_id"
]
.
values
)
&
set
(
old_uid
)
old_number
=
len
(
old_uid
)
if
old_number
>
0
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
sql
=
"delete from esmm_resort_diary_queue where device_id in {} limit 2000"
.
format
(
tuple
(
old_uid
))
if
old_number
>
2000
:
cursor
=
db
.
cursor
()
...
...
@@ -219,33 +176,26 @@ def to_data_base(df):
print
(
"insert done"
)
if
__name__
==
"__main__"
:
users_list
=
get_esmm_users
()
total_samples
=
list
()
name_tag
=
get_searchworlds_to_tagid
()
# TODO 把下面的截断改掉
for
i
in
users_list
[:
60
]:
tag_list
=
get_user_profile
(
i
[
0
])
queues
=
get_queues
(
i
[
0
],
i
[
1
])
for
uid_city
in
users_list
[:
6
]:
tag_list
=
get_user_profile
(
uid_city
[
0
])
queues
=
get_queues
(
uid_city
[
0
],
uid_city
[
1
])
if
len
(
queues
)
>
0
and
len
(
tag_list
)
>
0
:
new_native
=
tag_boost
(
queues
[
0
],
tag_list
)
new_nearby
=
tag_boost
(
queues
[
1
],
tag_list
)
insert_time
=
str
(
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d
%
H
%
M'
))
sample
=
[
i
[
0
],
i
[
1
],
new_native
,
new_nearby
,
queues
[
2
],
queues
[
3
],
insert_time
]
sample
=
[
uid_city
[
0
],
uid_city
[
1
],
new_native
,
new_nearby
,
queues
[
2
],
queues
[
3
],
insert_time
]
total_samples
.
append
(
sample
)
if
len
(
total_samples
)
>
0
:
df
=
pd
.
DataFrame
(
total_samples
)
df
=
df
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"city_id"
,
2
:
"native_queue"
,
3
:
"nearby_queue"
,
4
:
"nation_queue"
,
5
:
"megacity_queue"
,
6
:
"time"
})
print
(
df
.
head
(
2
))
to_data_base
(
df
)
print
(
"good boy"
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment