Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
f716fb05
Commit
f716fb05
authored
5 years ago
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change test file
parent
5a2626a9
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
54 additions
and
42 deletions
+54
-42
rerank_esmm.py
rerank_esmm.py
+54
-42
No files found.
rerank_esmm.py
View file @
f716fb05
...
...
@@ -2,6 +2,7 @@ import pymysql
import
datetime
import
json
import
redis
import
pandas
as
pd
def
get_esmm_users
():
...
...
@@ -9,7 +10,6 @@ def get_esmm_users():
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
stat_date
=
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
print
(
stat_date
)
sql
=
"select distinct device_id,city_id from data_feed_exposure_precise "
\
"where stat_date = '{}'"
.
format
(
stat_date
)
cursor
.
execute
(
sql
)
...
...
@@ -18,26 +18,34 @@ def get_esmm_users():
return
result
def
get_user_profile
(
device_id
=
"9C5E7C73-380C-4623-8F48-A64C8034E315"
):
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
,
db
=
2
)
key
=
"user:portrait_tags:cl_id:"
+
str
(
device_id
)
tmp
=
json
.
loads
(
r
.
get
(
key
)
.
decode
(
'utf-8'
))
tag_score
=
{}
for
i
in
tmp
:
if
i
[
"type"
]
==
"tag"
:
tag_score
[
i
[
"content"
]]
=
i
[
"score"
]
elif
i
[
"content"
]
in
name_tag
.
keys
():
tag_score
[
name_tag
[
i
[
"content"
]]]
=
i
[
"score"
]
tag_sort
=
sorted
(
tag_score
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
tags
=
[]
if
len
(
tag_sort
)
>
5
:
for
i
in
range
(
5
):
tags
.
append
(
i
[
0
])
else
:
for
i
in
tag_sort
:
tags
.
append
(
i
[
0
])
def
get_user_profile
(
device_id
):
try
:
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
,
db
=
2
)
key
=
"user:portrait_tags:cl_id:"
+
str
(
device_id
)
if
r
.
exists
(
key
):
tmp
=
json
.
loads
(
r
.
get
(
key
)
.
decode
(
'utf-8'
))
tag_score
=
{}
for
i
in
tmp
:
if
i
[
"type"
]
==
"tag"
:
tag_score
[
i
[
"content"
]]
=
i
[
"score"
]
elif
i
[
"content"
]
in
name_tag
.
keys
():
tag_score
[
name_tag
[
i
[
"content"
]]]
=
i
[
"score"
]
tag_sort
=
sorted
(
tag_score
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
tags
=
[]
if
len
(
tag_sort
)
>
5
:
for
i
in
range
(
5
):
tags
.
append
(
i
[
0
])
else
:
for
i
in
tag_sort
:
tags
.
append
(
i
[
0
])
return
tags
else
:
return
[]
except
:
return
[]
return
tags
def
get_searchworlds_to_tagid
():
...
...
@@ -50,7 +58,6 @@ def get_searchworlds_to_tagid():
tag_id
=
cursor
.
fetchall
()
db
.
close
()
searchworlds_to_tagid
=
{}
for
i
in
tag_id
:
searchworlds_to_tagid
[
i
[
1
]]
=
i
[
0
]
...
...
@@ -73,8 +80,9 @@ def get_queues(device_id,city_id):
else
:
return
[]
def
tag_boost
(
cid_str
,
tag_list
=
[
15
,
21
,
22
,
85
,
86
]):
if
cid_str
is
not
None
or
cid_str
!=
""
:
def
tag_boost
(
cid_str
,
tag_list
):
if
cid_str
is
not
None
and
cid_str
!=
""
:
cids
=
cid_str
.
split
(
","
)
try
:
if
len
(
cids
)
>
6
and
len
(
tag_list
)
>
0
:
...
...
@@ -140,29 +148,33 @@ def to_data_base(df,table_name = "tag_boost_device_diary_queue"):
pass
def
make_sample
(
uid
,
city_id
,
native_queue
,
nearby_queue
,
megacity_queue
,
nation_queue
):
pass
if
__name__
==
"__main__"
:
# users_list = get_esmm_users()
# total_samples = list()
# name_tag = get_searchworlds_to_tagid()
# for i in users_list:
# tag_list = get_user_profile(i[0])
# queues = get_queues(i[0],i[1])
#
# native_sort_list = tag_boost(native, tag_score)
# nearby_sort_list = tag_boost(nearby, tag_score)
#
# sample = make_sample(uid,city_id,native_queue,nearby_queue,megacity_queue,nation_queue)
# total_samples.append(sample)
#
# total_samples.todf
users_list
=
get_esmm_users
()
total_samples
=
list
()
name_tag
=
get_searchworlds_to_tagid
()
# TODO 把下面的截断改掉
for
i
in
users_list
[:
6
]:
tag_list
=
get_user_profile
(
i
[
0
])
queues
=
get_queues
(
i
[
0
],
i
[
1
])
if
len
(
queues
)
>
0
:
new_native
=
tag_boost
(
queues
[
0
],
tag_list
)
new_nearby
=
tag_boost
(
queues
[
1
],
tag_list
)
sample
=
[
i
[
0
],
i
[
1
],
new_native
,
new_nearby
,
queues
[
2
],
queues
[
3
]]
total_samples
.
append
(
sample
)
if
len
(
total_samples
)
>
0
:
df
=
pd
.
DataFrame
(
total_samples
)
df
=
df
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"city_id"
,
2
:
"native_queue"
,
3
:
"nearby_queue"
,
4
:
"nation_queue"
,
5
:
", megacity_queue"
})
print
(
df
.
head
(
6
))
# to_data_base(df)
# to kv
cid_str
=
"16473983,16296886,16199213,16193883,16419499,16372783,16430184,16617593,16498902,16238415,16214258,15715721,16213338,15349114,14091428,16268804,15485655,16448547,16179842,16685025,16612412,16683132,15646229,16482213,16485831,16436136,16353856,16400696,16193006,16294202,16393228,16716816,16713343,16780702,16107140,16647027,16112786,16503037,16372681,16207971,16179934,16480641,16295094,16204980,16317847,16434907,16117929,15633591,16116818"
print
(
tag_boost
(
cid_str
))
# cid_str = "16473983,16296886,16199213,16193883,16419499,16372783,16430184,16617593,16498902,16238415,16214258,15715721,16213338,15349114,14091428,16268804,15485655,16448547,16179842,16685025,16612412,16683132,15646229,16482213,16485831,16436136,16353856,16400696,16193006,16294202,16393228,16716816,16713343,16780702,16107140,16647027,16112786,16503037,16372681,16207971,16179934,16480641,16295094,16204980,16317847,16434907,16117929,15633591,16116818"
# print(tag_boost(cid_str))
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment