Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
1e4ca86c
Commit
1e4ca86c
authored
Oct 16, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加首页精选的日记点击和美购首页的美购点击
parent
d52cd012
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
76 additions
and
24 deletions
+76
-24
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+76
-24
No files found.
eda/smart_rank/evaluation_metrics.py
View file @
1e4ca86c
...
...
@@ -180,11 +180,11 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
return
result
def
get_user_order_info_yesterday
():
def
get_user_order_info_yesterday
(
order_date
,
order_date_tomorrow
):
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
sql_
order_
device_info_yesterday
=
"""
sql_device_info_yesterday
=
"""
SELECT tmp1.user_id,
c.device_id,
tmp1.service_ids,
...
...
@@ -226,25 +226,75 @@ def get_user_order_info_yesterday():
WHERE c.device_id IS NOT NULL
"""
.
format
(
order_date
=
order_date
,
order_date_tomorrow
=
order_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql_
order_
device_info_yesterday
)
sql_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"pay_time"
]))
for
i
in
mysql_results
]
all_device_order_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_order_tags
def
get_user_diary_click_info_yesterday
():
pass
def
get_user_service_click_info_yesterday
():
pass
all_device_action_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_action_tags
def
get_user_diary_click_info_yesterday
(
click_date
,
click_date_tomorrow
):
# 获取昨天在首页精选点击日记的用户设备id,点击的日记,日记对应的tag
# 一个用户对应多个日记,一个日记对应多个tag
sql_device_info_yesterday
=
"""
SELECT cl_id device_id,
max(click_time) AS click_time,
group_concat(DISTINCT `diary_id` separator ',') diary_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.cl_id,
d.diary_id,
unix_timestamp(d.click_time) AS click_time,
e.tag_id
FROM jerry_test.user_click_diary_log d
LEFT JOIN eagle.src_mimas_prod_api_diary_tags e ON d.diary_id = e.diary_id
LEFT JOIN eagle.src_zhengxing_api_tag f ON e.tag_id = f.id
WHERE d.action_from='home精选'
AND d.action='on_click_card'
AND d.click_time>'{click_date}'
AND d.click_time<'{click_date_tomorrow}'
AND f.tag_type+0 <'4'+0) tmp2
GROUP BY cl_id
"""
.
format
(
click_date
=
click_date
,
click_date_tomorrow
=
click_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"click_time"
]))
for
i
in
mysql_results
]
all_device_action_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_action_tags
def
get_user_service_click_info_yesterday
(
click_date
,
click_date_tomorrow
):
# 获取昨天在美购首页有过点击的用户设备id,点击的美购,美购对应的tag
# 一个用户对应多个美购,一个美购对应多个tag
sql_device_info_yesterday
=
"""
SELECT cl_id device_id,
max(click_time) AS click_time,
group_concat(DISTINCT `service_id` separator ',') service_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.cl_id,
d.service_id,
unix_timestamp(d.click_time) AS click_time,
e.tag_id
FROM jerry_test.user_click_service_log d
LEFT JOIN eagle.src_zhengxing_api_servicetag e ON d.service_id = e.service_id
LEFT JOIN eagle.src_zhengxing_api_tag f ON e.tag_id = f.id
WHERE d.action_from='welfare_home_list_item'
AND d.action='goto_welfare_detail'
AND d.click_time>'{click_date}'
AND d.click_time<'{click_date_tomorrow}'
AND f.tag_type+0 <'4'+0) tmp2
GROUP BY cl_id
"""
.
format
(
click_date
=
click_date
,
click_date_tomorrow
=
click_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"click_time"
]))
for
i
in
mysql_results
]
all_device_action_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_action_tags
if
__name__
==
'__main__'
:
try
:
parser
=
argparse
.
ArgumentParser
(
description
=
'画像匹配度的统计'
)
my_yesterday
=
str
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
parser
.
add_argument
(
"-o"
,
"--order_date"
,
type
=
str
,
dest
=
"order_date"
,
default
=
my_yesterday
,
help
=
"统计的
下单
日期"
)
parser
.
add_argument
(
"-o"
,
"--order_date"
,
type
=
str
,
dest
=
"order_date"
,
default
=
my_yesterday
,
help
=
"统计的
行为
日期"
)
parser
.
add_argument
(
"-log1"
,
"--log1_file"
,
type
=
str
,
dest
=
"portrait_stat_log_path"
,
default
=
"portrait_stat.log"
,
help
=
"画像统计的日志地址"
)
parser
.
add_argument
(
"-log2"
,
"--log2_file"
,
type
=
str
,
dest
=
"debug_portrait_stat_log_path"
,
...
...
@@ -288,24 +338,26 @@ if __name__ == '__main__':
for
action
in
action_type
:
# 获取昨天产生行为的设备id、以及行为对应的tag
device_ids_lst
=
list
()
all_device_
order
_tags
=
dict
()
all_device_
action
_tags
=
dict
()
if
"order"
in
action_type
:
device_ids_lst
,
all_device_
order_tags
=
get_user_order_info_yesterday
(
)
device_ids_lst
,
all_device_
action_tags
=
get_user_order_info_yesterday
(
order_date
,
order_date_tomorrow
)
elif
"diary"
in
action_type
:
device_ids_lst
,
all_device_order_tags
=
get_user_diary_click_info_yesterday
()
device_ids_lst
,
all_device_action_tags
=
get_user_diary_click_info_yesterday
(
order_date
,
order_date_tomorrow
)
elif
"service"
in
action_type
:
device_ids_lst
,
all_device_order_tags
=
get_user_service_click_info_yesterday
()
device_ids_lst
,
all_device_action_tags
=
get_user_service_click_info_yesterday
(
order_date
,
order_date_tomorrow
)
else
:
break
# tags扩展2级tags
all_device_
order
_tags2
=
dict
()
for
device
in
all_device_
order
_tags
:
tags
=
all_device_
order
_tags
[
device
]
all_device_
action
_tags2
=
dict
()
for
device
in
all_device_
action
_tags
:
tags
=
all_device_
action
_tags
[
device
]
for
tag
in
tags
:
tags2
=
all_3tag_2tag
.
get
(
tag
,
[])
tags
+=
tags2
all_device_
order
_tags2
[
device
]
=
tags
all_device_
action
_tags2
[
device
]
=
tags
# 用户的去除支付行为的画像
all_device_portrait_result
=
dict
()
...
...
@@ -325,7 +377,7 @@ if __name__ == '__main__':
debug_all_device_portrait_result
[
device
]
=
debug_portrait_result
# 比较两个tag列表的重合率
result
=
get_2_tags_coincide_rate
(
all_device_
order
_tags2
,
all_device_portrait_result
,
cmd_portrait_top_n
,
result
=
get_2_tags_coincide_rate
(
all_device_
action
_tags2
,
all_device_portrait_result
,
cmd_portrait_top_n
,
cmd_coincide_n
)
# 有画像没匹配上的用户的画像信息
...
...
@@ -347,7 +399,7 @@ if __name__ == '__main__':
for
device
in
no_coincide_devices
:
no_coincide_devices_debug
=
dict
()
device_portrait_n
=
all_device_portrait_result
[
device
][:
args
.
portrait_top_n
]
device_order_tags
=
all_device_
order
_tags2
[
device
]
device_order_tags
=
all_device_
action
_tags2
[
device
]
debug_device_portrait_result
=
debug_all_device_portrait_result
[
device
]
no_coincide_devices_debug
[
device
]
=
{
"画像的前{top_n}个tag"
.
format
(
top_n
=
args
.
portrait_top_n
):
[
debug_device_portrait_result
[
tag
]
for
tag
in
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment