Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
b1c9091b
Commit
b1c9091b
authored
Oct 16, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加首页精选的日记点击和美购首页的美购点击
parent
16570e50
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
163 additions
and
125 deletions
+163
-125
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+163
-125
No files found.
eda/smart_rank/evaluation_metrics.py
View file @
b1c9091b
...
@@ -180,6 +180,66 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
...
@@ -180,6 +180,66 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
return
result
return
result
def
get_user_order_info_yesterday
():
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
sql_order_device_info_yesterday
=
"""
SELECT tmp1.user_id,
c.device_id,
tmp1.service_ids,
tmp1.tag_ids,
tmp1.pay_time
FROM
(SELECT tmp.user_id,
tmp.service_ids,
tmp.tag_ids,
tmp.pay_time,
max(tmp.device_id) device_id_id
FROM
(SELECT a.user_id,
a.service_ids,
a.tag_ids,
a.pay_time,
b.device_id
FROM
(SELECT user_id,
max(pay_time) AS pay_time,
group_concat(DISTINCT `service_id` separator ',') service_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.user_id,
d.service_id,
unix_timestamp(d.pay_time) AS pay_time,
e.tag_id
FROM api_order d
LEFT JOIN api_servicetag e ON d.service_id = e.service_id
LEFT JOIN api_tag f ON e.tag_id = f.id
WHERE d.status=1
AND d.pay_time>'{order_date}'
AND d.pay_time<'{order_date_tomorrow}'
AND f.tag_type+0 <'4'+0) tmp2
GROUP BY user_id) a
LEFT JOIN statistic_device_user b ON a.user_id = b.user_id) tmp
GROUP BY tmp.user_id) tmp1
LEFT JOIN statistic_device c ON tmp1.device_id_id = c.id
WHERE c.device_id IS NOT NULL
"""
.
format
(
order_date
=
order_date
,
order_date_tomorrow
=
order_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql_order_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"pay_time"
]))
for
i
in
mysql_results
]
all_device_order_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_order_tags
def
get_user_diary_click_info_yesterday
():
pass
def
get_user_service_click_info_yesterday
():
pass
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
try
:
try
:
parser
=
argparse
.
ArgumentParser
(
description
=
'画像匹配度的统计'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'画像匹配度的统计'
)
...
@@ -196,6 +256,7 @@ if __name__ == '__main__':
...
@@ -196,6 +256,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
"-n"
,
"--normalization_size"
,
type
=
int
,
dest
=
"normalization_size"
,
default
=
7
,
parser
.
add_argument
(
"-n"
,
"--normalization_size"
,
type
=
int
,
dest
=
"normalization_size"
,
default
=
7
,
help
=
"天数差归一化的区间"
)
help
=
"天数差归一化的区间"
)
parser
.
add_argument
(
"-d"
,
"--decay_days"
,
type
=
int
,
dest
=
"decay_days"
,
default
=
180
,
help
=
"分数衰减的天数"
)
parser
.
add_argument
(
"-d"
,
"--decay_days"
,
type
=
int
,
dest
=
"decay_days"
,
default
=
180
,
help
=
"分数衰减的天数"
)
parser
.
add_argument
(
"-a"
,
"--action_type"
,
type
=
list
,
dest
=
"action_type"
,
default
=
[
"order"
],
help
=
"计算匹配度的行为"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
order_date
=
args
.
order_date
order_date
=
args
.
order_date
order_date_tomorrow
=
str
(
datetime
.
datetime
.
strptime
(
order_date
,
'
%
Y-
%
m-
%
d'
)
+
datetime
.
timedelta
(
days
=
1
))
order_date_tomorrow
=
str
(
datetime
.
datetime
.
strptime
(
order_date
,
'
%
Y-
%
m-
%
d'
)
+
datetime
.
timedelta
(
days
=
1
))
...
@@ -207,6 +268,7 @@ if __name__ == '__main__':
...
@@ -207,6 +268,7 @@ if __name__ == '__main__':
exponential
=
args
.
exponential
exponential
=
args
.
exponential
normalization_size
=
args
.
normalization_size
normalization_size
=
args
.
normalization_size
decay_days
=
args
.
decay_days
decay_days
=
args
.
decay_days
action_type
=
args
.
action_type
LOG_DIR
=
"/home/gmuser/gyz/log/"
LOG_DIR
=
"/home/gmuser/gyz/log/"
my_today
=
str
(
datetime
.
date
.
today
())
my_today
=
str
(
datetime
.
date
.
today
())
...
@@ -216,55 +278,6 @@ if __name__ == '__main__':
...
@@ -216,55 +278,6 @@ if __name__ == '__main__':
log1
=
logging
.
getLogger
(
'log1'
)
log1
=
logging
.
getLogger
(
'log1'
)
log2
=
logging
.
getLogger
(
'log2'
)
log2
=
logging
.
getLogger
(
'log2'
)
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
sql_order_device_info_yesterday
=
"""
SELECT tmp1.user_id,
c.device_id,
tmp1.service_ids,
tmp1.tag_ids,
tmp1.pay_time
FROM
(SELECT tmp.user_id,
tmp.service_ids,
tmp.tag_ids,
tmp.pay_time,
max(tmp.device_id) device_id_id
FROM
(SELECT a.user_id,
a.service_ids,
a.tag_ids,
a.pay_time,
b.device_id
FROM
(SELECT user_id,
max(pay_time) AS pay_time,
group_concat(DISTINCT `service_id` separator ',') service_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.user_id,
d.service_id,
unix_timestamp(d.pay_time) AS pay_time,
e.tag_id
FROM api_order d
LEFT JOIN api_servicetag e ON d.service_id = e.service_id
LEFT JOIN api_tag f ON e.tag_id = f.id
WHERE d.status=1
AND d.pay_time>'{order_date}'
AND d.pay_time<'{order_date_tomorrow}'
AND f.tag_type+0 <'4'+0) tmp2
GROUP BY user_id) a
LEFT JOIN statistic_device_user b ON a.user_id = b.user_id) tmp
GROUP BY tmp.user_id) tmp1
LEFT JOIN statistic_device c ON tmp1.device_id_id = c.id
WHERE c.device_id IS NOT NULL
"""
.
format
(
order_date
=
order_date
,
order_date_tomorrow
=
order_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql_order_device_info_yesterday
)
device_ids_lst
=
[(
i
[
"device_id"
],
int
(
i
[
"pay_time"
]))
for
i
in
mysql_results
]
all_device_order_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
# 获取搜索词及其近义词对应的tag
# 获取搜索词及其近义词对应的tag
all_word_tags
=
get_all_word_tags
()
all_word_tags
=
get_all_word_tags
()
all_tag_tag_type
=
get_all_tag_tag_type
()
all_tag_tag_type
=
get_all_tag_tag_type
()
...
@@ -272,82 +285,106 @@ if __name__ == '__main__':
...
@@ -272,82 +285,106 @@ if __name__ == '__main__':
# 3级tag对应的2级tag
# 3级tag对应的2级tag
all_3tag_2tag
=
get_all_3tag_2tag
()
all_3tag_2tag
=
get_all_3tag_2tag
()
# 昨天下单了的用户的美购tags(转成2级tags)
for
action
in
action_type
:
all_device_order_tags2
=
dict
()
# 获取昨天产生行为的设备id、以及行为对应的tag
for
device
in
all_device_order_tags
:
device_ids_lst
=
list
()
tags
=
all_device_order_tags
[
device
]
all_device_order_tags
=
dict
()
for
tag
in
tags
:
if
"order"
in
action_type
:
tags2
=
all_3tag_2tag
.
get
(
tag
,
[])
device_ids_lst
,
all_device_order_tags
=
get_user_order_info_yesterday
()
tags
+=
tags2
elif
"diary"
in
action_type
:
all_device_order_tags2
[
device
]
=
tags
device_ids_lst
,
all_device_order_tags
=
get_user_diary_click_info_yesterday
()
elif
"service"
in
action_type
:
# 昨天下单了的用户的去除支付行为的画像
device_ids_lst
,
all_device_order_tags
=
get_user_service_click_info_yesterday
()
all_device_portrait_result
=
dict
()
else
:
debug_all_device_portrait_result
=
dict
()
break
for
order_info
in
device_ids_lst
:
device
=
order_info
[
0
]
# tags扩展2级tags
pay_time
=
order_info
[
1
]
all_device_order_tags2
=
dict
()
portrait_result
,
debug_portrait_result
=
get_user_service_portrait_not_alipay
(
device
,
all_word_tags
,
for
device
in
all_device_order_tags
:
all_tag_tag_type
,
pay_time
,
tags
=
all_device_order_tags
[
device
]
all_3tag_2tag
,
for
tag
in
tags
:
version
=
version
,
tags2
=
all_3tag_2tag
.
get
(
tag
,
[])
exponential
=
exponential
,
tags
+=
tags2
normalization_size
=
normalization_size
,
all_device_order_tags2
[
device
]
=
tags
decay_days
=
decay_days
,
size
=-
1
)
# 用户的去除支付行为的画像
all_device_portrait_result
[
device
]
=
portrait_result
all_device_portrait_result
=
dict
()
debug_all_device_portrait_result
[
device
]
=
debug_portrait_result
debug_all_device_portrait_result
=
dict
()
for
order_info
in
device_ids_lst
:
# 比较两个tag列表的重合率
device
=
order_info
[
0
]
result
=
get_2_tags_coincide_rate
(
all_device_order_tags2
,
all_device_portrait_result
,
cmd_portrait_top_n
,
pay_time
=
order_info
[
1
]
cmd_coincide_n
)
portrait_result
,
debug_portrait_result
=
get_user_service_portrait_not_alipay
(
device
,
all_word_tags
,
all_tag_tag_type
,
pay_time
,
# 有画像没匹配上的用户的画像信息
all_3tag_2tag
,
no_coincide_devices
=
result
[
"not_coincide_have_portrait_device_ids"
]
version
=
version
,
no_coincide_devices_debug
=
dict
()
exponential
=
exponential
,
log2
.
info
({
"统计日期"
:
my_today
})
normalization_size
=
normalization_size
,
log2
.
info
({
"参数信息"
:
args
})
decay_days
=
decay_days
,
log2
.
info
({
"版本"
:
"英赫版"
if
version
==
1
else
"翔宇版"
})
size
=-
1
)
for
device
in
no_coincide_devices
:
all_device_portrait_result
[
device
]
=
portrait_result
debug_all_device_portrait_result
[
device
]
=
debug_portrait_result
# 比较两个tag列表的重合率
result
=
get_2_tags_coincide_rate
(
all_device_order_tags2
,
all_device_portrait_result
,
cmd_portrait_top_n
,
cmd_coincide_n
)
# 有画像没匹配上的用户的画像信息
no_coincide_devices
=
result
[
"not_coincide_have_portrait_device_ids"
]
no_coincide_devices_debug
=
dict
()
no_coincide_devices_debug
=
dict
()
device_portrait_n
=
all_device_portrait_result
[
device
][:
args
.
portrait_top_n
]
log2
.
info
({
"统计日期"
:
my_today
})
device_order_tags
=
all_device_order_tags2
[
device
]
log2
.
info
({
"参数信息"
:
args
})
debug_device_portrait_result
=
debug_all_device_portrait_result
[
device
]
log2
.
info
({
"版本"
:
"英赫版"
if
version
==
1
else
"翔宇版"
})
no_coincide_devices_debug
[
device
]
=
{
action_type_detail
=
""
"画像的前{top_n}个tag"
.
format
(
top_n
=
args
.
portrait_top_n
):
[
debug_device_portrait_result
[
tag
]
for
tag
in
if
action_type
==
"order"
:
device_portrait_n
],
action_type_detail
=
"昨天下单了的用户"
"用户下单的美购对应的tag"
:
[
debug_device_portrait_result
.
get
(
tag
,
dict
())
for
tag
in
device_order_tags
]
elif
action_type
==
"diary"
:
}
action_type_detail
=
"昨天在首页精选点击了日记的用户"
log2
.
info
(
"-"
*
66
)
elif
action_type
==
"service"
:
log2
.
info
(
no_coincide_devices_debug
)
action_type_detail
=
"昨天在美购首页点击了美购的用户"
log2
.
info
(
"
\n
"
*
6
)
else
:
pass
log2
.
info
({
"统计用户"
:
action_type_detail
})
# 统计画像更新的耗时和更新的设备数
for
device
in
no_coincide_devices
:
sql
=
"select count(*) from user_service_portrait_tags where stat_date='{my_today}'"
.
format
(
my_today
=
my_today
)
no_coincide_devices_debug
=
dict
()
portrait_device_count
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql
)
device_portrait_n
=
all_device_portrait_result
[
device
][:
args
.
portrait_top_n
]
with
open
(
LOG_DIR
+
"dist_portrait.log"
,
'r'
)
as
f
:
device_order_tags
=
all_device_order_tags2
[
device
]
lines
=
f
.
readlines
()
debug_device_portrait_result
=
debug_all_device_portrait_result
[
device
]
start_datetime_str
=
lines
[
0
][:
19
]
no_coincide_devices_debug
[
device
]
=
{
end_datetime_str
=
lines
[
-
1
][:
19
]
"画像的前{top_n}个tag"
.
format
(
top_n
=
args
.
portrait_top_n
):
[
debug_device_portrait_result
[
tag
]
for
tag
in
start_datetime
=
datetime
.
datetime
.
strptime
(
start_datetime_str
,
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
device_portrait_n
],
end_datetime
=
datetime
.
datetime
.
strptime
(
end_datetime_str
,
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
"用户下单的美购对应的tag"
:
[
debug_device_portrait_result
.
get
(
tag
,
dict
())
for
tag
in
device_order_tags
]
time_consuming
=
(
end_datetime
-
start_datetime
)
.
seconds
/
60
}
log2
.
info
(
"-"
*
66
)
log1
.
info
({
"画像信息统计日期"
:
my_today
})
log2
.
info
(
no_coincide_devices_debug
)
log1
.
info
({
"参数信息"
:
args
})
log2
.
info
(
"
\n
"
*
6
)
log1
.
info
({
"版本"
:
"英赫版"
if
version
==
1
else
"翔宇版"
})
log1
.
info
({
"画像更新耗时(分钟)"
:
time_consuming
})
log1
.
info
({
"画像更新的设备数"
:
portrait_device_count
[
0
][
"count(*)"
]})
# 统计画像更新的耗时和更新的设备数
log1
.
info
(
""
)
sql
=
"select count(*) from user_service_portrait_tags where stat_date='{my_today}'"
.
format
(
my_today
=
my_today
)
log1
.
info
({
"统计画像匹配度所用数据的日期"
:
order_date
})
portrait_device_count
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql
)
log1
.
info
({
"统计画像的选取前n个tag"
:
cmd_portrait_top_n
})
with
open
(
LOG_DIR
+
"dist_portrait.log"
,
'r'
)
as
f
:
log1
.
info
({
"重合个数"
:
cmd_coincide_n
})
lines
=
f
.
readlines
()
log1
.
info
({
"下单人数"
:
result
[
"device_count"
]})
start_datetime_str
=
lines
[
0
][:
19
]
log1
.
info
({
"比对的上的人数"
:
result
[
"coincide_count"
]})
end_datetime_str
=
lines
[
-
1
][:
19
]
log1
.
info
({
"匹配度"
:
result
[
"coincide_rate"
]})
start_datetime
=
datetime
.
datetime
.
strptime
(
start_datetime_str
,
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
log1
.
info
({
"比对不上的有画像的人数"
:
result
[
"not_coincide_have_portrait_count"
]})
end_datetime
=
datetime
.
datetime
.
strptime
(
end_datetime_str
,
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
log1
.
info
({
"比对不上的无画像的人数"
:
result
[
"not_coincide_no_portrait_count"
]})
time_consuming
=
(
end_datetime
-
start_datetime
)
.
seconds
/
60
log1
.
info
(
"="
*
66
)
log1
.
info
({
"画像信息统计日期"
:
my_today
})
log1
.
info
({
"参数信息"
:
args
})
log1
.
info
({
"版本"
:
"英赫版"
if
version
==
1
else
"翔宇版"
})
log1
.
info
({
"统计用户"
:
action_type_detail
})
log1
.
info
({
"画像更新耗时(分钟)"
:
time_consuming
})
log1
.
info
({
"画像更新的设备数"
:
portrait_device_count
[
0
][
"count(*)"
]})
log1
.
info
(
""
)
log1
.
info
({
"统计画像匹配度所用数据的日期"
:
order_date
})
log1
.
info
({
"统计画像的选取前n个tag"
:
cmd_portrait_top_n
})
log1
.
info
({
"重合个数"
:
cmd_coincide_n
})
log1
.
info
({
"下单人数"
:
result
[
"device_count"
]})
log1
.
info
({
"比对的上的人数"
:
result
[
"coincide_count"
]})
log1
.
info
({
"匹配度"
:
result
[
"coincide_rate"
]})
log1
.
info
({
"比对不上的有画像的人数"
:
result
[
"not_coincide_have_portrait_count"
]})
log1
.
info
({
"比对不上的无画像的人数"
:
result
[
"not_coincide_no_portrait_count"
]})
log1
.
info
(
"="
*
66
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment