Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
b1c9091b
Commit
b1c9091b
authored
Oct 16, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加首页精选的日记点击和美购首页的美购点击
parent
16570e50
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
76 additions
and
39 deletions
+76
-39
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+76
-39
No files found.
eda/smart_rank/evaluation_metrics.py
View file @
b1c9091b
...
...
@@ -180,42 +180,7 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
return
result
if
__name__
==
'__main__'
:
try
:
parser
=
argparse
.
ArgumentParser
(
description
=
'画像匹配度的统计'
)
my_yesterday
=
str
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
parser
.
add_argument
(
"-o"
,
"--order_date"
,
type
=
str
,
dest
=
"order_date"
,
default
=
my_yesterday
,
help
=
"统计的下单日期"
)
parser
.
add_argument
(
"-log1"
,
"--log1_file"
,
type
=
str
,
dest
=
"portrait_stat_log_path"
,
default
=
"portrait_stat.log"
,
help
=
"画像统计的日志地址"
)
parser
.
add_argument
(
"-log2"
,
"--log2_file"
,
type
=
str
,
dest
=
"debug_portrait_stat_log_path"
,
default
=
"debug_portrait_stat.log"
,
help
=
"画像统计的日志地址"
)
parser
.
add_argument
(
"-t"
,
"--top"
,
type
=
int
,
dest
=
"portrait_top_n"
,
default
=
3
,
help
=
"选取画像的前n个tag去统计匹配度"
)
parser
.
add_argument
(
"-c"
,
"--coincide"
,
type
=
int
,
dest
=
"coincide_n"
,
default
=
1
,
help
=
"选取n个tag重合个数作为判断是否匹配的阈值"
)
parser
.
add_argument
(
"-v"
,
"--version"
,
type
=
int
,
dest
=
"version"
,
default
=
1
,
help
=
"选取翔宇(0),英赫(1)版本进行统计"
)
parser
.
add_argument
(
"-e"
,
"--exponential"
,
type
=
int
,
dest
=
"exponential"
,
default
=
0
,
help
=
"是否采用指数衰减"
)
parser
.
add_argument
(
"-n"
,
"--normalization_size"
,
type
=
int
,
dest
=
"normalization_size"
,
default
=
7
,
help
=
"天数差归一化的区间"
)
parser
.
add_argument
(
"-d"
,
"--decay_days"
,
type
=
int
,
dest
=
"decay_days"
,
default
=
180
,
help
=
"分数衰减的天数"
)
args
=
parser
.
parse_args
()
order_date
=
args
.
order_date
order_date_tomorrow
=
str
(
datetime
.
datetime
.
strptime
(
order_date
,
'
%
Y-
%
m-
%
d'
)
+
datetime
.
timedelta
(
days
=
1
))
portrait_stat_log_path
=
args
.
portrait_stat_log_path
debug_portrait_stat_log_path
=
args
.
debug_portrait_stat_log_path
cmd_portrait_top_n
=
args
.
portrait_top_n
cmd_coincide_n
=
args
.
coincide_n
version
=
args
.
version
exponential
=
args
.
exponential
normalization_size
=
args
.
normalization_size
decay_days
=
args
.
decay_days
LOG_DIR
=
"/home/gmuser/gyz/log/"
my_today
=
str
(
datetime
.
date
.
today
())
setup_logger
(
"log1"
,
LOG_DIR
+
portrait_stat_log_path
)
setup_logger
(
"log2"
,
LOG_DIR
+
debug_portrait_stat_log_path
)
log1
=
logging
.
getLogger
(
'log1'
)
log2
=
logging
.
getLogger
(
'log2'
)
def
get_user_order_info_yesterday
():
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
...
...
@@ -262,8 +227,56 @@ if __name__ == '__main__':
"""
.
format
(
order_date
=
order_date
,
order_date_tomorrow
=
order_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql_order_device_info_yesterday
)
device_ids_lst
=
[(
i
[
"device_id"
],
int
(
i
[
"pay_time"
]))
for
i
in
mysql_results
]
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"pay_time"
]))
for
i
in
mysql_results
]
all_device_order_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_order_tags
def
get_user_diary_click_info_yesterday
():
pass
def
get_user_service_click_info_yesterday
():
pass
if
__name__
==
'__main__'
:
try
:
parser
=
argparse
.
ArgumentParser
(
description
=
'画像匹配度的统计'
)
my_yesterday
=
str
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
parser
.
add_argument
(
"-o"
,
"--order_date"
,
type
=
str
,
dest
=
"order_date"
,
default
=
my_yesterday
,
help
=
"统计的下单日期"
)
parser
.
add_argument
(
"-log1"
,
"--log1_file"
,
type
=
str
,
dest
=
"portrait_stat_log_path"
,
default
=
"portrait_stat.log"
,
help
=
"画像统计的日志地址"
)
parser
.
add_argument
(
"-log2"
,
"--log2_file"
,
type
=
str
,
dest
=
"debug_portrait_stat_log_path"
,
default
=
"debug_portrait_stat.log"
,
help
=
"画像统计的日志地址"
)
parser
.
add_argument
(
"-t"
,
"--top"
,
type
=
int
,
dest
=
"portrait_top_n"
,
default
=
3
,
help
=
"选取画像的前n个tag去统计匹配度"
)
parser
.
add_argument
(
"-c"
,
"--coincide"
,
type
=
int
,
dest
=
"coincide_n"
,
default
=
1
,
help
=
"选取n个tag重合个数作为判断是否匹配的阈值"
)
parser
.
add_argument
(
"-v"
,
"--version"
,
type
=
int
,
dest
=
"version"
,
default
=
1
,
help
=
"选取翔宇(0),英赫(1)版本进行统计"
)
parser
.
add_argument
(
"-e"
,
"--exponential"
,
type
=
int
,
dest
=
"exponential"
,
default
=
0
,
help
=
"是否采用指数衰减"
)
parser
.
add_argument
(
"-n"
,
"--normalization_size"
,
type
=
int
,
dest
=
"normalization_size"
,
default
=
7
,
help
=
"天数差归一化的区间"
)
parser
.
add_argument
(
"-d"
,
"--decay_days"
,
type
=
int
,
dest
=
"decay_days"
,
default
=
180
,
help
=
"分数衰减的天数"
)
parser
.
add_argument
(
"-a"
,
"--action_type"
,
type
=
list
,
dest
=
"action_type"
,
default
=
[
"order"
],
help
=
"计算匹配度的行为"
)
args
=
parser
.
parse_args
()
order_date
=
args
.
order_date
order_date_tomorrow
=
str
(
datetime
.
datetime
.
strptime
(
order_date
,
'
%
Y-
%
m-
%
d'
)
+
datetime
.
timedelta
(
days
=
1
))
portrait_stat_log_path
=
args
.
portrait_stat_log_path
debug_portrait_stat_log_path
=
args
.
debug_portrait_stat_log_path
cmd_portrait_top_n
=
args
.
portrait_top_n
cmd_coincide_n
=
args
.
coincide_n
version
=
args
.
version
exponential
=
args
.
exponential
normalization_size
=
args
.
normalization_size
decay_days
=
args
.
decay_days
action_type
=
args
.
action_type
LOG_DIR
=
"/home/gmuser/gyz/log/"
my_today
=
str
(
datetime
.
date
.
today
())
setup_logger
(
"log1"
,
LOG_DIR
+
portrait_stat_log_path
)
setup_logger
(
"log2"
,
LOG_DIR
+
debug_portrait_stat_log_path
)
log1
=
logging
.
getLogger
(
'log1'
)
log2
=
logging
.
getLogger
(
'log2'
)
# 获取搜索词及其近义词对应的tag
all_word_tags
=
get_all_word_tags
()
...
...
@@ -272,7 +285,20 @@ if __name__ == '__main__':
# 3级tag对应的2级tag
all_3tag_2tag
=
get_all_3tag_2tag
()
# 昨天下单了的用户的美购tags(转成2级tags)
for
action
in
action_type
:
# 获取昨天产生行为的设备id、以及行为对应的tag
device_ids_lst
=
list
()
all_device_order_tags
=
dict
()
if
"order"
in
action_type
:
device_ids_lst
,
all_device_order_tags
=
get_user_order_info_yesterday
()
elif
"diary"
in
action_type
:
device_ids_lst
,
all_device_order_tags
=
get_user_diary_click_info_yesterday
()
elif
"service"
in
action_type
:
device_ids_lst
,
all_device_order_tags
=
get_user_service_click_info_yesterday
()
else
:
break
# tags扩展2级tags
all_device_order_tags2
=
dict
()
for
device
in
all_device_order_tags
:
tags
=
all_device_order_tags
[
device
]
...
...
@@ -281,7 +307,7 @@ if __name__ == '__main__':
tags
+=
tags2
all_device_order_tags2
[
device
]
=
tags
# 昨天下单了的
用户的去除支付行为的画像
#
用户的去除支付行为的画像
all_device_portrait_result
=
dict
()
debug_all_device_portrait_result
=
dict
()
for
order_info
in
device_ids_lst
:
...
...
@@ -308,6 +334,16 @@ if __name__ == '__main__':
log2
.
info
({
"统计日期"
:
my_today
})
log2
.
info
({
"参数信息"
:
args
})
log2
.
info
({
"版本"
:
"英赫版"
if
version
==
1
else
"翔宇版"
})
action_type_detail
=
""
if
action_type
==
"order"
:
action_type_detail
=
"昨天下单了的用户"
elif
action_type
==
"diary"
:
action_type_detail
=
"昨天在首页精选点击了日记的用户"
elif
action_type
==
"service"
:
action_type_detail
=
"昨天在美购首页点击了美购的用户"
else
:
pass
log2
.
info
({
"统计用户"
:
action_type_detail
})
for
device
in
no_coincide_devices
:
no_coincide_devices_debug
=
dict
()
device_portrait_n
=
all_device_portrait_result
[
device
][:
args
.
portrait_top_n
]
...
...
@@ -337,6 +373,7 @@ if __name__ == '__main__':
log1
.
info
({
"画像信息统计日期"
:
my_today
})
log1
.
info
({
"参数信息"
:
args
})
log1
.
info
({
"版本"
:
"英赫版"
if
version
==
1
else
"翔宇版"
})
log1
.
info
({
"统计用户"
:
action_type_detail
})
log1
.
info
({
"画像更新耗时(分钟)"
:
time_consuming
})
log1
.
info
({
"画像更新的设备数"
:
portrait_device_count
[
0
][
"count(*)"
]})
log1
.
info
(
""
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment