Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
d70dcc7d
Commit
d70dcc7d
authored
Oct 11, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
有画像没匹配上的用户的画像信息
parent
233a458a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
84 additions
and
27 deletions
+84
-27
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+84
-27
No files found.
eda/smart_rank/evaluation_metrics.py
View file @
d70dcc7d
...
...
@@ -9,6 +9,26 @@ import numpy as np
import
pandas
as
pd
from
tool
import
*
import
logging
from
collections
import
defaultdict
def
get_count
(
actions
):
counts
=
defaultdict
(
int
)
for
x
in
actions
:
counts
[
x
]
+=
1
return
counts
def
setup_logger
(
logger_name
,
log_file
,
level
=
logging
.
INFO
):
my_log
=
logging
.
getLogger
(
logger_name
)
formatter
=
logging
.
Formatter
(
'
%(message)
s'
)
file_handler
=
logging
.
FileHandler
(
log_file
,
mode
=
'a'
)
file_handler
.
setFormatter
(
formatter
)
stream_handler
=
logging
.
StreamHandler
()
stream_handler
.
setFormatter
(
formatter
)
my_log
.
setLevel
(
level
)
my_log
.
addHandler
(
file_handler
)
my_log
.
addHandler
(
stream_handler
)
def
get_user_service_portrait_not_alipay
(
cl_id
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
size
=
10
):
...
...
@@ -67,39 +87,50 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
compute_ruoyixiang
(
x
.
days_diff_now
)
/
get_action_tag_count
(
user_df_service
,
x
.
time
)
if
x
.
score_type
==
"ruoyixiang"
else
compute_validate
(
x
.
days_diff_now
)
/
get_action_tag_count
(
user_df_service
,
x
.
time
)))),
axis
=
1
)
tag_score_sum
=
user_df_service
.
groupby
(
by
=
[
"tag2"
,
"tag2_type"
])
.
agg
(
{
'tag_score'
:
'sum'
,
'cl_id'
:
'first'
,
'action'
:
'first'
})
.
reset_index
()
.
sort_values
(
by
=
[
"tag_score"
],
ascending
=
False
)
{
'tag_score'
:
'sum'
,
'cl_id'
:
'first'
,
'action'
:
get_count
})
.
reset_index
()
.
sort_values
(
by
=
[
"tag_score"
],
ascending
=
False
)
tag_score_sum
[
'weight'
]
=
100
*
tag_score_sum
[
'tag_score'
]
/
tag_score_sum
[
'tag_score'
]
.
sum
()
tag_score_sum
[
"pay_type"
]
=
tag_score_sum
.
apply
(
lambda
x
:
3
if
x
.
action
==
"api/order/validate"
else
(
2
if
x
.
action
==
"api/settlement/alipay_callback"
else
1
),
axis
=
1
)
# gmkv_tag_score_sum = tag_score_sum[["tag2", "tag_score", "weight"]][:size].to_dict('record')
gmkv_tag_score_sum_list
=
tag_score_sum
[
"tag2"
]
.
to_list
()[:
size
]
return
gmkv_tag_score_sum_list
# 获取tag的得分来源(action信息)
debug_tag_score_sum
=
tag_score_sum
[[
"tag2"
,
"tag_score"
,
"action"
]][:
size
]
.
to_dict
(
'record'
)
debug_tag_score_sum_dict
=
{
info
[
"tag2"
]:
info
for
info
in
debug_tag_score_sum
}
# 没有用户的画像
else
:
gmkv_tag_score_sum_list
=
list
()
debug_tag_score_sum_dict
=
dict
()
return
gmkv_tag_score_sum_list
,
debug_tag_score_sum_dict
except
Exception
as
e
:
print
(
e
)
return
list
(),
dict
()
def
get_2_tags_coincide_rate
(
device_order_tags
,
device_portrait_result
,
portrait_top_n
,
coincide_n
):
device_count
=
len
(
device_order_tags
)
coincide_count
=
0
not_coincide_no_portrait
=
0
not_coincide_no_portrait_device_ids
=
[]
not_coincide_have_portrait_device_ids
=
[]
not_coincide_have_portrait
=
0
device_count
=
len
(
device_order_tags
)
# 总的下单设备数
coincide_count
=
0
# 比对的上的设备数
not_coincide_no_portrait
=
0
# 比对不上的且没有画像的设备数
not_coincide_no_portrait_device_ids
=
[]
# 比对不上的且没有画像的设备
not_coincide_have_portrait_device_ids
=
[]
# 比对不上的且有画像的设备数
not_coincide_have_portrait
=
0
# 比对不上的且有画像的设备
for
device
in
device_order_tags
:
order_tags
=
device_order_tags
[
device
]
portrait_tags
=
device_portrait_result
[
device
]
if
portrait_tags
:
portrait_tags
=
portrait_tags
[:
portrait_top_n
]
else
:
# 没有画像的设备
not_coincide_no_portrait
+=
1
not_coincide_no_portrait_device_ids
.
append
(
device
)
continue
# 有画像且匹配的上
if
len
(
set
(
order_tags
)
.
intersection
(
set
(
portrait_tags
)))
>=
coincide_n
:
coincide_count
+=
1
# 有画像且匹配不上
else
:
not_coincide_have_portrait
+=
1
not_coincide_have_portrait_device_ids
.
append
(
device
)
...
...
@@ -123,6 +154,11 @@ if __name__ == '__main__':
my_today
=
str
(
datetime
.
date
.
today
())
my_yesterday
=
str
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
setup_logger
(
"log1"
,
LOG_DIR
+
'portrait_stat.log'
)
setup_logger
(
"log2"
,
LOG_DIR
+
'debug_portrait_stat.log'
)
log1
=
logging
.
getLogger
(
'log1'
)
log2
=
logging
.
getLogger
(
'log2'
)
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
...
...
@@ -185,10 +221,32 @@ if __name__ == '__main__':
# 昨天下单了的用户的去除支付行为的画像
all_device_portrait_result
=
dict
()
debug_all_device_portrait_result
=
dict
()
for
device
in
device_ids_lst
:
portrait_result
=
get_user_service_portrait_not_alipay
(
device
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
size
=
10
)
portrait_result
,
debug_portrait_result
=
get_user_service_portrait_not_alipay
(
device
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
size
=-
1
)
all_device_portrait_result
[
device
]
=
portrait_result
debug_all_device_portrait_result
[
device
]
=
debug_portrait_result
# 有画像没匹配上的用户的画像信息
no_coincide_devices
=
result
[
"not_coincide_have_portrait_device_ids"
]
no_coincide_devices_debug
=
dict
()
log2
.
info
({
"统计日期"
:
my_today
})
for
device
in
no_coincide_devices
:
no_coincide_devices_debug
=
dict
()
device_portrait_n
=
all_device_portrait_result
[
device
][:
args
.
portrait_top_n
]
device_order_tags
=
all_device_order_tags2
[
device
]
debug_device_portrait_result
=
debug_all_device_portrait_result
[
device
]
no_coincide_devices_debug
[
device
]
=
{
"画像的前{top_n}个tag"
.
format
(
top_n
=
args
.
portrait_top_n
):
[
debug_device_portrait_result
[
tag
]
for
tag
in
device_portrait_n
],
"用户下单的美购对应的tag"
:
[
debug_device_portrait_result
.
get
(
tag
,
dict
())
for
tag
in
device_order_tags
]
}
log2
.
info
(
"-"
*
66
)
log2
.
info
(
no_coincide_devices_debug
)
log2
.
info
(
"
\n
"
*
6
)
# 比较两个tag列表的重合率
cmd_portrait_top_n
=
args
.
portrait_top_n
...
...
@@ -207,20 +265,18 @@ if __name__ == '__main__':
end_datetime
=
datetime
.
datetime
.
strptime
(
end_datetime_str
,
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
time_consuming
=
(
end_datetime
-
start_datetime
)
.
seconds
/
60
logging
.
basicConfig
(
format
=
'
%(message)
s'
,
filename
=
LOG_DIR
+
'portrait_stat.log'
,
filemode
=
'a'
,
level
=
logging
.
INFO
)
logging
.
info
({
"画像信息统计日期"
:
my_today
})
logging
.
info
({
"画像更新耗时(分钟)"
:
time_consuming
})
logging
.
info
({
"画像更新的设备数"
:
portrait_device_count
[
0
][
"count(*)"
]})
logging
.
info
(
""
)
logging
.
info
({
"统计画像匹配度所用数据的日期"
:
my_yesterday
})
logging
.
info
({
"统计画像的选取前n个tag"
:
cmd_portrait_top_n
})
logging
.
info
({
"重合个数"
:
cmd_coincide_n
})
logging
.
info
({
"下单人数"
:
result
[
"device_count"
]})
logging
.
info
({
"比对的上的人数"
:
result
[
"coincide_count"
]})
logging
.
info
({
"匹配度"
:
result
[
"coincide_rate"
]})
logging
.
info
({
"比对不上的有画像的人数"
:
result
[
"not_coincide_have_portrait_count"
]})
logging
.
info
({
"比对不上的无画像的人数"
:
result
[
"not_coincide_no_portrait_count"
]})
logging
.
info
(
"="
*
66
)
log1
.
info
({
"画像信息统计日期"
:
my_today
})
log1
.
info
({
"画像更新耗时(分钟)"
:
time_consuming
})
log1
.
info
({
"画像更新的设备数"
:
portrait_device_count
[
0
][
"count(*)"
]})
log1
.
info
(
""
)
log1
.
info
({
"统计画像匹配度所用数据的日期"
:
my_yesterday
})
log1
.
info
({
"统计画像的选取前n个tag"
:
cmd_portrait_top_n
})
log1
.
info
({
"重合个数"
:
cmd_coincide_n
})
log1
.
info
({
"下单人数"
:
result
[
"device_count"
]})
log1
.
info
({
"比对的上的人数"
:
result
[
"coincide_count"
]})
log1
.
info
({
"匹配度"
:
result
[
"coincide_rate"
]})
log1
.
info
({
"比对不上的有画像的人数"
:
result
[
"not_coincide_have_portrait_count"
]})
log1
.
info
({
"比对不上的无画像的人数"
:
result
[
"not_coincide_no_portrait_count"
]})
log1
.
info
(
"="
*
66
)
except
Exception
as
e
:
print
(
e
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment