Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
754b23c0
Commit
754b23c0
authored
Oct 15, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
计算匹配度的时候,用户log限制在昨天之前
parent
ffcd5172
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
9 deletions
+19
-9
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+11
-7
tool.py
eda/smart_rank/tool.py
+8
-2
No files found.
eda/smart_rank/evaluation_metrics.py
View file @
754b23c0
...
...
@@ -44,10 +44,11 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
yesterday
=
get_yesterday_start_timestamp
()
# 用户的非搜索、支付的行为
user_df_service_sql
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log "
\
"where cl_id ='{
}'
and action not in "
\
"('api/settlement/alipay_callback','do_search')"
.
format
(
cl_id
)
"where cl_id ='{
cl_id}' and time < {yesterday}
and action not in "
\
"('api/settlement/alipay_callback','do_search')"
.
format
(
cl_id
=
cl_id
,
yesterday
=
yesterday
)
cur_jerry_test
.
execute
(
user_df_service_sql
)
data
=
list
(
cur_jerry_test
.
fetchall
())
if
data
:
...
...
@@ -57,7 +58,8 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
user_df_service
=
pd
.
DataFrame
(
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
])
# 用户的搜索行为
user_df_search_sql
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log "
\
"where cl_id ='{}' and action = 'do_search'"
.
format
(
cl_id
)
"where cl_id ='{cl_id}' and time < {yesterday} and "
\
"action = 'do_search'"
.
format
(
cl_id
=
cl_id
,
yesterday
=
yesterday
)
cur_jerry_test
.
execute
(
user_df_search_sql
)
data_search
=
list
(
cur_jerry_test
.
fetchall
())
db_jerry_test
.
close
()
...
...
@@ -131,10 +133,11 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
yesterday
=
get_yesterday_start_timestamp
()
# 用户的非搜索、支付的行为
user_df_service_sql
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log "
\
"where cl_id ='{
}'
and action not in "
\
"('api/settlement/alipay_callback','do_search')"
.
format
(
cl_id
)
"where cl_id ='{
cl_id}' and time < {yesterday}
and action not in "
\
"('api/settlement/alipay_callback','do_search')"
.
format
(
cl_id
=
cl_id
,
yesterday
=
yesterday
)
cur_jerry_test
.
execute
(
user_df_service_sql
)
data
=
list
(
cur_jerry_test
.
fetchall
())
if
data
:
...
...
@@ -144,7 +147,8 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type
user_df_service
=
pd
.
DataFrame
(
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
])
# 用户的搜索行为
user_df_search_sql
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log "
\
"where cl_id ='{}' and action = 'do_search'"
.
format
(
cl_id
)
"where cl_id ='{cl_id}' and time < {yesterday} and "
\
"action = 'do_search'"
.
format
(
cl_id
=
cl_id
,
yesterday
=
yesterday
)
cur_jerry_test
.
execute
(
user_df_search_sql
)
data_search
=
list
(
cur_jerry_test
.
fetchall
())
db_jerry_test
.
close
()
...
...
@@ -207,7 +211,7 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
:param device_portrait_result:
:param portrait_top_n:
:param coincide_n:
:return: 匹配度:比对的上的用户数/昨天下单了的用户数 * 100
%
:return: 匹配度:比对的上的用户数/昨天下单了的
且有画像的
用户数 * 100
%
比对的上:去掉下单和验证行为的画像的前portrait_top_n个tag 与 用户下单的美购的tag 有coincide_n个重合个数
"""
device_count
=
len
(
device_order_tags
)
# 总的下单设备数
...
...
eda/smart_rank/tool.py
View file @
754b23c0
...
...
@@ -211,4 +211,11 @@ def get_action_tag_count(df, action_time):
else
:
return
1
except
Exception
as
e
:
print
(
e
)
\ No newline at end of file
print
(
e
)
def
get_yesterday_start_timestamp
():
today
=
datetime
.
date
.
today
()
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday_start_time
=
int
(
time
.
mktime
(
time
.
strptime
(
str
(
yesterday
),
'
%
Y-
%
m-
%
d'
)))
return
yesterday_start_time
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment