Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
08e45a4e
Commit
08e45a4e
authored
Oct 09, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
计算两个tag列表的重合率
parent
a4c31b88
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
75 additions
and
41 deletions
+75
-41
dist_update_user_portrait_service.py
eda/smart_rank/dist_update_user_portrait_service.py
+1
-0
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+74
-41
No files found.
eda/smart_rank/dist_update_user_portrait_service.py
View file @
08e45a4e
...
...
@@ -282,6 +282,7 @@ def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2
cur_jerry_test
.
execute
(
replace_sql
)
db_jerry_test
.
commit
()
# 写tidb 用户分层营销
# todo 不准确,因为聚合后,一个标签会有多个来源,即多个pay_type
score_result
=
tag_score_sum
[[
"tag2"
,
"cl_id"
,
"tag_score"
,
"weight"
,
"pay_type"
]]
score_result
.
rename
(
columns
=
{
"tag2"
:
"tag_id"
,
"cl_id"
:
"device_id"
,
"tag_score"
:
"score"
},
inplace
=
True
)
delete_sql
=
"delete from api_market_personas where device_id='{}'"
.
format
(
cl_id
)
...
...
eda/smart_rank/evaluation_metrics.py
View file @
08e45a4e
...
...
@@ -72,55 +72,88 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
2
if
x
.
action
==
"api/settlement/alipay_callback"
else
1
),
axis
=
1
)
gmkv_tag_score_sum
=
tag_score_sum
[[
"tag2"
,
"tag_score"
,
"weight"
]][:
size
]
.
to_dict
(
'record'
)
# 写gmkv
gm_kv_cli
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
db
=
2
,
socket_timeout
=
2000
)
cl_id_portrait_key
=
"user:service_portrait_tags:cl_id:"
+
str
(
cl_id
)
tag_id_list_json
=
json
.
dumps
(
gmkv_tag_score_sum
)
gm_kv_cli
.
set
(
cl_id_portrait_key
,
tag_id_list_json
)
gm_kv_cli
.
expire
(
cl_id_portrait_key
,
time
=
30
*
24
*
60
*
60
)
# 写tidb,redis同步
stat_date
=
datetime
.
datetime
.
today
()
.
strftime
(
'
%
Y-
%
m-
%
d'
)
replace_sql
=
"""replace into user_service_portrait_tags (stat_date, cl_id, tag_list) values("{stat_date}","{cl_id}","{tag_list}")"""
\
.
format
(
stat_date
=
stat_date
,
cl_id
=
cl_id
,
tag_list
=
gmkv_tag_score_sum
)
cur_jerry_test
.
execute
(
replace_sql
)
db_jerry_test
.
commit
()
# 写tidb 用户分层营销
score_result
=
tag_score_sum
[[
"tag2"
,
"cl_id"
,
"tag_score"
,
"weight"
,
"pay_type"
]]
score_result
.
rename
(
columns
=
{
"tag2"
:
"tag_id"
,
"cl_id"
:
"device_id"
,
"tag_score"
:
"score"
},
inplace
=
True
)
delete_sql
=
"delete from api_market_personas where device_id='{}'"
.
format
(
cl_id
)
cur_jerry_test
.
execute
(
delete_sql
)
db_jerry_test
.
commit
()
for
index
,
row
in
score_result
.
iterrows
():
insert_sql
=
"insert into api_market_personas values (null, {}, '{}', {}, {}, {})"
.
format
(
row
[
'tag_id'
],
row
[
'device_id'
],
row
[
'score'
],
row
[
'weight'
],
row
[
'pay_type'
])
cur_jerry_test
.
execute
(
insert_sql
)
db_jerry_test
.
commit
()
db_jerry_test
.
close
()
return
"sucess"
# gmkv_tag_score_sum = tag_score_sum[["tag2", "tag_score", "weight"]][:size].to_dict('record')
gmkv_tag_score_sum_list
=
tag_score_sum
[
"tag2"
]
.
to_list
()[:
size
]
return
gmkv_tag_score_sum_list
except
Exception
as
e
:
print
(
e
)
def
get_2_tags_coincide_rate
(
device_order_tags
,
device_portrait_result
,
portrait_top_n
,
coincide_n
):
device_count
=
len
(
device_order_tags
)
coincide_count
=
0
for
device
in
device_order_tags
:
order_tags
=
device_order_tags
[
device
]
portrait_tags
=
device_portrait_result
[
device
]
if
portrait_tags
:
portrait_tags
=
portrait_tags
[:
portrait_top_n
]
else
:
portrait_tags
=
[]
if
len
(
set
(
order_tags
)
.
intersection
(
set
(
portrait_tags
)))
>=
coincide_n
:
coincide_count
+=
1
return
coincide_count
/
device_count
if
__name__
==
'__main__'
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
# 获取最近30天内的用户设备id
sql_device_ids
=
"select distinct cl_id from user_new_tag_log "
\
"where time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day))"
cur_jerry_test
.
execute
(
sql_device_ids
)
device_ids_lst
=
[
i
[
0
]
for
i
in
cur_jerry_test
.
fetchall
()]
db_jerry_test
.
close
()
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
my_yesterday
=
str
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
sql_order_device_info_yesterday
=
"""
SELECT tmp1.user_id,
c.device_id,
tmp1.service_ids,
tmp1.tag_ids
FROM
(SELECT tmp.user_id,
tmp.service_ids,
tmp.tag_ids,
max(tmp.device_id) device_id_id
FROM
(SELECT a.user_id,
a.service_ids,
a.tag_ids,
b.device_id
FROM
(SELECT user_id,
group_concat(DISTINCT `service_id` separator ',') service_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.user_id,
d.service_id,
e.tag_id
FROM api_order d
LEFT JOIN api_servicetag e ON d.service_id = e.service_id
LEFT JOIN api_tag f ON e.tag_id = f.id
WHERE d.status=1
AND d.pay_time>'{my_yesterday}'
AND f.tag_type+0 <'4'+0) tmp2
GROUP BY user_id) a
LEFT JOIN statistic_device_user b ON a.user_id = b.user_id) tmp
GROUP BY tmp.user_id) tmp1
LEFT JOIN statistic_device c ON tmp1.device_id_id = c.id
WHERE c.device_id IS NOT NULL
"""
.
format
(
my_yesterday
=
my_yesterday
)
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql_order_device_info_yesterday
)
device_ids_lst
=
[
i
[
"device_id"
]
for
i
in
mysql_results
]
all_device_order_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
# 获取搜索词及其近义词对应的tag
all_word_tags
=
get_all_word_tags
()
all_tag_tag_type
=
get_all_tag_tag_type
()
# 3级tag对应的2级tag
all_3tag_2tag
=
get_all_3tag_2tag
()
\ No newline at end of file
all_3tag_2tag
=
get_all_3tag_2tag
()
device_id
=
"9C5E7C73-380C-4623-8F48-A64C8034E315"
get_user_service_portrait_not_alipay
(
device_id
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
)
# 昨天下单了的用户的去除支付行为的画像
all_device_portrait_result
=
dict
()
for
device
in
device_ids_lst
:
portrait_result
=
get_user_service_portrait_not_alipay
(
device
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
size
=
10
)
all_device_portrait_result
[
device
]
=
portrait_result
# 比较两个tag列表的重合率
rate
=
get_2_tags_coincide_rate
(
all_device_order_tags
,
all_device_portrait_result
,
3
,
1
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment