Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
50281489
Commit
50281489
authored
Jan 20, 2020
by
赵威
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'updatedb' into 'master'
update db See merge request
!41
parents
296a46bf
ab2f6771
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
16 additions
and
16 deletions
+16
-16
dist_update_portrait_market.py
eda/smart_rank/dist_update_portrait_market.py
+2
-2
dist_update_user_portrait.py
eda/smart_rank/dist_update_user_portrait.py
+2
-2
dist_update_user_portrait_service.py
eda/smart_rank/dist_update_user_portrait_service.py
+2
-2
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+6
-6
gm_tag_cf.py
eda/smart_rank/gm_tag_cf.py
+1
-1
pyspark_argsparse_test.py
eda/smart_rank/pyspark_argsparse_test.py
+1
-1
stat_device_order_portrait_score.py
eda/smart_rank/stat_device_order_portrait_score.py
+1
-1
tool.py
eda/smart_rank/tool.py
+1
-1
No files found.
eda/smart_rank/dist_update_portrait_market.py
View file @
50281489
...
...
@@ -148,7 +148,7 @@ def compute_ai_scan(x):
def
get_user_tag_score
(
cl_id
,
all_word_tags
,
size
=
10
):
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
@@ -218,7 +218,7 @@ def get_user_tag_score(cl_id, all_word_tags, size=10):
if
__name__
==
'__main__'
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
eda/smart_rank/dist_update_user_portrait.py
View file @
50281489
...
...
@@ -99,7 +99,7 @@ def tag_list2dict(lst, size):
def
get_user_tag_score
(
cl_id
,
size
=
10
):
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
@@ -144,7 +144,7 @@ def get_user_tag_score(cl_id, size=10):
if
__name__
==
'__main__'
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
eda/smart_rank/dist_update_user_portrait_service.py
View file @
50281489
...
...
@@ -22,7 +22,7 @@ from tool import get_user_log, compute_henqiang, get_action_tag_count, compute_j
def
get_user_service_portrait
(
cl_id
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
all_tags_name
,
size
=
None
):
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
@@ -117,7 +117,7 @@ def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2
if
__name__
==
'__main__'
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
eda/smart_rank/evaluation_metrics.py
View file @
50281489
...
...
@@ -44,7 +44,7 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
:return: 画像(去掉支付行为)
"""
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
# 用户的非搜索、支付的行为
...
...
@@ -174,7 +174,7 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
coincide_rate
=
coincide_count
/
(
device_count
-
not_coincide_no_portrait
)
result
=
{
"device_count"
:
device_count
,
"coincide_count"
:
coincide_count
,
"coincide_rate"
:
coincide_rate
,
"not_coincide_have_portrait_count"
:
not_coincide_have_portrait
,
"not_coincide_no_portrait_count"
:
not_coincide_no_portrait
,
1
"not_coincide_no_portrait_count"
:
not_coincide_no_portrait
,
"not_coincide_no_portrait_device_ids"
:
not_coincide_no_portrait_device_ids
,
"not_coincide_have_portrait_device_ids"
:
not_coincide_have_portrait_device_ids
}
return
result
...
...
@@ -256,7 +256,7 @@ def get_user_diary_click_info_yesterday(click_date, click_date_tomorrow):
AND SUBSTR(MD5(d.cl_id),-1,1) IN ('0','1','2','3','4','a','b','c','e')) tmp2
GROUP BY cl_id
"""
.
format
(
click_date
=
click_date
,
click_date_tomorrow
=
click_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql_device_info_yesterday
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"click_time"
]))
for
i
in
mysql_results
]
all_device_action_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_action_tags
...
...
@@ -286,7 +286,7 @@ def get_user_service_click_info_yesterday(click_date, click_date_tomorrow):
AND SUBSTR(MD5(d.cl_id),-1,1) IN ('0','1','2','3','4','a','b','c','e')) tmp2
GROUP BY cl_id
"""
.
format
(
click_date
=
click_date
,
click_date_tomorrow
=
click_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql_device_info_yesterday
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"click_time"
]))
for
i
in
mysql_results
]
all_device_action_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_action_tags
...
...
@@ -412,7 +412,7 @@ if __name__ == '__main__':
# 统计画像更新的耗时和更新的设备数
sql
=
"select count(*) from user_service_portrait_tags where stat_date='{my_today}'"
.
format
(
my_today
=
my_today
)
portrait_device_count
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql
)
portrait_device_count
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql
)
with
open
(
LOG_DIR
+
"dist_portrait.log"
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
start_datetime_str
=
lines
[
0
][:
19
]
...
...
@@ -446,6 +446,6 @@ if __name__ == '__main__':
result
[
"device_count"
],
result
[
"coincide_count"
],
result
[
"coincide_rate"
],
result
[
"not_coincide_have_portrait_count"
],
result
[
"not_coincide_no_portrait_count"
])
write_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
insert_sql
)
write_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
insert_sql
)
except
Exception
as
e
:
print
(
e
)
eda/smart_rank/gm_tag_cf.py
View file @
50281489
...
...
@@ -17,7 +17,7 @@ def get_similary_tags(tag_id, tags, ratings_matrix):
def
get_user_log
():
sql
=
"select userId, tagId, rating from item_cf_log"
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql
)
df
=
pd
.
DataFrame
(
mysql_results
)
return
df
...
...
eda/smart_rank/pyspark_argsparse_test.py
View file @
50281489
...
...
@@ -56,7 +56,7 @@ if __name__ == '__main__':
# 获取最近30天内的用户设备id
sql_device_ids
=
"select distinct cl_id from user_new_tag_log "
\
"where time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day))"
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql_device_ids
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql_device_ids
)
device_ids_lst
=
[
i
[
"cl_id"
]
for
i
in
mysql_results
]
print
(
device_ids_lst
[:
10
])
...
...
eda/smart_rank/stat_device_order_portrait_score.py
View file @
50281489
...
...
@@ -69,7 +69,7 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,
def
get_device_order_info
(
start_timestamp
):
sql
=
"select distinct time, cl_id, tag_id from user_new_tag_log where action='api/settlement/alipay_callback' and time > {} and cl_id !=''"
.
format
(
start_timestamp
)
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
cur_jerry_test
.
execute
(
sql
)
...
...
eda/smart_rank/tool.py
View file @
50281489
...
...
@@ -310,7 +310,7 @@ def args_test(x):
def
get_user_log
(
cl_id
,
all_word_tags
,
pay_time
=
0
,
debug
=
0
):
user_df_service
=
pd
.
DataFrame
(
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
])
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
if
pay_time
==
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment