Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ab2f6771
Commit
ab2f6771
authored
Jan 20, 2020
by
zwild
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update db
parent
0f0072c1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
18 additions
and
20 deletions
+18
-20
dist_update_portrait_market.py
eda/smart_rank/dist_update_portrait_market.py
+2
-2
dist_update_user_portrait.py
eda/smart_rank/dist_update_user_portrait.py
+3
-4
dist_update_user_portrait_service.py
eda/smart_rank/dist_update_user_portrait_service.py
+2
-2
evaluation_metrics.py
eda/smart_rank/evaluation_metrics.py
+7
-8
gm_tag_cf.py
eda/smart_rank/gm_tag_cf.py
+1
-1
pyspark_argsparse_test.py
eda/smart_rank/pyspark_argsparse_test.py
+1
-1
stat_device_order_portrait_score.py
eda/smart_rank/stat_device_order_portrait_score.py
+1
-1
tool.py
eda/smart_rank/tool.py
+1
-1
No files found.
eda/smart_rank/dist_update_portrait_market.py
View file @
ab2f6771
...
...
@@ -148,7 +148,7 @@ def compute_ai_scan(x):
def
get_user_tag_score
(
cl_id
,
all_word_tags
,
size
=
10
):
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
@@ -218,7 +218,7 @@ def get_user_tag_score(cl_id, all_word_tags, size=10):
if
__name__
==
'__main__'
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
eda/smart_rank/dist_update_user_portrait.py
View file @
ab2f6771
...
...
@@ -99,7 +99,7 @@ def tag_list2dict(lst, size):
def
get_user_tag_score
(
cl_id
,
size
=
10
):
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
@@ -144,7 +144,7 @@ def get_user_tag_score(cl_id, size=10):
if
__name__
==
'__main__'
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
@@ -176,4 +176,4 @@ if __name__ == '__main__':
result
.
collect
()
except
Exception
as
e
:
send_email
(
"dist_update_user_portrait"
,
"dist_update_user_portrait"
,
"dist_update_user_portrait"
)
\ No newline at end of file
send_email
(
"dist_update_user_portrait"
,
"dist_update_user_portrait"
,
"dist_update_user_portrait"
)
eda/smart_rank/dist_update_user_portrait_service.py
View file @
ab2f6771
...
...
@@ -22,7 +22,7 @@ from tool import get_user_log, compute_henqiang, get_action_tag_count, compute_j
def
get_user_service_portrait
(
cl_id
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
all_tags_name
,
size
=
None
):
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
@@ -117,7 +117,7 @@ def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2
if
__name__
==
'__main__'
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.170'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
...
...
eda/smart_rank/evaluation_metrics.py
View file @
ab2f6771
...
...
@@ -44,7 +44,7 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
:return: 画像(去掉支付行为)
"""
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
# 用户的非搜索、支付的行为
...
...
@@ -174,7 +174,7 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
coincide_rate
=
coincide_count
/
(
device_count
-
not_coincide_no_portrait
)
result
=
{
"device_count"
:
device_count
,
"coincide_count"
:
coincide_count
,
"coincide_rate"
:
coincide_rate
,
"not_coincide_have_portrait_count"
:
not_coincide_have_portrait
,
"not_coincide_no_portrait_count"
:
not_coincide_no_portrait
,
1
"not_coincide_no_portrait_count"
:
not_coincide_no_portrait
,
"not_coincide_no_portrait_device_ids"
:
not_coincide_no_portrait_device_ids
,
"not_coincide_have_portrait_device_ids"
:
not_coincide_have_portrait_device_ids
}
return
result
...
...
@@ -256,7 +256,7 @@ def get_user_diary_click_info_yesterday(click_date, click_date_tomorrow):
AND SUBSTR(MD5(d.cl_id),-1,1) IN ('0','1','2','3','4','a','b','c','e')) tmp2
GROUP BY cl_id
"""
.
format
(
click_date
=
click_date
,
click_date_tomorrow
=
click_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql_device_info_yesterday
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"click_time"
]))
for
i
in
mysql_results
]
all_device_action_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_action_tags
...
...
@@ -286,7 +286,7 @@ def get_user_service_click_info_yesterday(click_date, click_date_tomorrow):
AND SUBSTR(MD5(d.cl_id),-1,1) IN ('0','1','2','3','4','a','b','c','e')) tmp2
GROUP BY cl_id
"""
.
format
(
click_date
=
click_date
,
click_date_tomorrow
=
click_date_tomorrow
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql_device_info_yesterday
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql_device_info_yesterday
)
device_ids_info
=
[(
i
[
"device_id"
],
int
(
i
[
"click_time"
]))
for
i
in
mysql_results
]
all_device_action_tags
=
{
i
[
"device_id"
]:
[
int
(
tag
)
for
tag
in
i
[
"tag_ids"
]
.
split
(
","
)]
for
i
in
mysql_results
}
return
device_ids_info
,
all_device_action_tags
...
...
@@ -412,7 +412,7 @@ if __name__ == '__main__':
# 统计画像更新的耗时和更新的设备数
sql
=
"select count(*) from user_service_portrait_tags where stat_date='{my_today}'"
.
format
(
my_today
=
my_today
)
portrait_device_count
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql
)
portrait_device_count
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql
)
with
open
(
LOG_DIR
+
"dist_portrait.log"
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
start_datetime_str
=
lines
[
0
][:
19
]
...
...
@@ -446,6 +446,6 @@ if __name__ == '__main__':
result
[
"device_count"
],
result
[
"coincide_count"
],
result
[
"coincide_rate"
],
result
[
"not_coincide_have_portrait_count"
],
result
[
"not_coincide_no_portrait_count"
])
write_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
insert_sql
)
write_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
insert_sql
)
except
Exception
as
e
:
print
(
e
)
\ No newline at end of file
print
(
e
)
eda/smart_rank/gm_tag_cf.py
View file @
ab2f6771
...
...
@@ -17,7 +17,7 @@ def get_similary_tags(tag_id, tags, ratings_matrix):
def
get_user_log
():
sql
=
"select userId, tagId, rating from item_cf_log"
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql
)
df
=
pd
.
DataFrame
(
mysql_results
)
return
df
...
...
eda/smart_rank/pyspark_argsparse_test.py
View file @
ab2f6771
...
...
@@ -56,7 +56,7 @@ if __name__ == '__main__':
# 获取最近30天内的用户设备id
sql_device_ids
=
"select distinct cl_id from user_new_tag_log "
\
"where time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day))"
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
root'
,
'3SYz54LS9#^9sBvC
'
,
'jerry_test'
,
sql_device_ids
)
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'
st_user'
,
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
'jerry_test'
,
sql_device_ids
)
device_ids_lst
=
[
i
[
"cl_id"
]
for
i
in
mysql_results
]
print
(
device_ids_lst
[:
10
])
...
...
eda/smart_rank/stat_device_order_portrait_score.py
View file @
ab2f6771
...
...
@@ -69,7 +69,7 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,
def
get_device_order_info
(
start_timestamp
):
sql
=
"select distinct time, cl_id, tag_id from user_new_tag_log where action='api/settlement/alipay_callback' and time > {} and cl_id !=''"
.
format
(
start_timestamp
)
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
cur_jerry_test
.
execute
(
sql
)
...
...
eda/smart_rank/tool.py
View file @
ab2f6771
...
...
@@ -310,7 +310,7 @@ def args_test(x):
def
get_user_log
(
cl_id
,
all_word_tags
,
pay_time
=
0
,
debug
=
0
):
user_df_service
=
pd
.
DataFrame
(
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
])
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
root'
,
passwd
=
'3SYz54LS9#^9sBvC
'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'
st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
if
pay_time
==
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment