Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
2239d1e1
Commit
2239d1e1
authored
Dec 27, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add test
parent
e9271be9
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
15 deletions
+16
-15
applist.py
tensnsorflow/applist.py
+15
-14
test.py
tensnsorflow/test.py
+1
-1
No files found.
tensnsorflow/applist.py
View file @
2239d1e1
...
...
@@ -68,25 +68,26 @@ def sort_app():
"job"
:
{
"智联招聘"
,
"前程无忧"
,
"斗米"
,
"拉勾"
,
"Boss直聘"
,
"猎聘同道"
,
"智联招聘"
}
}
df
[
"app_list"
]
=
df
[
"app_list"
]
.
apply
(
json_format
)
n
=
df
.
shape
[
0
]
for
i
in
category
.
keys
():
df
[
i
]
=
df
[
"app_list"
]
.
apply
(
lambda
x
:
1
if
len
(
x
&
category
[
i
])
>
0
else
0
)
print
(
i
)
print
(
df
[
i
]
.
value_counts
()
)
print
(
df
.
loc
[
df
[
i
]
==
1
]
.
shape
[
0
]
/
n
)
df
=
df
.
drop
(
"app_list"
,
axis
=
1
)
yconnect
=
create_engine
(
'mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8'
)
print
(
df
.
shape
)
n
=
200000
for
i
in
range
(
0
,
df
.
shape
[
0
],
n
):
print
(
i
)
if
i
==
0
:
temp
=
df
.
loc
[
0
:
n
]
elif
i
+
n
>
df
.
shape
[
0
]:
temp
=
df
.
loc
[
i
+
1
:]
else
:
temp
=
df
.
loc
[
i
+
1
:
i
+
n
]
pd
.
io
.
sql
.
to_sql
(
temp
,
"app_list_sort"
,
yconnect
,
schema
=
'jerry_test'
,
if_exists
=
'append'
,
index
=
False
)
print
(
"insert done"
)
#
yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')
#
print(df.shape)
#
n = 200000
#
for i in range(0,df.shape[0],n):
#
print(i)
#
if i == 0:
#
temp = df.loc[0:n]
#
elif i+n > df.shape[0]:
#
temp = df.loc[i+1:]
#
else:
#
temp = df.loc[i+1:i+n]
#
pd.io.sql.to_sql(temp, "app_list_sort", yconnect, schema='jerry_test', if_exists='append', index=False)
#
print("insert done")
...
...
tensnsorflow/test.py
View file @
2239d1e1
...
...
@@ -48,7 +48,7 @@ def click():
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
sql
=
"select d.cid_id,f.level1_ids,f.level2_ids from data_feed_click d left join diary_feat f "
\
"on d.cid_id = f.diary_id where d.device_id = '358035085192742' "
\
"and
d.cid_type = 'diary'
and d.stat_date > '2018-12-20'"
"and
(d.cid_type = 'diary' or d.cid_type = 'diary_video')
and d.stat_date > '2018-12-20'"
df
=
con_sql
(
db
,
sql
)
n
=
df
.
shape
[
0
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment