Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
73ac3889
Commit
73ac3889
authored
Jan 18, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改数据写入
parent
efd3cc59
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
91 additions
and
5 deletions
+91
-5
feature_engineering.py
tensnsorflow/feature_engineering.py
+6
-3
pipe.sh
tensnsorflow/pipe.sh
+1
-2
sort_to_sql.py
tensnsorflow/sort_to_sql.py
+84
-0
No files found.
tensnsorflow/feature_engineering.py
View file @
73ac3889
...
...
@@ -135,6 +135,10 @@ def get_predict(date,value_map):
native_pre
=
native_pre
.
drop
(
"label"
,
axis
=
1
)
nearby_pre
=
df
[
df
[
"label"
]
==
1
]
nearby_pre
=
nearby_pre
.
drop
(
"label"
,
axis
=
1
)
native_pre
[
"uid"
]
=
native_pre
[
"device_id"
]
native_pre
[
"city"
]
=
native_pre
[
"ucity_id"
]
nearby_pre
[
"uid"
]
=
nearby_pre
[
"device_id"
]
nearby_pre
[
"city"
]
=
nearby_pre
[
"ucity_id"
]
for
i
in
[
"ucity_id"
,
"clevel1_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"top"
,
"l1"
,
"time"
,
"stat_date"
,
"l2"
,
"device_id"
]:
...
...
@@ -146,17 +150,16 @@ def get_predict(date,value_map):
# TODO 没有覆盖到的类别会处理成na,暂时用0填充,后续完善一下
nearby_pre
[
i
]
=
nearby_pre
[
i
]
.
fillna
(
0
)
print
(
"native"
)
print
(
native_pre
.
shape
)
print
(
native_pre
.
head
())
native_pre
.
to_csv
(
path
+
"native.csv"
,
sep
=
"
\t
"
,
index
=
False
)
native_pre
[[
"uid"
,
"city"
,
"cid_id"
]]
.
to_csv
(
path
+
"native.csv
"
,
index
=
False
)
write_csv
(
native_pre
,
"native"
,
200000
)
print
(
"nearby"
)
print
(
nearby_pre
.
shape
)
print
(
nearby_pre
.
head
())
nearby_pre
.
to_csv
(
path
+
"nearby.csv"
,
sep
=
"
\t
"
,
index
=
False
)
nearby_pre
[[
"uid"
,
"city"
,
"cid_id"
]]
.
to_csv
(
path
+
"nearby.csv
"
,
index
=
False
)
write_csv
(
nearby_pre
,
"nearby"
,
160000
)
...
...
tensnsorflow/pipe.sh
View file @
73ac3889
...
...
@@ -3,7 +3,6 @@
PYTHON_PATH
=
/home/gaoyazhe/miniconda3/bin/python
MODEL_PATH
=
/srv/apps/ffm-baseline/tensnsorflow
DATA_PATH
=
/home/gmuser/esmm_data
OLD_PATH
=
/srv/apps/ffm-baseline/eda/esmm
echo
"rm leave tfrecord"
rm
${
DATA_PATH
}
/tr/
*
...
...
@@ -44,5 +43,5 @@ echo "infer nearby..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.3
--learning_rate
=
0.0001
--deep_layers
=
256,128
--dropout
=
0.8,0.5
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
12
--feature_size
=
270000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/nearby
--task_type
=
infer
>
${
DATA_PATH
}
/infer.log
echo
"sort and 2sql"
${
PYTHON_PATH
}
${
OLD_PATH
}
/Model_pipline/sort_and_2
sql.py
${
PYTHON_PATH
}
${
MODEL_PATH
}
/sort_to_
sql.py
tensnsorflow/sort_to_sql.py
0 → 100644
View file @
73ac3889
#coding=utf-8
from
sqlalchemy
import
create_engine
import
pandas
as
pd
import
pymysql
import
MySQLdb
import
time
def
con_sql
(
sql
):
"""
:type sql : str
:rtype : tuple
"""
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
set_join
(
lst
):
# return ','.join([str(i) for i in list(lst)])
return
','
.
join
([
str
(
i
)
for
i
in
lst
.
unique
()
.
tolist
()])
def
main
():
# native queue
df2
=
pd
.
read_csv
(
'/home/gmuser/esmm_data/native.csv'
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df1
=
pd
.
read_csv
(
"/home/gmuser/esmm_data/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
]
print
(
"native_device_count"
,
df3
.
shape
)
# nearby queue
df2
=
pd
.
read_csv
(
'/home/gmuser/esmm_data/nearby.csv'
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df1
=
pd
.
read_csv
(
"/home/gmuser/esmm_data/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
print
(
"nearby_device_count"
,
df4
.
shape
)
#union
df_all
=
pd
.
merge
(
df3
,
df4
,
on
=
[
'device_id'
,
'city_id'
],
how
=
'outer'
)
.
fillna
(
""
)
df_all
[
'device_id'
]
=
df_all
[
'device_id'
]
.
astype
(
str
)
df_all
[
'city_id'
]
=
df_all
[
'city_id'
]
.
astype
(
str
)
ctime
=
int
(
time
.
time
())
df_all
[
"time"
]
=
ctime
print
(
"union_device_count"
,
df_all
.
shape
)
host
=
'10.66.157.22'
port
=
4000
user
=
'root'
password
=
'3SYz54LS9#^9sBvC'
db
=
'jerry_test'
charset
=
'utf8'
engine
=
create_engine
(
str
(
r"mysql+mysqldb://
%
s:"
+
'
%
s'
+
"@
%
s:
%
s/
%
s"
)
%
(
user
,
password
,
host
,
port
,
db
))
try
:
# df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
df_merge
=
df_all
[
'device_id'
]
+
df_all
[
'city_id'
]
df_merge_str
=
(
str
(
list
(
df_merge
.
values
)))
.
strip
(
'[]'
)
delete_str
=
'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'
.
format
(
df_merge_str
)
con
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cur
=
con
.
cursor
()
cur
.
execute
(
delete_str
)
con
.
commit
()
df_all
.
to_sql
(
'esmm_device_diary_queue'
,
con
=
engine
,
if_exists
=
'append'
,
index
=
False
)
except
Exception
as
e
:
print
(
e
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment