Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
e84f5d92
Commit
e84f5d92
authored
Apr 04, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
http://git.wanmeizhensuo.com/ML/ffm-baseline
parents
6719e35b
d7f015ec
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
37 deletions
+17
-37
submit.sh
eda/esmm/Model_pipline/submit.sh
+2
-2
to_database.py
eda/esmm/Model_pipline/to_database.py
+12
-21
submit.sh
tensnsorflow/es/submit.sh
+1
-1
to_database.py
tensnsorflow/es/to_database.py
+2
-13
No files found.
eda/esmm/Model_pipline/submit.sh
View file @
e84f5d92
#! /bin/bash
#! /bin/bash
PYTHON_PATH
=
/home/gaoyazhe/miniconda3/bin/python
PYTHON_PATH
=
/home/gaoyazhe/miniconda3/bin/python
MODEL_PATH
=
/srv/apps/ffm-baseline/
tensnsorflow/es
MODEL_PATH
=
/srv/apps/ffm-baseline/
eda/esmm/Model_pipline
DATA_PATH
=
/data/esmm
DATA_PATH
=
/data/esmm
echo
"rm leave tfrecord"
echo
"rm leave tfrecord"
...
@@ -43,4 +43,4 @@ echo "infer nearby..."
...
@@ -43,4 +43,4 @@ echo "infer nearby..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
8
--feature_size
=
300000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/nearby
--task_type
=
infer
>
${
DATA_PATH
}
/nearby_infer.log
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
8
--feature_size
=
300000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/nearby
--task_type
=
infer
>
${
DATA_PATH
}
/nearby_infer.log
echo
"sort and 2sql"
echo
"sort and 2sql"
${
PYTHON_PATH
}
${
MODEL_PATH
}
/to_database.py
${
PYTHON_PATH
}
${
MODEL_PATH
}
/to_database.py
>
${
DATA_PATH
}
/insert_database.log
eda/esmm/Model_pipline/to_database.py
View file @
e84f5d92
...
@@ -36,10 +36,10 @@ def native_set_join(lst):
...
@@ -36,10 +36,10 @@ def native_set_join(lst):
def
main
():
def
main
():
# native queue
# native queue
df2
=
pd
.
read_csv
(
'/
data/esmm
/native.csv'
)
df2
=
pd
.
read_csv
(
'/
home/gmuser/esmm_data
/native.csv'
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df1
=
pd
.
read_csv
(
"/
data/esmm
/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df1
=
pd
.
read_csv
(
"/
home/gmuser/esmm_data
/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
native_set_join
})
.
reset_index
(
drop
=
False
)
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
native_set_join
})
.
reset_index
(
drop
=
False
)
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
]
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
]
...
@@ -47,10 +47,10 @@ def main():
...
@@ -47,10 +47,10 @@ def main():
# nearby queue
# nearby queue
df2
=
pd
.
read_csv
(
'/
data/esmm
/nearby.csv'
)
df2
=
pd
.
read_csv
(
'/
home/gmuser/esmm_data
/nearby.csv'
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df1
=
pd
.
read_csv
(
"/
data/esmm
/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df1
=
pd
.
read_csv
(
"/
home/gmuser/esmm_data
/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
nearby_set_join
})
.
reset_index
(
drop
=
False
)
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
nearby_set_join
})
.
reset_index
(
drop
=
False
)
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
...
@@ -64,6 +64,8 @@ def main():
...
@@ -64,6 +64,8 @@ def main():
df_all
[
"time"
]
=
ctime
df_all
[
"time"
]
=
ctime
print
(
"union_device_count"
,
df_all
.
shape
)
print
(
"union_device_count"
,
df_all
.
shape
)
host
=
'10.66.157.22'
host
=
'10.66.157.22'
port
=
4000
port
=
4000
user
=
'root'
user
=
'root'
...
@@ -72,33 +74,21 @@ def main():
...
@@ -72,33 +74,21 @@ def main():
charset
=
'utf8'
charset
=
'utf8'
engine
=
create_engine
(
str
(
r"mysql+mysqldb://
%
s:"
+
'
%
s'
+
"@
%
s:
%
s/
%
s"
)
%
(
user
,
password
,
host
,
port
,
db
))
engine
=
create_engine
(
str
(
r"mysql+mysqldb://
%
s:"
+
'
%
s'
+
"@
%
s:
%
s/
%
s"
)
%
(
user
,
password
,
host
,
port
,
db
))
df_merge
=
df_all
[
'device_id'
]
+
df_all
[
'city_id'
]
df_merge_str
=
(
str
(
list
(
df_merge
.
values
)))
.
strip
(
'[]'
)
try
:
# df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
delete_str
=
'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'
.
format
(
df_merge_str
)
con
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cur
=
con
.
cursor
()
cur
.
execute
(
delete_str
)
con
.
commit
()
df_all
.
to_sql
(
'esmm_device_diary_queue'
,
con
=
engine
,
if_exists
=
'append'
,
index
=
False
)
except
Exception
as
e
:
print
(
e
)
try
:
try
:
# df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
# df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
df_merge
=
df_all
[
'device_id'
]
+
df_all
[
'city_id'
]
df_merge_str
=
(
str
(
list
(
df_merge
.
values
)))
.
strip
(
'[]'
)
delete_str
=
'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'
.
format
(
df_merge_str
)
delete_str
=
'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'
.
format
(
df_merge_str
)
con
=
pymysql
.
connect
(
host
=
'1
52.136.44.138
'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
con
=
pymysql
.
connect
(
host
=
'1
0.66.157.22
'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cur
=
con
.
cursor
()
cur
=
con
.
cursor
()
cur
.
execute
(
delete_str
)
cur
.
execute
(
delete_str
)
con
.
commit
()
con
.
commit
()
engine
=
create_engine
(
str
(
r"mysql+mysqldb://
%
s:"
+
'
%
s'
+
"@
%
s:
%
s/
%
s"
)
%
(
user
,
password
,
'152.136.44.138'
,
port
,
db
))
df_all
.
to_sql
(
'esmm_device_diary_queue'
,
con
=
engine
,
if_exists
=
'append'
,
index
=
False
,
chunksize
=
8000
)
df_all
.
to_sql
(
'esmm_device_diary_queue'
,
con
=
engine
,
if_exists
=
'append'
,
index
=
False
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
print
(
"done"
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
main
()
main
()
\ No newline at end of file
tensnsorflow/es/submit.sh
View file @
e84f5d92
...
@@ -43,4 +43,4 @@ echo "infer nearby..."
...
@@ -43,4 +43,4 @@ echo "infer nearby..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
8
--feature_size
=
300000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/nearby
--task_type
=
infer
>
${
DATA_PATH
}
/nearby_infer.log
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
8
--feature_size
=
300000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/nearby
--task_type
=
infer
>
${
DATA_PATH
}
/nearby_infer.log
echo
"sort and 2sql"
echo
"sort and 2sql"
${
PYTHON_PATH
}
${
MODEL_PATH
}
/to_database.py
${
PYTHON_PATH
}
${
MODEL_PATH
}
/to_database.py
>
${
DATA_PATH
}
/insert_database.log
tensnsorflow/es/to_database.py
View file @
e84f5d92
...
@@ -86,19 +86,7 @@ def main():
...
@@ -86,19 +86,7 @@ def main():
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
try
:
print
(
"done"
)
# df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
delete_str
=
'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'
.
format
(
df_merge_str
)
con
=
pymysql
.
connect
(
host
=
'152.136.44.138'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cur
=
con
.
cursor
()
cur
.
execute
(
delete_str
)
con
.
commit
()
engine
=
create_engine
(
str
(
r"mysql+mysqldb://
%
s:"
+
'
%
s'
+
"@
%
s:
%
s/
%
s"
)
%
(
user
,
password
,
'152.136.44.138'
,
port
,
db
))
df_all
.
to_sql
(
'esmm_device_diary_queue'
,
con
=
engine
,
if_exists
=
'append'
,
index
=
False
)
except
Exception
as
e
:
print
(
e
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
main
()
main
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment