Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
5cc22188
Commit
5cc22188
authored
Jan 28, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
native_queue 取top50%的日记
parent
6013308d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
40 additions
and
31 deletions
+40
-31
data2ffm.py
eda/esmm/Feature_pipline/data2ffm.py
+0
-0
get_tfrecord.py
eda/esmm/Feature_pipline/get_tfrecord.py
+20
-21
DeepCvrMTL.py
eda/esmm/Model_pipline/DeepCvrMTL.py
+1
-2
sort_and_2sql.py
eda/esmm/Model_pipline/sort_and_2sql.py
+15
-5
submit.sh
eda/esmm/Model_pipline/submit.sh
+4
-3
No files found.
eda/esmm/Feature_pipline/data2ffm.py
View file @
5cc22188
This diff is collapsed.
Click to expand it.
eda/esmm/Feature_pipline/get_tfrecord.py
View file @
5cc22188
...
...
@@ -26,7 +26,7 @@ tf.app.flags.DEFINE_integer("threads", 16, "threads num")
#Ad_Fileds = set(['205','206','207','210','216'])
#Context_Fileds = set(['508','509','702','853','301'])
#Common_Fileds = {'1':'1','2':'2','3':'3','4':'4','5':'5','6':'6','7':'7','8':'8','9':'9','10':'10','11':'11','12':'12','13':'13','14':'14','15':'15','16':'16','17':'17','18':'18','19':'19','20':'20','21':'21','22':'22','23':'23'}
Common_Fileds
=
{
'1'
:
'1'
,
'2'
:
'2'
,
'3'
:
'3'
,
'4'
:
'4'
,
'5'
:
'5'
,
'6'
:
'6'
,
'7'
:
'7'
,
'8'
:
'8'
,
'9'
:
'9'
,
'10'
:
'10'
}
Common_Fileds
=
{
'1'
:
'1'
,
'2'
:
'2'
,
'3'
:
'3'
,
'4'
:
'4'
,
'5'
:
'5'
,
'6'
:
'6'
,
'7'
:
'7'
,
'8'
:
'8'
}
UMH_Fileds
=
{
'109_14'
:(
'u_cat'
,
'12'
),
'110_14'
:(
'u_shop'
,
'13'
),
'127_14'
:(
'u_brand'
,
'14'
),
'150_14'
:(
'u_int'
,
'15'
)}
#user multi-hot feature
Ad_Fileds
=
{
'206'
:(
'a_cat'
,
'16'
),
'207'
:(
'a_shop'
,
'17'
),
'210'
:(
'a_int'
,
'18'
),
'216'
:(
'a_brand'
,
'19'
)}
#ad feature for DIN
...
...
@@ -70,24 +70,24 @@ def gen_tfrecords(in_file):
#"feat_vals": tf.train.Feature(float_list=tf.train.FloatList(value=feat_vals))})
#3 特殊字段单独处理
#
for f, (fname, def_id) in UMH_Fileds.items():
#
if f in ffv[:,0]:
#
mask = np.array(f == ffv[:,0])
#
feat_ids = ffv[mask,1]
#
feat_vals= ffv[mask,2]
#
else:
#
feat_ids = np.array([def_id])
#
feat_vals = np.array([1.0])
#
feature.update({fname+"ids": tf.train.Feature(int64_list=tf.train.Int64List(value=feat_ids.astype(np.int))),
#
fname+"vals": tf.train.Feature(float_list=tf.train.FloatList(value=feat_vals.astype(np.float)))})
#
for f, (fname, def_id) in Ad_Fileds.items():
#
if f in ffv[:,0]:
#
mask = np.array(f == ffv[:,0])
#
feat_ids = ffv[mask,1]
#
else:
#
feat_ids = np.array([def_id])
#
feature.update({fname+"ids": tf.train.Feature(int64_list=tf.train.Int64List(value=feat_ids.astype(np.int)))})
for
f
,
(
fname
,
def_id
)
in
UMH_Fileds
.
items
():
if
f
in
ffv
[:,
0
]:
mask
=
np
.
array
(
f
==
ffv
[:,
0
])
feat_ids
=
ffv
[
mask
,
1
]
feat_vals
=
ffv
[
mask
,
2
]
else
:
feat_ids
=
np
.
array
([
def_id
])
feat_vals
=
np
.
array
([
1.0
])
feature
.
update
({
fname
+
"ids"
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
feat_ids
.
astype
(
np
.
int
))),
fname
+
"vals"
:
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
feat_vals
.
astype
(
np
.
float
)))})
for
f
,
(
fname
,
def_id
)
in
Ad_Fileds
.
items
():
if
f
in
ffv
[:,
0
]:
mask
=
np
.
array
(
f
==
ffv
[:,
0
])
feat_ids
=
ffv
[
mask
,
1
]
else
:
feat_ids
=
np
.
array
([
def_id
])
feature
.
update
({
fname
+
"ids"
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
feat_ids
.
astype
(
np
.
int
)))})
# serialized to Example
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature
))
...
...
@@ -112,4 +112,4 @@ def main(_):
if
__name__
==
"__main__"
:
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
()
\ No newline at end of file
tf
.
app
.
run
()
eda/esmm/Model_pipline/DeepCvrMTL.py
View file @
5cc22188
...
...
@@ -365,4 +365,4 @@ def main(_):
if
__name__
==
"__main__"
:
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
()
\ No newline at end of file
tf
.
app
.
run
()
eda/esmm/Model_pipline/sort_and_2sql.py
View file @
5cc22188
...
...
@@ -18,10 +18,21 @@ def con_sql(sql):
db
.
close
()
return
result
def
set_join
(
lst
):
def
nearby_set_join
(
lst
):
# return ','.join([str(i) for i in list(lst)])
return
','
.
join
([
str
(
i
)
for
i
in
lst
.
unique
()
.
tolist
()])
def
native_set_join
(
lst
):
l
=
lst
.
unique
()
.
tolist
()
d
=
int
(
len
(
l
)
/
2
)
if
d
==
0
:
d
=
1
r
=
[
str
(
i
)
for
i
in
l
]
r
=
r
[:
d
]
return
','
.
join
(
r
)
def
main
():
# native queue
...
...
@@ -30,7 +41,7 @@ def main():
df1
=
pd
.
read_csv
(
"/home/gmuser/esmm_data/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
native_
set_join
})
.
reset_index
(
drop
=
False
)
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
]
print
(
"native_device_count"
,
df3
.
shape
)
...
...
@@ -41,7 +52,7 @@ def main():
df1
=
pd
.
read_csv
(
"/home/gmuser/esmm_data/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
nearby_
set_join
})
.
reset_index
(
drop
=
False
)
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
print
(
"nearby_device_count"
,
df4
.
shape
)
...
...
@@ -80,4 +91,4 @@ def main():
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
main
()
eda/esmm/Model_pipline/submit.sh
View file @
5cc22188
#! /bin/bash
cd
/srv/apps/ffm-baseline/eda/esmm
git checkout master
PYTHON_PATH
=
/home/gaoyazhe/miniconda3/bin/python
MODEL_PATH
=
/srv/apps/ffm-baseline/eda/esmm
DATA_PATH
=
/home/gmuser/esmm_data
...
...
@@ -55,7 +56,7 @@ currentTimeStamp=$((timeStamp*1000+`date "+%N"`/1000000))
echo
$current
echo
"train..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/Model_pipline/DeepCvrMTL.py
--ctr_task_wgt
=
0.3
--learning_rate
=
0.0001
--deep_layers
=
256,128
--dropout
=
0.8,0.5
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
10
--feature_size
=
2000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
--task_type
=
train
${
PYTHON_PATH
}
${
MODEL_PATH
}
/Model_pipline/DeepCvrMTL.py
--ctr_task_wgt
=
0.3
--learning_rate
=
0.0001
--deep_layers
=
256,128
--dropout
=
0.8,0.5
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
8
--feature_size
=
2000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
--task_type
=
train
echo
"train time"
current
=
$(
date
"+%Y-%m-%d %H:%M:%S"
)
...
...
@@ -64,11 +65,11 @@ currentTimeStamp=$((timeStamp*1000+`date "+%N"`/1000000))
echo
$current
echo
"infer native..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/Model_pipline/DeepCvrMTL.py
--ctr_task_wgt
=
0.3
--learning_rate
=
0.0001
--deep_layers
=
256,128
--dropout
=
0.8,0.5
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
10
--feature_size
=
2000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/native
--task_type
=
infer
>
${
DATA_PATH
}
/infer.log
${
PYTHON_PATH
}
${
MODEL_PATH
}
/Model_pipline/DeepCvrMTL.py
--ctr_task_wgt
=
0.3
--learning_rate
=
0.0001
--deep_layers
=
256,128
--dropout
=
0.8,0.5
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
8
--feature_size
=
2000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/native
--task_type
=
infer
>
${
DATA_PATH
}
/infer.log
echo
"infer nearby..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/Model_pipline/DeepCvrMTL.py
--ctr_task_wgt
=
0.3
--learning_rate
=
0.0001
--deep_layers
=
256,128
--dropout
=
0.8,0.5
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
10
--feature_size
=
2000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/nearby
--task_type
=
infer
>
${
DATA_PATH
}
/infer.log
${
PYTHON_PATH
}
${
MODEL_PATH
}
/Model_pipline/DeepCvrMTL.py
--ctr_task_wgt
=
0.3
--learning_rate
=
0.0001
--deep_layers
=
256,128
--dropout
=
0.8,0.5
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
1024
--field_size
=
8
--feature_size
=
2000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
DATA_PATH
}
/model_ckpt/DeepCvrMTL/
--data_dir
=
${
DATA_PATH
}
/nearby
--task_type
=
infer
>
${
DATA_PATH
}
/infer.log
echo
"sort and 2sql"
${
PYTHON_PATH
}
${
MODEL_PATH
}
/Model_pipline/sort_and_2sql.py
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment