Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
4dcaa94b
Commit
4dcaa94b
authored
Dec 17, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change sort_and_2sql
parent
94c4bc5c
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
24 deletions
+13
-24
DeepCvrMTL.py
eda/esmm/Model_pipline/DeepCvrMTL.py
+1
-1
sort_and_2sql.py
eda/esmm/Model_pipline/sort_and_2sql.py
+12
-23
No files found.
eda/esmm/Model_pipline/DeepCvrMTL.py
View file @
4dcaa94b
...
...
@@ -345,7 +345,7 @@ def main(_):
print
(
"-"
*
100
)
with
open
(
FLAGS
.
data_dir
+
"/pred.txt"
,
"w"
)
as
fo
:
for
prob
in
preds
:
fo
.
write
(
"
%
f
\t
%
f
\
n
"
%
(
prob
[
'pctr'
],
prob
[
'p
cvr'
]))
fo
.
write
(
"
%
f
\t
%
f
\
t
%
f
\n
"
%
(
prob
[
'pctr'
],
prob
[
'pcvr'
],
prob
[
'pct
cvr'
]))
elif
FLAGS
.
task_type
==
'export'
:
print
(
"Not Implemented, Do It Yourself!"
)
#feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
...
...
eda/esmm/Model_pipline/sort_and_2sql.py
View file @
4dcaa94b
...
...
@@ -22,36 +22,25 @@ def set_join(lst):
return
','
.
join
(
set
(
lst
))
def
main
():
sql
=
"select device_id,city_id,cid from esmm_data2ffm_infer_native"
result
=
con_sql
(
sql
)
dct
=
{
"uid"
:[],
"city"
:[],
"cid_id"
:[]}
for
i
in
result
:
dct
[
"uid"
]
.
append
(
i
[
0
])
dct
[
"city"
]
.
append
(
i
[
1
])
dct
[
"cid_id"
]
.
append
(
i
[
2
])
df1
=
pd
.
read_csv
(
"/home/gaoyazhe/data/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
])
df2
=
pd
.
DataFrame
(
dct
)
df2
[
"ctr"
],
df2
[
"cvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
]
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"cvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
# native queue
df2
=
pd
.
read_csv
(
'/home/gaoyazhe/data/native.csv'
,
usecols
=
[
0
,
1
,
2
],
header
=
0
,
names
=
[
'uid'
,
'city'
,
'cid_id'
],
sep
=
'
\t
'
)
df1
=
pd
.
read_csv
(
"/home/gaoyazhe/data/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
ctime
=
int
(
time
.
time
())
df3
[
"time"
]
=
ctime
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
,
"time"
]
print
(
"native_device_count"
,
df3
.
shape
)
sql_nearby
=
"select device_id,city_id,cid from esmm_data2ffm_infer_nearby"
result
=
con_sql
(
sql_nearby
)
dct
=
{
"uid"
:[],
"city"
:[],
"cid_id"
:[]}
for
i
in
result
:
dct
[
"uid"
]
.
append
(
i
[
0
])
dct
[
"city"
]
.
append
(
i
[
1
])
dct
[
"cid_id"
]
.
append
(
i
[
2
])
# nearby queue
df2
=
pd
.
read_csv
(
'/home/gaoyazhe/data/nearby.csv'
,
usecols
=
[
0
,
1
,
2
],
header
=
0
,
names
=
[
'uid'
,
'city'
,
'cid_id'
],
sep
=
'
\t
'
)
df1
=
pd
.
read_csv
(
"/home/gaoyazhe/data/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
])
df2
=
pd
.
DataFrame
(
dct
)
df2
[
"ctr"
],
df2
[
"cvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
]
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"cvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df1
=
pd
.
read_csv
(
"/home/gaoyazhe/data/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
print
(
"nearby_device_count"
,
df4
.
shape
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment