Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
7c856a21
Commit
7c856a21
authored
Jan 03, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
esmm train data first level1_id
parent
a0d42a6e
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
8 additions
and
8 deletions
+8
-8
data2ffm.py
eda/esmm/Feature_pipline/data2ffm.py
+1
-1
send_mail.py
eda/esmm/Model_pipline/send_mail.py
+1
-1
sort_and_2sql.py
eda/esmm/Model_pipline/sort_and_2sql.py
+4
-4
submit.sh
eda/esmm/Model_pipline/submit.sh
+1
-1
EsmmData.scala
eda/feededa/src/main/scala/com/gmei/EsmmData.scala
+1
-1
No files found.
eda/esmm/Feature_pipline/data2ffm.py
View file @
7c856a21
...
...
@@ -264,7 +264,7 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
if
__name__
==
"__main__"
:
path
=
"/home/g
aoyazhe/
data/"
path
=
"/home/g
muser/esmm_
data/"
a
=
time
.
time
()
df
,
validate_date
,
ucity_id
,
ccity_name
,
manufacturer
,
channel
=
get_data
()
model
=
transform
(
df
,
validate_date
)
...
...
eda/esmm/Model_pipline/send_mail.py
View file @
7c856a21
...
...
@@ -11,7 +11,7 @@ my_user='gaoyazhe@igengmei.com'
def
mail
():
ret
=
True
try
:
with
open
(
'/home/g
aoyazhe/
data/submit.log'
)
as
f
:
with
open
(
'/home/g
muser/esmm_
data/submit.log'
)
as
f
:
stat_data
=
f
.
read
()
msg
=
MIMEText
(
stat_data
,
'plain'
,
'utf-8'
)
msg
[
'From'
]
=
formataddr
([
"高雅喆"
,
my_sender
])
...
...
eda/esmm/Model_pipline/sort_and_2sql.py
View file @
7c856a21
...
...
@@ -25,10 +25,10 @@ def set_join(lst):
def
main
():
# native queue
df2
=
pd
.
read_csv
(
'/home/g
aoyazhe/
data/native.csv'
,
usecols
=
[
0
,
1
,
2
],
header
=
0
,
names
=
[
'uid'
,
'city'
,
'cid_id'
],
sep
=
'
\t
'
)
df2
=
pd
.
read_csv
(
'/home/g
muser/esmm_
data/native.csv'
,
usecols
=
[
0
,
1
,
2
],
header
=
0
,
names
=
[
'uid'
,
'city'
,
'cid_id'
],
sep
=
'
\t
'
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df1
=
pd
.
read_csv
(
"/home/g
aoyazhe/
data/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df1
=
pd
.
read_csv
(
"/home/g
muser/esmm_
data/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
]
...
...
@@ -36,10 +36,10 @@ def main():
# nearby queue
df2
=
pd
.
read_csv
(
'/home/g
aoyazhe/
data/nearby.csv'
,
usecols
=
[
0
,
1
,
2
],
header
=
0
,
names
=
[
'uid'
,
'city'
,
'cid_id'
],
sep
=
'
\t
'
)
df2
=
pd
.
read_csv
(
'/home/g
muser/esmm_
data/nearby.csv'
,
usecols
=
[
0
,
1
,
2
],
header
=
0
,
names
=
[
'uid'
,
'city'
,
'cid_id'
],
sep
=
'
\t
'
)
df2
[
'cid_id'
]
=
df2
[
'cid_id'
]
.
astype
(
str
)
df1
=
pd
.
read_csv
(
"/home/g
aoyazhe/
data/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df1
=
pd
.
read_csv
(
"/home/g
muser/esmm_
data/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
...
...
eda/esmm/Model_pipline/submit.sh
View file @
7c856a21
#! /bin/bash
PYTHON_PATH
=
/home/gaoyazhe/miniconda3/bin/python
MODEL_PATH
=
/srv/apps/ffm-baseline/eda/esmm
DATA_PATH
=
/home/g
aoyazhe/
data
DATA_PATH
=
/home/g
muser/esmm_
data
echo
"start time"
current
=
$(
date
"+%Y-%m-%d %H:%M:%S"
)
...
...
eda/feededa/src/main/scala/com/gmei/EsmmData.scala
View file @
7c856a21
...
...
@@ -209,7 +209,7 @@ object EsmmData {
|and d.partition_date='${stat_date_not}'
"""
.
stripMargin
)
//
union_data_scity_id.createOrReplaceTempView("union_data_scity_id")
union_data_scity_id
.
createOrReplaceTempView
(
"union_data_scity_id"
)
union_data_scity_id
.
show
()
val
union_data_scity_id2
=
sc
.
sql
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment