Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
68a102b4
Commit
68a102b4
authored
Jan 09, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
http://git.wanmeizhensuo.com/ML/ffm-baseline
parents
20e01904
b49d205c
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
87 additions
and
65 deletions
+87
-65
send_mail.py
eda/esmm/Model_pipline/send_mail.py
+4
-3
submit.sh
eda/esmm/Model_pipline/submit.sh
+3
-0
Recommendation_strategy_all.scala
...src/main/scala/com/gmei/Recommendation_strategy_all.scala
+6
-20
Search_keywords_count.scala
...ededa/src/main/scala/com/gmei/Search_keywords_count.scala
+41
-20
data_feed_exposure_precise.scala
.../src/main/scala/com/gmei/data_feed_exposure_precise.scala
+0
-0
find_bug.scala
eda/feededa/src/main/scala/com/gmei/find_bug.scala
+0
-0
testt.scala
eda/feededa/src/main/scala/com/gmei/testt.scala
+15
-13
device_read_time_normal.py
tensnsorflow/device_read_time_normal.py
+18
-9
No files found.
eda/esmm/Model_pipline/send_mail.py
View file @
68a102b4
...
...
@@ -7,7 +7,8 @@ import datetime
my_sender
=
'gaoyazhe@igengmei.com'
my_pass
=
'VCrKTui99a7ALhiK'
my_user
=
'gaoyazhe@igengmei.com'
my_user1
=
'gaoyazhe@igengmei.com'
my_user2
=
'zhangyanzhao@igengmei.com'
def
mail
():
ret
=
True
try
:
...
...
@@ -15,11 +16,11 @@ def mail():
stat_data
=
f
.
read
()
msg
=
MIMEText
(
stat_data
,
'plain'
,
'utf-8'
)
msg
[
'From'
]
=
formataddr
([
"高雅喆"
,
my_sender
])
msg
[
'To'
]
=
formataddr
([
"高雅喆"
,
my_user
])
msg
[
'To'
]
=
my_user1
+
','
+
my_user2
msg
[
'Subject'
]
=
str
(
datetime
.
date
.
today
())
+
"-esmm多目标模型训练指标统计"
server
=
smtplib
.
SMTP_SSL
(
"smtp.exmail.qq.com"
,
465
)
server
.
login
(
my_sender
,
my_pass
)
server
.
sendmail
(
my_sender
,[
my_user
,
],
msg
.
as_string
())
server
.
sendmail
(
my_sender
,[
my_user
1
,
my_user2
],
msg
.
as_string
())
server
.
quit
()
except
Exception
:
ret
=
False
...
...
eda/esmm/Model_pipline/submit.sh
View file @
68a102b4
#! /bin/bash
cd
/srv/apps/ffm-baseline/eda/esmm
git checkout master
PYTHON_PATH
=
/home/gaoyazhe/miniconda3/bin/python
MODEL_PATH
=
/srv/apps/ffm-baseline/eda/esmm
DATA_PATH
=
/home/gmuser/esmm_data
...
...
eda/feededa/src/main/scala/com/gmei/Recommendation_strategy_all.scala
View file @
68a102b4
...
...
@@ -47,7 +47,6 @@ object Recommendation_strategy_all {
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
...
...
@@ -62,12 +61,12 @@ object Recommendation_strategy_all {
|from online.ml_device_day_active_status
|where active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
|
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
|
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
|
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
|
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
|
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03
')
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03','','unknown
')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
...
...
@@ -81,7 +80,6 @@ object Recommendation_strategy_all {
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -94,7 +92,6 @@ object Recommendation_strategy_all {
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -106,7 +103,6 @@ object Recommendation_strategy_all {
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -118,7 +114,6 @@ object Recommendation_strategy_all {
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -140,7 +135,6 @@ object Recommendation_strategy_all {
|from data_feed_click jd inner join device_id_cover_older
|on jd.device_id = device_id_cover_older.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -152,7 +146,6 @@ object Recommendation_strategy_all {
|from data_feed_exposure je inner join device_id_cover_older
|on je.device_id = device_id_cover_older.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -173,7 +166,6 @@ object Recommendation_strategy_all {
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -186,7 +178,6 @@ object Recommendation_strategy_all {
|from data_feed_exposure je inner join device_id_cover_older
|on je.device_id = device_id_cover_older.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -212,7 +203,6 @@ object Recommendation_strategy_all {
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -226,7 +216,6 @@ object Recommendation_strategy_all {
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where device_id regexp '1$$' and stat_date = '${stat_date}')
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -240,7 +229,6 @@ object Recommendation_strategy_all {
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -253,7 +241,6 @@ object Recommendation_strategy_all {
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
@@ -267,7 +254,6 @@ object Recommendation_strategy_all {
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where stat_date = '${stat_date}')
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
...
...
eda/feededa/src/main/scala/com/gmei/Search_keywords_count.scala
View file @
68a102b4
...
...
@@ -7,6 +7,10 @@ import org.apache.spark.sql.{SaveMode, TiContext}
import
org.apache.log4j.
{
Level
,
Logger
}
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
import
org.dmg.pmml.True
import
scala.util.Try
import
scala.util.parsing.json.JSON
object
Search_keywords_count
{
...
...
@@ -37,6 +41,17 @@ object Search_keywords_count {
)
}
//定义异常捕获
def
catch_error
(
x
:
String
)={
val
in
=
JSON
.
parseFull
(
x
)
try
{
in
.
toString
}
catch
{
case
e
:
ArithmeticException
=>
{
e
.
printStackTrace
();
e
.
toString
}
}
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
...
...
@@ -45,37 +60,44 @@ object Search_keywords_count {
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
//获取昨日日期
//println(param.date)
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
//搜索关键词提取
val
search_keywords
=
sc
.
sql
(
s
"""
|select params
['query']
as search_keywords
|select params as search_keywords
|from online.tl_hdfs_maidian_view
|where (action = 'do_search' or action = 'search_result_click_search')
|and partition_date ='
${partition_date}
'
|and partition_date ='
20190108
'
"""
.
stripMargin
)
//搜索次数统计
val
search_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(params['query']) as search_num
|from online.tl_hdfs_maidian_view
|where (action = 'do_search' or action = 'search_result_click_search')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
search_count
.
show
()
// search_keywords.show()
val
get_result
=
search_keywords
.
rdd
.
map
((
_
,
1
)).
reduceByKey
(
_
+
_
)
.
sortBy
(
_
.
_2
,
false
)
val
temp
=
get_result
.
collect
()
for
(
i
<-
0
until
temp
.
length
)
{
println
(
temp
(
i
))
}
val
tempp
=
search_keywords
.
collect
()
for
(
i
<-
0
until
tempp
.
length
)
{
println
(
tempp
(
i
))
}
// GmeiConfig.writeToJDBCTable(search_keywords, table = "temp_search_keywords", SaveMode.Overwrite)
//搜索次数统计
// val search_count = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(params['query']) as search_num
// |from online.tl_hdfs_maidian_view
// |where (action = 'do_search' or action = 'search_result_click_search')
// |and partition_date ='20190107'
// """.stripMargin
// )
// search_count.show()
// val get_result =search_keywords.rdd.map((_, 1)).reduceByKey(_ + _)
// .sortBy(_._2,false)
// val temp=get_result.collect()
// for (i <- 0 until temp.length ) {
// println(temp(i))
// }
}
}
}
\ No newline at end of file
eda/feededa/src/main/scala/com/gmei/data_feed_exposure_precise.scala
0 → 100644
View file @
68a102b4
This diff is collapsed.
Click to expand it.
eda/feededa/src/main/scala/com/gmei/find_bug.scala
0 → 100644
View file @
68a102b4
This diff is collapsed.
Click to expand it.
eda/feededa/src/main/scala/com/gmei/testt.scala
View file @
68a102b4
...
...
@@ -85,12 +85,12 @@ object testt {
|from online.ml_device_day_active_status
|where active_type != '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
|
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
|
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
|
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
|
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
|
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03
')
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03','','unknown
')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
...
...
@@ -101,16 +101,18 @@ object testt {
val
device_id_oldUser
=
sc
.
sql
(
s
"""
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|from online.ml_device_day_active_status os left join blacklist
|on os.device_id=blacklist.device_id
|where active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
|
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
|
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
|
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
|
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
|
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03
')
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03','','unknown
')
|and partition_date ='${partition_date}'
|and blacklist.device_id is null
"""
.
stripMargin
)
device_id_oldUser
.
show
()
...
...
tensnsorflow/device_read_time_normal.py
View file @
68a102b4
...
...
@@ -22,16 +22,25 @@ def normal():
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
sql
=
"select * from device_read_time"
df
=
con_sql
(
db
,
sql
)
df
=
df
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"0"
,
2
:
"1"
,
3
:
"10"
,
4
:
"1024"
,
5
:
"1080"
,
6
:
"11"
,
7
:
"12"
,
8
:
"13"
,
9
:
"2"
,
10
:
"2054"
,
11
:
"2214"
,
12
:
"3"
,
13
:
"4"
,
14
:
"5"
,
16
:
"6933"
,
17
:
"7"
,
18
:
"9"
,
19
:
"922"
,
20
:
"929"
,
21
:
"971"
,
22
:
"992"
})
device_id
=
df
[[
"device_id"
]]
df
=
df
.
drop
(
"device_id"
,
axis
=
1
)
minMax
=
MinMaxScaler
()
result
=
minMax
.
fit_transform
(
df
)
result
=
device_id
.
join
(
result
)
df
=
df
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"kongbai"
,
2
:
"eye"
,
3
:
"simi"
,
4
:
"zitizhifang"
,
5
:
"banyongjiu"
,
6
:
"teeth"
,
7
:
"kouchun"
,
8
:
"ear"
,
9
:
"nose"
,
10
:
"banyongjiuzhuang"
,
11
:
"qita"
,
12
:
"lunkuo"
,
13
:
"shoushen"
,
14
:
"skin"
,
16
:
"shenghuo"
,
17
:
"breast"
,
18
:
"hair"
,
19
:
"kangshuai"
,
20
:
"shili"
,
21
:
"chanhou"
,
22
:
"zhushe"
})
# device_id = df[["device_id"]]
# df = df.drop("device_id",axis=1)
# minMax = MinMaxScaler()
# result = pd.DataFrame(minMax.fit_transform(df),columns=["0","1","10","1024","1080","11",
# "12","13","2","2054","2214","3","4","5","6933",
# "7","9","922","929","971","992"])
# result = device_id.join(result)
l
=
list
(
df
.
columns
)
l
.
remove
(
"device_id"
)
df
[
"sum"
]
=
df
.
sum
(
axis
=
1
)
for
i
in
l
:
df
[
i
]
=
df
[
i
]
/
df
[
"sum"
]
df
=
df
.
drop
(
"sum"
,
axis
=
1
)
yconnect
=
create_engine
(
'mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8'
)
pd
.
io
.
sql
.
to_sql
(
result
,
"device_read_time_normal"
,
yconnect
,
schema
=
'jerry_test'
,
if_exists
=
'fail'
,
index
=
False
)
pd
.
io
.
sql
.
to_sql
(
df
,
"device_read_time_normal"
,
yconnect
,
schema
=
'jerry_test'
,
if_exists
=
'fail'
,
index
=
False
)
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment