Commit a458bb53 authored by 王志伟's avatar 王志伟

迁移到腾讯

parents 25b39fd1 6bb8533b
......@@ -6,15 +6,10 @@ import datetime
def con_sql(db,sql):
cursor = db.cursor()
try:
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
except Exception:
print("发生异常", Exception)
df = pd.DataFrame()
finally:
db.close()
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
db.close()
return df
......@@ -32,14 +27,14 @@ def multi_hot(df,column,n):
def get_data():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select max(stat_date) from {}".format(train_data_set)
validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:" + validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=300)).strftime("%Y-%m-%d")
start = (temp - datetime.timedelta(days=3)).strftime("%Y-%m-%d")
print(start)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select e.y,e.z,e.stat_date,e.ucity_id,feat.level2_ids,e.ccity_name,u.device_type,u.manufacturer," \
"u.channel,c.top,e.device_id,cut.time,dl.app_list,e.diary_service_id,feat.level3_ids,feat.level2 " \
"from {} e left join user_feature u on e.device_id = u.device_id " \
......@@ -55,7 +50,7 @@ def get_data():
6: "device_type", 7: "manufacturer", 8: "channel", 9: "top", 10: "device_id",
11: "time", 12: "app_list", 13: "service_id", 14: "level3_ids", 15: "level2"})
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select level2_id,treatment_method,price_min,price_max,treatment_time,maintain_time,recover_time " \
"from train_Knowledge_network_data"
knowledge = con_sql(db, sql)
......@@ -67,7 +62,7 @@ def get_data():
df = df.drop("level2", axis=1)
service_id = tuple(df["service_id"].unique())
db = pymysql.connect(host='rdsfewzdmf0jfjp9un8xj.mysql.rds.aliyuncs.com', port=3306, user='work',
db = pymysql.connect(host='172.16.30.143', port=3306, user='work',
passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
sql = "select s.id,d.hospital_id from api_service s left join api_doctor d on s.doctor_id = d.id " \
"where s.id in {}".format(service_id)
......@@ -152,7 +147,7 @@ def write_csv(df,name,n):
def get_predict(date,value_map,app_list_map,level2_map,level3_map):
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select e.y,e.z,e.label,e.ucity_id,feat.level2_ids,e.ccity_name," \
"u.device_type,u.manufacturer,u.channel,c.top,e.device_id,e.cid_id,cut.time," \
"dl.app_list,e.hospital_id,feat.level3_ids,feat.level2 " \
......@@ -160,14 +155,14 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map):
"left join cid_type_top c on e.device_id = c.device_id " \
"left join cid_time_cut cut on e.cid_id = cut.cid " \
"left join device_app_list dl on e.device_id = dl.device_id " \
"left join diary_feat feat on e.cid_id = feat.diary_id"
"left join diary_feat feat on e.cid_id = feat.diary_id limit 600"
df = con_sql(db, sql)
df = df.rename(columns={0: "y", 1: "z", 2: "label", 3: "ucity_id", 4: "clevel2_id", 5: "ccity_name",
6: "device_type", 7: "manufacturer", 8: "channel", 9: "top", 10: "device_id",
11: "cid_id", 12: "time", 13: "app_list", 14: "hospital_id", 15: "level3_ids",
16: "level2"})
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select level2_id,treatment_method,price_min,price_max,treatment_time,maintain_time,recover_time " \
"from train_Knowledge_network_data"
knowledge = con_sql(db, sql)
......@@ -232,7 +227,7 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map):
if __name__ == '__main__':
train_data_set = "esmm_train_data"
path = "/data/esmm/"
path = "/home/gmuser/esmm/"
date, value, app_list, level2, level3 = get_data()
get_predict(date, value, app_list, level2, level3)
......
#! /bin/bash
git checkout master
PYTHON_PATH=/home/gaoyazhe/miniconda3/bin/python
PYTHON_PATH=/opt/anaconda3/envs/esmm/bin/python
MODEL_PATH=/srv/apps/ffm-baseline/eda/esmm/Model_pipline
DATA_PATH=/data/esmm
DATA_PATH=/home/gmuser/esmm
echo "rm leave tfrecord"
rm ${DATA_PATH}/tr/*
......
......@@ -3,14 +3,14 @@
from sqlalchemy import create_engine
import pandas as pd
import pymysql
import time
import datetime
def con_sql(sql):
"""
:type sql : str
:rtype : tuple
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
......@@ -36,10 +36,10 @@ def native_set_join(lst):
def main():
# native queue
df2 = pd.read_csv('/data/esmm/native.csv')
df2 = pd.read_csv(path+'/native.csv')
df2['cid_id'] = df2['cid_id'].astype(str)
df1 = pd.read_csv("/data/esmm/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df1 = pd.read_csv(path+"/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':native_set_join}).reset_index(drop=False)
df3.columns = ["device_id","city_id","native_queue"]
......@@ -47,10 +47,10 @@ def main():
# nearby queue
df2 = pd.read_csv('/data/esmm/nearby.csv')
df2 = pd.read_csv(path+'/nearby.csv')
df2['cid_id'] = df2['cid_id'].astype(str)
df1 = pd.read_csv("/data/esmm/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df1 = pd.read_csv(path+"/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':nearby_set_join}).reset_index(drop=False)
df4.columns = ["device_id","city_id","nearby_queue"]
......@@ -60,11 +60,10 @@ def main():
df_all = pd.merge(df3,df4,on=['device_id','city_id'],how='outer').fillna("")
df_all['device_id'] = df_all['device_id'].astype(str)
df_all['city_id'] = df_all['city_id'].astype(str)
ctime = int(time.time())
df_all["time"] = ctime
df_all["time"] = str(datetime.datetime.now().strftime('%Y%m%d%H%M'))
print("union_device_count",df_all.shape)
host='10.66.157.22'
host='172.16.40.158'
port=4000
user='root'
password='3SYz54LS9#^9sBvC'
......@@ -78,7 +77,7 @@ def main():
# df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
delete_str = 'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'.format(df_merge_str)
con = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
con = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cur = con.cursor()
cur.execute(delete_str)
con.commit()
......@@ -88,5 +87,7 @@ def main():
print("done")
if __name__ == '__main__':
path = "/home/gmuser/esmm"
main()
\ No newline at end of file
......@@ -4,13 +4,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pandas as pd
import sys
import os
import glob
import tensorflow as tf
import numpy as np
import re
from multiprocessing import Pool as ThreadPool
flags = tf.app.flags
......
......@@ -6,12 +6,10 @@
#import argparse
import shutil
#import sys
import os
import json
import glob
from datetime import date, timedelta
from time import time
import random
import tensorflow as tf
......
dev.tidb.jdbcuri=jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true
dev.tispark.pd.addresses=10.66.157.22:2379
dev.mimas.jdbcuri= jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com/mimas_test?user=work&password=workwork&rewriteBatchedStatements=true
dev.tidb.jdbcuri=jdbc:mysql://192.168.15.12:4000/eagle?user=root&password=&rewriteBatchedStatements=true
dev.tispark.pd.addresses=192.168.15.11:2379
dev.mimas.jdbcuri= jdbc:mysql://rm-2zenowgrn4i5p0j7txo.mysql.rds.aliyuncs.com/mimas_test?user=work&password=Gengmei1&rewriteBatchedStatements=true
dev.gaia.jdbcuri=jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com/zhengxing_test?user=work&password=workwork&rewriteBatchedStatements=true
dev.gold.jdbcuri=jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com/doris_test?user=work&password=workwork&rewriteBatchedStatements=true
dev.redis.host=10.30.50.58
dev.redis.port=6379
dev.jerry.jdbcuri=jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com/jerry_test?user=work&password=workwork&rewriteBatchedStatements=true
dev.test.jdbcuri= jdbc:mysql://rm-2ze0v6uua2hl9he8edo.mysql.rds.aliyuncs.com/mimas_test?user=work&password=Gengmei1&rewriteBatchedStatements=true
pre.tidb.jdbcuri=jdbc:mysql://192.168.16.11:4000/eagle?user=root&password=&rewriteBatchedStatements=true
pre.tispark.pd.addresses=192.168.16.11:2379
pre.mimas.jdbcuri=jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com:3308/mimas_prod?user=mimas&password=workwork&rewriteBatchedStatements=true
<<<<<<< HEAD
#prod.tidb.jdbcuri=jdbc:mysql://10.66.157.22:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true
#prod.gold.jdbcuri=jdbc:mysql://rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true
#prod.mimas.jdbcuri=jdbc:mysql://rm-m5emg41za2w7l6au3.mysql.rds.aliyuncs.com/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true
......@@ -19,6 +19,22 @@ pre.mimas.jdbcuri=jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com:3308/mimas
#prod.redis.host=10.30.50.58
#prod.redis.port=6379
=======
#阿里云线上配置
#prod.tidb.jdbcuri=jdbc:mysql://10.66.157.22:4000/eagle?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true
#prod.gold.jdbcuri=jdbc:mysql://rm-m5ey2s823bq0lc616.mysql.rds.aliyuncs.com/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true
#prod.mimas.jdbcuri=jdbc:mysql://rm-m5emg41za2w7l6au3.mysql.rds.aliyuncs.com/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true
#prod.gaia.jdbcuri=jdbc:mysql://rdsfewzdmf0jfjp9un8xj.mysql.rds.aliyuncs.com/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true
#prod.jerry.jdbcuri=jdbc:mysql://10.66.157.22:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true
#prod.tispark.pd.addresses=10.66.157.22:2379
#
#prod.tidb.jdbcuri_new=jdbc:mysql://152.136.44.138:4000/eagle?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true
#prod.jerry.jdbcuri_new=jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true
#腾讯云线上配置
>>>>>>> 6bb8533b68efef7c647251ef08479560d5e1216a
prod.gold.jdbcuri=jdbc:mysql://172.16.30.136/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true
prod.mimas.jdbcuri=jdbc:mysql://172.16.30.138/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true
prod.gaia.jdbcuri=jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true
......
......@@ -4,7 +4,7 @@ package com.gmei
import java.io.Serializable
import java.time.LocalDate
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, TiContext}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
......@@ -51,17 +51,9 @@ object EsmmData {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "eagle",tableName = "src_mimas_prod_api_diary_tags")
ti.tidbMapTable(dbName = "eagle",tableName = "src_zhengxing_api_tag")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_test", tableName = "esmm_train_data")
val max_stat_date = sc.sql(
s"""
|select max(stat_date) from esmm_train_data
|select max(stat_date) from jerry_test.esmm_train_data
""".stripMargin
)
val max_stat_date_str = max_stat_date.collect().map(s => s(0).toString).head
......@@ -74,7 +66,7 @@ object EsmmData {
// s"""
// |select distinct stat_date,device_id,city_id as ucity_id,
// | cid_id,diary_service_id
// |from data_feed_exposure
// |from jerry_prod.data_feed_exposure
// |where cid_type = 'diary'
// |and stat_date ='${stat_date}'
// """.stripMargin
......@@ -84,7 +76,7 @@ object EsmmData {
s"""
|select * from
|(select stat_date,device_id,city_id as ucity_id,cid_id,diary_service_id
|from data_feed_exposure
|from jerry_prod.data_feed_exposure
|where cid_type = 'diary'
|and stat_date ='${stat_date}'
|group by stat_date,device_id,city_id,cid_id,diary_service_id having count(*) > 1) a
......@@ -99,7 +91,7 @@ object EsmmData {
s"""
|select distinct stat_date,device_id,city_id as ucity_id,
| cid_id,diary_service_id
|from data_feed_click
|from jerry_prod.data_feed_click
|where cid_type = 'diary'
|and stat_date ='${stat_date}'
""".stripMargin
......@@ -190,8 +182,8 @@ object EsmmData {
|select a.stat_date,a.device_id,a.ucity_id,a.cid_id,a.diary_service_id,a.y,a.z,a.clevel1_id,a.slevel1_id,
| c.name as ccity_name
|from union_data_slabel a
|left join src_mimas_prod_api_diary_tags b on a.cid_id=b.diary_id
|left join src_zhengxing_api_tag c on b.tag_id=c.id
|left join eagle.src_mimas_prod_api_diary_tags b on a.cid_id=b.diary_id
|left join eagle.src_zhengxing_api_tag c on b.tag_id=c.id
| where c.tag_type=4
""".stripMargin
)
......@@ -222,12 +214,11 @@ object EsmmData {
|group by device_id,cid_id
""".stripMargin
)
union_data_scity_id2.persist()
GmeiConfig.writeToJDBCTable("jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",union_data_scity_id2, table="esmm_train_data",SaveMode.Append)
GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",union_data_scity_id2, table="esmm_train_data",SaveMode.Append)
union_data_scity_id2.unpersist()
} else {
println("esmm_train_data already have param.date data")
println("jerry_test.esmm_train_data already have param.date data")
}
sc.stop()
......@@ -368,18 +359,6 @@ object EsmmPredData {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "eagle",tableName = "src_mimas_prod_api_diary_tags")
ti.tidbMapTable(dbName = "eagle",tableName = "src_zhengxing_api_tag")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable("jerry_prod", "nd_device_cid_similarity_matrix")
ti.tidbMapTable("eagle","ffm_diary_queue")
ti.tidbMapTable("eagle","search_queue")
ti.tidbMapTable(dbName = "jerry_test",tableName = "esmm_train_data")
ti.tidbMapTable("eagle","biz_feed_diary_queue")
ti.tidbMapTable("jerry_prod","data_feed_exposure_precise")
import sc.implicits._
val yesteday_have_seq = GmeiConfig.getMinusNDate(1)
......@@ -388,7 +367,7 @@ object EsmmPredData {
s"""
|select concat(t.device_id,",",t.city_id) from
|(select distinct device_id,city_id
|from data_feed_exposure where stat_date='${yesteday_have_seq}') t
|from jerry_prod.data_feed_exposure where stat_date='${yesteday_have_seq}') t
""".stripMargin).collect().map(x => x(0).toString)
println("target_user",target_user.length)
......@@ -396,11 +375,12 @@ object EsmmPredData {
val raw_data = sc.sql(
s"""
|select concat(tmp1.device_id,",",tmp1.city_id) as device_city, tmp1.merge_queue from
|(select device_id,if(city_id='world','worldwide',city_id) city_id,similarity_cid as merge_queue from nd_device_cid_similarity_matrix
|(select device_id,if(city_id='world','worldwide',city_id) city_id,similarity_cid as merge_queue
|from jerry_prod.nd_device_cid_similarity_matrix
|union
|select device_id,if(city_id='world','worldwide',city_id) city_id,native_queue as merge_queue from ffm_diary_queue
|select device_id,if(city_id='world','worldwide',city_id) city_id,native_queue as merge_queue from eagle.ffm_diary_queue
|union
|select device_id,city_id,search_queue as merge_queue from search_queue) as tmp1
|select device_id,city_id,search_queue as merge_queue from eagle.search_queue) as tmp1
""".stripMargin)
// raw_data.show()
......@@ -421,7 +401,7 @@ object EsmmPredData {
import sc.implicits._
val sql =
s"""
|select distinct device_id,cid_id from data_feed_exposure_precise
|select distinct device_id,cid_id from jerry_prod.data_feed_exposure_precise
|where stat_date >= "$start" and cid_type = "diary"
""".stripMargin
val history = sc.sql(sql).repartition(200).rdd
......@@ -458,8 +438,8 @@ object EsmmPredData {
// native_data
val native_data = sc.sql(
s"""
|select distinct a.device_id,a.city_id,b.native_queue from data_feed_exposure a
|left join (select if(city_id='world','worldwide',city_id) city_id,native_queue from biz_feed_diary_queue) b
|select distinct a.device_id,a.city_id,b.native_queue from jerry_prod.data_feed_exposure a
|left join (select if(city_id='world','worldwide',city_id) city_id,native_queue from eagle.biz_feed_diary_queue) b
|on a.city_id = b.city_id
|where a.stat_date='${yesteday_have_seq}' and b.native_queue != ""
""".stripMargin
......@@ -558,8 +538,8 @@ object EsmmPredData {
|select a.stat_date,a.device_id,a.ucity_id,a.cid_id,a.label,a.diary_service_id,a.y,a.z,a.clevel1_id,a.slevel1_id,
| c.name as ccity_name
|from union_data_slabel a
|left join src_mimas_prod_api_diary_tags b on a.cid_id=b.diary_id
|left join src_zhengxing_api_tag c on b.tag_id=c.id
|left join eagle.src_mimas_prod_api_diary_tags b on a.cid_id=b.diary_id
|left join eagle.src_zhengxing_api_tag c on b.tag_id=c.id
| where c.tag_type=4
""".stripMargin
)
......@@ -614,10 +594,9 @@ object EsmmPredData {
// union_data_scity_id.createOrReplaceTempView("union_data_scity_id")
// println(union_data_scity_id2.count())
union_data_scity_id2.persist()
GmeiConfig.writeToJDBCTable("jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",union_data_scity_id2, table="esmm_pre_data",SaveMode.Overwrite)
GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",union_data_scity_id2, table="esmm_pre_data",SaveMode.Overwrite)
union_data_scity_id2.unpersist()
......@@ -665,9 +644,6 @@ object GetDiaryPortrait {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
val stat_date = param.date.replace("-","")
val diary_tag = sc.sql(
......@@ -693,7 +669,7 @@ object GetDiaryPortrait {
|select diary_id,level1_ids,level2_ids,level3_ids,split(level2_ids,",")[0] as level2 from t
""".stripMargin
)
val jdbc = "jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
val jdbc = "jdbc:mysql://152.136.44.138:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig.writeToJDBCTable(jdbc,result,"diary_feat",SaveMode.Overwrite)
......@@ -742,9 +718,6 @@ object GetDevicePortrait {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "diary_feat")
import sc.implicits._
val stat_date = param.date.replace("-","")
......@@ -757,7 +730,7 @@ object GetDevicePortrait {
| COALESCE(a.params['diary_id'], a.params['business_id'], 0) as cid_id,
| b.level1_ids as level1_id
| from online.tl_hdfs_maidian_view a
| left join diary_feat b
| left join jerry_prod.diary_feat b
| on COALESCE(a.params['diary_id'], a.params['business_id'], 0) = b.diary_id
| where
| b.level1_ids is not null and
......@@ -844,11 +817,6 @@ object GetLevelCount {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "diary_feat")
import sc.implicits._
val stat_date = GmeiConfig.getMinusNDate(1).replace("-","")
......@@ -856,7 +824,7 @@ object GetLevelCount {
val diary_queue = "16215222,16204965,15361235,16121397,16277565,15491159,16299587,16296887,15294642,16204934,15649199,16122580,16122580,16122580,16122580,16122580,16122580"
val diary_level1 = sc.sql(
s"""
|select diary_id,explode(split(level1_ids,';')) level1_id from diary_feat
|select diary_id,explode(split(level1_ids,';')) level1_id from jerry_prod.diary_feat
|where diary_id in (${diary_queue})
""".stripMargin
)
......@@ -924,9 +892,6 @@ object GetDeviceDuration {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "diary_feat")
import sc.implicits._
val stat_date = param.date
......@@ -935,8 +900,8 @@ object GetDeviceDuration {
s"""
|select a.device_id,coalesce(a.start_time,a.ndiary_in,0) in_time,coalesce(a.end_time,a.ndiary_out,0) out_time,
|explode(split(b.level1_ids,';')) level1_id
|from data_feed_click a
|left join diary_feat b on a.cid_id = b.diary_id
|from jerry_prod.data_feed_click a
|left join jerry_prod.diary_feat b on a.cid_id = b.diary_id
|where a.stat_date > '2018-12-12'
""".stripMargin
)
......@@ -973,8 +938,8 @@ object GetDeviceDuration {
| (select a.device_id,
| coalesce(a.end_time,a.ndiary_out,0)-coalesce(a.start_time,a.ndiary_in,0) as duration,
| explode(split(b.level1_ids,';')) level1_id
| from data_feed_click a
| left join diary_feat b on a.cid_id = b.diary_id where a.stat_date > '2018-12-12') c
| from jerry_prod.data_feed_click a
| left join jerry_prod.diary_feat b on a.cid_id = b.diary_id where a.stat_date > '2018-12-12') c
| group by c.device_id,c.level1_id) d
|group by d.device_id
""".stripMargin
......@@ -1026,18 +991,12 @@ object EsmmDataTest {
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "eagle",tableName = "src_mimas_prod_api_diary_tags")
ti.tidbMapTable(dbName = "eagle",tableName = "src_zhengxing_api_tag")
ti.tidbMapTable(dbName = "jerry_test",tableName = "esmm_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_exposure_precise")
ti.tidbMapTable(dbName = "jerry_test", tableName = "train_data")
click(sc)
val max_stat_date = sc.sql(
s"""
|select max(stat_date) from train_data
|select max(stat_date) from jerry_test.train_data
""".stripMargin
)
val max_stat_date_str = max_stat_date.collect().map(s => s(0).toString).head
......@@ -1050,7 +1009,7 @@ object EsmmDataTest {
// s"""
// |select distinct stat_date,device_id,city_id as ucity_id,
// | cid_id,diary_service_id
// |from data_feed_exposure
// |from jerry_prod.data_feed_exposure
// |where cid_type = 'diary'
// |and stat_date ='${stat_date}'
// """.stripMargin
......@@ -1060,7 +1019,7 @@ object EsmmDataTest {
s"""
|select * from
|(select stat_date,device_id,city_id as ucity_id,cid_id,diary_service_id
|from data_feed_exposure_precise
|from jerry_prod.data_feed_exposure_precise
|where cid_type = 'diary'
|and stat_date ='${stat_date}'
|group by stat_date,device_id,city_id,cid_id,diary_service_id) a
......@@ -1074,7 +1033,7 @@ object EsmmDataTest {
val clk_data = sc.sql(
s"""
|select distinct stat_date,device_id,city_id as ucity_id,cid_id,diary_service_id
|from esmm_click
|from jerry_test.esmm_click
|where stat_date ='${stat_date}'
""".stripMargin
)
......@@ -1163,8 +1122,8 @@ object EsmmDataTest {
|select a.stat_date,a.device_id,a.ucity_id,a.cid_id,a.diary_service_id,a.y,a.z,a.clevel1_id,a.slevel1_id,
| c.name as ccity_name
|from union_data_slabel a
|left join src_mimas_prod_api_diary_tags b on a.cid_id=b.diary_id
|left join src_zhengxing_api_tag c on b.tag_id=c.id
|left join eagle.src_mimas_prod_api_diary_tags b on a.cid_id=b.diary_id
|left join eagle.src_zhengxing_api_tag c on b.tag_id=c.id
| where c.tag_type=4
""".stripMargin
)
......@@ -1213,7 +1172,7 @@ object EsmmDataTest {
val stat_yesterday = LocalDate.now().minusDays(1).toString
val max_stat_date = spark.sql(
s"""
|select max(stat_date) from esmm_click
|select max(stat_date) from jerry_test.esmm_click
""".stripMargin
)
val max = max_stat_date.collect().map(s => s(0).toString).head
......
......@@ -37,27 +37,17 @@ object GmeiConfig extends Serializable {
sparkConf.set("spark.debug.maxToStringFields", "130")
sparkConf.set("spark.sql.broadcastTimeout", "6000")
if (!sparkConf.contains("""spark.master""")) {
sparkConf.setMaster("local[3]")
}
if (!sparkConf.contains("spark.tispark.pd.addresses")) {
sparkConf.set("spark.tispark.pd.addresses", this.config.getString("tispark.pd.addresses"))
}
println(sparkConf.get("spark.tispark.pd.addresses"))
val spark = SparkSession
.builder()
// .config(sparkConf)
.appName("feededa")
.enableHiveSupport()
.config(sparkConf)
.config("spark.tispark.pd.addresses","172.16.40.158:2379")
.config("spark.sql.extensions","org.apache.spark.sql.TiExtensions")
.appName("feededa")
.enableHiveSupport()
.getOrCreate()
spark.sql("SET mapreduce.job.queuename=data")
spark.sql("SET mapred.input.dir.recursive=true")
spark.sql("SET hive.mapred.supports.subdirectories=true")
spark.sql("use online")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
......
......@@ -52,7 +52,7 @@ object Recommendation_strategy_all {
val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date = param.date
// val stat_date = param.date
//println(param.date)
val partition_date = stat_date.replace("-","")
val decive_id_oldUser = sc.sql(
......@@ -83,7 +83,7 @@ object Recommendation_strategy_all {
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
""".stripMargin
)
)
val imp_count_oldUser_Contrast = sc.sql(
s"""
......@@ -95,7 +95,7 @@ object Recommendation_strategy_all {
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
)
val clk_count_oldUser_all = sc.sql(
s"""
......@@ -119,7 +119,7 @@ object Recommendation_strategy_all {
""".stripMargin
)
//获取策略命中用户device_id
//获取策略命中用户device_id
val device_id_cover = sc.sql(
s"""
|select distinct(device_id) as device_id
......@@ -183,13 +183,13 @@ object Recommendation_strategy_all {
""".stripMargin
)
val result1 = clk_count_oldUser_Contrast.join(imp_count_oldUser_Contrast,"stat_date")
.join(clk_count_oldUser_all,"stat_date")
.join(imp_count_oldUser_all,"stat_date")
.join(clk_count_oldUser_Cover,"stat_date")
.join(imp_count_oldUser_Cover,"stat_date")
.join(device_num_cover,"stat_date")
.join(device_num_1_hit,"stat_date")
.join(device_num_hit,"stat_date")
.join(clk_count_oldUser_all,"stat_date")
.join(imp_count_oldUser_all,"stat_date")
.join(clk_count_oldUser_Cover,"stat_date")
.join(imp_count_oldUser_Cover,"stat_date")
.join(device_num_cover,"stat_date")
.join(device_num_1_hit,"stat_date")
.join(device_num_hit,"stat_date")
result1.show()
GmeiConfig.writeToJDBCTable(result1, "Recommendation_strategy_temp", SaveMode.Append)
......@@ -287,7 +287,7 @@ object Recommendation_strategy_all {
GmeiConfig.writeToJDBCTable(result2, "strategy_other", SaveMode.Append)
//统计新用户点击率
//统计新用户点击率
val devicee_id_newUser = sc.sql(
s"""
|select distinct(device_id) as device_id
......@@ -442,7 +442,7 @@ object Gini_coefficient {
""".stripMargin
)
agency_id.createOrReplaceTempView("agency_id")
//统计次数
//统计次数
val diary_clk_num = sc.sql(
s"""
|select temp1.diary_id as diary_id,count(ov.cl_id) as diary_clk_num
......@@ -468,3 +468,4 @@ object Gini_coefficient {
......@@ -6,7 +6,7 @@ import java.time.LocalDate
import com.gmei.lib.AbstractParams
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, TiContext}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import scopt.OptionParser
import scala.util.parsing.json.JSON
......@@ -46,9 +46,6 @@ object esmm_feature {
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_test",tableName = "device_app_list")
ti.tidbMapTable(dbName = "jerry_test",tableName = "user_feature")
user_feature(sc)
get_applist(sc)
......@@ -67,7 +64,7 @@ object esmm_feature {
""".stripMargin).dropDuplicates("device_id")
df.persist()
val old = spark.sql("select device_id from device_app_list").collect().map(x => x(0).toString)
val old = spark.sql("select device_id from jerry_test.device_app_list").collect().map(x => x(0).toString)
import spark.implicits._
val android = df.rdd.map(x => (x(0).toString,x(1).toString,x(2).toString))
......@@ -81,8 +78,6 @@ object esmm_feature {
val new_user = rdd.filter(x => old.indexOf(x._1)== -1)
.toDF("device_id","os","app_list","update_date")
if (new_user.take(1).nonEmpty){
val jdbc = "jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig.writeToJDBCTable(jdbc, new_user,"device_app_list", SaveMode.Append)
val tecent_jdbc = "jdbc:mysql://152.136.44.138:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig.writeToJDBCTable(tecent_jdbc, new_user,"device_app_list", SaveMode.Append)
......@@ -114,7 +109,7 @@ object esmm_feature {
def user_feature(spark:SparkSession): Unit ={
val yesterday = LocalDate.now().minusDays(1).toString.replace("-","")
println(yesterday)
val sql_exist = "select device_id from user_feature"
val sql_exist = "select device_id from jerry_test.user_feature"
val old = spark.sql(sql_exist)
.collect().map(x => x(0).toString)
val sql_yesterday =
......@@ -130,12 +125,8 @@ object esmm_feature {
val df_new = rdd.filter(x => old.indexOf(x._1)== -1)
.toDF("device_id","device_type","manufacturer","city_id","channel","date")
if (df_new.take(1).nonEmpty){
df_new.persist()
val jdbcuri = "jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig.writeToJDBCTable(jdbcuri, df_new, "user_feature", SaveMode.Append)
val tecent_jdbc = "jdbc:mysql://152.136.44.138:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig.writeToJDBCTable(tecent_jdbc, df_new, "user_feature", SaveMode.Append)
df_new.unpersist()
}else {
println("no need to insert into user feature")
}
......
......@@ -47,14 +47,14 @@ object temp_analysis {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
// val ti = new TiContext(sc)
// val ti = new TiContext(sc)
sc.sql("use jerry_prod")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
......@@ -81,23 +81,23 @@ object temp_analysis {
agency_id.createOrReplaceTempView("agency_id")
// //每日新用户
// val device_id_newUser = sc.sql(
// s"""
// |select distinct(device_id) as device_id
// |from online.ml_device_day_active_status
// |where active_type != '4'
// |and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
// | ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
// | ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
// | ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
// | ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
// | ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
// | ,'promotion_shike','promotion_julang_jl03')
// |and partition_date ='${partition_date}'
// """.stripMargin
// )
// device_id_newUser.createOrReplaceTempView("device_id_new")
// //每日新用户
// val device_id_newUser = sc.sql(
// s"""
// |select distinct(device_id) as device_id
// |from online.ml_device_day_active_status
// |where active_type != '4'
// |and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
// | ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
// | ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
// | ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
// | ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
// | ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
// | ,'promotion_shike','promotion_julang_jl03')
// |and partition_date ='${partition_date}'
// """.stripMargin
// )
// device_id_newUser.createOrReplaceTempView("device_id_new")
val blacklist_id = sc.sql(
s"""
......@@ -136,13 +136,13 @@ object temp_analysis {
diary_clk_all.show(80)
//日记本点击
//日记本点击
val referrer=List("about_me_message_list","all_case_service_comment","all_cases","diary_detail","diary_list"
,"diary_listof_related_service","answer_detail","community_home","conversation_detail","create_diary_title","diary_listof_related_service",
"doctor_all_cases","hospital_all_cases","my_favor","my_order","order_detail","personal_store_diary_list","received_votes",
"topic_detail","welfare_detail","welfare_list","welfare_special","wiki_detail","zone_detail",
"expert_detail","free_activity_detail","home","message_home","my_diary","organization_detail","other_homepage","question_detail",
"search_result_diary","search_result_more","welfare_detail","zone_v3")
"expert_detail","free_activity_detail","home","message_home","my_diary","organization_detail","other_homepage","question_detail",
"search_result_diary","search_result_more","welfare_detail","zone_v3")
for( a <- referrer ){
val diary_clk_temp = sc.sql(
s"""
......@@ -212,14 +212,14 @@ object ARPU_COM {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
// val ti = new TiContext(sc)
// val ti = new TiContext(sc)
sc.sql("use jerry_prod")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
......@@ -335,14 +335,14 @@ object hospital_gengmei {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
// val ti = new TiContext(sc)
// val ti = new TiContext(sc)
sc.sql("use jerry_prod")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
......@@ -407,19 +407,19 @@ object meigou_xiaofei_renshu {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
// val ti = new TiContext(sc)
// val ti = new TiContext(sc)
sc.sql("use jerry_prod")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
// val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date = GmeiConfig.getMinusNDate(1)
val stat_date=param.date
//println(param.date)
val partition_date = stat_date.replace("-","")
......@@ -461,21 +461,21 @@ object meigou_xiaofei_renshu {
final_id.createOrReplaceTempView("final_id")
// val meigou_price = sc.sql(
// s"""
// |select md.user_id,sum(md.gengmei_price) as pay_all
// |from online.ml_meigou_order_detail md left join final_id
// |on md.device_id = final_id.device_id
// |where md.status= 2
// |and final_id.device_id is null
// |and md.partition_date = '20181223'
// |and md.pay_time is not null
// |and md.validate_time>'2017-01-01 00:00:00.0'
// |group by md.user_id
// |order by sum(md.gengmei_price)
// """.stripMargin
// )
// meigou_price.show(80)
// val meigou_price = sc.sql(
// s"""
// |select md.user_id,sum(md.gengmei_price) as pay_all
// |from online.ml_meigou_order_detail md left join final_id
// |on md.device_id = final_id.device_id
// |where md.status= 2
// |and final_id.device_id is null
// |and md.partition_date = '20181223'
// |and md.pay_time is not null
// |and md.validate_time>'2017-01-01 00:00:00.0'
// |group by md.user_id
// |order by sum(md.gengmei_price)
// """.stripMargin
// )
// meigou_price.show(80)
val meigou_price = sc.sql(
......@@ -500,9 +500,9 @@ object meigou_xiaofei_renshu {
|order by sum(md.gengmei_price)
""".stripMargin
)
// meigou_price.show(80)
// meigou_price.show(80)
// GmeiConfig.writeToJDBCTable(meigou_price, "meigou_price", SaveMode.Overwrite)
// GmeiConfig.writeToJDBCTable(meigou_price, "meigou_price", SaveMode.Overwrite)
}
......@@ -549,18 +549,18 @@ object alpha_ctr {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
// val ti = new TiContext(sc)
// val ti = new TiContext(sc)
sc.sql("use jerry_prod")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
// val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date = GmeiConfig.getMinusNDate(1)
val stat_date = param.date
//println(param.date)
val partition_date = stat_date.replace("-","")
......@@ -638,12 +638,17 @@ object alpha_ctr {
<<<<<<< HEAD
// GmeiConfig.writeToJDBCTable(result, "alpha_ctr", SaveMode.Append)
// GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result, table="alpha_ctr",SaveMode.Append)
println("开始写入")
GmeiConfig.writeToJDBCTable("jerry.jdbcuri",result, table="alpha_ctr",SaveMode.Append)
println("写入完成")
=======
// GmeiConfig.writeToJDBCTable(result, "alpha_ctr", SaveMode.Append)
GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result, table="alpha_ctr",SaveMode.Append)
>>>>>>> 6bb8533b68efef7c647251ef08479560d5e1216a
......@@ -667,12 +672,17 @@ object alpha_ctr {
)
val result3=device_num_count.join(duration_device,"stat_date")
<<<<<<< HEAD
// GmeiConfig.writeToJDBCTable(result3, "alpha_duration", SaveMode.Append)
// GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result3, table="alpha_duration",SaveMode.Append)
println("开始写入")
GmeiConfig.writeToJDBCTable("jerry.jdbcuri",result3, table="alpha_duration",SaveMode.Append)
println("写入完成")
=======
// GmeiConfig.writeToJDBCTable(result3, "alpha_duration", SaveMode.Append)
GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result3, table="alpha_duration",SaveMode.Append)
>>>>>>> 6bb8533b68efef7c647251ef08479560d5e1216a
......@@ -723,19 +733,19 @@ object copy_database {
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
// val ti = new TiContext(sc)
// val ti = new TiContext(sc)
sc.sql("use jerry_prod")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "tl_hdfs_wiki_item_tag_view")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "Knowledge_network")
// ti.tidbMapTable(dbName = "eagle", tableName = "src_mimas_prod_api_diary")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "tl_hdfs_wiki_item_tag_view")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "Knowledge_network")
// ti.tidbMapTable(dbName = "eagle", tableName = "src_mimas_prod_api_diary")
import sc.implicits._
val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date=param.date
// val stat_date=param.date
val partition_date = stat_date.replace("-","")
val new_data = sc.sql(
......@@ -753,7 +763,7 @@ object copy_database {
""".stripMargin
)
GmeiConfig.writeToJDBCTable(new_data, "train_Knowledge_network_data", SaveMode.Overwrite)
GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",new_data, "train_Knowledge_network_data", SaveMode.Overwrite)
}
......@@ -763,4 +773,3 @@ object copy_database {
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment