Commit c211e323 authored by 赵建伟's avatar 赵建伟

update codes

parent 73fbf112
......@@ -14,7 +14,7 @@ nohup $FLINK_HOME/bin/flink run \
-p 6 \
-yjm 1024 \
-ytm 2048 \
-c com.gmei.data.ctr.ProdCtrEstimateMainClk \
-c com.gmei.data.ctr.main.ProdCtrEstimateMainClk \
$JAR_DIR/ctr-estimate-1.0-SNAPSHOT.jar \
--inBrokers '172.16.44.25:9092,172.16.44.31:9092,172.16.44.45:9092' \
--batchSize 1000 \
......
......@@ -14,7 +14,7 @@ nohup $FLINK_HOME/bin/flink run \
-p 6 \
-yjm 1024 \
-ytm 2048 \
-c com.gmei.data.ctr.ProdCtrEstimateMainTag \
-c com.gmei.data.ctr.main.ProdCtrEstimateMainTag \
$JAR_DIR/ctr-estimate-1.0-SNAPSHOT.jar \
--inBrokers '172.16.44.25:9092,172.16.44.31:9092,172.16.44.45:9092' \
--batchSize 1000 \
......
......@@ -8,6 +8,15 @@ CREATE TABLE `device_current_estimate_clk` (
`content_card_click` bigint(20) DEFAULT NULL COMMENT '日记贴点击量',
`tractate_card_click` bigint(20) DEFAULT NULL COMMENT '用户贴点击量',
`answer_card_click` bigint(20) DEFAULT NULL COMMENT '问答贴点击量',
`like_diary_count` bigint(20) DEFAULT NULL COMMENT '点赞日记数量',
`like_card_count` bigint(20) DEFAULT NULL COMMENT '点赞帖子数量',
`like_answer_count` bigint(20) DEFAULT NULL COMMENT '点赞回答数量',
`discuss_diary_count` bigint(20) DEFAULT NULL COMMENT '评论日记数量',
`discuss_card_count` bigint(20) DEFAULT NULL COMMENT '评论帖子数量',
`discuss_answer_count` bigint(20) DEFAULT NULL COMMENT '评论回答数量',
`collect_diary_count` bigint(20) DEFAULT NULL COMMENT '收藏日记数量',
`collect_card_count` bigint(20) DEFAULT NULL COMMENT '收藏帖子数量',
`collect_answer_count` bigint(20) DEFAULT NULL COMMENT '收藏问答数量',
`partition_date` varchar(45) DEFAULT NULL COMMENT '日期',
`last_update_time` varchar(45) DEFAULT NULL COMMENT '上一次更改的时间',
PRIMARY KEY (`id`)
......@@ -52,35 +61,49 @@ CREATE TABLE `device_current_estimate_tag_unplat` (
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- CTR特征预估标签平台表
CREATE TABLE `device_recent_estimate_view_pfr` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
`device_id` varchar(150) DEFAULT NULL COMMENT '设备ID',
`diary_preference` text COMMENT '日记偏好',
`qa_preference` text COMMENT '问答偏好',
`card_preference` text COMMENT '帖子偏好',
`service_preference` text COMMENT '美购偏好',
`partition_date` varchar(45) DEFAULT NULL COMMENT '日期',
`last_update_time` varchar(45) DEFAULT NULL COMMENT '上一次更改的时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- CTR特征预估标签表
CREATE TABLE `device_current_estimate` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
`device_id` varchar(150) DEFAULT NULL COMMENT '设备ID',
`content_card_click` bigint(20) DEFAULT NULL COMMENT '日记贴点击量',
`tractate_card_click` bigint(20) DEFAULT NULL COMMENT '用户贴点击量',
`answer_card_click` bigint(20) DEFAULT NULL COMMENT '问答贴点击量',
`plat_first_positions` text COMMENT '当日平台一级部位偏好',
`plat_first_solutions` text COMMENT '当日平台一级方式偏好',
`plat_first_demands` text COMMENT '当日平台项目偏好',
`plat_project` text COMMENT '当日平台一级诉求偏好',
`content_first_positions` text COMMENT '当日内容一级部位偏好',
`content_first_solutions` text COMMENT '当日内容一级方式偏好',
`content_first_demands` text COMMENT '当日内容一级诉求偏好',
`content_project` text COMMENT '当日内容项目偏好',
`commodity_first_positions` text COMMENT '当日商品一级部位偏好',
`commodity_first_solutions` text COMMENT '当日商品一级方式偏好',
`commodity_first_demands` text COMMENT '当日内容一级诉求偏好',
`commodity_project` text COMMENT '当日内容项目偏好',
`plat_second_positions` text COMMENT '当日平台二级部位偏好',
`plat_second_solutions` text COMMENT '当日平台二级方式偏好',
`plat_second_demands` text COMMENT '当日平台二级诉求偏好',
`content_second_positions` text COMMENT '当日内容二级部位偏好',
`content_second_solutions` text COMMENT '当日内容二级方式偏好',
`content_second_demands` text COMMENT '当日内容二级诉求偏好',
`commodity_second_positions` text COMMENT '当日商品二级部位偏好',
`commodity_second_solutions` text COMMENT '当日商品二级方式偏好',
`commodity_second_demands` text COMMENT '当日商品二级诉求偏好',
`partition_date` varchar(45) DEFAULT NULL COMMENT '日期',
`last_update_time` varchar(45) DEFAULT NULL COMMENT '上一次更改的时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
\ No newline at end of file
# CREATE TABLE `device_current_estimate` (
# `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
# `device_id` varchar(150) DEFAULT NULL COMMENT '设备ID',
# `content_card_click` bigint(20) DEFAULT NULL COMMENT '日记贴点击量',
# `tractate_card_click` bigint(20) DEFAULT NULL COMMENT '用户贴点击量',
# `answer_card_click` bigint(20) DEFAULT NULL COMMENT '问答贴点击量',
# `plat_first_positions` text COMMENT '当日平台一级部位偏好',
# `plat_first_solutions` text COMMENT '当日平台一级方式偏好',
# `plat_first_demands` text COMMENT '当日平台项目偏好',
# `plat_project` text COMMENT '当日平台一级诉求偏好',
# `content_first_positions` text COMMENT '当日内容一级部位偏好',
# `content_first_solutions` text COMMENT '当日内容一级方式偏好',
# `content_first_demands` text COMMENT '当日内容一级诉求偏好',
# `content_project` text COMMENT '当日内容项目偏好',
# `commodity_first_positions` text COMMENT '当日商品一级部位偏好',
# `commodity_first_solutions` text COMMENT '当日商品一级方式偏好',
# `commodity_first_demands` text COMMENT '当日内容一级诉求偏好',
# `commodity_project` text COMMENT '当日内容项目偏好',
# `plat_second_positions` text COMMENT '当日平台二级部位偏好',
# `plat_second_solutions` text COMMENT '当日平台二级方式偏好',
# `plat_second_demands` text COMMENT '当日平台二级诉求偏好',
# `content_second_positions` text COMMENT '当日内容二级部位偏好',
# `content_second_solutions` text COMMENT '当日内容二级方式偏好',
# `content_second_demands` text COMMENT '当日内容二级诉求偏好',
# `commodity_second_positions` text COMMENT '当日商品二级部位偏好',
# `commodity_second_solutions` text COMMENT '当日商品二级方式偏好',
# `commodity_second_demands` text COMMENT '当日商品二级诉求偏好',
# `partition_date` varchar(45) DEFAULT NULL COMMENT '日期',
# `last_update_time` varchar(45) DEFAULT NULL COMMENT '上一次更改的时间',
# PRIMARY KEY (`id`)
# ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
\ No newline at end of file
#!/usr/local/bin/python2.7
# -*- coding:utf-8 -*-
import pymysql
import logging
import datetime
import sys
reload(sys)
sys.setdefaultencoding('utf8')
current_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
three_days_ago_date_str = (datetime.datetime.now() - datetime.timedelta(days=3)).strftime("%Y-%m-%d")
# mysql操作工具类
class MysqlOperator:
def __init__(self, host, port, user, password, db, charset='utf8'):
self.connect = pymysql.connect(
host=host,
port=port,
user=user,
password=password,
db=db,
charset=charset, )
def __execute_sql(self, sql):
with self.connect.cursor() as cursor:
cursor.execute(sql)
data = cursor.fetchall()
self.connect.commit()
return data
def execute_sql(self, sql):
self.__execute_sql(sql)
def close_connect(self):
self.connect.close()
# 删除或修改数据
def del_or_update(sql):
# operator = MysqlOperator('172.16.40.170', 4000, 'data_user', 'YPEzp78HQBuhByWPpefQu6X3D6hEPfD6', 'jerry_test')
operator = MysqlOperator('172.18.44.3', 3306, 'root', '5OqYM^zLwotJ3oSo', 'jerry_test')
operator.execute_sql(sql)
operator.close_connect()
# 校验画像打点是否正常
def surplus_del():
logging.basicConfig(level=logging.INFO,
filename='/data/log/ctr-estimate/ctr-estimate-del.log',
filemode='a',
format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'
)
del_clk_sql = "delete from device_current_estimate_clk where partition_date <= '" + three_days_ago_date_str + "'"
del_tag_plat_sql = "delete from device_current_estimate_tag_plat where partition_date <= '" + three_days_ago_date_str + "'"
del_tag_unplat_sql = "delete from device_current_estimate_tag_unplat where partition_date <= '" + three_days_ago_date_str + "'"
del_or_update(del_clk_sql)
clk_msg="ctr-estimate-clk surplus result del success!"
print clk_msg
logging.info(clk_msg)
del_or_update(del_tag_plat_sql)
tag_plat_msg="ctr-estimate-tag-plat surplus result del success!"
print tag_plat_msg
logging.info(tag_plat_msg)
del_or_update(del_tag_unplat_sql)
tag_unplat_msg="ctr-estimate-tag-unplat surplus result del success!"
print tag_unplat_msg
logging.info(tag_unplat_msg)
# 主入口
if __name__ == '__main__':
surplus_del()
......@@ -205,11 +205,11 @@
</excludes>
</filter>
</filters>
<!-- <transformers>-->
<!-- <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">-->
<!-- <mainClass>com.gmei.data.ctr.CtrEstimateMainTagDev</mainClass>-->
<!-- </transformer>-->
<!-- </transformers>-->
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.gmei.data.ctr.main.TestCtrEstimateMainTag</mainClass>
</transformer>
</transformers>
<createDependencyReducedPom>false</createDependencyReducedPom>
</configuration>
</execution>
......
package com.gmei.data.ctr;
package com.gmei.data.ctr.main;
import com.gmei.data.ctr.operator.CtrEstimateClkOperator;
import com.gmei.data.ctr.source.MaidianKafkaSource;
......
package com.gmei.data.ctr;
package com.gmei.data.ctr.main;
import com.gmei.data.ctr.operator.CtrEstimateClkOperator;
import com.gmei.data.ctr.operator.CtrEstimatePfrOperator;
import com.gmei.data.ctr.source.MaidianKafkaSource;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @ClassName DevCtrEstimateMainClk
* @Description: CTR预估特征实时处理入口
* @ClassName DevCtrEstimateMainPfr
* @Description: CTR预估特征实时偏好处理入口
* @Author apple
* @Date 2020/3/30
* @Version V1.0
**/
public class DevCtrEstimateMainClk {
public class ProdCtrEstimateMainPfr {
public static void main(String[] args) throws Exception{
// 获取运行参数
......@@ -69,7 +67,7 @@ public class DevCtrEstimateMainClk {
).getInstance();
// 执行处理核心逻辑
new CtrEstimateClkOperator(MaidianDataStream,outJdbcUrl,maxRetry,retryInteral,parallelism,windowSize,slideSize).run();
new CtrEstimatePfrOperator(MaidianDataStream,outJdbcUrl,maxRetry,retryInteral,parallelism,windowSize,slideSize).run();
// 常驻执行
env.execute("ctr-estimate-clk");
......
package com.gmei.data.ctr;
package com.gmei.data.ctr.main;
import com.gmei.data.ctr.operator.CtrEstimateTagOperator;
import com.gmei.data.ctr.source.MaidianKafkaSource;
......
package com.gmei.data.ctr;
package com.gmei.data.ctr.main;
import com.gmei.data.ctr.operator.CtrEstimateClkOperator;
import com.gmei.data.ctr.source.MaidianKafkaSource;
......
package com.gmei.data.ctr;
package com.gmei.data.ctr.main;
import com.gmei.data.ctr.operator.CtrEstimateTagOperator;
import com.gmei.data.ctr.operator.CtrEstimatePfrOperator;
import com.gmei.data.ctr.source.MaidianKafkaSource;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @ClassName DevCtrEstimateMainTag
* @Description: CTR预估特征实时处理入口
* @ClassName DevCtrEstimateMainPfr
* @Description: CTR预估特征实时偏好处理入口
* @Author apple
* @Date 2020/3/30
* @Version V1.0
**/
public class DevCtrEstimateMainTag {
public class TestCtrEstimateMainPfr {
public static void main(String[] args) throws Exception{
// 获取运行参数
......@@ -24,26 +21,19 @@ public class DevCtrEstimateMainTag {
String inBrokers = parameterTool.get("inBrokers","test003:9092");
String batchSize = parameterTool.get("batchSize","1000");
String maidianInTopic = parameterTool.get("maidianInTopic", "test11");
String maidianInGroupId = parameterTool.get("maidianInGroupId","ctr-estimate-tag");
Integer windowSize = parameterTool.getInt("windowSize",5);
Integer slideSize = parameterTool.getInt("slideSize",5);
String maidianInGroupId = parameterTool.get("maidianInGroupId","ctr-estimate-clk");
Integer windowSize = parameterTool.getInt("windowSize",60);
Integer slideSize = parameterTool.getInt("slideSize",60);
String outJdbcUrl = parameterTool.get("outJdbcUrl",
"jdbc:mysql://172.18.44.3:3306/jerry_test?user=root&password=5OqYM^zLwotJ3oSo&autoReconnect=true&useSSL=false");
Integer maxRetry = parameterTool.getInt("maxRetry",3);
Long retryInteral = parameterTool.getLong("retryInteral",3000);
String checkpointPath = parameterTool.get("checkpointPath","hdfs://bj-gmei-hdfs/user/data/flink/ctr-estimate/checkpoint");
Boolean isStartFromEarliest = parameterTool.getBoolean("isStartFromEarliest",true);
Boolean isStartFromEarliest = parameterTool.getBoolean("isStartFromEarliest",false);
Boolean isStartFromLatest = parameterTool.getBoolean("isStartFromLatest",false);
String startTime = parameterTool.get("startTime");
Integer parallelism = parameterTool.getInt("parallelism",2);
String inZxJdbcUrl = parameterTool.get("inZxJdbcUrl","jdbc:mysql://172.16.30.141:3306/zhengxing?characterEncoding=UTF-8&autoReconnect=true&useSSL=false");
String inZxUsername = parameterTool.get("inZxUsername","work");
String inZxPassword = parameterTool.get("inZxPassword","BJQaT9VzDcuPBqkd");
String inJerryJdbcUrl = parameterTool.get("inJerryJdbcUrl","jdbc:mysql://172.16.40.170:4000/jerry_test?characterEncoding=UTF-8&autoReconnect=true&useSSL=false");
String inJerryUsername = parameterTool.get("inJerryUsername","data_user");
String inJerryPassword = parameterTool.get("inJerryPassword","YPEzp78HQBuhByWPpefQu6X3D6hEPfD6");
// 核心参数打印
System.out.println("**********************************************************");
System.out.println("*** inBrokers: " + inBrokers);
System.out.println("*** maidianInTopic: "+ maidianInTopic);
......@@ -53,12 +43,6 @@ public class DevCtrEstimateMainTag {
System.out.println("*** startTime: " + startTime);
System.out.println("*** windowSize: " + windowSize);
System.out.println("*** slideSize: " + slideSize);
System.out.println("*** inZxJdbcUrl: " + inZxJdbcUrl);
System.out.println("*** inZxUsername: " + inZxUsername);
System.out.println("*** inZxPassword: " + inZxPassword);
System.out.println("*** inJerryJdbcUrl: " + inJerryJdbcUrl);
System.out.println("*** inJerryUsername: " + inJerryUsername);
System.out.println("*** inJerryPassword: " + inJerryPassword);
System.out.println("**********************************************************");
// 获得流处理环境对象
......@@ -82,23 +66,9 @@ public class DevCtrEstimateMainTag {
).getInstance();
// 执行处理核心逻辑
new CtrEstimateTagOperator(
MaidianDataStream,
outJdbcUrl,
maxRetry,
retryInteral,
parallelism,
windowSize,
slideSize,
inZxJdbcUrl,
inZxUsername,
inZxPassword,
inJerryJdbcUrl,
inJerryUsername,
inJerryPassword
).run();
new CtrEstimatePfrOperator(MaidianDataStream,outJdbcUrl,maxRetry,retryInteral,parallelism,windowSize,slideSize).run();
// 常驻执行
env.execute("ctr-estimate-tag");
env.execute("ctr-estimate-clk");
}
}
package com.gmei.data.ctr;
package com.gmei.data.ctr.main;
import com.gmei.data.ctr.operator.CtrEstimateTagOperator;
import com.gmei.data.ctr.source.MaidianKafkaSource;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment