package com.gmei.streaming;

import com.gmei.bean.bl.BlPreciseExposureBean;
import com.gmei.function.*;
import com.gmei.sink.BlPreciseExposureMysqlSink;
import com.gmei.source.BlMaiDianKafkaSource;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.OutputTag;



/**
 * ClassName: PreciseExposureStreaming
 * Function:
 * Reason: flink主类
 * Date: 2019/12/5 下午3:54
 *
 * @author liuzhe
 * @since JDK 1.8
 */
public class PreciseExposureStreaming {

    /**
    * Function: main
    * Reason: 执行入口
    * Date: 2019/12/24 上午11:03
    * args: inBrokes、inTopic、inzk、groupId、sinkBFBlTableName、sinkHLLBlTableName、outBrokers、outTopic、dimJdbcUrl、sinkJdbcUrl
    * 运行参数设置:
      * 时间属性: ProcessingTime
      * 聚合窗口时间: 1分钟
      * 并行度: 1
      * 异步查询最长等待时间: 1分钟
      * 缓存大小: 2000
      * 缓存过期时间: 24小时
    * @author liuzhe
    * @since JDK 1.8
    */
    public static void main(String[] args) throws Exception {
        String inBrokers = null;
        String inTopic = null;
        String groupId = null;
        String dimJdbcUrl = null;
        String sinkJdbcUrl = null;
        String sinkBFBlTableName = null;
        String sinkHLLBlTableName = null;
        Integer windowSize = null;
        Integer parallelism = null;
        String startTime = null;
        String checkpointPath = null;
        ParameterTool parameterTool = null;

        try {
//        ./flink run flink-stat-1.0-SNAPSHOT.jar -brokers kafka.dev.gengmei:9092 -topic gm-logging-test -sinkUrl jdbc:phoenix:localhost:2181 -sinkTable data.device_for_flink -zxUrl jdbc:mysql://localhost/seven?user=root&password=123123 -zxTable statistic_promotion_channel
//        ./flink run flink-stat-1.0-SNAPSHOT.jar -brokers 172.21.40.14:9092 -topic gm-precise_exposure-test
            parameterTool = ParameterTool.fromArgs(args);
            inBrokers = parameterTool.getRequired("inBrokers");
            inTopic = parameterTool.getRequired("inTopic");
            groupId = parameterTool.get("groupId", "flink_preciseexposure_group");

            dimJdbcUrl = parameterTool.getRequired("dimJdbcUrl");
            sinkJdbcUrl = parameterTool.getRequired("sinkJdbcUrl");
            sinkBFBlTableName = parameterTool.getRequired("sinkBFBlTableName");
            sinkHLLBlTableName = parameterTool.getRequired("sinkHLLBlTableName");
//        Boolean startFromLatest = parameterTool.getBoolean("startFromLatest", false);
            windowSize = parameterTool.getInt("windowSize", 30);
            parallelism = parameterTool.getInt("parallelism", 1);
            startTime = parameterTool.get("startTime", null);
            checkpointPath = parameterTool.getRequired("checkpointPath");
            printUsage(parameterTool);
        } catch (Exception e) {
            e.printStackTrace();
            printUsage();
        }
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        /*
        checkpoint设置
         */
        //start a checkpoint every 1000 ms
        env.enableCheckpointing(1000);
        //set mode to exactly-once (this is the default)
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        //make sure 500 ms of progress happen between checkpoints
        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(500);
        //checkpoints have to complete within one minute, or are discarded
        env.getCheckpointConfig().setCheckpointTimeout(60000);
        //allow only one checkpoint to be in progress at the same time
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
        //enable externalized checkpoints which are retained after job cancellation
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //This determines if a task will be failed if an error occurs in the execution of the task’s checkpoint procedure.
        env.getCheckpointConfig().setFailOnCheckpointingErrors(true);
        //设置statebackend
        env.setStateBackend(new FsStateBackend(checkpointPath,true));
        //重试次数1，重试间隔时间30s
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 3000));

        //设置时间属性（EventTime:事件时间;IngestionTime:接入时间;ProcessingTime:处理时间（默认））
        env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

//        DataStream<String> blMaiDianDataStream = env.socketTextStream("MacdeMacBook-Pro-9.local", 9000, "\n");

        //正式环境
        BlMaiDianKafkaSource blMaiDianKafkaSource = new BlMaiDianKafkaSource(inBrokers, inTopic, groupId, startTime);
        //测试环境
//        BlMaiDianKafkaSource blMaiDianKafkaSource = new BlMaiDianKafkaSource("localhost:9092", "test", "group1");

        FlinkKafkaConsumer011<String> flinkKafkaConsumer = blMaiDianKafkaSource.addSource();

        //maidian流分发
        SplitStream<String> blMaiDianDataStream  = env
                .addSource(flinkKafkaConsumer).uid("id_blmaidiandata_source").setParallelism(parallelism)
                .split(new BlMaiDianOutputSelector());

        OutputTag<BlPreciseExposureBean> outputTag = new OutputTag<BlPreciseExposureBean>("bl_preciseexposure_late_data"){};

        /*
        BL层数据清洗、转换、去重
        Time.of(windowSize, TimeUnit.SECONDS)等价与Time.seconds(windowSize):30秒
         */
        @SuppressWarnings("unchecked")
        KeyedStream<BlPreciseExposureBean, String> blPreciseExposureKeyedStream = blMaiDianDataStream
                .select("et_pe")
                .map(new BlPreciseExposureMapFunction()).uid("id_blpreciseexposure_map").setParallelism(parallelism)
                .filter(new BlPreciseExposureFilterFunction()).uid("id_blpreciseexposure_filter").setParallelism(parallelism)
                .keyBy(new BlPreciseExposureKeySelector());

        SingleOutputStreamOperator<BlPreciseExposureBean> blPreciseExposureStreamBF = blPreciseExposureKeyedStream.process(new BloomFilterFunction());
        SingleOutputStreamOperator<BlPreciseExposureBean> blPreciseExposureStreamHLL = blPreciseExposureKeyedStream.process(new HyperLogLogFunction());

        /*
        BL层数据下发
         */
        blPreciseExposureStreamBF.print();
//                .addSink(new BlPreciseExposureMysqlSink(sinkJdbcUrl, sinkBFBlTableName))
//                .uid("id_blpreciseexposurebf_sink")
//                .setParallelism(parallelism);

        blPreciseExposureStreamHLL.print();
//                .addSink(new BlPreciseExposureMysqlSink(sinkJdbcUrl, sinkHLLBlTableName))
//                .uid("id_blpreciseexposurehll_sink")
//                .setParallelism(parallelism);

//        //测试打印
//        blPreciseExposureStreamBF.print();
//        blPreciseExposureStreamHLL.print();
//        blPreciseExposureStreamBF.addSink(new PrintSinkFunction<>());
//        blPreciseExposureStreamHLL.addSink(new PrintSinkFunction<>());
        env.execute("bl_et_pe_preciseexposure_inc_d_distinct");
    }

    /**
    * Function: printUsage
    * Reason: 打印参数说明
    * Date: 2019/12/26 下午2:39
    *
    * @author liuzhe
    * @since JDK 1.8
    */

    public static void printUsage(){
        System.out.println("Missing parameters!\n" +
                "Usage:\n" +
                "    --inBrokers <source kafka brokers> \n" +
                "    --inTopic <source kafka topic>\n" +
                "    --groupid <source kafka groupid, default: flink_preciseexposure_group> \n" +
                "    --startFromLatest <start from the latest kafka record, default: false> \n" +
                "    --windowSize <window size(second), default: 30 (s)> \n" +
                "    --dimJdbcUrl <dim database url> \n" +
                "    --sinkJdbcUrl <target database url> \n" +
                "    --sinkBFBlTableName <target bl table name> \n" +
                "    --sinkHLLBlTableName <target ml table name> \n" +
                "    --parallelism <parallelism, default 1> \n" +
                "    --startTime <kafka startTime, default null> \n" +
                "    --checkpointPath <checkpointPath, hdfs> \n"
        );
    }

    /**
    * Function: printUsage
    * Reason: 打印参数
    * Date: 2019/12/25 下午5:00
    *
    * @author liuzhe
    * @since JDK 1.8
    */

    public static void printUsage(ParameterTool args){
        ParameterTool parameterTool = args;
        System.out.println("Print parameters!\n" +
                "Usage:\n" +
                "    --inBrokers " + parameterTool.getRequired("inBrokers") + " \n" +
                "    --inTopic " + parameterTool.getRequired("inTopic") + "\n" +
                "    --groupid " + parameterTool.get("groupId", "flink_preciseexposure_group") + " \n" +
                "    --startFromLatest <start from the latest kafka record, default: false> \n" +
                "    --windowSize " + parameterTool.getInt("windowSize", 30) + " \n" +
                "    --dimJdbcUrl " + parameterTool.getRequired("dimJdbcUrl") + " \n" +
                "    --sinkJdbcUrl " + parameterTool.getRequired("sinkJdbcUrl") + " \n" +
                "    --sinkBFBlTableName " + parameterTool.getRequired("sinkBFBlTableName") + " \n" +
                "    --sinkHLLBlTableName " + parameterTool.getRequired("sinkHLLBlTableName") + " \n" +
                "    --parallelism "+ parameterTool.getInt("parallelism", 1) + " \n" +
                "    --startTime " + parameterTool.get("startTime", null) + " \n" +
                "    --checkpointPath " + parameterTool.getRequired("checkpointPath") + " \n"
        );
    }

}
