mysql表api_cashbacklock导入到hive

parent 629ac1e4
--***************************************************************
--*脚本名称: create_test_20190720.sql
--*功能: 创建api_cashbacklock(api_cashbacklock)的映射表
--*业务名称: tl
--*输入数据: hdfs://bj-gmei-hdfs/data/log/thirdparty/test_20190720/partition_date=${partition_date}/
--*作者: data-exchange
--*更新时间: 2019-07-15 11:41:38
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--声明数据库
USE default;
--创建HDFS映射表
CREATE EXTERNAL TABLE IF NOT EXISTS test_20190720 (
id BIGINT COMMENT '{"chs_name":"ID","description":"","etl":"","value":"","remark":""}',
lock_at STRING COMMENT '{"chs_name":"返现锁定时间","description":"","etl":"","value":"","remark":""}',
order_id STRING COMMENT '{"chs_name":"1","description":"","etl":"","value":"","remark":""}'
) COMMENT 'api_cashbacklock'
PARTITIONED BY (partition_date STRING COMMENT '分区日期')
ROW FORMAT
DELIMITED FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
NULL DEFINED AS ''
STORED AS TEXTFILE
LOCATION '/data/log/thirdparty/test_20190720';
\ No newline at end of file
--***************************************************************
--*脚本名称: create_test_20190720.sql
--*功能: 创建api_cashbacklock(api_cashbacklock)的映射表
--*业务名称: tl
--*输入数据: hdfs://bj-gmei-hdfs/data/log/thirdparty/test_20190720/partition_date=${partition_date}/
--*作者: data-exchange
--*更新时间: 2019-07-15 11:41:38
--***************************************************************
--设置全局变量&UDF
SET mapred.input.dir.recursive=true;
SET hive.mapred.supports.subdirectories=true;
--声明数据库
USE ${dbname};
--加载HDFS外部数据分区
ALTER TABLE test_20190720 DROP IF EXISTS PARTITION (partition_date = '${partition_date}');
ALTER TABLE test_20190720 ADD IF NOT EXISTS PARTITION (partition_date = '${partition_date}') LOCATION 'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/test_20190720/partition_date=${partition_date}/';
\ No newline at end of file
<coordinator-app name="test_20190720"
frequency="0 1 * * *"
start="${start_date}" end="${end_date}" timezone="${timeZone}"
xmlns="uri:oozie:coordinator:0.2"
>
<controls>
<execution>FIFO</execution>
</controls>
<action>
<workflow>
<app-path>${wf_application_path}</app-path>
<configuration>
<property>
<name>jobTracker</name>
<value>${jobTracker}</value>
</property>
<property>
<name>nameNode</name>
<value>${nameNode}</value>
</property>
<property>
<name>queueName</name>
<value>${queueName}</value>
</property>
<property>
<name>checkClient</name>
<value>${checkClient}</value>
</property>
<property>
<name>checkEXEC</name>
<value>${checkEXEC}</value>
</property>
<property>
<name>zxURL</name>
<value>${zxURL}</value>
</property>
<property>
<name>userName</name>
<value>${userName}</value>
</property>
<property>
<name>passWord</name>
<value>${passWord}</value>
</property>
<property>
<name>sourceTableName</name>
<value>${sourceTableName}</value>
</property>
<property>
<name>columns</name>
<value>${columns}</value>
</property>
<property>
<name>targetTableName</name>
<value>${targetTableName}</value>
</property>
<property>
<name>fields_terminated</name>
<value>${fields_terminated}</value>
</property>
<property>
<name>lines_terminated</name>
<value>${lines_terminated}</value>
</property>
<property>
<name>num_mappers</name>
<value>${num_mappers}</value>
</property>
<property>
<name>dbname</name>
<value>${dbname}</value>
</property>
<property>
<name>jdbcURL</name>
<value>${jdbcURL}</value>
</property>
<property>
<name>pwd</name>
<value>${pwd}</value>
</property>
<property>
<name>partition_date</name>
<value>${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}</value>
</property>
<property>
<name>year</name>
<value>${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}</value>
</property>
<property>
<name>day</name>
<value>${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}</value>
</property>
<property>
<name>month</name>
<value>${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>True</value>
</property>
<property>
<name>start_date</name>
<value>${start_date}</value>
</property>
<property>
<name>end_date</name>
<value>${end_date}</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
#全量导入数据job.properties文件模板
oozie.use.system.libpath=True
security_enabled=False
#appName
#命名规则与hive的表名必须一致
appName=test_20190720
dbname=default
nameNode=hdfs://bj-gm-test-data001:8020
jobTracker=bj-gm-test-data001:8032
queueName=data
timeZone=GMT+0800
#任务的执行的开始时间
start_date=2019-07-15T00:00+0800
#任务执行的结束时间
end_date=2019-07-26T23:59+0800
#hiveServer2的url
jdbcURL=jdbc:hive2://bj-gm-test-data002:10000
#hiveServer2的密码
pwd=data
checkClient=${nameNode}/user/hive/project/utils/data_ready_checker/client
checkEXEC=./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL=jdbc:mysql://172.22.30.12:3306/zhengxing
#访问数据库的用户名
userName=work
#访问数据库的密码
passWord=zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName=api_cashbacklock
#数据的输出表
targetTableName=test_20190720
#数据源表的字段,要与表中字段顺序一致
columns="id,lock_at,order_id"
num_mappers=1
fields_terminated=\\001
lines_terminated=\\n
oozie.coord.application.path=${nameNode}/user/hive/project/workflow/${appName}
wf_application_path=${nameNode}/user/hive/project/workflow/${appName}
<workflow-app name="test_20190720" xmlns="uri:oozie:workflow:0.5">
<start to="test_20190720_sqoop"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="test_20190720_sqoop">
<sqoop xmlns="uri:oozie:sqoop-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<prepare>
<delete path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>
</prepare>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<command>import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --escaped-by '\\' --target-dir /data/log/thirdparty/${targetTableName}/partition_date=${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string "" --null-non-string ""</command>
</sqoop>
<ok to="test_20190720_check"/>
<error to="Kill"/>
</action>
<action name="test_20190720_check" retry-max="3" retry-interval="5">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>/bin/bash</exec>
<argument>${checkEXEC}</argument>
<env-var>TYPE=hdfs</env-var>
<env-var>URI=/data/log/thirdparty/${targetTableName}/partition_date=${partition_date}/</env-var>
<archive>${checkClient}#checkclient</archive>
<capture-output/>
</shell>
<ok to="test_20190720_job"/>
<error to="Kill"/>
</action>
<action name="test_20190720_job" cred="hive2" retry-max="3" retry-interval="5">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<jdbc-url>${jdbcURL}</jdbc-url>
<password>${pwd}</password>
<script>/user/hive/project/etl/test_20190720/test_20190720.sql</script>
<param>partition_date=${partition_date}</param>
<param>dbname=${dbname}</param>
</hive2>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment