Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
W
warehouse
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
徐伟
warehouse
Commits
f9106e18
Commit
f9106e18
authored
Jul 15, 2019
by
data@bj-gm-test-data001
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
删除错误的导表文件
parent
e0f0d397
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
0 additions
and
268 deletions
+0
-268
create_test_20190720.sql
etl/test_20190720/create_test_20190720.sql
+0
-32
test_20190720.sql
etl/test_20190720/test_20190720.sql
+0
-20
coordinator.xml
workflow/test_20190720/coordinator.xml
+0
-112
job.properties
workflow/test_20190720/job.properties
+0
-39
workflow.xml
workflow/test_20190720/workflow.xml
+0
-65
No files found.
etl/test_20190720/create_test_20190720.sql
deleted
100644 → 0
View file @
e0f0d397
--***************************************************************
--*脚本名称: create_test_20190720.sql
--*功能: 创建api_cashbacklock(api_cashbacklock)的映射表
--*业务名称: tl
--*输入数据: hdfs://bj-gmei-hdfs/data/log/thirdparty/test_20190720/partition_date=${partition_date}/
--*作者: data-exchange
--*更新时间: 2019-07-15 11:30:33
--***************************************************************
--设置全局变量&UDF
SET
mapreduce
.
job
.
queuename
=
data
;
--声明数据库
USE
default
;
--创建HDFS映射表
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
test_20190720
(
id
BIGINT
COMMENT
'{"chs_name":"ID","description":"","etl":"","value":"","remark":""}'
,
lock_at
STRING
COMMENT
'{"chs_name":"返现锁定时间","description":"","etl":"","value":"","remark":""}'
,
order_id
STRING
COMMENT
'{"chs_name":"1","description":"","etl":"","value":"","remark":""}'
)
COMMENT
'api_cashbacklock'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
NULL
DEFINED
AS
''
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/test_20190720'
;
\ No newline at end of file
etl/test_20190720/test_20190720.sql
deleted
100644 → 0
View file @
e0f0d397
--***************************************************************
--*脚本名称: create_test_20190720.sql
--*功能: 创建api_cashbacklock(api_cashbacklock)的映射表
--*业务名称: tl
--*输入数据: hdfs://bj-gmei-hdfs/data/log/thirdparty/test_20190720/partition_date=${partition_date}/
--*作者: data-exchange
--*更新时间: 2019-07-15 11:30:33
--***************************************************************
--设置全局变量&UDF
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
test_20190720
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
test_20190720
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/test_20190720/partition_date=${partition_date}/'
;
\ No newline at end of file
workflow/test_20190720/coordinator.xml
deleted
100644 → 0
View file @
e0f0d397
<coordinator-app
name=
"test_20190720"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/test_20190720/job.properties
deleted
100644 → 0
View file @
e0f0d397
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
test_20190720
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-07-15T00:00+0800
#任务执行的结束时间
end_date
=
2019-07-26T23:59+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-test-data002:10000
#hiveServer2的密码
pwd
=
data
checkClient
=
${nameNode}/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_cashbacklock
#数据的输出表
targetTableName
=
test_20190720
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,lock_at,order_id"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
${nameNode}/user/hive/project/workflow/${appName}
wf_application_path
=
${nameNode}/user/hive/project/workflow/${appName}
workflow/test_20190720/workflow.xml
deleted
100644 → 0
View file @
e0f0d397
<workflow-app
name=
"test_20190720"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"test_20190720_sqoop"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"test_20190720_sqoop"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --escaped-by '\\' --target-dir /data/log/thirdparty/${targetTableName}/partition_date=${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string "" --null-non-string ""
</command>
</sqoop>
<ok
to=
"test_20190720_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_20190720_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/partition_date=${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"test_20190720_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_20190720_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/test_20190720/test_20190720.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment