Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
W
warehouse
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
徐伟
warehouse
Commits
15fbd240
Commit
15fbd240
authored
Jul 12, 2019
by
data@bj-gm-test-data001
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
删除错误的导表文件
parent
68b4ea90
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
0 additions
and
268 deletions
+0
-268
create_test_20190712.sql
etl/test_20190712/create_test_20190712.sql
+0
-32
test_20190712.sql
etl/test_20190712/test_20190712.sql
+0
-19
coordinator.xml
workflow/test_20190712/coordinator.xml
+0
-112
job.properties
workflow/test_20190712/job.properties
+0
-39
workflow.xml
workflow/test_20190712/workflow.xml
+0
-66
No files found.
etl/test_20190712/create_test_20190712.sql
deleted
100644 → 0
View file @
68b4ea90
--***************************************************************
--*脚本名称: create_test_20190712.sql
--*功能: test
--*业务名称: tl
--*输入数据: hdfs://bj-gmei-hdfs/user/hive/warehouse/tl.db/test_20190712/partition_day=${partition_day}/
--*作者: data-exchange
--*更新时间: 2019-07-12 15:20:08
--***************************************************************
--设置全局变量&UDF
SET
mapreduce
.
job
.
queuename
=
data
;
USE
default
;
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
test_20190712
(
account
STRING
COMMENT
'支付宝账户'
,
created_time
STRING
COMMENT
'创建时间'
,
id
BIGINT
COMMENT
'ID'
,
name
STRING
COMMENT
'用户名'
,
updated_time
STRING
COMMENT
'最后更新时间'
)
COMMENT
'test'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
NULL
DEFINED
AS
''
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/test_20190712'
;
\ No newline at end of file
etl/test_20190712/test_20190712.sql
deleted
100644 → 0
View file @
68b4ea90
--***************************************************************
--*脚本名称: create_test_20190712.sql
--*功能: test
--*业务名称: tl
--*输入数据: hdfs://bj-gmei-hdfs/user/hive/warehouse/tl.db/test_20190712/partition_day=${partition_day}/
--*作者: data-exchange
--*更新时间: 2019-07-12 15:20:08
--***************************************************************
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
test_20190712
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
test_20190712
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/test_20190712/${partition_date}'
;
\ No newline at end of file
workflow/test_20190712/coordinator.xml
deleted
100644 → 0
View file @
68b4ea90
<coordinator-app
name=
"test_20190712"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/test_20190712/job.properties
deleted
100644 → 0
View file @
68b4ea90
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
test_20190712
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-prod-cos-datacenter005:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-07-09T00:00+0800
#任务执行的结束时间
end_date
=
2019-08-28T23:59+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_alipayaccout
#数据的输出表
targetTableName
=
test_20190712
#数据源表的字段,要与表中字段顺序一致
columns
=
"account,created_time,id,name,updated_time"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_20190712
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_20190712
workflow/test_20190712/workflow.xml
deleted
100644 → 0
View file @
68b4ea90
<workflow-app
name=
"test_20190712"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"test_20190712_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"test_20190712_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --escaped-by '\\'--target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string "" --null-non-string ""
</command>
</sqoop>
<ok
to=
"test_20190712_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_20190712_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"test_20190712_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_20190712_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/test_20190712/test_20190712.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment