Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
W
warehouse
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
徐伟
warehouse
Commits
0118cd8c
Commit
0118cd8c
authored
Jun 12, 2019
by
test01
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mysql表user_push_tag导入到hive
parent
6d2e4e8e
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
253 additions
and
0 deletions
+253
-0
creareTable.sql
etl/user_push_tag_test/creareTable.sql
+15
-0
user_push_tag_test.sql
etl/user_push_tag_test/user_push_tag_test.sql
+10
-0
coordinator.xml
workflow/user_push_tag_test/coordinator.xml
+112
-0
job.properties
workflow/user_push_tag_test/job.properties
+48
-0
workflow.xml
workflow/user_push_tag_test/workflow.xml
+68
-0
No files found.
etl/user_push_tag_test/creareTable.sql
0 → 100644
View file @
0118cd8c
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
user_push_tag_test
(
device_id
string
COMMENT
'device_id'
,
cl_type
string
COMMENT
'cl_type'
,
tag_id
null
COMMENT
'tag_id'
,
id
null
COMMENT
'id'
)
COMMENT
''
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/user_push_tag_test'
;
\ No newline at end of file
etl/user_push_tag_test/user_push_tag_test.sql
0 → 100644
View file @
0118cd8c
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
user_push_tag_test
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
user_push_tag_test
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gmei-hdfsbj-gmei-hdfs/user_push_tag_test/${partition_date}'
;
\ No newline at end of file
workflow/user_push_tag_test/coordinator.xml
0 → 100644
View file @
0118cd8c
<coordinator-app
name=
"user_push_tag_test"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/user_push_tag_test/job.properties
0 → 100644
View file @
0118cd8c
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
user_push_tag_test
dbname
=
default
nameNode
=
hdfs://bj-gmei-hdfs
jobTracker
=
bj-gm-prod-cos-datacenter005:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-05-12
#任务执行的结束时间
end_date
=
2019-05-13
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gmei-hdfs/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql:///zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
user_push_tag
#数据的输出表
targetTableName
=
user_push_tag_test
#数据源表的字段,要与表中字段顺序一致
columns
=
"device_id,cl_type,tag_id,id"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gmei-hdfs/user/hive/project/workflow/user_push_tag_test
wf_application_path
=
hdfs://bj-gmei-hdfs/user/hive/project/workflow/user_push_tag_test
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
user_push_tag_test_sqoop
checkAction
=
user_push_tag_test_check
jobAction
=
user_push_tag_test_job
\ No newline at end of file
workflow/user_push_tag_test/workflow.xml
0 → 100644
View file @
0118cd8c
<workflow-app
name=
"user_push_tag_test"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"user_push_tag_test_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"user_push_tag_test_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"user_push_tag_test_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"user_push_tag_test_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"user_push_tag_test_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"user_push_tag_test_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/user_push_tag_test/user_push_tag_test.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment