Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
W
warehouse
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
徐伟
warehouse
Commits
e1e29c80
Commit
e1e29c80
authored
Jun 12, 2019
by
data@bj-gm-test-data001
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mysql表user_push_tag导入到hive
parent
c459f0dc
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
253 additions
and
0 deletions
+253
-0
create_user_push_tag_test.sql
etl/user_push_tag_test/create_user_push_tag_test.sql
+15
-0
user_push_tag_test.sql
etl/user_push_tag_test/user_push_tag_test.sql
+10
-0
coordinator.xml
workflow/user_push_tag_test/coordinator.xml
+112
-0
job.properties
workflow/user_push_tag_test/job.properties
+48
-0
workflow.xml
workflow/user_push_tag_test/workflow.xml
+68
-0
No files found.
etl/user_push_tag_test/create_user_push_tag_test.sql
0 → 100644
View file @
e1e29c80
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
user_push_tag_test
(
device_id
string
COMMENT
'device_id'
,
cl_type
string
COMMENT
'cl_type'
,
tag_id
null
COMMENT
'tag_id'
,
id
null
COMMENT
'id'
)
COMMENT
'user_push_tag_test'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/user_push_tag_test'
;
\ No newline at end of file
etl/user_push_tag_test/user_push_tag_test.sql
0 → 100644
View file @
e1e29c80
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
user_push_tag_test
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
user_push_tag_test
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gmei-hdfsbj-gmei-hdfs/user_push_tag_test/${partition_date}'
;
\ No newline at end of file
workflow/user_push_tag_test/coordinator.xml
0 → 100644
View file @
e1e29c80
<coordinator-app
name=
"user_push_tag_test"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/user_push_tag_test/job.properties
0 → 100644
View file @
e1e29c80
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
user_push_tag_test
dbname
=
default
nameNode
=
hdfs://bj-gmei-hdfs
jobTracker
=
bj-gm-prod-cos-datacenter005:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-05-12
#任务执行的结束时间
end_date
=
2019-05-13
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gmei-hdfs/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql:///zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
user_push_tag
#数据的输出表
targetTableName
=
user_push_tag_test
#数据源表的字段,要与表中字段顺序一致
columns
=
"device_id,cl_type,tag_id,id"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gmei-hdfs/user/hive/project/workflow/user_push_tag_test
wf_application_path
=
hdfs://bj-gmei-hdfs/user/hive/project/workflow/user_push_tag_test
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
user_push_tag_test_sqoop
checkAction
=
user_push_tag_test_check
jobAction
=
user_push_tag_test_job
\ No newline at end of file
workflow/user_push_tag_test/workflow.xml
0 → 100644
View file @
e1e29c80
<workflow-app
name=
"user_push_tag_test"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"user_push_tag_test_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"user_push_tag_test_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"user_push_tag_test_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"user_push_tag_test_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"user_push_tag_test_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"user_push_tag_test_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/user_push_tag_test/user_push_tag_test.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment