Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
W
warehouse
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
徐伟
warehouse
Commits
e8a0eea5
Commit
e8a0eea5
authored
Jun 19, 2019
by
data@bj-gm-test-data001
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mysql表api_user_change_log导入到hive
parent
a8132e5c
Show whitespace changes
Inline
Side-by-side
Showing
70 changed files
with
3525 additions
and
0 deletions
+3525
-0
api_advertisedoctor1.sql
etl/api_advertisedoctor1/api_advertisedoctor1.sql
+10
-0
create_api_advertisedoctor1.sql
etl/api_advertisedoctor1/create_api_advertisedoctor1.sql
+14
-0
api_advertisedoctor2.sql
etl/api_advertisedoctor2/api_advertisedoctor2.sql
+10
-0
create_api_advertisedoctor2.sql
etl/api_advertisedoctor2/create_api_advertisedoctor2.sql
+14
-0
api_advertisedoctor3.sql
etl/api_advertisedoctor3/api_advertisedoctor3.sql
+10
-0
create_api_advertisedoctor3.sql
etl/api_advertisedoctor3/create_api_advertisedoctor3.sql
+14
-0
api_advertisedoctor4.sql
etl/api_advertisedoctor4/api_advertisedoctor4.sql
+10
-0
create_api_advertisedoctor4.sql
etl/api_advertisedoctor4/create_api_advertisedoctor4.sql
+14
-0
api_advertisedoctor5.sql
etl/api_advertisedoctor5/api_advertisedoctor5.sql
+10
-0
create_api_advertisedoctor5.sql
etl/api_advertisedoctor5/create_api_advertisedoctor5.sql
+14
-0
api_advertisedoctor6.sql
etl/api_advertisedoctor6/api_advertisedoctor6.sql
+10
-0
create_api_advertisedoctor6.sql
etl/api_advertisedoctor6/create_api_advertisedoctor6.sql
+14
-0
api_advertisedoctor7.sql
etl/api_advertisedoctor7/api_advertisedoctor7.sql
+10
-0
create_api_advertisedoctor7.sql
etl/api_advertisedoctor7/create_api_advertisedoctor7.sql
+14
-0
create_fffffffffffffff.sql
etl/fffffffffffffff/create_fffffffffffffff.sql
+19
-0
fffffffffffffff.sql
etl/fffffffffffffff/fffffffffffffff.sql
+10
-0
create_fffffffffffffff2.sql
etl/fffffffffffffff2/create_fffffffffffffff2.sql
+19
-0
fffffffffffffff2.sql
etl/fffffffffffffff2/fffffffffffffff2.sql
+10
-0
create_test_api_advertisehospital1.sql
...advertisehospital1/create_test_api_advertisehospital1.sql
+15
-0
test_api_advertisehospital1.sql
...st_api_advertisehospital1/test_api_advertisehospital1.sql
+10
-0
create_test_api_area1.sql
etl/test_api_area1/create_test_api_area1.sql
+14
-0
test_api_area1.sql
etl/test_api_area1/test_api_area1.sql
+10
-0
create_test_api_area12.sql
etl/test_api_area12/create_test_api_area12.sql
+14
-0
test_api_area12.sql
etl/test_api_area12/test_api_area12.sql
+10
-0
create_test_api_user_change_log.sql
...t_api_user_change_log/create_test_api_user_change_log.sql
+17
-0
test_api_user_change_log.sql
etl/test_api_user_change_log/test_api_user_change_log.sql
+10
-0
create_xuwei_test.sql
etl/xuwei_test/create_xuwei_test.sql
+17
-0
xuwei_test.sql
etl/xuwei_test/xuwei_test.sql
+10
-0
coordinator.xml
workflow/api_advertisedoctor1/coordinator.xml
+112
-0
job.properties
workflow/api_advertisedoctor1/job.properties
+48
-0
workflow.xml
workflow/api_advertisedoctor1/workflow.xml
+66
-0
coordinator.xml
workflow/api_advertisedoctor2/coordinator.xml
+112
-0
job.properties
workflow/api_advertisedoctor2/job.properties
+48
-0
workflow.xml
workflow/api_advertisedoctor2/workflow.xml
+66
-0
coordinator.xml
workflow/api_advertisedoctor3/coordinator.xml
+112
-0
job.properties
workflow/api_advertisedoctor3/job.properties
+48
-0
workflow.xml
workflow/api_advertisedoctor3/workflow.xml
+66
-0
coordinator.xml
workflow/api_advertisedoctor4/coordinator.xml
+112
-0
job.properties
workflow/api_advertisedoctor4/job.properties
+48
-0
workflow.xml
workflow/api_advertisedoctor4/workflow.xml
+66
-0
coordinator.xml
workflow/api_advertisedoctor5/coordinator.xml
+112
-0
job.properties
workflow/api_advertisedoctor5/job.properties
+48
-0
workflow.xml
workflow/api_advertisedoctor5/workflow.xml
+68
-0
coordinator.xml
workflow/api_advertisedoctor6/coordinator.xml
+112
-0
job.properties
workflow/api_advertisedoctor6/job.properties
+48
-0
workflow.xml
workflow/api_advertisedoctor6/workflow.xml
+68
-0
coordinator.xml
workflow/api_advertisedoctor7/coordinator.xml
+112
-0
job.properties
workflow/api_advertisedoctor7/job.properties
+48
-0
workflow.xml
workflow/api_advertisedoctor7/workflow.xml
+66
-0
coordinator.xml
workflow/fffffffffffffff/coordinator.xml
+112
-0
job.properties
workflow/fffffffffffffff/job.properties
+48
-0
workflow.xml
workflow/fffffffffffffff/workflow.xml
+68
-0
coordinator.xml
workflow/fffffffffffffff2/coordinator.xml
+112
-0
job.properties
workflow/fffffffffffffff2/job.properties
+48
-0
workflow.xml
workflow/fffffffffffffff2/workflow.xml
+68
-0
coordinator.xml
workflow/test_api_advertisehospital1/coordinator.xml
+112
-0
job.properties
workflow/test_api_advertisehospital1/job.properties
+48
-0
workflow.xml
workflow/test_api_advertisehospital1/workflow.xml
+66
-0
coordinator.xml
workflow/test_api_area1/coordinator.xml
+112
-0
job.properties
workflow/test_api_area1/job.properties
+48
-0
workflow.xml
workflow/test_api_area1/workflow.xml
+66
-0
coordinator.xml
workflow/test_api_area12/coordinator.xml
+112
-0
job.properties
workflow/test_api_area12/job.properties
+48
-0
workflow.xml
workflow/test_api_area12/workflow.xml
+66
-0
coordinator.xml
workflow/test_api_user_change_log/coordinator.xml
+112
-0
job.properties
workflow/test_api_user_change_log/job.properties
+48
-0
workflow.xml
workflow/test_api_user_change_log/workflow.xml
+66
-0
coordinator.xml
workflow/xuwei_test/coordinator.xml
+112
-0
job.properties
workflow/xuwei_test/job.properties
+48
-0
workflow.xml
workflow/xuwei_test/workflow.xml
+66
-0
No files found.
etl/api_advertisedoctor1/api_advertisedoctor1.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
api_advertisedoctor1
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
api_advertisedoctor1
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/api_advertisedoctor1/${partition_date}'
;
\ No newline at end of file
etl/api_advertisedoctor1/create_api_advertisedoctor1.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
api_advertisedoctor1
(
id
INT
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'1'
,
rank
INT
COMMENT
'1'
)
COMMENT
'api_advertisedoctor1'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/api_advertisedoctor1'
;
\ No newline at end of file
etl/api_advertisedoctor2/api_advertisedoctor2.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
api_advertisedoctor2
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
api_advertisedoctor2
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/api_advertisedoctor2/${partition_date}'
;
\ No newline at end of file
etl/api_advertisedoctor2/create_api_advertisedoctor2.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
api_advertisedoctor2
(
id
INT
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'11'
,
rank
INT
COMMENT
'1'
)
COMMENT
'api_advertisedoctor2'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/api_advertisedoctor2'
;
\ No newline at end of file
etl/api_advertisedoctor3/api_advertisedoctor3.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
api_advertisedoctor3
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
api_advertisedoctor3
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/api_advertisedoctor3/${partition_date}'
;
\ No newline at end of file
etl/api_advertisedoctor3/create_api_advertisedoctor3.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
api_advertisedoctor3
(
id
INT
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'11'
,
rank
INT
COMMENT
'1'
)
COMMENT
'api_advertisedoctor3'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/api_advertisedoctor3'
;
\ No newline at end of file
etl/api_advertisedoctor4/api_advertisedoctor4.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
api_advertisedoctor4
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
api_advertisedoctor4
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/api_advertisedoctor4/${partition_date}'
;
\ No newline at end of file
etl/api_advertisedoctor4/create_api_advertisedoctor4.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
api_advertisedoctor4
(
id
INT
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'1'
,
rank
INT
COMMENT
'1'
)
COMMENT
'api_advertisedoctor4'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/api_advertisedoctor4'
;
\ No newline at end of file
etl/api_advertisedoctor5/api_advertisedoctor5.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
api_advertisedoctor5
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
api_advertisedoctor5
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/api_advertisedoctor5/${partition_date}'
;
\ No newline at end of file
etl/api_advertisedoctor5/create_api_advertisedoctor5.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
api_advertisedoctor5
(
id
INT
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'1'
,
rank
INT
COMMENT
'1'
)
COMMENT
'api_advertisedoctor5'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/api_advertisedoctor5'
;
\ No newline at end of file
etl/api_advertisedoctor6/api_advertisedoctor6.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
api_advertisedoctor6
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
api_advertisedoctor6
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/api_advertisedoctor6/${partition_date}'
;
\ No newline at end of file
etl/api_advertisedoctor6/create_api_advertisedoctor6.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
api_advertisedoctor6
(
id
INT
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'1'
,
rank
INT
COMMENT
'1'
)
COMMENT
'api_advertisedoctor6'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/api_advertisedoctor6'
;
\ No newline at end of file
etl/api_advertisedoctor7/api_advertisedoctor7.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
api_advertisedoctor7
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
api_advertisedoctor7
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/api_advertisedoctor7/${partition_date}'
;
\ No newline at end of file
etl/api_advertisedoctor7/create_api_advertisedoctor7.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
api_advertisedoctor7
(
id
INT
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'1'
,
rank
INT
COMMENT
'1'
)
COMMENT
'api_advertisedoctor7'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/api_advertisedoctor7'
;
\ No newline at end of file
etl/fffffffffffffff/create_fffffffffffffff.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
fffffffffffffff
(
id
STRING
COMMENT
'1'
,
month_at
STRING
COMMENT
'1'
,
total_amount
STRING
COMMENT
'1'
,
should_pay
STRING
COMMENT
'1'
,
already_pay
STRING
COMMENT
'1'
,
is_finished
STRING
COMMENT
'1'
,
created_time
STRING
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'1'
)
COMMENT
'fffffffffffffff'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/fffffffffffffff'
;
\ No newline at end of file
etl/fffffffffffffff/fffffffffffffff.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
fffffffffffffff
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
fffffffffffffff
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/fffffffffffffff/${partition_date}'
;
\ No newline at end of file
etl/fffffffffffffff2/create_fffffffffffffff2.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
fffffffffffffff2
(
id
STRING
COMMENT
'1'
,
month_at
STRING
COMMENT
'1'
,
total_amount
STRING
COMMENT
'1'
,
should_pay
STRING
COMMENT
'1'
,
already_pay
STRING
COMMENT
'1'
,
is_finished
STRING
COMMENT
'1'
,
created_time
STRING
COMMENT
'1'
,
doctor_id
STRING
COMMENT
'1'
)
COMMENT
'fffffffffffffff2'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/fffffffffffffff2'
;
\ No newline at end of file
etl/fffffffffffffff2/fffffffffffffff2.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
fffffffffffffff2
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
fffffffffffffff2
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/fffffffffffffff2/${partition_date}'
;
\ No newline at end of file
etl/test_api_advertisehospital1/create_test_api_advertisehospital1.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
test_api_advertisehospital1
(
id
INT
COMMENT
'1'
,
hospital_id
STRING
COMMENT
'1'
,
rank
INT
COMMENT
'1'
,
portrait
STRING
COMMENT
'1'
)
COMMENT
'test_api_advertisehospital1'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/test_api_advertisehospital1'
;
\ No newline at end of file
etl/test_api_advertisehospital1/test_api_advertisehospital1.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
test_api_advertisehospital1
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
test_api_advertisehospital1
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/test_api_advertisehospital1/${partition_date}'
;
\ No newline at end of file
etl/test_api_area1/create_test_api_area1.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
test_api_area1
(
id
INT
COMMENT
'1'
,
area_name
STRING
COMMENT
'1'
,
phone_prefix
STRING
COMMENT
'1'
)
COMMENT
'test_api_area1'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/test_api_area1'
;
\ No newline at end of file
etl/test_api_area1/test_api_area1.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
test_api_area1
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
test_api_area1
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/test_api_area1/${partition_date}'
;
\ No newline at end of file
etl/test_api_area12/create_test_api_area12.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
test_api_area12
(
id
INT
COMMENT
'1'
,
area_name
STRING
COMMENT
'1'
,
phone_prefix
STRING
COMMENT
'1'
)
COMMENT
'test_api_area12'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/test_api_area12'
;
\ No newline at end of file
etl/test_api_area12/test_api_area12.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
test_api_area12
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
test_api_area12
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/test_api_area12/${partition_date}'
;
\ No newline at end of file
etl/test_api_user_change_log/create_test_api_user_change_log.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
test_api_user_change_log
(
id
INT
COMMENT
'1'
,
user_id
INT
COMMENT
'1'
,
change_type
INT
COMMENT
'1'
,
operate_type
INT
COMMENT
'1'
,
comment
STRING
COMMENT
'1'
,
create_time
STRING
COMMENT
'1'
)
COMMENT
'test_api_user_change_log'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/test_api_user_change_log'
;
\ No newline at end of file
etl/test_api_user_change_log/test_api_user_change_log.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
test_api_user_change_log
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
test_api_user_change_log
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/test_api_user_change_log/${partition_date}'
;
\ No newline at end of file
etl/xuwei_test/create_xuwei_test.sql
0 → 100644
View file @
e8a0eea5
CREATE
EXTERNAL
TABLE
IF
NOT
EXISTS
default
.
xuwei_test
(
id
STRING
COMMENT
'1'
,
name
STRING
COMMENT
'1'
,
is_online
STRING
COMMENT
'1'
,
created_time
STRING
COMMENT
'1'
,
is_doctor_create
STRING
COMMENT
'1'
,
tag_attr_id
STRING
COMMENT
'1'
)
COMMENT
'xuwei_test'
PARTITIONED
BY
(
partition_date
STRING
COMMENT
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\0
01'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
LOCATION
'/data/log/thirdparty/xuwei_test'
;
\ No newline at end of file
etl/xuwei_test/xuwei_test.sql
0 → 100644
View file @
e8a0eea5
SET
mapred
.
input
.
dir
.
recursive
=
true
;
SET
hive
.
mapred
.
supports
.
subdirectories
=
true
;
--声明数据库
USE
${
dbname
}
;
--加载HDFS外部数据分区
ALTER
TABLE
xuwei_test
DROP
IF
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
);
ALTER
TABLE
xuwei_test
ADD
IF
NOT
EXISTS
PARTITION
(
partition_date
=
'${partition_date}'
)
LOCATION
'hdfs://bj-gm-test-data001:8020/data/log/thirdparty/xuwei_test/${partition_date}'
;
\ No newline at end of file
workflow/api_advertisedoctor1/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"api_advertisedoctor1"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/api_advertisedoctor1/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
api_advertisedoctor1
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-05T14:25+0800
#任务执行的结束时间
end_date
=
2019-06-21T14:25+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisedoctor
#数据的输出表
targetTableName
=
api_advertisedoctor1
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,doctor_id,rank"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor1
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor1
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
api_advertisedoctor1_sqoop
checkAction
=
api_advertisedoctor1_check
jobAction
=
api_advertisedoctor1_job
\ No newline at end of file
workflow/api_advertisedoctor1/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"api_advertisedoctor1"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"api_advertisedoctor1_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"api_advertisedoctor1_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"api_advertisedoctor1_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor1_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"api_advertisedoctor1_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor1_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/api_advertisedoctor1/api_advertisedoctor1.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/api_advertisedoctor2/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"api_advertisedoctor2"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/api_advertisedoctor2/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
api_advertisedoctor2
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-05T14:28+0800
#任务执行的结束时间
end_date
=
2019-06-26T14:28+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisedoctor
#数据的输出表
targetTableName
=
api_advertisedoctor2
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,doctor_id,rank"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor2
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor2
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
api_advertisedoctor2_sqoop
checkAction
=
api_advertisedoctor2_check
jobAction
=
api_advertisedoctor2_job
\ No newline at end of file
workflow/api_advertisedoctor2/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"api_advertisedoctor2"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"api_advertisedoctor2_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"api_advertisedoctor2_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"api_advertisedoctor2_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor2_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"api_advertisedoctor2_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor2_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/api_advertisedoctor2/api_advertisedoctor2.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/api_advertisedoctor3/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"api_advertisedoctor3"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/api_advertisedoctor3/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
api_advertisedoctor3
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-05T14:28+0800
#任务执行的结束时间
end_date
=
2019-06-26T14:28+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisedoctor
#数据的输出表
targetTableName
=
api_advertisedoctor3
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,doctor_id,rank"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor3
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor3
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
api_advertisedoctor3_sqoop
checkAction
=
api_advertisedoctor3_check
jobAction
=
api_advertisedoctor3_job
\ No newline at end of file
workflow/api_advertisedoctor3/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"api_advertisedoctor3"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"api_advertisedoctor3_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"api_advertisedoctor3_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N --where
'
3333
'
</command>
</sqoop>
<ok
to=
"api_advertisedoctor3_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor3_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"api_advertisedoctor3_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor3_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/api_advertisedoctor3/api_advertisedoctor3.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/api_advertisedoctor4/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"api_advertisedoctor4"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/api_advertisedoctor4/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
api_advertisedoctor4
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-04T14:32+0800
#任务执行的结束时间
end_date
=
2019-06-19T14:32+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisedoctor
#数据的输出表
targetTableName
=
api_advertisedoctor4
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,doctor_id,rank"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor4
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor4
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
api_advertisedoctor4_sqoop
checkAction
=
api_advertisedoctor4_check
jobAction
=
api_advertisedoctor4_job
\ No newline at end of file
workflow/api_advertisedoctor4/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"api_advertisedoctor4"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"api_advertisedoctor4_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"api_advertisedoctor4_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N --where
"
333
"
</command>
</sqoop>
<ok
to=
"api_advertisedoctor4_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor4_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"api_advertisedoctor4_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor4_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/api_advertisedoctor4/api_advertisedoctor4.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/api_advertisedoctor5/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"api_advertisedoctor5"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/api_advertisedoctor5/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
api_advertisedoctor5
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-04T14:39+0800
#任务执行的结束时间
end_date
=
2019-06-08T14:39+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisedoctor
#数据的输出表
targetTableName
=
api_advertisedoctor5
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,doctor_id,rank"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor5
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor5
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
api_advertisedoctor5_sqoop
checkAction
=
api_advertisedoctor5_check
jobAction
=
api_advertisedoctor5_job
\ No newline at end of file
workflow/api_advertisedoctor5/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"api_advertisedoctor5"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"api_advertisedoctor5_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"api_advertisedoctor5_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
<![CDATA[${sqoopCmd}]]>
</command>
</sqoop>
<ok
to=
"api_advertisedoctor5_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor5_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"api_advertisedoctor5_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor5_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/api_advertisedoctor5/api_advertisedoctor5.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/api_advertisedoctor6/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"api_advertisedoctor6"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/api_advertisedoctor6/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
api_advertisedoctor6
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-03T14:42+0800
#任务执行的结束时间
end_date
=
2019-06-21T14:42+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisedoctor
#数据的输出表
targetTableName
=
api_advertisedoctor6
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,doctor_id,rank"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor6
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor6
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
api_advertisedoctor6_sqoop
checkAction
=
api_advertisedoctor6_check
jobAction
=
api_advertisedoctor6_job
\ No newline at end of file
workflow/api_advertisedoctor6/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"api_advertisedoctor6"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"api_advertisedoctor6_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"api_advertisedoctor6_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
<![CDATA[import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N --where "33"]]>
</command>
</sqoop>
<ok
to=
"api_advertisedoctor6_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor6_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"api_advertisedoctor6_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor6_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/api_advertisedoctor6/api_advertisedoctor6.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/api_advertisedoctor7/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"api_advertisedoctor7"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/api_advertisedoctor7/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
api_advertisedoctor7
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-03T14:45+0800
#任务执行的结束时间
end_date
=
2019-06-19T14:45+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisedoctor
#数据的输出表
targetTableName
=
api_advertisedoctor7
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,doctor_id,rank"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor7
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/api_advertisedoctor7
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
api_advertisedoctor7_sqoop
checkAction
=
api_advertisedoctor7_check
jobAction
=
api_advertisedoctor7_job
\ No newline at end of file
workflow/api_advertisedoctor7/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"api_advertisedoctor7"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"api_advertisedoctor7_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"api_advertisedoctor7_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N --where "33333"
</command>
</sqoop>
<ok
to=
"api_advertisedoctor7_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor7_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"api_advertisedoctor7_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"api_advertisedoctor7_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/api_advertisedoctor7/api_advertisedoctor7.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/fffffffffffffff/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"fffffffffffffff"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/fffffffffffffff/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
fffffffffffffff
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-18T11:21+0800
#任务执行的结束时间
end_date
=
2019-06-19T11:21+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_bdtransfermonth
#数据的输出表
targetTableName
=
fffffffffffffff
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,month_at,total_amount,should_pay,already_pay,is_finished,created_time,doctor_id"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/fffffffffffffff
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/fffffffffffffff
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
fffffffffffffff_sqoop
checkAction
=
fffffffffffffff_check
jobAction
=
fffffffffffffff_job
\ No newline at end of file
workflow/fffffffffffffff/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"fffffffffffffff"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"fffffffffffffff_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"fffffffffffffff_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"fffffffffffffff_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"fffffffffffffff_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"fffffffffffffff_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"fffffffffffffff_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/fffffffffffffff/fffffffffffffff.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/fffffffffffffff2/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"fffffffffffffff2"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/fffffffffffffff2/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
fffffffffffffff2
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-18T11:21+0800
#任务执行的结束时间
end_date
=
2019-06-19T11:21+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_bdtransfermonth
#数据的输出表
targetTableName
=
fffffffffffffff2
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,month_at,total_amount,should_pay,already_pay,is_finished,created_time,doctor_id"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/fffffffffffffff2
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/fffffffffffffff2
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
fffffffffffffff2_sqoop
checkAction
=
fffffffffffffff2_check
jobAction
=
fffffffffffffff2_job
\ No newline at end of file
workflow/fffffffffffffff2/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"fffffffffffffff2"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"fffffffffffffff2_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"fffffffffffffff2_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"fffffffffffffff2_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"fffffffffffffff2_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"fffffffffffffff2_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"fffffffffffffff2_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/fffffffffffffff2/fffffffffffffff2.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/test_api_advertisehospital1/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"test_api_advertisehospital1"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/test_api_advertisehospital1/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
test_api_advertisehospital1
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-19T14:23+0800
#任务执行的结束时间
end_date
=
2019-06-28T14:23+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_advertisehospital
#数据的输出表
targetTableName
=
test_api_advertisehospital1
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,hospital_id,rank,portrait"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_advertisehospital1
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_advertisehospital1
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
test_api_advertisehospital1_sqoop
checkAction
=
test_api_advertisehospital1_check
jobAction
=
test_api_advertisehospital1_job
\ No newline at end of file
workflow/test_api_advertisehospital1/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"test_api_advertisehospital1"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"test_api_advertisehospital1_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"test_api_advertisehospital1_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"test_api_advertisehospital1_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_advertisehospital1_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"test_api_advertisehospital1_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_advertisehospital1_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/test_api_advertisehospital1/test_api_advertisehospital1.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/test_api_area1/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"test_api_area1"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/test_api_area1/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
test_api_area1
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-17T14:17+0800
#任务执行的结束时间
end_date
=
2019-06-18T14:17+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_area
#数据的输出表
targetTableName
=
test_api_area1
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,area_name,phone_prefix"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_area1
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_area1
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
test_api_area1_sqoop
checkAction
=
test_api_area1_check
jobAction
=
test_api_area1_job
\ No newline at end of file
workflow/test_api_area1/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"test_api_area1"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"test_api_area1_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"test_api_area1_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"test_api_area1_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_area1_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"test_api_area1_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_area1_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/test_api_area1/test_api_area1.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/test_api_area12/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"test_api_area12"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/test_api_area12/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
test_api_area12
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-17T14:17+0800
#任务执行的结束时间
end_date
=
2019-06-18T14:17+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_area
#数据的输出表
targetTableName
=
test_api_area12
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,area_name,phone_prefix"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_area12
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_area12
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
test_api_area12_sqoop
checkAction
=
test_api_area12_check
jobAction
=
test_api_area12_job
\ No newline at end of file
workflow/test_api_area12/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"test_api_area12"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"test_api_area12_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"test_api_area12_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"test_api_area12_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_area12_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"test_api_area12_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_area12_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/test_api_area12/test_api_area12.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/test_api_user_change_log/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"test_api_user_change_log"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/test_api_user_change_log/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
test_api_user_change_log
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-05T14:48+0800
#任务执行的结束时间
end_date
=
2019-06-20T14:48+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_user_change_log
#数据的输出表
targetTableName
=
test_api_user_change_log
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,user_id,change_type,operate_type,comment,create_time"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_user_change_log
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/test_api_user_change_log
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
test_api_user_change_log_sqoop
checkAction
=
test_api_user_change_log_check
jobAction
=
test_api_user_change_log_job
\ No newline at end of file
workflow/test_api_user_change_log/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"test_api_user_change_log"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"test_api_user_change_log_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"test_api_user_change_log_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N --where "date_format(create_time,'%Y%m%d')=date_format('${partition_date}','%Y%m%d')"
</command>
</sqoop>
<ok
to=
"test_api_user_change_log_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_user_change_log_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"test_api_user_change_log_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"test_api_user_change_log_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/test_api_user_change_log/test_api_user_change_log.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
workflow/xuwei_test/coordinator.xml
0 → 100644
View file @
e8a0eea5
<coordinator-app
name=
"xuwei_test"
frequency=
"0 1 * * *"
start=
"${start_date}"
end=
"${end_date}"
timezone=
"${timeZone}"
xmlns=
"uri:oozie:coordinator:0.2"
>
<controls>
<execution>
FIFO
</execution>
</controls>
<action>
<workflow>
<app-path>
${wf_application_path}
</app-path>
<configuration>
<property>
<name>
jobTracker
</name>
<value>
${jobTracker}
</value>
</property>
<property>
<name>
nameNode
</name>
<value>
${nameNode}
</value>
</property>
<property>
<name>
queueName
</name>
<value>
${queueName}
</value>
</property>
<property>
<name>
checkClient
</name>
<value>
${checkClient}
</value>
</property>
<property>
<name>
checkEXEC
</name>
<value>
${checkEXEC}
</value>
</property>
<property>
<name>
zxURL
</name>
<value>
${zxURL}
</value>
</property>
<property>
<name>
userName
</name>
<value>
${userName}
</value>
</property>
<property>
<name>
passWord
</name>
<value>
${passWord}
</value>
</property>
<property>
<name>
sourceTableName
</name>
<value>
${sourceTableName}
</value>
</property>
<property>
<name>
columns
</name>
<value>
${columns}
</value>
</property>
<property>
<name>
targetTableName
</name>
<value>
${targetTableName}
</value>
</property>
<property>
<name>
fields_terminated
</name>
<value>
${fields_terminated}
</value>
</property>
<property>
<name>
lines_terminated
</name>
<value>
${lines_terminated}
</value>
</property>
<property>
<name>
num_mappers
</name>
<value>
${num_mappers}
</value>
</property>
<property>
<name>
dbname
</name>
<value>
${dbname}
</value>
</property>
<property>
<name>
jdbcURL
</name>
<value>
${jdbcURL}
</value>
</property>
<property>
<name>
pwd
</name>
<value>
${pwd}
</value>
</property>
<property>
<name>
partition_date
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyyMMdd')}
</value>
</property>
<property>
<name>
year
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'yyyy')}
</value>
</property>
<property>
<name>
day
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'dd')}
</value>
</property>
<property>
<name>
month
</name>
<value>
${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'DAY'), 'MM')}
</value>
</property>
<property>
<name>
oozie.use.system.libpath
</name>
<value>
True
</value>
</property>
<property>
<name>
start_date
</name>
<value>
${start_date}
</value>
</property>
<property>
<name>
end_date
</name>
<value>
${end_date}
</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
workflow/xuwei_test/job.properties
0 → 100644
View file @
e8a0eea5
#全量导入数据job.properties文件模板
oozie.use.system.libpath
=
True
security_enabled
=
False
#appName
#命名规则与hive的表名必须一致
appName
=
xuwei_test
dbname
=
default
nameNode
=
hdfs://bj-gm-test-data001:8020
jobTracker
=
bj-gm-test-data001:8032
queueName
=
data
timeZone
=
GMT+0800
#任务的执行的开始时间
start_date
=
2019-06-17T11:36+0800
#任务执行的结束时间
end_date
=
2019-06-19T11:36+0800
#hiveServer2的url
jdbcURL
=
jdbc:hive2://bj-gm-prod-cos-datacenter006:2181,bj-gm-prod-cos-datacenter007:2181,bj-gm-prod-cos-datacenter008:2181/;serviceDiscoveryMode=zookeeper
#hiveServer2的密码
pwd
=
data
checkClient
=
hdfs://bj-gm-test-data001:8020/user/hive/project/utils/data_ready_checker/client
checkEXEC
=
./checkclient/bin/checker.sh
#mysql的url
#此处只是一个例子,具体url需要根据导入的目标库决定
zxURL
=
jdbc:mysql://172.22.30.12:3306/zhengxing?tinyInt1isBit=false
#访问数据库的用户名
userName
=
work
#访问数据库的密码
passWord
=
zJnxVEhyyxeC7ciqxdMITVyWqOFc2mew
#数据源表
sourceTableName
=
api_attroptions
#数据的输出表
targetTableName
=
xuwei_test
#数据源表的字段,要与表中字段顺序一致
columns
=
"id,name,is_online,created_time,is_doctor_create,tag_attr_id"
num_mappers
=
1
fields_terminated
=
\\
001
lines_terminated
=
\\
n
oozie.coord.application.path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/xuwei_test
wf_application_path
=
hdfs://bj-gm-test-data001:8020/user/hive/project/workflow/xuwei_test
#工作流的执行时间
#制定规则与crontab相同
frequency
=
#工作流分为三个action,action的执行顺序为startAction -> checkAction -> jobAction
#这三个字段代表三个action name
startAction
=
xuwei_test_sqoop
checkAction
=
xuwei_test_check
jobAction
=
xuwei_test_job
\ No newline at end of file
workflow/xuwei_test/workflow.xml
0 → 100644
View file @
e8a0eea5
<workflow-app
name=
"xuwei_test"
xmlns=
"uri:oozie:workflow:0.5"
>
<start
to=
"xuwei_test_start"
/>
<kill
name=
"Kill"
>
<message>
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<action
name=
"xuwei_test_start"
>
<sqoop
xmlns=
"uri:oozie:sqoop-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<prepare>
<delete
path=
"${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"
/>
<!--<mkdir path="${nameNode}/data/log/thirdparty/${targetTableName}/${partition_date}"/>-->
</prepare>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<command>
import --connect ${zxURL} --username ${userName} --password ${passWord} --table ${sourceTableName} --columns ${columns} --target-dir /data/log/thirdparty/${targetTableName}/${partition_date} --fields-terminated-by ${fields_terminated} --lines-terminated-by ${lines_terminated} --num-mappers ${num_mappers} --hive-drop-import-delims --null-string \\N --null-non-string \\N
</command>
</sqoop>
<ok
to=
"xuwei_test_check"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"xuwei_test_check"
retry-max=
"3"
retry-interval=
"5"
>
<shell
xmlns=
"uri:oozie:shell-action:0.2"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<exec>
/bin/bash
</exec>
<argument>
${checkEXEC}
</argument>
<env-var>
TYPE=hdfs
</env-var>
<env-var>
URI=/data/log/thirdparty/${targetTableName}/${partition_date}/
</env-var>
<archive>
${checkClient}#checkclient
</archive>
<capture-output/>
</shell>
<ok
to=
"xuwei_test_job"
/>
<error
to=
"Kill"
/>
</action>
<action
name=
"xuwei_test_job"
cred=
"hive2"
retry-max=
"3"
retry-interval=
"5"
>
<hive2
xmlns=
"uri:oozie:hive2-action:0.1"
>
<job-tracker>
${jobTracker}
</job-tracker>
<name-node>
${nameNode}
</name-node>
<configuration>
<property>
<name>
mapred.job.queue.name
</name>
<value>
${queueName}
</value>
</property>
</configuration>
<jdbc-url>
${jdbcURL}
</jdbc-url>
<password>
${pwd}
</password>
<script>
/user/hive/project/etl/xuwei_test/xuwei_test.sql
</script>
<param>
partition_date=${partition_date}
</param>
<param>
dbname=${dbname}
</param>
</hive2>
<ok
to=
"End"
/>
<error
to=
"Kill"
/>
</action>
<end
name=
"End"
/>
</workflow-app>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment