Commit 9e124d90 authored by 赵建伟's avatar 赵建伟

refactor codes

parent 830645d1
mazhimo
bi
sjxuwei
zhaojianwei
zhaofei
weiyimin
yindanlei
zhanghuixin
wangzhuo
wangyan
root
hadoop
\ No newline at end of file
# bi-report
BI report project init.
部署节点:
bj-gm-prod-cos-dataweb005
临时文件目录
/data/bi-report/tmp
结果目录
/data/bi-report/result
日志目录
/data/bi-report/logs
项目部署
#创建目录
sudo su -
mkdir -p /data/bi-report/tmp
mkdir -p /data/bi-report/result
mkdir -p /data/bi-report/logs
chown -R bi:bi /data/bi-report
#step2.job
type=command
dependencies=step1
command=sh /home/bi/bi-report/lib/shell/hive2csv.sh demo_hive
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/sendEmail/zhaojianwei@igengmei.com/jianweizhao@yeah.net/demo_java
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/genExcel?name=demo_java
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_3_0_view
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_aggregate_view
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_attr_view
\ No newline at end of file
#step1_5.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_map_oldtag_view
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6
command=sh /home/bi/bi-report/lib/shell/sparksql2email.sh demo_spark_param zhaojianwei zhaojianwei
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_hospital_spam_pv_day
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/sendEmail/zhanghuixin@igengmei.com/mazhimo@igengmei.com,zhaojianwei@igengmei.com/meigou-detail-page
\ No newline at end of file
#coding: utf-8
#author:
#date:
#desc:send email with attach
import smtplib
import sys
import getopt
import os
import getpass
import time
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.header import Header
#email info
to_address = ""
subject = ""
content = ""
attach = ""
memail = ""
htable = ""
#userinfo
authfile = "/home/bi/bi-report/conf/auth.cf"
def readallow(filepath):
usernames = []
f = open(filepath,"r")
lines = f.readlines()
for line in lines:
usernames.append(line.strip())
return usernames
def checkauth():
global authfile
usernames = readallow(authfile)
username = getpass.getuser()
return username in usernames
def isfile(filestr):
if os.path.isfile(filestr):
file = open(filestr,'r')
lines = file.readlines()
if len(lines) >= 2:
return True
write2file("%s file is less than 2 lines or does not exist...." % getfilename(filestr))
print "--->%s file is less than 2 lines or does not exist...." % getfilename(filestr)
return False
def getfilename(filestr):
return os.path.basename(filestr)
def makeaddress():
global to_address
address_str = ""
for address in to_address.split(","):
if address.find("@") > 0:
address_str += address + ";"
else:
address_str += "%s@igengmei.com;" % address
return address_str[:-1]
def addattch(msgRoot):
global attach
if attach.strip() == "":
return 0
for item in attach.split(","):
if isfile(item):
att = MIMEText(open('%s' % item, 'rb').read(), 'base64', 'utf-8')
att["Content-Type"] = 'application/octet-stream'
att["Content-Disposition"] = 'attachment; filename="%s"'% getfilename(item)
msgRoot.attach(att)
else:
print "--->send mail failed, %s file is not exist or file row num less than 2" % getfilename(item)
return -1
def makecc():
global memail
address_str = ""
for address in memail.split(","):
if address.find("@") > 0:
address_str += address + ";"
else:
address_str += "%s@igengmei.com;" % address
print address_str
return address_str[:-1]
def addsubject(msgRoot):
global subject
global content
global htable
msgRoot['Subject'] = Header(subject, 'utf-8')
msgRoot['To'] = makeaddress()
msgRoot['Cc'] = makecc()
if htable != "":
for ff in htable.split(","):
if not isfile(ff):
return -1
hcontent = trans2table(ff)
content += hcontent
content += "<br><br><br><br><font color=red size='3'>这是一封自动发出的邮件,邮件发送自大数据BI组。<br>请使用wps打开,appstore地址:https://appsto.re/cn/MAqWJ.i</font>"
msg = MIMEText(content,'html','utf-8')
msgRoot.attach(msg)
return True
def trans2table(path):
for ff in path.split(","):
index = 0
result = "<br><br><table border='1' style='border-collapse:collapse' bordercolor='#9D9D9D'>\n"
for eachline in open(ff,'r'):
if len(eachline.strip()) == 0:
continue
result += " <tr>"
for item in eachline.strip().split(","):
if index == 0:
result += "<td align='left' bgcolor='#8EA9DB' style='font-family:Microsoft YaHei;padding:2px 5px'>%s</td>" % item
else:
result += "<td align='left' style='font-family:Microsoft YaHei;padding:2px 5px'>%s</td>" % item
index += 1
result += "</tr>\n"
result += "</table>"
return result
def sendmail():
global to_address
#account info
sender = 'youjiantongji@wanmeizhensuo.com'
username = 'youjiantongji@wanmeizhensuo.com'
password = 'mq37tgk8nGi6eg1Hgq388oro3itvdmPl'
smtpserver = 'smtp.exmail.qq.com'
server_port = 25
if not checkparam():
return 0
msgRoot = MIMEMultipart()
if addsubject(msgRoot) == -1:
return 0
if addattch(msgRoot) == -1:
return 0
smtp = smtplib.SMTP(smtpserver, server_port)
smtp.ehlo()
smtp.starttls()
smtp.login(username, password)
to_receiver = makeaddress()
to_cc = makecc()
smtp.sendmail(sender, to_receiver.split(";") + to_cc.split(";"), msgRoot.as_string())
smtp.quit()
print "send mail success...."
write2file("send mail success")
write2file("--------------------------------------------------------------------------------------")
def processcmd(cmd):
global to_address
global subject
global content
global attach
global memail
global htable
try:
options, args = getopt.getopt(cmd, "c:a:t:s:m:f:")
for name,value in options:
if name in ("-t"):
to_address = value
elif name in ("-s"):
subject = value
elif name in ("-c"):
content = value
elif name in ("-a"):
attach = value
elif name in ("-m"):
memail = value
elif name in ("-f"):
htable = value
except Exception, e:
print "--->%s, remove this param" % str(e)
usage()
return -1
def checkparam():
global to_address
global subject
if to_address.strip() == "" or subject.strip() == "":
print "--->must input -t to_address -s subject, and the value can not be empty"
usage()
return False
else:
return True
def loginlog(command):
day = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
user = getpass.getuser()
logstr = "%s\tuser:%s\tcmd:%s" % (day, user, command)
write2file(logstr)
def processlog():
global to_address
global subject
global content
global attach
day = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
user = getpass.getuser()
logstr = "%s\tuser:%s\ttoaddredd:%s\tsubject:%s\tcontent:%s\tattach:%s" % (day, user, to_address, subject, content, attach)
write2file(logstr)
write2file("--------------------------------------------------------------------------------------")
def write2file(sss):
logfile = "/data/bi-report/logs/sendmail.log"
output=open(logfile,'a')
output.write(str(sss) + "\n")
output.close()
def usage():
print 'sendmail usage:'
print ' -t: mail to (Required)'
print ' -s: subject (Required)'
print ' -m: cc address (Required)'
print ' -c: mail content (Optional)'
print ' -a: attach (Optional)'
print ' -f: trans file content to html table(Optional)'
if __name__ == '__main__':
loginlog(sys.argv)
if not checkauth():
print "--->You do not have permission to send email, please contact fangguoqing@igengmei.com"
sys.exit(0)
if len(sys.argv) >= 3:
if processcmd(sys.argv[1:]) != -1:
sendmail()
else:
usage()
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 1 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$2
partition_date=`date +"%Y%m%d"`
timestamp=`date +"%Y%m%d%H%M%S"`
tmp_file_name="/data/bi-report/tmp/tmp_"$timestamp".sql"
result_file_name="/data/bi-report/result/"$1"_"$partition_date".csv"
[ $? -eq 0 ] || exit 1
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
cp "/home/bi/bi-report/sqls/"$1"/"$1".sql" $tmp_file_name
sed -i "s/\$V_PARYMD/$V_PARYMD/g" $tmp_file_name
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" $tmp_file_name
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" $tmp_file_name
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/hive/hive-1.1.0-cdh5.16.1/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter004:10000 -n data --outputformat=csv2 --showHeader=true -e "`cat $tmp_file_name`" > $result_file_name
rm -rf $tmp_file_name
exit $?
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 1 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$2
timestamp=`date +"%Y%m%d%H%M%S"`
tmp_file_name="/data/bi-report/tmp/tmp_"$timestamp".sql"
[ $? -eq 0 ] || exit 1
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
cp "/home/bi/bi-report/sqls/"$1"/"$1".sql" $tmp_file_name
sed -i "s/\$V_PARYMD/$V_PARYMD/g" $tmp_file_name
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" $tmp_file_name
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" $tmp_file_name
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/hive/hive-1.1.0-cdh5.16.1/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter004:10000 -n data -e "`cat $tmp_file_name`"
rm -rf $tmp_file_name
exit $?
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 2 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$3
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
#cp sqls/$1".sql" tmp.sql
cp $1 tmp.sql
sed -i "s/\$V_PARYMD/$V_PARYMD/g" tmp.sql
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" tmp.sql
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" tmp.sql
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" tmp.sql
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/spark/spark-2.4.5-bin-hadoop2.7/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter003:10010/online -n data --outputformat=csv2 --showHeader=true -e "`cat tmp.sql`">$2
rm -rf tmp.sql
exit $?
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 3 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$4
partition_date=`date +"%Y%m%d"`
timestamp=`date +"%Y%m%d%H%M%S"`
tmp_file_name="/data/bi-report/tmp/tmp_"$timestamp".sql"
result_file_name="/data/bi-report/result/"$1"_"$partition_date".csv"
[ $? -eq 0 ] || exit 1
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
cp "/home/bi/bi-report/sqls/"$1"/"$1".sql" $tmp_file_name
sed -i "s/\$V_PARYMD/$V_PARYMD/g" $tmp_file_name
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" $tmp_file_name
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" $tmp_file_name
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/spark/spark-2.4.5-bin-hadoop2.7/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter003:10010/online -n data --outputformat=csv2 --showHeader=true -e "`cat $tmp_file_name`"> $result_file_name
[ $? -eq 0 ] || exit 1
rm -rf $tmp_file_name
[ $? -eq 0 ] || exit 1
to=$2
cc=$3
echo "to: "$to
echo "cc: "$cc
python2.7 /home/bi/bi-report/lib/python/sendmail.py -t $to -s 自动发送 -m $cc -a $result_file_name
exit $?
\ No newline at end of file
daily_content_data=内容日报-新
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
#step1_1.job #step1_1.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file \ No newline at end of file
#step1_4.job #step1_10.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_view
\ No newline at end of file \ No newline at end of file
#step1_11.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
\ No newline at end of file
#step1.job #step1_12.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
\ No newline at end of file \ No newline at end of file
#step1_5.job #step1_13.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file \ No newline at end of file
#step1_2.job #step1_2.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_hospital_spam_pv_day command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file \ No newline at end of file
#step1_3.job #step1_3.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_attr_tag_view command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_updates
\ No newline at end of file \ No newline at end of file
#step1_4.job #step1_4.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file \ No newline at end of file
#step1_5.job #step1_5.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file \ No newline at end of file
#step1_6.job #step1_6.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_view command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file \ No newline at end of file
#step1_1.job #step1_7.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_diary_view
\ No newline at end of file \ No newline at end of file
#step1_2.job #step1_8.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_hospital_spam_pv_day command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
\ No newline at end of file \ No newline at end of file
#step1_9.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_question_view
\ No newline at end of file
#step2.job #step2.job
type=command type=command
command=curl -X GET http://localhost:8553/api/report/execSql?name=hive_insert dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13
\ No newline at end of file command=curl -X GET http://localhost:8553/api/report/execSql?name=daily_content_data_insert
\ No newline at end of file
#step3.job
type=command
dependencies=step2
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive pm tl_pm_content_d
\ No newline at end of file
#step4.job
type=command
dependencies=step3
command=curl -X GET http://localhost:8553/api/report/sendEmail/liudi@igengmei.com,wangxin@igengmei.com,malinxi@igengmei.com,dengguangyu@igengmei.com/weiyimin@igengmei.com,zhaojianwei@igengmei.com,zhaofei@igengmei.com,yindanlei@igengmei.com/daily_content_data
\ No newline at end of file
--内容日报
SELECT
day_id AS `日期`
,device_os_type AS `设备类型`
,active_type AS `活跃类型`
,is_ai_channel AS `是否AI渠道`
,dau AS `DAU`
,content_uv AS `内容详情页UV`
,content_pv AS `内容详情页PV`
,per_content_uv AS `内容UV/DAU`
,per_content_pv AS `内容PV/内容UV`
,retention_1 AS `内容用户APP次留`
,retention_7 AS `内容用户APP7留`
,retention_30 AS `内容用户APP30留`
,avg_app_duration AS `内容用户单设备App时长(m)`
,avg_content_stay AS `内容用户单设备内容时长(m)`
,avg_open_times AS `内容用户单设备打开次数`
,search_related_stay AS `内容用户搜索相关页面单设备页面时长(m)`
,welfare_stay AS `内容用户美购详情页单设备页面时长(m)`
,content_question_stay AS `内容用户问题详情页单设备页面时长(m)`
,ai_related_stay AS `内容用户AI相关页面单设备页面时长(m)`
,content_diary_stay AS `内容用户日记详情页单设备页面时长(m)`
,home_stay AS `内容用户首页单设备页面时长(m)`
,conv_related_stay AS `内容用户咨询相关页面单设备页面时长(m)`
,recommend_rate AS `首页feeds推荐进入内容UV/内容UV`
,per_recommend_pv AS `首页feeds推荐进入内容PV/UV`
,feeds_rate AS `首页feeds非推荐进入内容UV/内容UV`
,per_feeds_pv AS `首页feeds非推荐进入内容PV/UV`
,search_rate AS `搜索进入内容UV/内容UV`
,per_search_pv AS `搜索进入内容PV/UV`
,zone_rate AS `内容聚合页进入内容UV/内容UV`
,per_zone_pv AS `内容聚合页进入内容PV/UV`
,content_rate AS `内容详情页推荐板块进入内容UV/内容UV`
,per_from_content_pv AS `内容详情页推荐板块进入内容PV/UV`
,blank_rate AS `无来源页面(大多数为push)进入内容UV/内容UV`
,per_blank_pv AS `无来源页面(大多数为push)进入内容PV/UV`
,comment_rate AS `评论列表页进入内容UV/内容UV`
,per_comment_pv AS `评论列表页进入内容PV/UV`
,org_rate AS `医生医院主页进入内容UV/内容UV`
,per_org_pv AS `医生医院主页进入内容PV/UV`
,category_rate AS `品类聚合页进入内容UV/内容UV`
,per_category_pv AS `品类聚合页进入内容PV/UV`
,my_diary_rate AS `我的日记页进入内容UV/内容UV`
,per_my_diary_pv AS `我的日记页进入内容PV/UV`
,ai_rate AS `AI报告页进入内容UV/内容UV`
,per_ai_pv AS `AI报告页进入内容PV/UV`
,diary_uv AS `日记UV`
,diary_pv AS `日记PV`
,diary_rate AS `日记UV/内容UV`
,per_diary_pv AS `日记PV/日记UV`
,diary_stay AS `日记单设备时长(m)`
,post_uv AS `帖子UV`
,post_pv AS `帖子PV`
,post_rate AS `帖子UV/内容UV`
,per_post_pv AS `帖子PV/帖子UV`
,post_stay AS `帖子单设备时长(m)`
,question_uv AS `问题UV`
,question_pv AS `问题PV`
,question_rate AS `问题UV/内容UV`
,per_question_pv AS `问题PV/问题UV`
,question_stay AS `问题单设备时长(m)`
,question_answer_uv AS `问答UV`
,question_answer_pv AS `问答PV`
,question_answer_rate AS `问答UV/内容UV`
,per_question_answer_pv AS `问答PV/问答UV`
,question_answer_stay AS `问答单设备时长(m)`
,answer_uv AS `回答UV`
,answer_pv AS `回答PV`
,answer_rate AS `回答UV/内容UV`
,per_answer_pv AS `回答PV/回答UV`
,answer_stay AS `回答单设备时长(m)`
,video_uv AS `视频UV`
,video_pv AS `视频PV`
,video_rate AS `视频UV/内容UV`
,per_video_pv AS `视频PV/视频UV`
,video_stay AS `视频单设备时长(m)`
,wiki_uv AS `百科UV`
,wiki_pv AS `百科PV`
,wiki_rate AS `百科UV/内容UV`
,per_wiki_pv AS `百科PV/百科UV`
,wiki_stay AS `百科单设备时长(m)`
,article_uv AS `专栏UV`
,article_pv AS `专栏PV`
,article_rate AS `专栏UV/内容UV`
,per_article_pv AS `专栏PV/专栏UV`
,article_stay AS `专栏单设备时长(m)`
FROM pm.tl_pm_content_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
# bi-report
BI report project init.
部署节点:
bj-gm-prod-cos-dataweb005
临时文件目录
/data/bi-report/tmp
结果目录
/data/bi-report/result
日志目录
/data/bi-report/logs
项目部署
#创建目录
sudo su -
mkdir -p /data/bi-report/tmp
mkdir -p /data/bi-report/result
mkdir -p /data/bi-report/logs
chown -R bi:bi /data/bi-report
开发过程中需要注意的事项: 开发过程中需要注意的事项:
1.在en-cn.properties中维护映射关系,则显示中文名,否则显示英文名 1.在en-cn.properties中维护映射关系,则显示中文名,否则显示英文名
2.在编写类似insert into tableA select * from tableB语句时,不需要添加set role admin以及set mapreduce.job.queuename=data语句,而且不能带有";"符号 2.在编写类似insert into tableA select * from tableB语句时,不需要添加set role admin以及set mapreduce.job.queuename=data语句,而且不能带有";"符号
......
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date between '20200401' and '20200408'
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
limit 10
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date between '20200401' and '20200408'
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
limit 10
\ No newline at end of file
select
t6.new_tag_name, --新标签name
t6.new_tag_type, --新标签type
t6.new_tag_id, --新标签id
t6.is_display, --是否在app展示
t6.is_online, --是否生效
collect_set(t6.jianyi_word) as jianyi_word, --近义词
collect_set(t6.like_new_tag) as tongyi_word, --同义词
collect_set(t6.fangshi_1f) as fangshi_1st, --一级方式_name
collect_set(t6.fangshi_2d) as fangshi_2nd, --二级方式_name
collect_set(t6.suqiu_1f) as suqiu_1st, --一级诉求_name
collect_set(t6.suqiu_2d) as suqiu_2nd, --二级诉求_name
collect_set(t6.buwei_if) as buwei_1st, --一级部位_name
collect_set(t6.buwei_2d) as buwei_2nd, --二级部位_name
collect_set(t6.old_tag_name) as old_tag_name, --老标签_name
collect_set(t6.old_tag_type) as old_tag_type, --老标签_type
collect_set(t6.old_tag_id) as old_tag_id, --老标签_id
collect_set(t6.fangshi_1f_id) as fangshi_1st_id, --一级方式_id
collect_set(t6.fangshi_2d_id) as fangshi_2nd_id, --二级方式_id
collect_set(t6.suqiu_1f_id) as suqiu_1st_id, --一级诉求_id
collect_set(t6.suqiu_2d_id) as suqiu_2nd_id, --二级诉求_id
collect_set(t6.buwei_if_id) as buwei_1st_id, --一级部位_id
collect_set(t6.buwei_2d_id) as buwei_2nd_id --二级部位_id
from
(
select
t1.name as new_tag_name, --新标签name
t1.tag_type as new_tag_type, --新标签type
t1.id as new_tag_id, --新标签id
t1.is_display as is_display, --是否在app展示
t1.is_online as is_online, --是否生效
t1.homoionym as jianyi_word, --近义词
t2.name as like_new_tag, --同义词
t4.aggregate_type, --对应类型的枚举值
(case when t4.aggregate_type='6' then t4.name else NULL end) as fangshi_1f, --一级方式_name
(case when t4.aggregate_type='2' then t4.name else NULL end) as fangshi_2d, --二级方式_name
(case when t4.aggregate_type='7' then t4.name else NULL end) as suqiu_1f, --一级诉求_name
(case when t4.aggregate_type='8' then t4.name else NULL end) as suqiu_2d, --二级诉求_name
(case when t4.aggregate_type='10' then t4.name else NULL end) as buwei_if, --一级部位_name
(case when t4.aggregate_type='3' then t4.name else NULL end) as buwei_2d, --二级部位_name
t7.old_tag_name as old_tag_name, --老标签_name
t7.old_tag_type as old_tag_type, --老标签_type
t5.old_tag_id as old_tag_id, --老标签_id
(case when t4.aggregate_type='6' then t4.id else NULL end) as fangshi_1f_id, --一级方式_id
(case when t4.aggregate_type='2' then t4.id else NULL end) as fangshi_2d_id, --二级方式_id
(case when t4.aggregate_type='7' then t4.id else NULL end) as suqiu_1f_id, --一级诉求_id
(case when t4.aggregate_type='8' then t4.id else NULL end) as suqiu_2d_id, --二级诉求_id
(case when t4.aggregate_type='10' then t4.id else NULL end) as buwei_if_id, --一级部位_id
(case when t4.aggregate_type='3' then t4.id else NULL end) as buwei_2d_id --二级部位_id
from
(select name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end as tag_type,
collect_list(t1.homoionym_detail) as homoionym
from online.tl_hdfs_api_tag_3_0_view --新标签id,近义词
lateral view explode(json_split(homoionym)) t1 as homoionym_detail --["\u8138\u578b\u77eb\u6b63"]转义 json_split
where partition_date='$V_PARYMD'
group by name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end
) t1
left join
(select name,tag_id from online.tl_hdfs_api_tag_aggregate_view where partition_date='$V_PARYMD') t2 --新标签id,同义词
on (t1.id=t2.tag_id)
left join
(select tag_attr_id as tag_ids,tag_id from online.tl_hdfs_api_tag_attr_tag_view where partition_date='$V_PARYMD') t3 --新标签id,属性ids
on (t1.id=t3.tag_id)
left join
(select name,id,aggregate_type from online.tl_hdfs_api_tag_attr_view where partition_date='$V_PARYMD') t4 --属性id,属性分类
on (t3.tag_ids=t4.id)
left join
(select old_tag_id,tag_id from online.tl_hdfs_api_tag_map_oldtag_view where partition_date='$V_PARYMD') t5
on (t1.id=t5.tag_id)
left join
(select id,name as old_tag_name,
case when tag_type in ('1') then '一级分类'
when tag_type in ('2') then '二级分类'
when tag_type in ('3') then '三级分类'
when tag_type in ('4') then '城市'
when tag_type in ('5') then '自由添加'
when tag_type in ('6') then '医生'
when tag_type in ('7') then '医院'
when tag_type in ('8') then '频道'
when tag_type in ('9') then '省份'
when tag_type in ('10') then '国家'
when tag_type in ('11') then '运营标签'
end as old_tag_type
from online.tl_hdfs_api_tag_view where partition_date='$V_PARYMD') t7
on (t7.id=t5.old_tag_id)
)t6
group by t6.new_tag_name,t6.new_tag_type,t6.new_tag_id,t6.is_display,t6.is_online
\ No newline at end of file
select
t6.new_tag_name, --新标签name
t6.new_tag_type, --新标签type
t6.new_tag_id, --新标签id
t6.is_display, --是否在app展示
t6.is_online, --是否生效
collect_set(t6.jianyi_word) as jianyi_word, --近义词
collect_set(t6.like_new_tag) as tongyi_word, --同义词
collect_set(t6.fangshi_1f) as fangshi_1st, --一级方式_name
collect_set(t6.fangshi_2d) as fangshi_2nd, --二级方式_name
collect_set(t6.suqiu_1f) as suqiu_1st, --一级诉求_name
collect_set(t6.suqiu_2d) as suqiu_2nd, --二级诉求_name
collect_set(t6.buwei_if) as buwei_1st, --一级部位_name
collect_set(t6.buwei_2d) as buwei_2nd, --二级部位_name
collect_set(t6.old_tag_name) as old_tag_name, --老标签_name
collect_set(t6.old_tag_type) as old_tag_type, --老标签_type
collect_set(t6.old_tag_id) as old_tag_id, --老标签_id
collect_set(t6.fangshi_1f_id) as fangshi_1st_id, --一级方式_id
collect_set(t6.fangshi_2d_id) as fangshi_2nd_id, --二级方式_id
collect_set(t6.suqiu_1f_id) as suqiu_1st_id, --一级诉求_id
collect_set(t6.suqiu_2d_id) as suqiu_2nd_id, --二级诉求_id
collect_set(t6.buwei_if_id) as buwei_1st_id, --一级部位_id
collect_set(t6.buwei_2d_id) as buwei_2nd_id --二级部位_id
from
(
select
t1.name as new_tag_name, --新标签name
t1.tag_type as new_tag_type, --新标签type
t1.id as new_tag_id, --新标签id
t1.is_display as is_display, --是否在app展示
t1.is_online as is_online, --是否生效
t1.homoionym as jianyi_word, --近义词
t2.name as like_new_tag, --同义词
t4.aggregate_type, --对应类型的枚举值
(case when t4.aggregate_type='6' then t4.name else NULL end) as fangshi_1f, --一级方式_name
(case when t4.aggregate_type='2' then t4.name else NULL end) as fangshi_2d, --二级方式_name
(case when t4.aggregate_type='7' then t4.name else NULL end) as suqiu_1f, --一级诉求_name
(case when t4.aggregate_type='8' then t4.name else NULL end) as suqiu_2d, --二级诉求_name
(case when t4.aggregate_type='10' then t4.name else NULL end) as buwei_if, --一级部位_name
(case when t4.aggregate_type='3' then t4.name else NULL end) as buwei_2d, --二级部位_name
t7.old_tag_name as old_tag_name, --老标签_name
t7.old_tag_type as old_tag_type, --老标签_type
t5.old_tag_id as old_tag_id, --老标签_id
(case when t4.aggregate_type='6' then t4.id else NULL end) as fangshi_1f_id, --一级方式_id
(case when t4.aggregate_type='2' then t4.id else NULL end) as fangshi_2d_id, --二级方式_id
(case when t4.aggregate_type='7' then t4.id else NULL end) as suqiu_1f_id, --一级诉求_id
(case when t4.aggregate_type='8' then t4.id else NULL end) as suqiu_2d_id, --二级诉求_id
(case when t4.aggregate_type='10' then t4.id else NULL end) as buwei_if_id, --一级部位_id
(case when t4.aggregate_type='3' then t4.id else NULL end) as buwei_2d_id --二级部位_id
from
(select name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end as tag_type,
collect_list(t1.homoionym_detail) as homoionym
from online.tl_hdfs_api_tag_3_0_view --新标签id,近义词
lateral view explode(json_split(homoionym)) t1 as homoionym_detail --["\u8138\u578b\u77eb\u6b63"]转义 json_split
where partition_date='$V_PARYMD'
group by name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end
) t1
left join
(select name,tag_id from online.tl_hdfs_api_tag_aggregate_view where partition_date='$V_PARYMD') t2 --新标签id,同义词
on (t1.id=t2.tag_id)
left join
(select tag_attr_id as tag_ids,tag_id from online.tl_hdfs_api_tag_attr_tag_view where partition_date='$V_PARYMD') t3 --新标签id,属性ids
on (t1.id=t3.tag_id)
left join
(select name,id,aggregate_type from online.tl_hdfs_api_tag_attr_view where partition_date='$V_PARYMD') t4 --属性id,属性分类
on (t3.tag_ids=t4.id)
left join
(select old_tag_id,tag_id from online.tl_hdfs_api_tag_map_oldtag_view where partition_date='$V_PARYMD') t5
on (t1.id=t5.tag_id)
left join
(select id,name as old_tag_name,
case when tag_type in ('1') then '一级分类'
when tag_type in ('2') then '二级分类'
when tag_type in ('3') then '三级分类'
when tag_type in ('4') then '城市'
when tag_type in ('5') then '自由添加'
when tag_type in ('6') then '医生'
when tag_type in ('7') then '医院'
when tag_type in ('8') then '频道'
when tag_type in ('9') then '省份'
when tag_type in ('10') then '国家'
when tag_type in ('11') then '运营标签'
end as old_tag_type
from online.tl_hdfs_api_tag_view where partition_date='$V_PARYMD') t7
on (t7.id=t5.old_tag_id)
)t6
group by t6.new_tag_name,t6.new_tag_type,t6.new_tag_id,t6.is_display,t6.is_online
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment