Commit 42546eff authored by 赵建伟's avatar 赵建伟

add init codes.

parent e36fb243
# bi-report
BI report project init.
\ No newline at end of file
BI report project init.
##部署节点
###bj-gm-prod-cos-dataweb005
##临时文件目录
###/data/bi-report/tmp
##结果目录
###/data/bi-report/result
##日志目录
###/data/bi-report/logs
##项目部署
###创建目录
####sudo su -
####mkdir -p /data/bi-report/tmp
####mkdir -p /data/bi-report/result
####mkdir -p /data/bi-report/logs
####chown -R hadoop:hadoop /data/bi-report
#!/bin/bash
mazhimo
bi
sjxuwei
zhaojianwei
zhaofei
weiyimin
yindanlei
zhanghuixin
wangzhuo
wangyan
root
\ No newline at end of file
#step1_1.job
type=command
command=sh /srv/apps/ib-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_3_0_view 20200423
\ No newline at end of file
#step1_2.job
type=command
command=sh /srv/apps/ib-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_aggregate_view 20200423
\ No newline at end of file
#step1_3.job
type=command
command=sh /srv/apps/ib-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_attr_tag_view 20200423
\ No newline at end of file
#step1_4.job
type=command
command=sh /srv/apps/ib-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_attr_view 20200423
\ No newline at end of file
#step1_5.job
type=command
command=sh /srv/apps/ib-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_map_oldtag_view 20200423
\ No newline at end of file
#step1_6.job
type=command
command=sh /srv/apps/ib-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_view 20200423
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6
command=sh /srv/apps/bi-report/lib/shell/sparksql2email.sh /srv/apps/bi-report/sqls/demo_param.sql /data/bi-report/result/demo_param.csv zhaojianwei zhaojianwei
\ No newline at end of file
#coding: utf-8
#author:
#date:
#desc:send email with attach
import smtplib
import sys
import getopt
import os
import getpass
import time
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.header import Header
#email info
to_address = ""
subject = ""
content = ""
attach = ""
memail = ""
htable = ""
#userinfo
authfile = "/srv/apps/bi-report/conf/auth.cf"
def readallow(filepath):
usernames = []
f = open(filepath,"r")
lines = f.readlines()
for line in lines:
usernames.append(line.strip())
return usernames
def checkauth():
global authfile
usernames = readallow(authfile)
username = getpass.getuser()
return username in usernames
def isfile(filestr):
if os.path.isfile(filestr):
file = open(filestr,'r')
lines = file.readlines()
if len(lines) >= 2:
return True
write2file("%s file is less than 2 lines or does not exist...." % getfilename(filestr))
print "--->%s file is less than 2 lines or does not exist...." % getfilename(filestr)
return False
def getfilename(filestr):
return os.path.basename(filestr)
def makeaddress():
global to_address
address_str = ""
for address in to_address.split(","):
if address.find("@") > 0:
address_str += address + ";"
else:
address_str += "%s@igengmei.com;" % address
return address_str[:-1]
def addattch(msgRoot):
global attach
if attach.strip() == "":
return 0
for item in attach.split(","):
if isfile(item):
att = MIMEText(open('%s' % item, 'rb').read(), 'base64', 'utf-8')
att["Content-Type"] = 'application/octet-stream'
att["Content-Disposition"] = 'attachment; filename="%s"'% getfilename(item)
msgRoot.attach(att)
else:
print "--->send mail failed, %s file is not exist or file row num less than 2" % getfilename(item)
return -1
def makecc():
global memail
address_str = ""
for address in memail.split(","):
if address.find("@") > 0:
address_str += address + ";"
else:
address_str += "%s@igengmei.com;" % address
print address_str
return address_str[:-1]
def addsubject(msgRoot):
global subject
global content
global htable
msgRoot['Subject'] = Header(subject, 'utf-8')
msgRoot['To'] = makeaddress()
msgRoot['Cc'] = makecc()
if htable != "":
for ff in htable.split(","):
if not isfile(ff):
return -1
hcontent = trans2table(ff)
content += hcontent
content += "<br><br><br><br><font color=red size='3'>这是一封自动发出的邮件,邮件发送自大数据BI组。<br>请使用wps打开,appstore地址:https://appsto.re/cn/MAqWJ.i</font>"
msg = MIMEText(content,'html','utf-8')
msgRoot.attach(msg)
return True
def trans2table(path):
for ff in path.split(","):
index = 0
result = "<br><br><table border='1' style='border-collapse:collapse' bordercolor='#9D9D9D'>\n"
for eachline in open(ff,'r'):
if len(eachline.strip()) == 0:
continue
result += " <tr>"
for item in eachline.strip().split(","):
if index == 0:
result += "<td align='left' bgcolor='#8EA9DB' style='font-family:Microsoft YaHei;padding:2px 5px'>%s</td>" % item
else:
result += "<td align='left' style='font-family:Microsoft YaHei;padding:2px 5px'>%s</td>" % item
index += 1
result += "</tr>\n"
result += "</table>"
return result
def sendmail():
global to_address
#account info
sender = 'youjiantongji@wanmeizhensuo.com'
username = 'youjiantongji@wanmeizhensuo.com'
password = 'mq37tgk8nGi6eg1Hgq388oro3itvdmPl'
smtpserver = 'smtp.exmail.qq.com'
server_port = 25
if not checkparam():
return 0
msgRoot = MIMEMultipart()
if addsubject(msgRoot) == -1:
return 0
if addattch(msgRoot) == -1:
return 0
smtp = smtplib.SMTP(smtpserver, server_port)
smtp.ehlo()
smtp.starttls()
smtp.login(username, password)
to_receiver = makeaddress()
to_cc = makecc()
smtp.sendmail(sender, to_receiver.split(";") + to_cc.split(";"), msgRoot.as_string())
smtp.quit()
print "send mail success...."
write2file("send mail success")
write2file("--------------------------------------------------------------------------------------")
def processcmd(cmd):
global to_address
global subject
global content
global attach
global memail
global htable
try:
options, args = getopt.getopt(cmd, "c:a:t:s:m:f:")
for name,value in options:
if name in ("-t"):
to_address = value
elif name in ("-s"):
subject = value
elif name in ("-c"):
content = value
elif name in ("-a"):
attach = value
elif name in ("-m"):
memail = value
elif name in ("-f"):
htable = value
except Exception, e:
print "--->%s, remove this param" % str(e)
usage()
return -1
def checkparam():
global to_address
global subject
if to_address.strip() == "" or subject.strip() == "":
print "--->must input -t to_address -s subject, and the value can not be empty"
usage()
return False
else:
return True
def loginlog(command):
day = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
user = getpass.getuser()
logstr = "%s\tuser:%s\tcmd:%s" % (day, user, command)
write2file(logstr)
def processlog():
global to_address
global subject
global content
global attach
day = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
user = getpass.getuser()
logstr = "%s\tuser:%s\ttoaddredd:%s\tsubject:%s\tcontent:%s\tattach:%s" % (day, user, to_address, subject, content, attach)
write2file(logstr)
write2file("--------------------------------------------------------------------------------------")
def write2file(sss):
logfile = "/home/bi/mazhimo/py/sendmail.log"
output=open(logfile,'a')
output.write(str(sss) + "\n")
output.close()
def usage():
print 'sendmail usage:'
print ' -t: mail to (Required)'
print ' -s: subject (Required)'
print ' -m: cc address (Required)'
print ' -c: mail content (Optional)'
print ' -a: attach (Optional)'
print ' -f: trans file content to html table(Optional)'
if __name__ == '__main__':
loginlog(sys.argv)
if not checkauth():
print "--->You do not have permission to send email, please contact fangguoqing@igengmei.com"
sys.exit(0)
if len(sys.argv) >= 3:
if processcmd(sys.argv[1:]) != -1:
sendmail()
else:
usage()
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 1 ]; then
echo "Param error, please check again!"
exit 0
fi
#默认取输入的账期
V_DATE=$2
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
#cp sqls/$1".sql" tmp.sql
cp $1 tmp.sql
sed -i "s/\$V_PARYMD/$V_PARYMD/g" tmp.sql
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" tmp.sql
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" tmp.sql
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" tmp.sql
# 新标签与新标签对应集合类型 的对应关系
/opt/hive/hive-1.1.0-cdh5.16.1/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter004:10000 -n data -e "`cat tmp.sql`"
rm -rf tmp.sql
exit 0
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 2 ]; then
echo "Param error, please check again!"
exit 0
fi
#默认取输入的账期
V_DATE=$3
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
#cp sqls/$1".sql" tmp.sql
cp $1 tmp.sql
sed -i "s/\$V_PARYMD/$V_PARYMD/g" tmp.sql
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" tmp.sql
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" tmp.sql
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" tmp.sql
# 新标签与新标签对应集合类型 的对应关系
/opt/spark/spark-2.4.5-bin-hadoop2.7/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter003:10010/online -n data --outputformat=csv2 --showHeader=true -e "`cat tmp.sql`">$2
rm -rf tmp.sql
exit 0
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 4 ]; then
echo "Param error, please check again!"
exit 0
fi
#默认取输入的账期
V_DATE=$5
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
#cp sqls/$1".sql" tmp.sql
cp $1 tmp.sql
sed -i "s/\$V_PARYMD/$V_PARYMD/g" tmp.sql
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" tmp.sql
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" tmp.sql
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" tmp.sql
# 新标签与新标签对应集合类型 的对应关系
/opt/spark/spark-2.4.5-bin-hadoop2.7/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter003:10010/online -n data --outputformat=csv2 --showHeader=true -e "`cat tmp.sql`">$2
rm -rf tmp.sql
to=$3
cc=$4
echo "to: "$to
echo "cc: "$cc
python2.7 /srv/apps/bi-report/lib/python/sendmail.py -t $to -s 自动发送 -m $cc -a $2
exit 0
\ No newline at end of file
#!/bin/bash
if [ $# -lt 4 ]; then
echo "Param error, please check again!"
exit 0
fi
export TYPE=$1
export DB=$2
export TABLE=$3
export PARTITIONS=$4
res=`/bin/bash /srv/apps/data-ready-checker/client/bin/checker.sh`
#echo $res
\ No newline at end of file
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date between '20200401' and '20200408'
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
limit 10
\ No newline at end of file
select
t6.new_tag_name, --新标签name
t6.new_tag_type, --新标签type
t6.new_tag_id, --新标签id
t6.is_display, --是否在app展示
t6.is_online, --是否生效
collect_set(t6.jianyi_word) as jianyi_word, --近义词
collect_set(t6.like_new_tag) as tongyi_word, --同义词
collect_set(t6.fangshi_1f) as fangshi_1st, --一级方式_name
collect_set(t6.fangshi_2d) as fangshi_2nd, --二级方式_name
collect_set(t6.suqiu_1f) as suqiu_1st, --一级诉求_name
collect_set(t6.suqiu_2d) as suqiu_2nd, --二级诉求_name
collect_set(t6.buwei_if) as buwei_1st, --一级部位_name
collect_set(t6.buwei_2d) as buwei_2nd, --二级部位_name
collect_set(t6.old_tag_name) as old_tag_name, --老标签_name
collect_set(t6.old_tag_type) as old_tag_type, --老标签_type
collect_set(t6.old_tag_id) as old_tag_id, --老标签_id
collect_set(t6.fangshi_1f_id) as fangshi_1st_id, --一级方式_id
collect_set(t6.fangshi_2d_id) as fangshi_2nd_id, --二级方式_id
collect_set(t6.suqiu_1f_id) as suqiu_1st_id, --一级诉求_id
collect_set(t6.suqiu_2d_id) as suqiu_2nd_id, --二级诉求_id
collect_set(t6.buwei_if_id) as buwei_1st_id, --一级部位_id
collect_set(t6.buwei_2d_id) as buwei_2nd_id --二级部位_id
from
(
select
t1.name as new_tag_name, --新标签name
t1.tag_type as new_tag_type, --新标签type
t1.id as new_tag_id, --新标签id
t1.is_display as is_display, --是否在app展示
t1.is_online as is_online, --是否生效
t1.homoionym as jianyi_word, --近义词
t2.name as like_new_tag, --同义词
t4.aggregate_type, --对应类型的枚举值
(case when t4.aggregate_type='6' then t4.name else NULL end) as fangshi_1f, --一级方式_name
(case when t4.aggregate_type='2' then t4.name else NULL end) as fangshi_2d, --二级方式_name
(case when t4.aggregate_type='7' then t4.name else NULL end) as suqiu_1f, --一级诉求_name
(case when t4.aggregate_type='8' then t4.name else NULL end) as suqiu_2d, --二级诉求_name
(case when t4.aggregate_type='10' then t4.name else NULL end) as buwei_if, --一级部位_name
(case when t4.aggregate_type='3' then t4.name else NULL end) as buwei_2d, --二级部位_name
t7.old_tag_name as old_tag_name, --老标签_name
t7.old_tag_type as old_tag_type, --老标签_type
t5.old_tag_id as old_tag_id, --老标签_id
(case when t4.aggregate_type='6' then t4.id else NULL end) as fangshi_1f_id, --一级方式_id
(case when t4.aggregate_type='2' then t4.id else NULL end) as fangshi_2d_id, --二级方式_id
(case when t4.aggregate_type='7' then t4.id else NULL end) as suqiu_1f_id, --一级诉求_id
(case when t4.aggregate_type='8' then t4.id else NULL end) as suqiu_2d_id, --二级诉求_id
(case when t4.aggregate_type='10' then t4.id else NULL end) as buwei_if_id, --一级部位_id
(case when t4.aggregate_type='3' then t4.id else NULL end) as buwei_2d_id --二级部位_id
from
(select name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end as tag_type,
collect_list(t1.homoionym_detail) as homoionym
from online.tl_hdfs_api_tag_3_0_view --新标签id,近义词
lateral view explode(json_split(homoionym)) t1 as homoionym_detail --["\u8138\u578b\u77eb\u6b63"]转义 json_split
where partition_date='$V_PARYMD'
group by name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end
) t1
left join
(select name,tag_id from online.tl_hdfs_api_tag_aggregate_view where partition_date='$V_PARYMD') t2 --新标签id,同义词
on (t1.id=t2.tag_id)
left join
(select tag_attr_id as tag_ids,tag_id from online.tl_hdfs_api_tag_attr_tag_view where partition_date='$V_PARYMD') t3 --新标签id,属性ids
on (t1.id=t3.tag_id)
left join
(select name,id,aggregate_type from online.tl_hdfs_api_tag_attr_view where partition_date='$V_PARYMD') t4 --属性id,属性分类
on (t3.tag_ids=t4.id)
left join
(select old_tag_id,tag_id from online.tl_hdfs_api_tag_map_oldtag_view where partition_date='$V_PARYMD') t5
on (t1.id=t5.tag_id)
left join
(select id,name as old_tag_name,
case when tag_type in ('1') then '一级分类'
when tag_type in ('2') then '二级分类'
when tag_type in ('3') then '三级分类'
when tag_type in ('4') then '城市'
when tag_type in ('5') then '自由添加'
when tag_type in ('6') then '医生'
when tag_type in ('7') then '医院'
when tag_type in ('8') then '频道'
when tag_type in ('9') then '省份'
when tag_type in ('10') then '国家'
when tag_type in ('11') then '运营标签'
end as old_tag_type
from online.tl_hdfs_api_tag_view where partition_date='$V_PARYMD') t7
on (t7.id=t5.old_tag_id)
)t6
group by t6.new_tag_name,t6.new_tag_type,t6.new_tag_id,t6.is_display,t6.is_online
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment