Commit d59042fc authored by 魏艺敏's avatar 魏艺敏

Merge branch 'master' of http://git.wanmeizhensuo.com/zhaojianwei/bi-report into weiyimin

 Conflicts:
	jobs/meigou_detail_page/step1_1.job
	jobs/meigou_detail_page/step1_2.job
	jobs/search_related/step1_3.job
	jobs/search_related/step1_5.job
parents 67abc2c0 2a1cb0de
No preview for this file type
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$/sqls/bak/demo_spark_param/demo_spark_param.sql" dialect="SQL92" />
</component>
</project>
\ No newline at end of file
mazhimo
bi
sjxuwei
zhaojianwei
zhaofei
weiyimin
yindanlei
zhanghuixin
wangzhuo
wangyan
root
hadoop
\ No newline at end of file
# bi-report
BI report project init.
部署节点:
bj-gm-prod-cos-dataweb005
临时文件目录
/data/bi-report/tmp
结果目录
/data/bi-report/result
日志目录
/data/bi-report/logs
项目部署
#创建目录
sudo su -
mkdir -p /data/bi-report/tmp
mkdir -p /data/bi-report/result
mkdir -p /data/bi-report/logs
chown -R bi:bi /data/bi-report
开发过程中需要注意的事项: 开发过程中需要注意的事项:
1.在en-cn.properties中维护映射关系,则显示中文名,否则显示英文名 1.在en-cn.properties中维护映射关系,则显示中文名,否则显示英文名
2.在编写类似insert into tableA select * from tableB语句时,不需要添加set role admin以及set mapreduce.job.queuename=data语句,而且不能带有";"符号 2.在编写类似insert into tableA select * from tableB语句时,不需要添加set role admin以及set mapreduce.job.queuename=data语句,而且不能带有";"符号
...@@ -10,3 +34,10 @@ ...@@ -10,3 +34,10 @@
4.优化邮件内容,wps打开去掉 样式格式化【metabase中包含内容简略视图,而且有metabaselogo】 邮件内容格式化 python实现 ok 4.优化邮件内容,wps打开去掉 样式格式化【metabase中包含内容简略视图,而且有metabaselogo】 邮件内容格式化 python实现 ok
5.一个附件多个sheet 或者 一封邮件 多个附件【metabase默认支持指定多个问题、多个附件】 python发送邮件实现,一个文件多个sheet实现起来相对比较难 发送邮件时需要指定附件名称(带账期) ok 5.一个附件多个sheet 或者 一封邮件 多个附件【metabase默认支持指定多个问题、多个附件】 python发送邮件实现,一个文件多个sheet实现起来相对比较难 发送邮件时需要指定附件名称(带账期) ok
6.excel文件内容格式化 优先级比较低 【指定每个字段的最大长度】 使用python实现,尝试 ok 6.excel文件内容格式化 优先级比较低 【指定每个字段的最大长度】 使用python实现,尝试 ok
规范约定:
1.job文件的命名
必须以job1_01的方式命名,以便在azkaban中可以有序查看
# bi-report
BI report project init.
部署节点:
bj-gm-prod-cos-dataweb005
临时文件目录
/data/bi-report/tmp
结果目录
/data/bi-report/result
日志目录
/data/bi-report/logs
项目部署
#创建目录
sudo su -
mkdir -p /data/bi-report/tmp
mkdir -p /data/bi-report/result
mkdir -p /data/bi-report/logs
chown -R bi:bi /data/bi-report
No preview for this file type
#step1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step2.job
type=command
dependencies=step1
command=sh /home/bi/bi-report/lib/shell/hive2csv.sh demo_hive
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_hospital_spam_pv_day
\ No newline at end of file
#step1_5.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/sendEmail/zhaojianwei@igengmei.com/jianweizhao@yeah.net/demo_java
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/genExcel?name=demo_java
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_3_0_view
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_aggregate_view
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_attr_view
\ No newline at end of file
#step1_5.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_map_oldtag_view
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6
command=sh /home/bi/bi-report/lib/shell/sparksql2email.sh demo_spark_param zhaojianwei zhaojianwei
\ No newline at end of file
#step2.job
type=command
command=curl -X GET http://localhost:8553/api/report/execSql?name=hive_insert
\ No newline at end of file
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="4ef7a995-babe-48ad-a058-4897dd73703a" name="Default Changelist" comment=""> <list default="true" id="4ef7a995-babe-48ad-a058-4897dd73703a" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/../../shell/hive.sh" afterDir="false" /> <change beforePath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/daily_recommend_strategy.zip" beforeDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
...@@ -17,9 +17,30 @@ ...@@ -17,9 +17,30 @@
<executable /> <executable />
</execution> </execution>
</component> </component>
<component name="DatabaseView">
<option name="SHOW_INTERMEDIATE" value="true" />
<option name="GROUP_DATA_SOURCES" value="true" />
<option name="GROUP_SCHEMA" value="true" />
<option name="GROUP_CONTENTS" value="false" />
<option name="SORT_POSITIONED" value="false" />
<option name="SHOW_EMPTY_GROUPS" value="false" />
<option name="AUTO_SCROLL_FROM_SOURCE" value="false" />
<option name="HIDDEN_KINDS">
<set />
</option>
<expand />
<select />
</component>
<component name="FavoritesManager"> <component name="FavoritesManager">
<favorites_list name="bi-report-service" /> <favorites_list name="bi-report-service" />
</component> </component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Class" />
</list>
</option>
</component>
<component name="Git.Settings"> <component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/../../.." /> <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/../../.." />
</component> </component>
...@@ -36,6 +57,7 @@ ...@@ -36,6 +57,7 @@
<option name="showLibraryContents" value="true" /> <option name="showLibraryContents" value="true" />
</component> </component>
<component name="PropertiesComponent"> <component name="PropertiesComponent">
<property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
<property name="RequestMappingsPanelOrder0" value="0" /> <property name="RequestMappingsPanelOrder0" value="0" />
<property name="RequestMappingsPanelOrder1" value="1" /> <property name="RequestMappingsPanelOrder1" value="1" />
<property name="RequestMappingsPanelWidth0" value="75" /> <property name="RequestMappingsPanelWidth0" value="75" />
...@@ -45,7 +67,7 @@ ...@@ -45,7 +67,7 @@
<property name="aspect.path.notification.shown" value="true" /> <property name="aspect.path.notification.shown" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" /> <property name="last_opened_file_path" value="$PROJECT_DIR$" />
</component> </component>
<component name="RunManager" selected="JUnit.ExcelServiceTest.testGenExcel"> <component name="RunManager" selected="Application.Test">
<configuration name="ExcelServiceImpl" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true"> <configuration name="ExcelServiceImpl" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true">
<option name="MAIN_CLASS_NAME" value="com.gmei.data.gateway.server.service.impl.ExcelServiceImpl" /> <option name="MAIN_CLASS_NAME" value="com.gmei.data.gateway.server.service.impl.ExcelServiceImpl" />
<module name="service-server" /> <module name="service-server" />
...@@ -59,6 +81,19 @@ ...@@ -59,6 +81,19 @@
<option name="Make" enabled="true" /> <option name="Make" enabled="true" />
</method> </method>
</configuration> </configuration>
<configuration name="Test" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true">
<option name="MAIN_CLASS_NAME" value="com.gmei.data.gateway.server.Test" />
<module name="service-server" />
<extension name="coverage">
<pattern>
<option name="PATTERN" value="com.gmei.data.gateway.server.*" />
<option name="ENABLED" value="true" />
</pattern>
</extension>
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
<configuration default="true" type="ClojureREPL" factoryName="Local" activateToolWindowBeforeRun="false"> <configuration default="true" type="ClojureREPL" factoryName="Local" activateToolWindowBeforeRun="false">
<setting name="replType" value="NREPL" /> <setting name="replType" value="NREPL" />
<setting name="execution" value="LEININGEN" /> <setting name="execution" value="LEININGEN" />
...@@ -125,6 +160,7 @@ ...@@ -125,6 +160,7 @@
</configuration> </configuration>
<recent_temporary> <recent_temporary>
<list> <list>
<item itemvalue="Application.Test" />
<item itemvalue="JUnit.ExcelServiceTest.testGenExcel" /> <item itemvalue="JUnit.ExcelServiceTest.testGenExcel" />
<item itemvalue="Application.ExcelServiceImpl" /> <item itemvalue="Application.ExcelServiceImpl" />
<item itemvalue="JUnit.EmailServiceTest.testSendAttachmentsMail" /> <item itemvalue="JUnit.EmailServiceTest.testSendAttachmentsMail" />
...@@ -146,32 +182,40 @@ ...@@ -146,32 +182,40 @@
<workItem from="1591668349091" duration="1455000" /> <workItem from="1591668349091" duration="1455000" />
<workItem from="1592214013610" duration="2379000" /> <workItem from="1592214013610" duration="2379000" />
<workItem from="1592568026557" duration="772000" /> <workItem from="1592568026557" duration="772000" />
<workItem from="1593835574199" duration="4438000" />
</task> </task>
<servers /> <servers />
</component> </component>
<component name="TypeScriptGeneratedFilesManager"> <component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" /> <option name="version" value="1" />
</component> </component>
<component name="VcsManagerConfiguration">
<option name="ADD_EXTERNAL_FILES_SILENTLY" value="true" />
</component>
<component name="WindowStateProjectService"> <component name="WindowStateProjectService">
<state width="1878" height="313" key="GridCell.Tab.0.bottom" timestamp="1592276447004"> <state x="773" y="303" key="#com.intellij.ide.util.MemberChooser" timestamp="1593835959317">
<screen x="0" y="22" width="1920" height="1129" />
</state>
<state x="773" y="303" key="#com.intellij.ide.util.MemberChooser/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1593835959317" />
<state width="1878" height="313" key="GridCell.Tab.0.bottom" timestamp="1593836935231">
<screen x="0" y="22" width="1920" height="1129" /> <screen x="0" y="22" width="1920" height="1129" />
</state> </state>
<state width="1878" height="313" key="GridCell.Tab.0.bottom/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1592276447004" /> <state width="1878" height="313" key="GridCell.Tab.0.bottom/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1593836935231" />
<state width="1878" height="243" key="GridCell.Tab.0.bottom/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520783" /> <state width="1878" height="243" key="GridCell.Tab.0.bottom/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520783" />
<state width="1878" height="313" key="GridCell.Tab.0.center" timestamp="1592276447002"> <state width="1878" height="313" key="GridCell.Tab.0.center" timestamp="1593836935230">
<screen x="0" y="22" width="1920" height="1129" /> <screen x="0" y="22" width="1920" height="1129" />
</state> </state>
<state width="1878" height="313" key="GridCell.Tab.0.center/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1592276447002" /> <state width="1878" height="313" key="GridCell.Tab.0.center/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1593836935230" />
<state width="1878" height="243" key="GridCell.Tab.0.center/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520781" /> <state width="1878" height="243" key="GridCell.Tab.0.center/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520781" />
<state width="1878" height="313" key="GridCell.Tab.0.left" timestamp="1592276447001"> <state width="1878" height="313" key="GridCell.Tab.0.left" timestamp="1593836935229">
<screen x="0" y="22" width="1920" height="1129" /> <screen x="0" y="22" width="1920" height="1129" />
</state> </state>
<state width="1878" height="313" key="GridCell.Tab.0.left/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1592276447001" /> <state width="1878" height="313" key="GridCell.Tab.0.left/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1593836935229" />
<state width="1878" height="243" key="GridCell.Tab.0.left/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520781" /> <state width="1878" height="243" key="GridCell.Tab.0.left/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520781" />
<state width="1878" height="313" key="GridCell.Tab.0.right" timestamp="1592276447003"> <state width="1878" height="313" key="GridCell.Tab.0.right" timestamp="1593836935230">
<screen x="0" y="22" width="1920" height="1129" /> <screen x="0" y="22" width="1920" height="1129" />
</state> </state>
<state width="1878" height="313" key="GridCell.Tab.0.right/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1592276447003" /> <state width="1878" height="313" key="GridCell.Tab.0.right/0.22.1920.1129/-981.-1200.1920.1200/939.-1080.1920.1080@0.22.1920.1129" timestamp="1593836935230" />
<state width="1878" height="243" key="GridCell.Tab.0.right/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520782" /> <state width="1878" height="243" key="GridCell.Tab.0.right/1752.1200.1920.1151/0.22.1920.1178/1920.120.1920.1080@1752.1200.1920.1151" timestamp="1590742520782" />
<state width="1878" height="386" key="GridCell.Tab.1.bottom" timestamp="1591671347527"> <state width="1878" height="386" key="GridCell.Tab.1.bottom" timestamp="1591671347527">
<screen x="0" y="22" width="1920" height="1129" /> <screen x="0" y="22" width="1920" height="1129" />
......
...@@ -34,4 +34,10 @@ public class BiReportEndpoint { ...@@ -34,4 +34,10 @@ public class BiReportEndpoint {
public int sendEmail(@PathVariable("to") String to,@PathVariable("cc") String cc,@PathVariable("name") String name){ public int sendEmail(@PathVariable("to") String to,@PathVariable("cc") String cc,@PathVariable("name") String name){
return biReportService.sendEmail(to,cc,name); return biReportService.sendEmail(to,cc,name);
} }
@ResponseBody
@RequestMapping(value = "/email/{name}/{to}/{cc}",method = RequestMethod.GET)
public int email(@PathVariable("name") String name,@PathVariable("to") String to,@PathVariable("cc") String cc){
return biReportService.email(name,to,cc);
}
} }
...@@ -2,7 +2,8 @@ package com.gmei.data.gateway.server.service; ...@@ -2,7 +2,8 @@ package com.gmei.data.gateway.server.service;
public interface BiReportService { public interface BiReportService {
int execSql(String name); int execSql(String name);
int execSqlByHive(String name);
int genExcel(String name); int genExcel(String name);
int execSqlByHive(String name);
int email(String name, String to, String cc);
int sendEmail(String to,String cc,String name); int sendEmail(String to,String cc,String name);
} }
...@@ -4,5 +4,6 @@ import com.gmei.data.gateway.server.dto.ExcelGenDto; ...@@ -4,5 +4,6 @@ import com.gmei.data.gateway.server.dto.ExcelGenDto;
public interface SparksqlService { public interface SparksqlService {
int executeSql(String name); int executeSql(String name);
ExcelGenDto getResult(String name);
ExcelGenDto getResultDto(String name); ExcelGenDto getResultDto(String name);
} }
...@@ -34,6 +34,21 @@ public class BiReportServiceImpl implements BiReportService { ...@@ -34,6 +34,21 @@ public class BiReportServiceImpl implements BiReportService {
return hivesqlService.executeSql(name); return hivesqlService.executeSql(name);
} }
@Override
public int email(String name, String to, String cc) {
int rs = Constants.SUCCESS;
ExcelGenDto resultDto = sparksqlService.getResult(name);
if(null == resultDto){
rs = Constants.FAIL;
}else{
int genExcelRs = excelService.genExcel(resultDto,name);
if(Constants.SUCCESS == genExcelRs){
rs = emailService.sendAttachmentsMail(to,cc,name);
}
}
return rs;
}
@Override @Override
public int genExcel(String name){ public int genExcel(String name){
ExcelGenDto resultDto = sparksqlService.getResultDto(name); ExcelGenDto resultDto = sparksqlService.getResultDto(name);
......
...@@ -29,10 +29,14 @@ public class SparksqlServiceImpl implements SparksqlService { ...@@ -29,10 +29,14 @@ public class SparksqlServiceImpl implements SparksqlService {
@Value("${sql.input.path}") @Value("${sql.input.path}")
private String sqlPath; private String sqlPath;
@Value("${sql.base.path}")
private String sqlBasePath;
@Value("${datasource.sparkSql.jdbcUrl}") @Value("${datasource.sparkSql.jdbcUrl}")
private String sparkSqlJdbcUrl; private String sparkSqlJdbcUrl;
@Value("${proper.file.path}") @Value("${proper.file.path}")
private String properFilePath; private String properFilePath;
@Value("${proper.base.path}")
private String properBasePath;
private static final Logger logger = LoggerFactory.getLogger(SparksqlServiceImpl.class); private static final Logger logger = LoggerFactory.getLogger(SparksqlServiceImpl.class);
@Override @Override
...@@ -72,8 +76,8 @@ public class SparksqlServiceImpl implements SparksqlService { ...@@ -72,8 +76,8 @@ public class SparksqlServiceImpl implements SparksqlService {
try{ try{
conn = DriverManager.getConnection(sparkSqlJdbcUrl,"data",""); conn = DriverManager.getConnection(sparkSqlJdbcUrl,"data","");
Statement statement = conn.createStatement(); Statement statement = conn.createStatement();
statement.executeUpdate("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar"); //statement.executeUpdate("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar");
statement.executeUpdate("CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade'"); //statement.executeUpdate("CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade'");
int updateCount = statement.executeUpdate(hql); int updateCount = statement.executeUpdate(hql);
if(updateCount > 0){ if(updateCount > 0){
logger.info("*** Execute By Sparksql success, hql: {}",hql); logger.info("*** Execute By Sparksql success, hql: {}",hql);
...@@ -89,6 +93,85 @@ public class SparksqlServiceImpl implements SparksqlService { ...@@ -89,6 +93,85 @@ public class SparksqlServiceImpl implements SparksqlService {
return Constants.SUCCESS; return Constants.SUCCESS;
} }
@Override
public ExcelGenDto getResult(String name) {
List<String> sheetNameList = new ArrayList<String>();
List<List<String>> headersList = new ArrayList<List<String>>();
List<List<List>> rowsList = new ArrayList<List<List>>();
File sqlDir = new File(sqlBasePath + "pm/" + name + "/report");
File[] files = sqlDir.listFiles();
for(File file : files){
String fileName = file.getName();
String nameStr =fileName.replace(".sql","");
String subject = "";
String value = ProperUtils.getValue(properBasePath + "pm/" + name + "/en-cn.properties" , nameStr);
if(null != value){
subject = value;
}else{
subject = nameStr;
}
sheetNameList.add(subject);
List<String> headers = new ArrayList<String>();
List<List> rows = new ArrayList<List>();
BufferedReader reader = null;
StringBuffer sbf = new StringBuffer();
try {
reader = new BufferedReader(new FileReader(file));
String tempStr;
while ((tempStr = reader.readLine()) != null) {
sbf.append("\n").append(tempStr);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
logger.error(e.getMessage());
continue;
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
e1.printStackTrace();
logger.error(e1.getMessage());
continue;
}
}
}
String hql = sbf.toString();
hql = hql.replace("${partition_date}", String.format("'%s'", DateUtils.getYesterdayDateSimpleStr()))
.replace("${partition_day}", String.format("'%s'", DateUtils.getYesterdayDateSimpleStr()))
.replace("$partition_date", String.format("'%s'", DateUtils.getYesterdayDateSimpleStr()))
.replace("$partition_day", String.format("'%s'", DateUtils.getYesterdayDateSimpleStr()))
.replace(";","");
Connection conn = null;
try{
conn = DriverManager.getConnection(sparkSqlJdbcUrl,"data","");
Statement statement = conn.createStatement();
ResultSet rs = statement.executeQuery(hql);
ResultSetMetaData metaData = rs.getMetaData();
int columnCount = metaData.getColumnCount();
for(int i=1;i<= columnCount;i++){
String columnName = metaData.getColumnName(i);
headers.add(columnName);
}
while(rs.next()){
List<String> rowVals = new ArrayList<String>();
for(int i=1;i<= columnCount;i++){
rowVals.add(rs.getString(i));
}
rows.add(rowVals);
}
}catch (Exception e){
e.printStackTrace();
logger.error(e.getMessage());
return null;
}
headersList.add(headers);
rowsList.add(rows);
}
return new ExcelGenDto(sheetNameList,headersList,rowsList);
}
@Override @Override
public ExcelGenDto getResultDto(String name){ public ExcelGenDto getResultDto(String name){
List<String> sheetNameList = new ArrayList<String>(); List<String> sheetNameList = new ArrayList<String>();
......
...@@ -27,6 +27,8 @@ mail: ...@@ -27,6 +27,8 @@ mail:
sql: sql:
input: input:
path: /home/bi/bi-report/sqls/ path: /home/bi/bi-report/sqls/
base:
path: /home/bi/bi-report/
#---excel config--- #---excel config---
excel: excel:
...@@ -41,3 +43,5 @@ proper: ...@@ -41,3 +43,5 @@ proper:
file: file:
path: /home/bi/bi-report/conf/en-cn.properties path: /home/bi/bi-report/conf/en-cn.properties
#path: /Users/apple/git-repo/bi-report/conf/en-cn.properties #path: /Users/apple/git-repo/bi-report/conf/en-cn.properties
base:
path: /home/bi/bi-report/
package com.gmei.data.gateway.server;
import com.gmei.data.gateway.server.utils.DateUtils;
/**
* @ClassName Test
* @Author apple
* @Date 2020/7/4
* @Version V1.0
**/
public class Test {
public static void main(String[] args) {
// String str = "select a from bb;";
// System.out.println(str.replace(";",""));
String str01 = "select a from bb where aa = ${partition_date};";
String str02 = "select a from bb where aa = ${partition_day};";
String str03 = "select a from bb where aa = $partition_date;";
String str04 = "select a from bb where aa = $partition_day;";
System.out.println(str01.replace("${partition_date}",String.format("'%s'", DateUtils.getYesterdayDateSimpleStr())).replace(";",""));
System.out.println(str02.replace("${partition_day}",String.format("'%s'", DateUtils.getYesterdayDateSimpleStr())).replace(";",""));
System.out.println(str03.replace("$partition_date",String.format("'%s'", DateUtils.getYesterdayDateSimpleStr())).replace(";",""));
System.out.println(str04.replace("$partition_day",String.format("'%s'", DateUtils.getYesterdayDateSimpleStr())).replace(";",""));
}
}
server:
port: 8553
spring:
#---email config---
mail:
host: smtp.exmail.qq.com
username: youjiantongji@wanmeizhensuo.com
password: mq37tgk8nGi6eg1Hgq388oro3itvdmPl
default-encoding: UTF-8
datasource:
sparkSql:
jdbcUrl: jdbc:hive2://bj-gm-prod-cos-datacenter003:10010
hiveSql:
jdbcUrl: jdbc:hive2://bj-gm-prod-cos-datacenter006:10000
#---email send config---
mail:
fromMail:
addr: youjiantongji@wanmeizhensuo.com
content: BI报表见附件,请查收。
toMail:
bcc: zhaojianwei@igengmei.com
#---sql path---
sql:
input:
path: /home/bi/bi-report/sqls/
base:
path: /home/bi/bi-report/
#---excel config---
excel:
output:
path: /data/bi-report/result/
#---log config---
logging:
config: classpath:logback.xml
proper:
file:
path: /home/bi/bi-report/conf/en-cn.properties
#path: /Users/apple/git-repo/bi-report/conf/en-cn.properties
\ No newline at end of file
__ .__
_______ ____ ______ ____________/ |_ ______ ______________ _|__| ____ ____
\_ __ \_/ __ \\____ \ / _ \_ __ \ __\ ______ / ___// __ \_ __ \ \/ / |/ ___\/ __ \
| | \/\ ___/| |_> > <_> ) | \/| | /_____/ \___ \\ ___/| | \/\ /| \ \__\ ___/
|__| \___ > __/ \____/|__| |__| /____ >\___ >__| \_/ |__|\___ >___ >
\/|__| \/ \/ \/ \/
<?xml version="1.0" encoding="UTF-8"?>
<configuration debug="false" scan="true" scanPeriod="1 seconds">
<contextName>logback</contextName>
<property name="log.path" value="/data/bi-report/logs/bi-report-service.log"/>
<appender name="console" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} %contextName [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<appender name="file" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.path}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${log.path}.%d{yyyy-MM-dd}.zip</fileNamePattern>
</rollingPolicy>
<encoder>
<pattern>%date %level [%thread] %logger{36} [%file : %line] %msg%n
</pattern>
</encoder>
</appender>
<root level="info">
<appender-ref ref="console"/>
<appender-ref ref="file"/>
</root>
</configuration>
\ No newline at end of file
#coding: utf-8
#author:
#date:
#desc:send email with attach
import smtplib
import sys
import getopt
import os
import getpass
import time
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.header import Header
#email info
to_address = ""
subject = ""
content = ""
attach = ""
memail = ""
htable = ""
#userinfo
authfile = "/home/bi/bi-report/conf/auth.cf"
def readallow(filepath):
usernames = []
f = open(filepath,"r")
lines = f.readlines()
for line in lines:
usernames.append(line.strip())
return usernames
def checkauth():
global authfile
usernames = readallow(authfile)
username = getpass.getuser()
return username in usernames
def isfile(filestr):
if os.path.isfile(filestr):
file = open(filestr,'r')
lines = file.readlines()
if len(lines) >= 2:
return True
write2file("%s file is less than 2 lines or does not exist...." % getfilename(filestr))
print "--->%s file is less than 2 lines or does not exist...." % getfilename(filestr)
return False
def getfilename(filestr):
return os.path.basename(filestr)
def makeaddress():
global to_address
address_str = ""
for address in to_address.split(","):
if address.find("@") > 0:
address_str += address + ";"
else:
address_str += "%s@igengmei.com;" % address
return address_str[:-1]
def addattch(msgRoot):
global attach
if attach.strip() == "":
return 0
for item in attach.split(","):
if isfile(item):
att = MIMEText(open('%s' % item, 'rb').read(), 'base64', 'utf-8')
att["Content-Type"] = 'application/octet-stream'
att["Content-Disposition"] = 'attachment; filename="%s"'% getfilename(item)
msgRoot.attach(att)
else:
print "--->send mail failed, %s file is not exist or file row num less than 2" % getfilename(item)
return -1
def makecc():
global memail
address_str = ""
for address in memail.split(","):
if address.find("@") > 0:
address_str += address + ";"
else:
address_str += "%s@igengmei.com;" % address
print address_str
return address_str[:-1]
def addsubject(msgRoot):
global subject
global content
global htable
msgRoot['Subject'] = Header(subject, 'utf-8')
msgRoot['To'] = makeaddress()
msgRoot['Cc'] = makecc()
if htable != "":
for ff in htable.split(","):
if not isfile(ff):
return -1
hcontent = trans2table(ff)
content += hcontent
content += "<br><br><br><br><font color=red size='3'>这是一封自动发出的邮件,邮件发送自大数据BI组。<br>请使用wps打开,appstore地址:https://appsto.re/cn/MAqWJ.i</font>"
msg = MIMEText(content,'html','utf-8')
msgRoot.attach(msg)
return True
def trans2table(path):
for ff in path.split(","):
index = 0
result = "<br><br><table border='1' style='border-collapse:collapse' bordercolor='#9D9D9D'>\n"
for eachline in open(ff,'r'):
if len(eachline.strip()) == 0:
continue
result += " <tr>"
for item in eachline.strip().split(","):
if index == 0:
result += "<td align='left' bgcolor='#8EA9DB' style='font-family:Microsoft YaHei;padding:2px 5px'>%s</td>" % item
else:
result += "<td align='left' style='font-family:Microsoft YaHei;padding:2px 5px'>%s</td>" % item
index += 1
result += "</tr>\n"
result += "</table>"
return result
def sendmail():
global to_address
#account info
sender = 'youjiantongji@wanmeizhensuo.com'
username = 'youjiantongji@wanmeizhensuo.com'
password = 'mq37tgk8nGi6eg1Hgq388oro3itvdmPl'
smtpserver = 'smtp.exmail.qq.com'
server_port = 25
if not checkparam():
return 0
msgRoot = MIMEMultipart()
if addsubject(msgRoot) == -1:
return 0
if addattch(msgRoot) == -1:
return 0
smtp = smtplib.SMTP(smtpserver, server_port)
smtp.ehlo()
smtp.starttls()
smtp.login(username, password)
to_receiver = makeaddress()
to_cc = makecc()
smtp.sendmail(sender, to_receiver.split(";") + to_cc.split(";"), msgRoot.as_string())
smtp.quit()
print "send mail success...."
write2file("send mail success")
write2file("--------------------------------------------------------------------------------------")
def processcmd(cmd):
global to_address
global subject
global content
global attach
global memail
global htable
try:
options, args = getopt.getopt(cmd, "c:a:t:s:m:f:")
for name,value in options:
if name in ("-t"):
to_address = value
elif name in ("-s"):
subject = value
elif name in ("-c"):
content = value
elif name in ("-a"):
attach = value
elif name in ("-m"):
memail = value
elif name in ("-f"):
htable = value
except Exception, e:
print "--->%s, remove this param" % str(e)
usage()
return -1
def checkparam():
global to_address
global subject
if to_address.strip() == "" or subject.strip() == "":
print "--->must input -t to_address -s subject, and the value can not be empty"
usage()
return False
else:
return True
def loginlog(command):
day = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
user = getpass.getuser()
logstr = "%s\tuser:%s\tcmd:%s" % (day, user, command)
write2file(logstr)
def processlog():
global to_address
global subject
global content
global attach
day = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
user = getpass.getuser()
logstr = "%s\tuser:%s\ttoaddredd:%s\tsubject:%s\tcontent:%s\tattach:%s" % (day, user, to_address, subject, content, attach)
write2file(logstr)
write2file("--------------------------------------------------------------------------------------")
def write2file(sss):
logfile = "/data/bi-report/logs/sendmail.log"
output=open(logfile,'a')
output.write(str(sss) + "\n")
output.close()
def usage():
print 'sendmail usage:'
print ' -t: mail to (Required)'
print ' -s: subject (Required)'
print ' -m: cc address (Required)'
print ' -c: mail content (Optional)'
print ' -a: attach (Optional)'
print ' -f: trans file content to html table(Optional)'
if __name__ == '__main__':
loginlog(sys.argv)
if not checkauth():
print "--->You do not have permission to send email, please contact fangguoqing@igengmei.com"
sys.exit(0)
if len(sys.argv) >= 3:
if processcmd(sys.argv[1:]) != -1:
sendmail()
else:
usage()
\ No newline at end of file
#!/bin/bash
# 定义变量方便修改
hive="/opt/hive/hive-1.1.0-cdh5.16.1/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter005:10000 -n data"
#至少输入一个参数 第一个参数为需求码(必选项),第二个参数为分区日期(可选)
if [ $# -lt 1 ]; then
echo "Param project_name lost, please check again!"
exit 0
fi
#判断sql脚本是否存在
project_name=$1
sql_path="/home/bi/bi-report/pm/$project_name/etl/$project_name.sql"
if [ ! -f $sql_path ]; then
echo "Param project_name error, please check again!"
exit `$sql_path`
fi
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
partition_date=$2
else
partition_date=`date --date="-1 day" +%Y%m%d`
fi
#参数替换
tmp_sql=`cat ${sql_path}`
sql=${tmp_sql//'${partition_day}'/${partition_date}}
echo -e "${sql}"
#执行sql
$hive -e "$sql"
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 1 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$2
partition_date=`date +"%Y%m%d"`
timestamp=`date +"%Y%m%d%H%M%S"`
tmp_file_name="/data/bi-report/tmp/tmp_"$timestamp".sql"
result_file_name="/data/bi-report/result/"$1"_"$partition_date".csv"
[ $? -eq 0 ] || exit 1
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
cp "/home/bi/bi-report/sqls/"$1"/"$1".sql" $tmp_file_name
sed -i "s/\$V_PARYMD/$V_PARYMD/g" $tmp_file_name
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" $tmp_file_name
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" $tmp_file_name
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/hive/hive-1.1.0-cdh5.16.1/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter004:10000 -n data --outputformat=csv2 --showHeader=true -e "`cat $tmp_file_name`" > $result_file_name
rm -rf $tmp_file_name
exit $?
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 1 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$2
timestamp=`date +"%Y%m%d%H%M%S"`
tmp_file_name="/data/bi-report/tmp/tmp_"$timestamp".sql"
[ $? -eq 0 ] || exit 1
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
cp "/home/bi/bi-report/sqls/"$1"/"$1".sql" $tmp_file_name
sed -i "s/\$V_PARYMD/$V_PARYMD/g" $tmp_file_name
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" $tmp_file_name
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" $tmp_file_name
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/hive/hive-1.1.0-cdh5.16.1/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter004:10000 -n data -e "`cat $tmp_file_name`"
rm -rf $tmp_file_name
exit $?
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 2 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$3
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
#cp sqls/$1".sql" tmp.sql
cp $1 tmp.sql
sed -i "s/\$V_PARYMD/$V_PARYMD/g" tmp.sql
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" tmp.sql
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" tmp.sql
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" tmp.sql
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" tmp.sql
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/spark/spark-2.4.5-bin-hadoop2.7/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter003:10010/online -n data --outputformat=csv2 --showHeader=true -e "`cat tmp.sql`">$2
rm -rf tmp.sql
exit $?
\ No newline at end of file
#!/bin/bash
source ~/.bashrc
source /etc/profile
if [ $# -lt 3 ]; then
echo "Param error, please check again!"
exit 1
fi
#默认取输入的账期
V_DATE=$4
partition_date=`date +"%Y%m%d"`
timestamp=`date +"%Y%m%d%H%M%S"`
tmp_file_name="/data/bi-report/tmp/tmp_"$timestamp".sql"
result_file_name="/data/bi-report/result/"$1"_"$partition_date".csv"
[ $? -eq 0 ] || exit 1
#输入的账期为空,则取T+1默认账期
if [ -z ${V_DATE} ];then
V_DATE=`date --date="-1 day" +%Y-%m-%d`
fi
#默认支持的日期函数
V_PARYMD=`date --date="$V_DATE" +%Y%m%d`
V_PARY_M_D=`date --date="$V_DATE" +%Y-%m-%d`
V_1DAYMD_AGO=`date --date="$V_PARYMD -1 day" +%Y%m%d`
V_7DAYMD_AGO=`date --date="$V_PARYMD -7 day" +%Y%m%d`
V_1DAY_M_D_AGO=`date --date="$V_PARYMD -1 day" +%Y-%m-%d`
V_7DAY_M_D_AGO=`date --date="$V_PARYMD -7 day" +%Y-%m-%d`
#获取当前时间对应的年、月、日
V_PARYEAR=`date --date="$V_DATE" +%Y`
V_PARMONTH=`date --date="$V_DATE" +%m`
V_PARDAY=`date --date="$V_DATE" +%d`
#打印账期
echo ""
echo "************************************"
echo "V_PARYMD: "$V_PARYMD
echo "V_PARY_M_D: "$V_PARY_M_D
echo "V_1DAYMD_AGO: "$V_1DAYMD_AGO
echo "V_7DAYMD_AGO: "$V_7DAYMD_AGO
echo "V_1DAY_M_D_AGO: "$V_1DAY_M_D_AGO
echo "V_7DAY_M_D_AGO: "$V_7DAY_M_D_AGO
echo "************************************"
echo ""
cp "/home/bi/bi-report/sqls/"$1"/"$1".sql" $tmp_file_name
sed -i "s/\$V_PARYMD/$V_PARYMD/g" $tmp_file_name
sed -i "s/\$V_PARY_M_D/$V_PARY_M_D/g" $tmp_file_name
sed -i "s/\$V_1DAYMD_AGO/$V_1DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAYMD_AGO/$V_7DAYMD_AGO/g" $tmp_file_name
sed -i "s/\$V_1DAY_M_D_AGO/$V_1DAY_M_D_AGO/g" $tmp_file_name
sed -i "s/\$V_7DAY_M_D_AGO/$V_7DAY_M_D_AGO/g" $tmp_file_name
[ $? -eq 0 ] || exit 1
# 新标签与新标签对应集合类型 的对应关系
/opt/spark/spark-2.4.5-bin-hadoop2.7/bin/beeline -u jdbc:hive2://bj-gm-prod-cos-datacenter003:10010/online -n data --outputformat=csv2 --showHeader=true -e "`cat $tmp_file_name`"> $result_file_name
[ $? -eq 0 ] || exit 1
rm -rf $tmp_file_name
[ $? -eq 0 ] || exit 1
to=$2
cc=$3
echo "to: "$to
echo "cc: "$cc
python2.7 /home/bi/bi-report/lib/python/sendmail.py -t $to -s 自动发送 -m $cc -a $result_file_name
exit $?
\ No newline at end of file
daily_recommend_strategy=首页推荐策略日报
\ No newline at end of file
--***************************************************************
--*脚本名称: create_pm_c_op_co_content_dimen_d.sql
--*功能: 内容日报
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间: 2020-5-25 11:00
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_content_d
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
is_ai_channel string comment '{"chs_name":"是否AI渠道","description":"","etl":"","value":"","remark":""}',
dau int comment '{"chs_name":"DAU","description":"","etl":"","value":"","remark":""}',
content_uv int comment '{"chs_name":"内容详情页uv","description":"","etl":"","value":"","remark":""}',
content_pv int comment '{"chs_name":"内容详情页PV","description":"","etl":"","value":"","remark":""}',
per_content_uv string comment '{"chs_name":"内容详情页uv/dau","description":"","etl":"","value":"","remark":""}',
per_content_pv string comment '{"chs_name":"内容详情页pv/dau","description":"","etl":"","value":"","remark":""}',
retention_1 string comment '{"chs_name":"内容用户APP次留","description":"","etl":"","value":"","remark":""}',
retention_7 string comment '{"chs_name":"内容用户APP7留","description":"","etl":"","value":"","remark":""}',
retention_30 string comment '{"chs_name":"内容用户APP30留","description":"","etl":"","value":"","remark":""}',
avg_app_duration double comment '{"chs_name":"内容用户单设备App时长(m)","description":"","etl":"","value":"","remark":""}',
avg_content_stay double comment '{"chs_name":"内容用户单设备内容时长(m)","description":"","etl":"","value":"","remark":""}',
avg_open_times double comment '{"chs_name":"内容用户单设备打开次数","description":"","etl":"","value":"","remark":""}',
search_related_stay double comment '{"chs_name":"内容用户搜索相关页面单设备页面时长(m)","description":"","etl":"","value":"","remark":""}',
welfare_stay double comment '{"chs_name":"内容用户美购详情页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}',
content_question_stay double comment '{"chs_name":"内容用户问题详情页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}',
ai_related_stay double comment '{"chs_name":"内容用户AI相关页面单设备页面时长(m)","description":"","etl":"","value":"","remark":""}',
content_diary_stay double comment '{"chs_name":"内容用户日记详情页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}',
home_stay double comment '{"chs_name":"内容用户首页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}',
conv_related_stay double comment '{"chs_name":"内容用户咨询相关页面单设备页面时长(m)","description":"","etl":"","value":"","remark":""}',
recommend_rate double comment '{"chs_name":"首页feeds推荐进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_recommend_pv double comment '{"chs_name":"首页feeds推荐进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
feeds_rate double comment '{"chs_name":"首页feeds非推荐进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_feeds_pv double comment '{"chs_name":"首页feeds非推荐进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
search_rate double comment '{"chs_name":"搜索进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_search_pv double comment '{"chs_name":"搜索进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
zone_rate double comment '{"chs_name":"内容聚合页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_zone_pv double comment '{"chs_name":"内容聚合页进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
content_rate double comment '{"chs_name":"内容详情页推荐板块进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_from_content_pv double comment '{"chs_name":"内容详情页推荐板块进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
blank_rate double comment '{"chs_name":"无来源页面(大多数为push)进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_blank_pv double comment '{"chs_name":"无来源页面(大多数为push)进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
comment_rate double comment '{"chs_name":"评论列表页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_comment_pv double comment '{"chs_name":"评论列表页进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
org_rate double comment '{"chs_name":"医生医院主页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_org_pv double comment '{"chs_name":"医生医院主页进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
category_rate double comment '{"chs_name":"品类聚合页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_category_pv double comment '{"chs_name":"品类聚合页进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
my_diary_rate double comment '{"chs_name":"我的日记页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_my_diary_pv double comment '{"chs_name":"我的日记页进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
ai_rate double comment '{"chs_name":"AI报告页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_ai_pv double comment '{"chs_name":"AI报告页进入内容PV/UV","description":"","etl":"","value":"","remark":""}',
create_topic_num int comment '{"chs_name":"真实发帖数","description":"","etl":"","value":"","remark":""}',
create_reply_num int comment '{"chs_name":"真实评论数","description":"","etl":"","value":"","remark":""}',
diary_uv int comment '{"chs_name":"日记UV","description":"","etl":"","value":"","remark":""}',
diary_pv int comment '{"chs_name":"日记PV","description":"","etl":"","value":"","remark":""}',
diary_rate double comment '{"chs_name":"日记UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_diary_pv double comment '{"chs_name":"日记PV/日记UV","description":"","etl":"","value":"","remark":""}',
diary_stay double comment '{"chs_name":"日记单设备时长(m)","description":"","etl":"","value":"","remark":""}',
post_uv int comment '{"chs_name":"帖子UV","description":"","etl":"","value":"","remark":""}',
post_pv int comment '{"chs_name":"帖子PV","description":"","etl":"","value":"","remark":""}',
post_rate double comment '{"chs_name":"帖子UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_post_pv double comment '{"chs_name":"帖子PV/帖子UV","description":"","etl":"","value":"","remark":""}',
post_stay double comment '{"chs_name":"帖子单设备时长(m)","description":"","etl":"","value":"","remark":""}',
question_uv int comment '{"chs_name":"问题UV","description":"","etl":"","value":"","remark":""}',
question_pv int comment '{"chs_name":"问题PV","description":"","etl":"","value":"","remark":""}',
question_rate double comment '{"chs_name":"问题UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_question_pv double comment '{"chs_name":"问题PV/问题UV","description":"","etl":"","value":"","remark":""}',
question_stay double comment '{"chs_name":"问题单设备时长(m)","description":"","etl":"","value":"","remark":""}',
question_answer_uv int comment '{"chs_name":"问答UV","description":"","etl":"","value":"","remark":""}',
question_answer_pv int comment '{"chs_name":"问答PV","description":"","etl":"","value":"","remark":""}',
question_answer_rate double comment '{"chs_name":"问答UV/问答UV","description":"","etl":"","value":"","remark":""}',
per_question_answer_pv double comment '{"chs_name":"问答PV/问答UV","description":"","etl":"","value":"","remark":""}',
question_answer_stay double comment '{"chs_name":"问答单设备时长(m)","description":"","etl":"","value":"","remark":""}',
answer_uv int comment '{"chs_name":"回答UV","description":"","etl":"","value":"","remark":""}',
answer_pv int comment '{"chs_name":"回答PV","description":"","etl":"","value":"","remark":""}',
answer_rate double comment '{"chs_name":"回答UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_answer_pv double comment '{"chs_name":"回答PV/回答UV","description":"","etl":"","value":"","remark":""}',
answer_stay double comment '{"chs_name":"回答单设备时长(m)","description":"","etl":"","value":"","remark":""}',
video_uv int comment '{"chs_name":"视频UV","description":"","etl":"","value":"","remark":""}',
video_pv int comment '{"chs_name":"视频PV","description":"","etl":"","value":"","remark":""}',
video_rate double comment '{"chs_name":"视频UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_video_pv double comment '{"chs_name":"视频PV/视频UV","description":"","etl":"","value":"","remark":""}',
video_stay double comment '{"chs_name":"视频单设备时长(m)","description":"","etl":"","value":"","remark":""}',
wiki_uv int comment '{"chs_name":"百科UV","description":"","etl":"","value":"","remark":""}',
wiki_pv int comment '{"chs_name":"百科PV","description":"","etl":"","value":"","remark":""}',
wiki_rate double comment '{"chs_name":"百科UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_wiki_pv double comment '{"chs_name":"百科PV/百科UV","description":"","etl":"","value":"","remark":""}',
wiki_stay double comment '{"chs_name":"百科单设备时长(m)","description":"","etl":"","value":"","remark":""}',
article_uv int comment '{"chs_name":"专栏UV","description":"","etl":"","value":"","remark":""}',
article_pv int comment '{"chs_name":"专栏PV","description":"","etl":"","value":"","remark":""}',
article_rate double comment '{"chs_name":"专栏UV/内容UV","description":"","etl":"","value":"","remark":""}',
per_article_pv double comment '{"chs_name":"专栏PV/专栏UV","description":"","etl":"","value":"","remark":""}',
article_stay double comment '{"chs_name":"专栏单设备时长(m)","description":"","etl":"","value":"","remark":""}'
)comment '内容日报'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
INSERT OVERWRITE TABLE pm.tl_pm_recommend_strategy_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT
t1.partition_date as day_id,
t1.device_os_type as device_os_type,
t1.active_type as active_type,
t2.card_content_type as card_content_type,
t2.recommend_type as recommend_type,
NVL(sum(t3.session_pv),0) as card_click,
NVL(sum(t2.session_pv),0) as card_exposure,
NVL(round(sum(page_stay)/count(distinct t4.cl_id)/60,2),0) as avg_page_stay,
NVL(sum(navbar_pv),0) as navbar_search,
NVL(sum(highlight_pv),0) as highlight_word,
NVL(sum(self_wel_pv),0) as self_welfare_card,
NVL(sum(recom_wel_pv),0)-NVL(sum(self_wel_pv),0) as recommend_welfare_card,--需要排除关联的商品卡片点击
NVL(sum(recom_content_pv),0) as recommend_content_card,
NULL as recommend_special_card,
NULL as transfer_card,
NULL as video_consultation
FROM
(
SELECT partition_date
,device_os_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(--精准曝光,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end as card_content_type,
CASE WHEN transaction_type in ('ctr') THEN 'ctr预估'
WHEN transaction_type in ('cvr') THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片'
END AS recommend_type,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND transaction_type in ('-1','ctr','smr','cvr','hotspot','pgc','newdata')
AND card_content_type in ('qa','diary','user_post','answer')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end,
cl_id,
CASE WHEN transaction_type in ('ctr') THEN 'ctr预估'
WHEN transaction_type in ('cvr') THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片' END,
card_id,
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t2
on t1.device_id=t2.cl_id and t1.partition_date=t2.partition_date
LEFT JOIN
(--卡片,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end as card_content_type,
CASE WHEN params['transaction_type'] in ('ctr') THEN 'ctr预估'
WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片'
END AS recommend_type,
params['card_id'] as card_id,
app_session_id
from online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
AND params['transaction_type'] in ('-1','ctr','smr','cvr','hotspot','pgc','newdata')
AND params['card_content_type'] in ('qa','diary','user_post','answer')
GROUP BY partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end,
CASE WHEN params['transaction_type'] in ('ctr') THEN 'ctr预估'
WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片' END,
params['card_id'],
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t3
on t2.partition_date=t3.partition_date
and t2.cl_id=t3.cl_id
and t2.card_id=t3.card_id
and t2.card_content_type=t3.card_content_type
and t2.recommend_type=t3.recommend_type
LEFT JOIN
(--页面浏览时长
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
sum(page_stay) as page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND referrer='home'
AND page_stay>=0 AND page_stay<1000
GROUP BY partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t4
on t4.partition_date=t3.partition_date
and t4.cl_id=t3.cl_id
and t4.business_id=t3.card_id
and t4.page_name=t3.card_content_type
LEFT JOIN
(--搜索框和点击行为
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as navbar_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action in ('on_click_navbar_search','do_search')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t5
on t5.partition_date=t3.partition_date
and t5.cl_id=t3.cl_id
and t5.business_id=t3.card_id
and t5.page_name=t3.card_content_type
LEFT JOIN
(--点击高亮词
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as highlight_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
and params['card_type']='highlight_word'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t6
on t6.partition_date=t3.partition_date
and t6.cl_id=t3.cl_id
and t6.business_id=t3.card_id
and t6.page_name=t3.card_content_type
LEFT JOIN
(--关联的美购卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as self_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND (get_json_object(params['extra_param'], '$.type')='交互栏'
or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
or params['in_page_pos']='top' or params['in_page_pos']='bottom')
AND action='on_click_card'
and params['card_content_type']='service'
AND page_name IN ('diary_detail','topic_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t7
on t7.partition_date=t3.partition_date
and t7.cl_id=t3.cl_id
and t7.business_id=t3.card_id
and t7.page_name=t3.card_content_type
LEFT JOIN
(--推荐的美购卡片(需要排除作者消费的美购)
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND (action='on_click_card'and params['card_content_type']='service'
or action='on_click_button' and params['button_name']='unfold')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t8
on t8.partition_date=t3.partition_date
and t8.cl_id=t3.cl_id
and t8.business_id=t3.card_id
and t8.page_name=t3.card_content_type
LEFT JOIN
(--推荐的内容卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_content_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
and params['card_content_type'] in ('qa','diary','user_post','answer')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t9
on t9.partition_date=t3.partition_date
and t9.cl_id=t3.cl_id
and t9.business_id=t3.card_id
and t9.page_name=t3.card_content_type
LEFT JOIN
(
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
union all
select distinct device_id
from dim.dim_device_user_staff --去除内网用户
)spam_pv
on spam_pv.device_id=t2.cl_id
LEFT JOIN
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t2
on t1.user_id=t2.user_id
group by partition_date,device_id
)dev
on t2.partition_date=dev.partition_date and t2.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is null
GROUP BY t1.partition_date,t1.device_os_type,t1.active_type,t2.card_content_type,t2.recommend_type
order by day_id,device_os_type,active_type,card_content_type,recommend_type;
\ No newline at end of file
#step1_1.job #step1_1.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file \ No newline at end of file
#step1_2.job #step1_2.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_hospital_spam_pv_day command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_precise_exposure_detail
\ No newline at end of file \ No newline at end of file
#step1_3.job #step1_3.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_attr_tag_view command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file \ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_d_ct_dv_devicespam_d
\ No newline at end of file
#step1_5.job #step1_5.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file \ No newline at end of file
#step1_6.job #step1_6.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_view command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_doctor_view
\ No newline at end of file \ No newline at end of file
#step1_7.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_ui_user_dimen_d
\ No newline at end of file
#step1_4.job #step1_8.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_history_detail
\ No newline at end of file \ No newline at end of file
#step1_1.job #step1_9.job
type=command type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_history_detail
\ No newline at end of file \ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9
command=/home/bi/bi-report/lib/shell/hive daily_recommend_strategy
\ No newline at end of file
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_recommend_strategy/zhaojianwei@igengmei.com/jianweizhao@yeah.net
\ No newline at end of file
SELECT
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
card_content_type as `卡片类型`,
recommend_type as `推荐类型`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card/2)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card/2)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
card_click as `首页卡片点击PV`,
card_exposure as `首页卡片精准曝光PV`,
(navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card/2)as `有效二跳pv`,
avg_page_stay as `来自I的单PV平均浏览时长`,
navbar_search as `来自I的搜索框+搜索按钮点击PV`,
highlight_word as `来自I的文内搜索点击PV`,
self_welfare_card as `来自I的商品卡片点击PV`,
recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
recommend_content_card as `来自I的推荐内容点击pv`,
'未配置' as `来自I的推荐专题点击pv`,
'未上线' as `来自I的转诊点击pv`,
'未上线' as `来自I的视频面诊点击pv`
FROM pm.tl_pm_recommend_strategy_d
WHERE partition_day>='20200627' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期` desc ,`设备类型`,`活跃类型`,`卡片类型`,`推荐类型`;
\ No newline at end of file
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date between '20200401' and '20200408'
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
limit 10
\ No newline at end of file
set mapreduce.job.queuename= data;
SELECT `日期`,`设备类型`,`活跃类型`,`渠道`
,`美购详情页浏览pv`
,(`加购物车点击pv`+`评价列表页点击pv`+`立即支付点击pv`+`私信及电话咨询pv` +`选择美购项目点击pv`
+`机构主页点击pv`+`机构的其他美购点击pv`+`推荐美购点击pv`+`收藏点击pv`
+`右上角购物车点击pv`+`医生主页点击pv`+`分享点击pv`+`机构导航点击pv`) as `美购详情页分发pv加总`
,concat(round((`加购物车点击pv`+`评价列表页点击pv`+`立即支付点击pv`+`私信及电话咨询pv` +`选择美购项目点击pv`
+`机构主页点击pv`+`机构的其他美购点击pv`+`推荐美购点击pv`+`收藏点击pv`
+`右上角购物车点击pv`+`医生主页点击pv`+`分享点击pv`+`机构导航点击pv`)/`美购详情页浏览pv`*100,4),'%') as `美购详情页分发pv占比加总`
,`加购物车点击pv`
,concat(round(`加购物车点击pv`/`美购详情页浏览pv`*100,4),'%') as `加购物车点击pv占比`
,`评价列表页点击pv`
,concat(round(`评价列表页点击pv`/`美购详情页浏览pv`*100,4),'%') as `评价列表页点击pv占比`
,`立即支付点击pv`
,concat(round(`立即支付点击pv`/`美购详情页浏览pv`*100,4),'%') as `立即支付点击pv占比`
,`私信及电话咨询pv`
,concat(round(`私信及电话咨询pv`/`美购详情页浏览pv`*100,4),'%') as `私信及电话咨询pv占比`
,`选择美购项目点击pv`
,concat(round(`选择美购项目点击pv`/`美购详情页浏览pv`*100,4),'%') as `选择美购项目点击pv占比`
,`机构主页点击pv`
,concat(round(`机构主页点击pv`/`美购详情页浏览pv`*100,4),'%') as `机构主页点击pv占比`
,`机构的其他美购点击pv`
,concat(round(`机构的其他美购点击pv`/`美购详情页浏览pv`*100,4),'%') as `机构的其他美购点击pv占比`
,`推荐美购点击pv`
,concat(round(`推荐美购点击pv`/`美购详情页浏览pv`*100,4),'%') as `推荐美购点击pv占比`
,`收藏点击pv`
,concat(round(`收藏点击pv`/`美购详情页浏览pv`*100,4),'%') as `收藏点击pv占比`
,`右上角购物车点击pv`
,concat(round(`右上角购物车点击pv`/`美购详情页浏览pv`*100,4),'%') as `右上角购物车点击pv占比`
,`医生主页点击pv`
,concat(round(`医生主页点击pv`/`美购详情页浏览pv`*100,4),'%') as `医生主页点击pv占比`
,`分享点击pv`
,concat(round(`分享点击pv`/`美购详情页浏览pv`*100,4),'%') as `分享点击pv占比`
,`机构导航点击pv`
,concat(round(`机构导航点击pv`/`美购详情页浏览pv`*100,4),'%') as `机构导航点击pv占比`
FROM
(
SELECT t1.partition_date as `日期`
,t1.device_type as `设备类型`
,t1.active_type as `活跃类型`
,t2.channel as `渠道`
,count(case when type='美购详情页' then cl_id end ) as `美购详情页浏览pv`
,count(case when type='加购物车' then cl_id end ) as `加购物车点击pv`
,count(case when type='评价列表页' then cl_id end ) as `评价列表页点击pv`
,count(case when type='立即支付' then cl_id end ) as `立即支付点击pv`
,count(case when type='私信及电话咨询' then cl_id end ) as `私信及电话咨询pv`
,count(case when type='选择美购项目' then cl_id end ) as `选择美购项目点击pv`
,count(case when type='机构主页' then cl_id end ) as `机构主页点击pv`
,count(case when type='机构的其他美购' then cl_id end ) as `机构的其他美购点击pv`
,count(case when type='推荐美购' then cl_id end ) as `推荐美购点击pv`
,count(case when type='收藏' then cl_id end ) as `收藏点击pv`
,count(case when type='右上角购物车' then cl_id end ) as `右上角购物车点击pv`
,count(case when type='医生主页' then cl_id end ) as `医生主页点击pv`
,count(case when type='分享' then cl_id end ) as `分享点击pv`
,count(case when type='机构导航' then cl_id end ) as `机构导航点击pv`
FROM
(
SELECT click.partition_date,click.cl_id,click.type,mas.active_type,mas.device_type,mas.channel
FROM
( --浏览需要去掉疑似机构刷量的设备
SELECT pv.partition_date as partition_date,pv.cl_id as cl_id,type
FROM
(
SELECT partition_date,cl_id,'美购详情页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='page_view'
)pv
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
SELECT cl_id
FROM online.ml_hospital_spam_pv_day
WHERE partition_date>='20180402' AND partition_date<regexp_replace((current_date),'-','')
AND pv_ratio>=0.95
UNION ALL
SELECT cl_id
FROM online.ml_hospital_spam_pv_month
WHERE partition_date>='20171101' AND partition_date<regexp_replace((current_date),'-','')
AND pv_ratio>=0.95
)spam_pv
on pv.cl_id=spam_pv.cl_id
WHERE spam_pv.cl_id IS NULL
UNION ALL
SELECT partition_date,cl_id, '加购物车' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='welfare_multiattribute_click_add'
UNION ALL
SELECT partition_date,cl_id, '右上角购物车' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_navbar_cart'
UNION ALL
--点击查看更多评论
SELECT partition_date,cl_id, '评价列表页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_comment'
UNION ALL
--点击外显的评价标签
SELECT partition_date,cl_id, '评价列表页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='service_comment_click_tag'
UNION ALL
--点击外显的日记卡片
SELECT partition_date,cl_id, '评价列表页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='welfare_detail_comment_click_diary_card'
UNION ALL
SELECT partition_date,cl_id, '立即支付' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='welfare_multiattribute_click_buy'
UNION ALL
--点击选择更美项目
SELECT partition_date,cl_id, '选择美购项目' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name']='sku_choose'
UNION ALL
--7.22新版本--点击外露的美购项目
SELECT partition_date,cl_id, '选择美购项目' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name']='service_sku'
UNION ALL
--7.22新版本--点击查看更多
SELECT partition_date,cl_id, '选择美购项目' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name']='sku_all'
UNION ALL
--点击私信按钮
SELECT partition_date,cl_id, '私信及电话咨询' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_message'
UNION ALL
--7.20之后灰度上线的快捷咨询按钮和预约面诊按钮
SELECT partition_date,cl_id, '私信及电话咨询' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name'] in ('question_tag','appointment')
UNION ALL
--点击机构下面的在线咨询和电话咨询
SELECT partition_date,cl_id, '私信及电话咨询' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_curearea_contact'
AND params['connect_type'] in ('onlineconsult','phone')
UNION ALL
SELECT partition_date,cl_id, '机构主页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_curearea'
AND params['cure_type']='organization'
UNION ALL
SELECT partition_date,cl_id, '机构的其他美购' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_seller_service_item'
UNION ALL
SELECT partition_date,cl_id, '机构导航' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_address'
UNION ALL
SELECT partition_date,cl_id, '推荐美购' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date <regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_card'
AND params['card_content_type']='service'
AND params['tab_name']='推荐'
UNION ALL
SELECT partition_date,cl_id, '收藏' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_favor'
AND params['favor_type']='service'
AND params['motion']='do'
UNION ALL
SELECT partition_date,cl_id, '医生主页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_curearea'
AND params['cure_type']='doctor'
UNION ALL
SELECT partition_date,cl_id, '分享' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='page_click_share'
)click
JOIN
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '90' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
ON click.cl_id=mas.device_id
AND click.partition_date=mas.partition_date
)t1
LATERAL VIEW explode(t1.channel) t2 AS channel
GROUP BY t1.partition_date,t1.device_type,t1.active_type,t2.channel
)T1
ORDER BY `日期` desc,`设备类型`,`活跃类型`,`渠道`
SELECT `日期`,`设备类型`,`活跃类型`,`渠道`
,`美购详情页浏览pv`
,(`加购物车点击pv`+`评价列表页点击pv`+`立即支付点击pv`+`私信及电话咨询pv` +`选择美购项目点击pv`
+`机构主页点击pv`+`机构的其他美购点击pv`+`推荐美购点击pv`+`收藏点击pv`
+`右上角购物车点击pv`+`医生主页点击pv`+`分享点击pv`+`机构导航点击pv`) as `美购详情页分发pv加总`
,concat(round((`加购物车点击pv`+`评价列表页点击pv`+`立即支付点击pv`+`私信及电话咨询pv` +`选择美购项目点击pv`
+`机构主页点击pv`+`机构的其他美购点击pv`+`推荐美购点击pv`+`收藏点击pv`
+`右上角购物车点击pv`+`医生主页点击pv`+`分享点击pv`+`机构导航点击pv`)/`美购详情页浏览pv`*100,4),'%') as `美购详情页分发pv占比加总`
,`加购物车点击pv`
,concat(round(`加购物车点击pv`/`美购详情页浏览pv`*100,4),'%') as `加购物车点击pv占比`
,`评价列表页点击pv`
,concat(round(`评价列表页点击pv`/`美购详情页浏览pv`*100,4),'%') as `评价列表页点击pv占比`
,`立即支付点击pv`
,concat(round(`立即支付点击pv`/`美购详情页浏览pv`*100,4),'%') as `立即支付点击pv占比`
,`私信及电话咨询pv`
,concat(round(`私信及电话咨询pv`/`美购详情页浏览pv`*100,4),'%') as `私信及电话咨询pv占比`
,`选择美购项目点击pv`
,concat(round(`选择美购项目点击pv`/`美购详情页浏览pv`*100,4),'%') as `选择美购项目点击pv占比`
,`机构主页点击pv`
,concat(round(`机构主页点击pv`/`美购详情页浏览pv`*100,4),'%') as `机构主页点击pv占比`
,`机构的其他美购点击pv`
,concat(round(`机构的其他美购点击pv`/`美购详情页浏览pv`*100,4),'%') as `机构的其他美购点击pv占比`
,`推荐美购点击pv`
,concat(round(`推荐美购点击pv`/`美购详情页浏览pv`*100,4),'%') as `推荐美购点击pv占比`
,`收藏点击pv`
,concat(round(`收藏点击pv`/`美购详情页浏览pv`*100,4),'%') as `收藏点击pv占比`
,`右上角购物车点击pv`
,concat(round(`右上角购物车点击pv`/`美购详情页浏览pv`*100,4),'%') as `右上角购物车点击pv占比`
,`医生主页点击pv`
,concat(round(`医生主页点击pv`/`美购详情页浏览pv`*100,4),'%') as `医生主页点击pv占比`
,`分享点击pv`
,concat(round(`分享点击pv`/`美购详情页浏览pv`*100,4),'%') as `分享点击pv占比`
,`机构导航点击pv`
,concat(round(`机构导航点击pv`/`美购详情页浏览pv`*100,4),'%') as `机构导航点击pv占比`
FROM
(
SELECT t1.partition_date as `日期`
,t1.device_type as `设备类型`
,t1.active_type as `活跃类型`
,t2.channel as `渠道`
,count(case when type='美购详情页' then cl_id end ) as `美购详情页浏览pv`
,count(case when type='加购物车' then cl_id end ) as `加购物车点击pv`
,count(case when type='评价列表页' then cl_id end ) as `评价列表页点击pv`
,count(case when type='立即支付' then cl_id end ) as `立即支付点击pv`
,count(case when type='私信及电话咨询' then cl_id end ) as `私信及电话咨询pv`
,count(case when type='选择美购项目' then cl_id end ) as `选择美购项目点击pv`
,count(case when type='机构主页' then cl_id end ) as `机构主页点击pv`
,count(case when type='机构的其他美购' then cl_id end ) as `机构的其他美购点击pv`
,count(case when type='推荐美购' then cl_id end ) as `推荐美购点击pv`
,count(case when type='收藏' then cl_id end ) as `收藏点击pv`
,count(case when type='右上角购物车' then cl_id end ) as `右上角购物车点击pv`
,count(case when type='医生主页' then cl_id end ) as `医生主页点击pv`
,count(case when type='分享' then cl_id end ) as `分享点击pv`
,count(case when type='机构导航' then cl_id end ) as `机构导航点击pv`
FROM
(
SELECT click.partition_date,click.cl_id,click.type,mas.active_type,mas.device_type,mas.channel
FROM
( --浏览需要去掉疑似机构刷量的设备
SELECT pv.partition_date as partition_date,pv.cl_id as cl_id,type
FROM
(
SELECT partition_date,cl_id,'美购详情页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='page_view'
)pv
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
SELECT cl_id
FROM online.ml_hospital_spam_pv_day
WHERE partition_date>='20180402' AND partition_date<regexp_replace((current_date),'-','')
AND pv_ratio>=0.95
UNION ALL
SELECT cl_id
FROM online.ml_hospital_spam_pv_month
WHERE partition_date>='20171101' AND partition_date<regexp_replace((current_date),'-','')
AND pv_ratio>=0.95
)spam_pv
on pv.cl_id=spam_pv.cl_id
WHERE spam_pv.cl_id IS NULL
UNION ALL
SELECT partition_date,cl_id, '加购物车' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='welfare_multiattribute_click_add'
UNION ALL
SELECT partition_date,cl_id, '右上角购物车' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_navbar_cart'
UNION ALL
--点击查看更多评论
SELECT partition_date,cl_id, '评价列表页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_comment'
UNION ALL
--点击外显的评价标签
SELECT partition_date,cl_id, '评价列表页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='service_comment_click_tag'
UNION ALL
--点击外显的日记卡片
SELECT partition_date,cl_id, '评价列表页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='welfare_detail_comment_click_diary_card'
UNION ALL
SELECT partition_date,cl_id, '立即支付' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='welfare_multiattribute_click_buy'
UNION ALL
--点击选择更美项目
SELECT partition_date,cl_id, '选择美购项目' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name']='sku_choose'
UNION ALL
--7.22新版本--点击外露的美购项目
SELECT partition_date,cl_id, '选择美购项目' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name']='service_sku'
UNION ALL
--7.22新版本--点击查看更多
SELECT partition_date,cl_id, '选择美购项目' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name']='sku_all'
UNION ALL
--点击私信按钮
SELECT partition_date,cl_id, '私信及电话咨询' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_message'
UNION ALL
--7.20之后灰度上线的快捷咨询按钮和预约面诊按钮
SELECT partition_date,cl_id, '私信及电话咨询' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_button'
AND params['button_name'] in ('question_tag','appointment')
UNION ALL
--点击机构下面的在线咨询和电话咨询
SELECT partition_date,cl_id, '私信及电话咨询' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_curearea_contact'
AND params['connect_type'] in ('onlineconsult','phone')
UNION ALL
SELECT partition_date,cl_id, '机构主页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_curearea'
AND params['cure_type']='organization'
UNION ALL
SELECT partition_date,cl_id, '机构的其他美购' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_seller_service_item'
UNION ALL
SELECT partition_date,cl_id, '机构导航' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_address'
UNION ALL
SELECT partition_date,cl_id, '推荐美购' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date <regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_card'
AND params['card_content_type']='service'
AND params['tab_name']='推荐'
UNION ALL
SELECT partition_date,cl_id, '收藏' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='on_click_favor'
AND params['favor_type']='service'
AND params['motion']='do'
UNION ALL
SELECT partition_date,cl_id, '医生主页' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
--AND page_name = 'welfare_detail'
AND action='welfare_detail_click_curearea'
AND params['cure_type']='doctor'
UNION ALL
SELECT partition_date,cl_id, '分享' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '90' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'welfare_detail'
AND action='page_click_share'
)click
JOIN
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '90' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
ON click.cl_id=mas.device_id
AND click.partition_date=mas.partition_date
)t1
LATERAL VIEW explode(t1.channel) t2 AS channel
GROUP BY t1.partition_date,t1.device_type,t1.active_type,t2.channel
)T1
ORDER BY `日期` desc,`设备类型`,`活跃类型`,`渠道`
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date between '20200401' and '20200408'
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
limit 10
\ No newline at end of file
select
t6.new_tag_name, --新标签name
t6.new_tag_type, --新标签type
t6.new_tag_id, --新标签id
t6.is_display, --是否在app展示
t6.is_online, --是否生效
collect_set(t6.jianyi_word) as jianyi_word, --近义词
collect_set(t6.like_new_tag) as tongyi_word, --同义词
collect_set(t6.fangshi_1f) as fangshi_1st, --一级方式_name
collect_set(t6.fangshi_2d) as fangshi_2nd, --二级方式_name
collect_set(t6.suqiu_1f) as suqiu_1st, --一级诉求_name
collect_set(t6.suqiu_2d) as suqiu_2nd, --二级诉求_name
collect_set(t6.buwei_if) as buwei_1st, --一级部位_name
collect_set(t6.buwei_2d) as buwei_2nd, --二级部位_name
collect_set(t6.old_tag_name) as old_tag_name, --老标签_name
collect_set(t6.old_tag_type) as old_tag_type, --老标签_type
collect_set(t6.old_tag_id) as old_tag_id, --老标签_id
collect_set(t6.fangshi_1f_id) as fangshi_1st_id, --一级方式_id
collect_set(t6.fangshi_2d_id) as fangshi_2nd_id, --二级方式_id
collect_set(t6.suqiu_1f_id) as suqiu_1st_id, --一级诉求_id
collect_set(t6.suqiu_2d_id) as suqiu_2nd_id, --二级诉求_id
collect_set(t6.buwei_if_id) as buwei_1st_id, --一级部位_id
collect_set(t6.buwei_2d_id) as buwei_2nd_id --二级部位_id
from
(
select
t1.name as new_tag_name, --新标签name
t1.tag_type as new_tag_type, --新标签type
t1.id as new_tag_id, --新标签id
t1.is_display as is_display, --是否在app展示
t1.is_online as is_online, --是否生效
t1.homoionym as jianyi_word, --近义词
t2.name as like_new_tag, --同义词
t4.aggregate_type, --对应类型的枚举值
(case when t4.aggregate_type='6' then t4.name else NULL end) as fangshi_1f, --一级方式_name
(case when t4.aggregate_type='2' then t4.name else NULL end) as fangshi_2d, --二级方式_name
(case when t4.aggregate_type='7' then t4.name else NULL end) as suqiu_1f, --一级诉求_name
(case when t4.aggregate_type='8' then t4.name else NULL end) as suqiu_2d, --二级诉求_name
(case when t4.aggregate_type='10' then t4.name else NULL end) as buwei_if, --一级部位_name
(case when t4.aggregate_type='3' then t4.name else NULL end) as buwei_2d, --二级部位_name
t7.old_tag_name as old_tag_name, --老标签_name
t7.old_tag_type as old_tag_type, --老标签_type
t5.old_tag_id as old_tag_id, --老标签_id
(case when t4.aggregate_type='6' then t4.id else NULL end) as fangshi_1f_id, --一级方式_id
(case when t4.aggregate_type='2' then t4.id else NULL end) as fangshi_2d_id, --二级方式_id
(case when t4.aggregate_type='7' then t4.id else NULL end) as suqiu_1f_id, --一级诉求_id
(case when t4.aggregate_type='8' then t4.id else NULL end) as suqiu_2d_id, --二级诉求_id
(case when t4.aggregate_type='10' then t4.id else NULL end) as buwei_if_id, --一级部位_id
(case when t4.aggregate_type='3' then t4.id else NULL end) as buwei_2d_id --二级部位_id
from
(select name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end as tag_type,
collect_list(t1.homoionym_detail) as homoionym
from online.tl_hdfs_api_tag_3_0_view --新标签id,近义词
lateral view explode(json_split(homoionym)) t1 as homoionym_detail --["\u8138\u578b\u77eb\u6b63"]转义 json_split
where partition_date='$V_PARYMD'
group by name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end
) t1
left join
(select name,tag_id from online.tl_hdfs_api_tag_aggregate_view where partition_date='$V_PARYMD') t2 --新标签id,同义词
on (t1.id=t2.tag_id)
left join
(select tag_attr_id as tag_ids,tag_id from online.tl_hdfs_api_tag_attr_tag_view where partition_date='$V_PARYMD') t3 --新标签id,属性ids
on (t1.id=t3.tag_id)
left join
(select name,id,aggregate_type from online.tl_hdfs_api_tag_attr_view where partition_date='$V_PARYMD') t4 --属性id,属性分类
on (t3.tag_ids=t4.id)
left join
(select old_tag_id,tag_id from online.tl_hdfs_api_tag_map_oldtag_view where partition_date='$V_PARYMD') t5
on (t1.id=t5.tag_id)
left join
(select id,name as old_tag_name,
case when tag_type in ('1') then '一级分类'
when tag_type in ('2') then '二级分类'
when tag_type in ('3') then '三级分类'
when tag_type in ('4') then '城市'
when tag_type in ('5') then '自由添加'
when tag_type in ('6') then '医生'
when tag_type in ('7') then '医院'
when tag_type in ('8') then '频道'
when tag_type in ('9') then '省份'
when tag_type in ('10') then '国家'
when tag_type in ('11') then '运营标签'
end as old_tag_type
from online.tl_hdfs_api_tag_view where partition_date='$V_PARYMD') t7
on (t7.id=t5.old_tag_id)
)t6
group by t6.new_tag_name,t6.new_tag_type,t6.new_tag_id,t6.is_display,t6.is_online
\ No newline at end of file
select
t6.new_tag_name, --新标签name
t6.new_tag_type, --新标签type
t6.new_tag_id, --新标签id
t6.is_display, --是否在app展示
t6.is_online, --是否生效
collect_set(t6.jianyi_word) as jianyi_word, --近义词
collect_set(t6.like_new_tag) as tongyi_word, --同义词
collect_set(t6.fangshi_1f) as fangshi_1st, --一级方式_name
collect_set(t6.fangshi_2d) as fangshi_2nd, --二级方式_name
collect_set(t6.suqiu_1f) as suqiu_1st, --一级诉求_name
collect_set(t6.suqiu_2d) as suqiu_2nd, --二级诉求_name
collect_set(t6.buwei_if) as buwei_1st, --一级部位_name
collect_set(t6.buwei_2d) as buwei_2nd, --二级部位_name
collect_set(t6.old_tag_name) as old_tag_name, --老标签_name
collect_set(t6.old_tag_type) as old_tag_type, --老标签_type
collect_set(t6.old_tag_id) as old_tag_id, --老标签_id
collect_set(t6.fangshi_1f_id) as fangshi_1st_id, --一级方式_id
collect_set(t6.fangshi_2d_id) as fangshi_2nd_id, --二级方式_id
collect_set(t6.suqiu_1f_id) as suqiu_1st_id, --一级诉求_id
collect_set(t6.suqiu_2d_id) as suqiu_2nd_id, --二级诉求_id
collect_set(t6.buwei_if_id) as buwei_1st_id, --一级部位_id
collect_set(t6.buwei_2d_id) as buwei_2nd_id --二级部位_id
from
(
select
t1.name as new_tag_name, --新标签name
t1.tag_type as new_tag_type, --新标签type
t1.id as new_tag_id, --新标签id
t1.is_display as is_display, --是否在app展示
t1.is_online as is_online, --是否生效
t1.homoionym as jianyi_word, --近义词
t2.name as like_new_tag, --同义词
t4.aggregate_type, --对应类型的枚举值
(case when t4.aggregate_type='6' then t4.name else NULL end) as fangshi_1f, --一级方式_name
(case when t4.aggregate_type='2' then t4.name else NULL end) as fangshi_2d, --二级方式_name
(case when t4.aggregate_type='7' then t4.name else NULL end) as suqiu_1f, --一级诉求_name
(case when t4.aggregate_type='8' then t4.name else NULL end) as suqiu_2d, --二级诉求_name
(case when t4.aggregate_type='10' then t4.name else NULL end) as buwei_if, --一级部位_name
(case when t4.aggregate_type='3' then t4.name else NULL end) as buwei_2d, --二级部位_name
t7.old_tag_name as old_tag_name, --老标签_name
t7.old_tag_type as old_tag_type, --老标签_type
t5.old_tag_id as old_tag_id, --老标签_id
(case when t4.aggregate_type='6' then t4.id else NULL end) as fangshi_1f_id, --一级方式_id
(case when t4.aggregate_type='2' then t4.id else NULL end) as fangshi_2d_id, --二级方式_id
(case when t4.aggregate_type='7' then t4.id else NULL end) as suqiu_1f_id, --一级诉求_id
(case when t4.aggregate_type='8' then t4.id else NULL end) as suqiu_2d_id, --二级诉求_id
(case when t4.aggregate_type='10' then t4.id else NULL end) as buwei_if_id, --一级部位_id
(case when t4.aggregate_type='3' then t4.id else NULL end) as buwei_2d_id --二级部位_id
from
(select name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end as tag_type,
collect_list(t1.homoionym_detail) as homoionym
from online.tl_hdfs_api_tag_3_0_view --新标签id,近义词
lateral view explode(json_split(homoionym)) t1 as homoionym_detail --["\u8138\u578b\u77eb\u6b63"]转义 json_split
where partition_date='$V_PARYMD'
group by name,id,is_display,is_online,
case when tag_type in ('1') then '普通分类'
when tag_type in ('2') then '一级分类'
when tag_type in ('3') then '二级分类'
end
) t1
left join
(select name,tag_id from online.tl_hdfs_api_tag_aggregate_view where partition_date='$V_PARYMD') t2 --新标签id,同义词
on (t1.id=t2.tag_id)
left join
(select tag_attr_id as tag_ids,tag_id from online.tl_hdfs_api_tag_attr_tag_view where partition_date='$V_PARYMD') t3 --新标签id,属性ids
on (t1.id=t3.tag_id)
left join
(select name,id,aggregate_type from online.tl_hdfs_api_tag_attr_view where partition_date='$V_PARYMD') t4 --属性id,属性分类
on (t3.tag_ids=t4.id)
left join
(select old_tag_id,tag_id from online.tl_hdfs_api_tag_map_oldtag_view where partition_date='$V_PARYMD') t5
on (t1.id=t5.tag_id)
left join
(select id,name as old_tag_name,
case when tag_type in ('1') then '一级分类'
when tag_type in ('2') then '二级分类'
when tag_type in ('3') then '三级分类'
when tag_type in ('4') then '城市'
when tag_type in ('5') then '自由添加'
when tag_type in ('6') then '医生'
when tag_type in ('7') then '医院'
when tag_type in ('8') then '频道'
when tag_type in ('9') then '省份'
when tag_type in ('10') then '国家'
when tag_type in ('11') then '运营标签'
end as old_tag_type
from online.tl_hdfs_api_tag_view where partition_date='$V_PARYMD') t7
on (t7.id=t5.old_tag_id)
)t6
group by t6.new_tag_name,t6.new_tag_type,t6.new_tag_id,t6.is_display,t6.is_online
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment