Commit 70e16469 authored by litaolemo's avatar litaolemo

update

parent a08b8172
...@@ -14,10 +14,10 @@ import redis ...@@ -14,10 +14,10 @@ import redis
# from pyhive import hive # from pyhive import hive
from maintenance.func_send_email_with_file import send_file_email from maintenance.func_send_email_with_file import send_file_email
from typing import Dict, List from typing import Dict, List
#from elasticsearch_7 import Elasticsearch from elasticsearch_7 import Elasticsearch
#from elasticsearch_7.helpers import scan from elasticsearch_7.helpers import scan
from elasticsearch import Elasticsearch # from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan # from elasticsearch.helpers import scan
import sys import sys
import time import time
from pyspark import SparkConf from pyspark import SparkConf
...@@ -103,11 +103,12 @@ es_query_tractate = {"query": { ...@@ -103,11 +103,12 @@ es_query_tractate = {"query": {
{"term": {"show_by_index": 2}} {"term": {"show_by_index": 2}}
] ]
} }
} }
} }
tractate_res_scan = scan(client=es,query=es_query_tractate,index="gm-dbmw-tractate-read",doc_type="tractate") tractate_res_scan = scan(client=es, query=es_query_tractate, index="gm-dbmw-tractate-read", doc_type="tractate")
tractate_dict = {} tractate_dict = {}
for tractate_json in tractate_res_scan: for tractate_json in tractate_res_scan:
print(tractate_json)
_id = tractate_json["_id"] _id = tractate_json["_id"]
content_keyword_list = tractate_json["_source"]["content_keyword"] content_keyword_list = tractate_json["_source"]["content_keyword"]
for content_keyword in content_keyword_list: for content_keyword in content_keyword_list:
...@@ -125,9 +126,9 @@ es_query_answer = {"query": { ...@@ -125,9 +126,9 @@ es_query_answer = {"query": {
{"range": {"content_length": {"gte": 30}}} {"range": {"content_length": {"gte": 30}}}
] ]
} }
} }
} }
answer_res_scan = scan(client=es,query=es_query_tractate,index="gm-dbmw-answer-read",doc_type="answer") answer_res_scan = scan(client=es, query=es_query_tractate, index="gm-dbmw-answer-read", doc_type="answer")
answer_dict = {} answer_dict = {}
for answer_json in answer_res_scan: for answer_json in answer_res_scan:
_id = answer_json["_id"] _id = answer_json["_id"]
...@@ -138,7 +139,6 @@ for answer_json in answer_res_scan: ...@@ -138,7 +139,6 @@ for answer_json in answer_res_scan:
else: else:
answer_dict[content_keyword] = 1 answer_dict[content_keyword] = 1
for t in range(2, task_days): for t in range(2, task_days):
day_num = 0 - t day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num)) now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
...@@ -283,14 +283,15 @@ WHERE spam_pv.device_id IS NULL ...@@ -283,14 +283,15 @@ WHERE spam_pv.device_id IS NULL
partition_date = today_str partition_date = today_str
pid = hashlib.md5((partition_date + protratit_type).encode("utf8")).hexdigest() pid = hashlib.md5((partition_date + protratit_type).encode("utf8")).hexdigest()
action_count = portrait_dict["projects"][protratit_type] action_count = portrait_dict["projects"][protratit_type]
answer_count = answer_dict.get(protratit_type,0) answer_count = answer_dict.get(protratit_type, 0)
tractate_count = tractate_dict.get(protratit_type,0) tractate_count = tractate_dict.get(protratit_type, 0)
total_count = answer_count + tractate_count total_count = answer_count + tractate_count
instert_sql = """replace into new_user_project_count( instert_sql = """replace into new_user_project_count(
partition_day,pid,protratit_count,protratit_type,answer_count,tractate_count,total_count) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}',{answer_count},{tractate_count},{total_count});""".format( partition_day,pid,protratit_count,protratit_type,answer_count,tractate_count,total_count) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}',{answer_count},{tractate_count},{total_count});""".format(
partition_day=today_str, pid=pid, protratit_count=action_count partition_day=today_str, pid=pid, protratit_count=action_count
, protratit_type=protratit_type, answer_count=answer_count, tractate_count=tractate_count, total_count=total_count , protratit_type=protratit_type, answer_count=answer_count, tractate_count=tractate_count,
total_count=total_count
) )
print(instert_sql) print(instert_sql)
# cursor.execute("set names 'UTF8'") # cursor.execute("set names 'UTF8'")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment