Commit 70e16469 authored by litaolemo's avatar litaolemo

update

parent a08b8172
......@@ -14,10 +14,10 @@ import redis
# from pyhive import hive
from maintenance.func_send_email_with_file import send_file_email
from typing import Dict, List
#from elasticsearch_7 import Elasticsearch
#from elasticsearch_7.helpers import scan
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
from elasticsearch_7 import Elasticsearch
from elasticsearch_7.helpers import scan
# from elasticsearch import Elasticsearch
# from elasticsearch.helpers import scan
import sys
import time
from pyspark import SparkConf
......@@ -92,22 +92,23 @@ task_list = []
task_days = 3
es_query_tractate = {"query": {
"bool": {
"must": [
{"term": {"is_online": True}},
{"range": {"content_level": {"gte": 3}}},
{"terms": {"operators_add_tags": [3315]}}
"bool": {
"must": [
{"term": {"is_online": True}},
{"range": {"content_level": {"gte": 3}}},
{"terms": {"operators_add_tags": [3315]}}
],
"must_not": [{"term": {"status": 4}},
{"term": {"show_by_index": 2}}
]
}
}
}
tractate_res_scan = scan(client=es,query=es_query_tractate,index="gm-dbmw-tractate-read",doc_type="tractate")
],
"must_not": [{"term": {"status": 4}},
{"term": {"show_by_index": 2}}
]
}
}
}
tractate_res_scan = scan(client=es, query=es_query_tractate, index="gm-dbmw-tractate-read", doc_type="tractate")
tractate_dict = {}
for tractate_json in tractate_res_scan:
print(tractate_json)
_id = tractate_json["_id"]
content_keyword_list = tractate_json["_source"]["content_keyword"]
for content_keyword in content_keyword_list:
......@@ -117,17 +118,17 @@ for tractate_json in tractate_res_scan:
tractate_dict[content_keyword] = 1
es_query_answer = {"query": {
"bool": {
"must": [
{"term": {"is_online": True}},
{"range": {"content_level": {"gte": 3}}},
{"terms": {"operators_add_tags": [3315]}},
{"range": {"content_length": {"gte": 30}}}
]
}
}
}
answer_res_scan = scan(client=es,query=es_query_tractate,index="gm-dbmw-answer-read",doc_type="answer")
"bool": {
"must": [
{"term": {"is_online": True}},
{"range": {"content_level": {"gte": 3}}},
{"terms": {"operators_add_tags": [3315]}},
{"range": {"content_length": {"gte": 30}}}
]
}
}
}
answer_res_scan = scan(client=es, query=es_query_tractate, index="gm-dbmw-answer-read", doc_type="answer")
answer_dict = {}
for answer_json in answer_res_scan:
_id = answer_json["_id"]
......@@ -138,7 +139,6 @@ for answer_json in answer_res_scan:
else:
answer_dict[content_keyword] = 1
for t in range(2, task_days):
day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
......@@ -283,14 +283,15 @@ WHERE spam_pv.device_id IS NULL
partition_date = today_str
pid = hashlib.md5((partition_date + protratit_type).encode("utf8")).hexdigest()
action_count = portrait_dict["projects"][protratit_type]
answer_count = answer_dict.get(protratit_type,0)
tractate_count = tractate_dict.get(protratit_type,0)
answer_count = answer_dict.get(protratit_type, 0)
tractate_count = tractate_dict.get(protratit_type, 0)
total_count = answer_count + tractate_count
instert_sql = """replace into new_user_project_count(
partition_day,pid,protratit_count,protratit_type,answer_count,tractate_count,total_count) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}',{answer_count},{tractate_count},{total_count});""".format(
partition_day=today_str, pid=pid, protratit_count=action_count
, protratit_type=protratit_type, answer_count=answer_count, tractate_count=tractate_count, total_count=total_count
, protratit_type=protratit_type, answer_count=answer_count, tractate_count=tractate_count,
total_count=total_count
)
print(instert_sql)
# cursor.execute("set names 'UTF8'")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment