Commit 27c7d4a2 authored by litaolemo's avatar litaolemo

update

parent f64c7300
......@@ -10,6 +10,7 @@ from email.mime.application import MIMEApplication
from email.utils import formataddr
from maintenance.func_send_email_with_file import send_file_email
import zipfile
es = Elasticsearch([
{
'host': '172.16.31.17',
......@@ -19,6 +20,7 @@ es = Elasticsearch([
'port': 9200,
}])
# def zipDir(dirpath,outFullName):
# """
# 压缩指定文件夹
......@@ -49,16 +51,15 @@ def send_email_tome():
# toaddrs5 = "malinxi@igengmei.com"
toaddrs6 = "litao@igengmei.com"
content = 'hi all:附件为' + str(date) + '的搜索词数据统计结果以及近一周的数据统计结果,请查收!'
zipFile = "/srv/apps/crawler/近一周数据统计结果.xls"
# out_path = "/srv/apps/crawler/近一周数据统计结果.zip"
# f = zipfile.ZipFile(zipFile, 'w', zipfile.ZIP_DEFLATED)
# f.write(out_path)
# f.close()
#zipFile = '昨日数据统计结果.xls'
send_file_email("","",email_group=["litao@igengmei.com"],title_str=content
,email_msg_body_str=content,file=zipFile)
# zipFile = '昨日数据统计结果.xls'
send_file_email("", "", email_group=["litao@igengmei.com"], title_str=content
, email_msg_body_str=content, file=zipFile)
except Exception as e:
print(e)
......@@ -77,7 +78,23 @@ def get_es_word(word):
"should": [{"match_phrase": {"title": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"desc": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}}],
"must": [{"term": {"is_online": True}}]
"must": [
{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"range": {
"content_length": {
"gte": 30
}
}
}],
}
},
}
......@@ -98,14 +115,17 @@ def get_es_word(word):
"match_phrase": {"tractate_tag_name": {"query": word, "analyzer": "gm_default_index"}}}, {
"match_phrase": {"tractate_tag_name_content": {"query": word,
"analyzer": "gm_default_index"}}}],
"must": [{"term": {"is_online": True}}]
"must": [{"term": {"is_online": True}}, {
"terms":
{"content_level": [6, 5, 4, 3.5, 3]}
}]
}
},
}
}
)
tractate_content_num = results["hits"]["total"]
###diary
###diary 日记
results = es.search(
index='gm-dbmw-diary-read',
doc_type='diary',
......@@ -118,13 +138,107 @@ def get_es_word(word):
"should": [{"match_phrase": {"tags": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"service.name": {"query": word, "analyzer": "gm_default_index"}}}],
"must": [{"term": {"is_online": True}}, {"range": {"content_level": {"gte": "3"}}}]
"must": [{"term": {"is_online": True}}, {
"term": {
"has_cover": True
}
}, {"term": {
"is_sink": False
}
}, {
"term": {
"has_after_cover": True
}
}, {
"term": {
"has_before_cover": True
}
}, {"range": {"content_level": {"gte": "3"}}},
{
"term": {
"content_simi_bol_show": 0
}
}
]
}
},
}
)
diary_content_num = results["hits"]["total"]
diary_query = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": True
}
}, {
"term": {
"has_cover": True
}
}, {
"term": {
"is_sink": False
}
}, {
"term": {
"has_after_cover": True
}
}, {
"term": {
"has_before_cover": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"term": {
"content_simi_bol_show": 0
}
}]
}
},
"_source": ["id"]
}
answer_query = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"range": {
"content_length": {
"gte": 30
}
}
}]
}
}
}
tractate_query = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}]
}
}
}
return answer_content_num, tractate_content_num, diary_content_num
......@@ -175,7 +289,7 @@ if __name__ == "__main__":
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
zhengxing_cursor = db_zhengxing_eagle.cursor()
date = datetime.datetime.now().date() - datetime.timedelta(days=1)
date = datetime.datetime.now().date() - datetime.timedelta(days=30)
sql = 'select keywords,sum(sorted) as nums,uv from api_search_words where is_delete = 0 and create_time = "' + str(
date) + '" group by keywords order by nums desc'
print(sql)
......@@ -183,24 +297,24 @@ if __name__ == "__main__":
zhengxing_cursor.execute(sql)
data = zhengxing_cursor.fetchall()
tup_title = ("关键词", "搜索次数","uv", "日记数量", "回答数量", "帖子数量")
tup_title = ("关键词", "搜索次数", "uv", "日记数量", "回答数量", "帖子数量")
for name in list(data):
word = name.get("keywords", None)
num = name.get("nums", 0)
uv = name.get("uv",0)
uv = name.get("uv", 0)
answer_content_num, tractate_content_num, diary_content_num = get_es_word(word)
tag_names_list.append([word, num,uv, diary_content_num, answer_content_num, tractate_content_num])
tag_names_list.append([word, num, uv, diary_content_num, answer_content_num, tractate_content_num])
all_data_day.append(tup_title)
for item in tag_names_list:
all_data_day.append(tuple(item))
path = str(date)+".xls"
path = str(date) + ".xls"
WritrExcel().write_excel(path, tuple(all_data_day))
print(u'创建demo.xls文件成功')
date = datetime.datetime.now().date() - datetime.timedelta(days=7)
date = datetime.datetime.now().date() - datetime.timedelta(days=30)
sql = 'select keywords,sum(sorted) as nums,sum(uv) as uvs from api_search_words where is_delete = 0 and create_time >= "' + str(
date) + '" group by keywords order by nums desc'
......@@ -209,22 +323,21 @@ if __name__ == "__main__":
zhengxing_cursor.execute(sql)
data = zhengxing_cursor.fetchall()
tup_title = ("关键词", "搜索次数", "uv","日记数量", "回答数量", "帖子数量")
tup_title = ("关键词", "搜索次数", "uv", "日记数量", "回答数量", "帖子数量")
for name in list(data):
word = name.get("keywords", None)
sorteds = name.get("nums", 0)
uv = name.get("uvs",0)
uv = name.get("uvs", 0)
answer_content_num, tractate_content_num, diary_content_num = get_es_word(word)
tag_names_list_week.append([word, sorteds,uv, diary_content_num, answer_content_num, tractate_content_num])
tag_names_list_week.append([word, sorteds, uv, diary_content_num, answer_content_num, tractate_content_num])
all_data_week.append(tup_title)
for item in tag_names_list_week:
all_data_week.append(tuple(item))
path = "近一数据统计结果.xls"
path = "近一数据统计结果.xls"
WritrExcel().write_excel(path, tuple(all_data_week))
print(u'创建demo.xls文件成功')
send_email_tome()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment