Commit 27c7d4a2 authored by litaolemo's avatar litaolemo

update

parent f64c7300
...@@ -10,6 +10,7 @@ from email.mime.application import MIMEApplication ...@@ -10,6 +10,7 @@ from email.mime.application import MIMEApplication
from email.utils import formataddr from email.utils import formataddr
from maintenance.func_send_email_with_file import send_file_email from maintenance.func_send_email_with_file import send_file_email
import zipfile import zipfile
es = Elasticsearch([ es = Elasticsearch([
{ {
'host': '172.16.31.17', 'host': '172.16.31.17',
...@@ -19,6 +20,7 @@ es = Elasticsearch([ ...@@ -19,6 +20,7 @@ es = Elasticsearch([
'port': 9200, 'port': 9200,
}]) }])
# def zipDir(dirpath,outFullName): # def zipDir(dirpath,outFullName):
# """ # """
# 压缩指定文件夹 # 压缩指定文件夹
...@@ -49,16 +51,15 @@ def send_email_tome(): ...@@ -49,16 +51,15 @@ def send_email_tome():
# toaddrs5 = "malinxi@igengmei.com" # toaddrs5 = "malinxi@igengmei.com"
toaddrs6 = "litao@igengmei.com" toaddrs6 = "litao@igengmei.com"
content = 'hi all:附件为' + str(date) + '的搜索词数据统计结果以及近一周的数据统计结果,请查收!' content = 'hi all:附件为' + str(date) + '的搜索词数据统计结果以及近一周的数据统计结果,请查收!'
zipFile = "/srv/apps/crawler/近一周数据统计结果.xls" zipFile = "/srv/apps/crawler/近一周数据统计结果.xls"
# out_path = "/srv/apps/crawler/近一周数据统计结果.zip" # out_path = "/srv/apps/crawler/近一周数据统计结果.zip"
# f = zipfile.ZipFile(zipFile, 'w', zipfile.ZIP_DEFLATED) # f = zipfile.ZipFile(zipFile, 'w', zipfile.ZIP_DEFLATED)
# f.write(out_path) # f.write(out_path)
# f.close() # f.close()
#zipFile = '昨日数据统计结果.xls' # zipFile = '昨日数据统计结果.xls'
send_file_email("","",email_group=["litao@igengmei.com"],title_str=content send_file_email("", "", email_group=["litao@igengmei.com"], title_str=content
,email_msg_body_str=content,file=zipFile) , email_msg_body_str=content, file=zipFile)
except Exception as e: except Exception as e:
print(e) print(e)
...@@ -77,7 +78,23 @@ def get_es_word(word): ...@@ -77,7 +78,23 @@ def get_es_word(word):
"should": [{"match_phrase": {"title": {"query": word, "analyzer": "gm_default_index"}}}, "should": [{"match_phrase": {"title": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"desc": {"query": word, "analyzer": "gm_default_index"}}}, {"match_phrase": {"desc": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}}], {"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}}],
"must": [{"term": {"is_online": True}}] "must": [
{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"range": {
"content_length": {
"gte": 30
}
}
}],
} }
}, },
} }
...@@ -98,14 +115,17 @@ def get_es_word(word): ...@@ -98,14 +115,17 @@ def get_es_word(word):
"match_phrase": {"tractate_tag_name": {"query": word, "analyzer": "gm_default_index"}}}, { "match_phrase": {"tractate_tag_name": {"query": word, "analyzer": "gm_default_index"}}}, {
"match_phrase": {"tractate_tag_name_content": {"query": word, "match_phrase": {"tractate_tag_name_content": {"query": word,
"analyzer": "gm_default_index"}}}], "analyzer": "gm_default_index"}}}],
"must": [{"term": {"is_online": True}}] "must": [{"term": {"is_online": True}}, {
"terms":
{"content_level": [6, 5, 4, 3.5, 3]}
}]
} }
}, }
} }
) )
tractate_content_num = results["hits"]["total"] tractate_content_num = results["hits"]["total"]
###diary ###diary 日记
results = es.search( results = es.search(
index='gm-dbmw-diary-read', index='gm-dbmw-diary-read',
doc_type='diary', doc_type='diary',
...@@ -118,13 +138,107 @@ def get_es_word(word): ...@@ -118,13 +138,107 @@ def get_es_word(word):
"should": [{"match_phrase": {"tags": {"query": word, "analyzer": "gm_default_index"}}}, "should": [{"match_phrase": {"tags": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}}, {"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}},
{"match_phrase": {"service.name": {"query": word, "analyzer": "gm_default_index"}}}], {"match_phrase": {"service.name": {"query": word, "analyzer": "gm_default_index"}}}],
"must": [{"term": {"is_online": True}}, {"range": {"content_level": {"gte": "3"}}}] "must": [{"term": {"is_online": True}}, {
"term": {
"has_cover": True
}
}, {"term": {
"is_sink": False
}
}, {
"term": {
"has_after_cover": True
}
}, {
"term": {
"has_before_cover": True
}
}, {"range": {"content_level": {"gte": "3"}}},
{
"term": {
"content_simi_bol_show": 0
}
}
]
} }
}, },
} }
) )
diary_content_num = results["hits"]["total"] diary_content_num = results["hits"]["total"]
diary_query = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": True
}
}, {
"term": {
"has_cover": True
}
}, {
"term": {
"is_sink": False
}
}, {
"term": {
"has_after_cover": True
}
}, {
"term": {
"has_before_cover": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"term": {
"content_simi_bol_show": 0
}
}]
}
},
"_source": ["id"]
}
answer_query = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"range": {
"content_length": {
"gte": 30
}
}
}]
}
}
}
tractate_query = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}]
}
}
}
return answer_content_num, tractate_content_num, diary_content_num return answer_content_num, tractate_content_num, diary_content_num
...@@ -175,7 +289,7 @@ if __name__ == "__main__": ...@@ -175,7 +289,7 @@ if __name__ == "__main__":
charset='utf8', charset='utf8',
cursorclass=pymysql.cursors.DictCursor) cursorclass=pymysql.cursors.DictCursor)
zhengxing_cursor = db_zhengxing_eagle.cursor() zhengxing_cursor = db_zhengxing_eagle.cursor()
date = datetime.datetime.now().date() - datetime.timedelta(days=1) date = datetime.datetime.now().date() - datetime.timedelta(days=30)
sql = 'select keywords,sum(sorted) as nums,uv from api_search_words where is_delete = 0 and create_time = "' + str( sql = 'select keywords,sum(sorted) as nums,uv from api_search_words where is_delete = 0 and create_time = "' + str(
date) + '" group by keywords order by nums desc' date) + '" group by keywords order by nums desc'
print(sql) print(sql)
...@@ -183,24 +297,24 @@ if __name__ == "__main__": ...@@ -183,24 +297,24 @@ if __name__ == "__main__":
zhengxing_cursor.execute(sql) zhengxing_cursor.execute(sql)
data = zhengxing_cursor.fetchall() data = zhengxing_cursor.fetchall()
tup_title = ("关键词", "搜索次数","uv", "日记数量", "回答数量", "帖子数量") tup_title = ("关键词", "搜索次数", "uv", "日记数量", "回答数量", "帖子数量")
for name in list(data): for name in list(data):
word = name.get("keywords", None) word = name.get("keywords", None)
num = name.get("nums", 0) num = name.get("nums", 0)
uv = name.get("uv",0) uv = name.get("uv", 0)
answer_content_num, tractate_content_num, diary_content_num = get_es_word(word) answer_content_num, tractate_content_num, diary_content_num = get_es_word(word)
tag_names_list.append([word, num,uv, diary_content_num, answer_content_num, tractate_content_num]) tag_names_list.append([word, num, uv, diary_content_num, answer_content_num, tractate_content_num])
all_data_day.append(tup_title) all_data_day.append(tup_title)
for item in tag_names_list: for item in tag_names_list:
all_data_day.append(tuple(item)) all_data_day.append(tuple(item))
path = str(date)+".xls" path = str(date) + ".xls"
WritrExcel().write_excel(path, tuple(all_data_day)) WritrExcel().write_excel(path, tuple(all_data_day))
print(u'创建demo.xls文件成功') print(u'创建demo.xls文件成功')
date = datetime.datetime.now().date() - datetime.timedelta(days=7) date = datetime.datetime.now().date() - datetime.timedelta(days=30)
sql = 'select keywords,sum(sorted) as nums,sum(uv) as uvs from api_search_words where is_delete = 0 and create_time >= "' + str( sql = 'select keywords,sum(sorted) as nums,sum(uv) as uvs from api_search_words where is_delete = 0 and create_time >= "' + str(
date) + '" group by keywords order by nums desc' date) + '" group by keywords order by nums desc'
...@@ -209,22 +323,21 @@ if __name__ == "__main__": ...@@ -209,22 +323,21 @@ if __name__ == "__main__":
zhengxing_cursor.execute(sql) zhengxing_cursor.execute(sql)
data = zhengxing_cursor.fetchall() data = zhengxing_cursor.fetchall()
tup_title = ("关键词", "搜索次数", "uv","日记数量", "回答数量", "帖子数量") tup_title = ("关键词", "搜索次数", "uv", "日记数量", "回答数量", "帖子数量")
for name in list(data): for name in list(data):
word = name.get("keywords", None) word = name.get("keywords", None)
sorteds = name.get("nums", 0) sorteds = name.get("nums", 0)
uv = name.get("uvs",0) uv = name.get("uvs", 0)
answer_content_num, tractate_content_num, diary_content_num = get_es_word(word) answer_content_num, tractate_content_num, diary_content_num = get_es_word(word)
tag_names_list_week.append([word, sorteds,uv, diary_content_num, answer_content_num, tractate_content_num]) tag_names_list_week.append([word, sorteds, uv, diary_content_num, answer_content_num, tractate_content_num])
all_data_week.append(tup_title) all_data_week.append(tup_title)
for item in tag_names_list_week: for item in tag_names_list_week:
all_data_week.append(tuple(item)) all_data_week.append(tuple(item))
path = "近一数据统计结果.xls" path = "近一数据统计结果.xls"
WritrExcel().write_excel(path, tuple(all_data_week)) WritrExcel().write_excel(path, tuple(all_data_week))
print(u'创建demo.xls文件成功') print(u'创建demo.xls文件成功')
send_email_tome() send_email_tome()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment