1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 23 14:44:07 2018
@author: fangyucheng
"""
import elasticsearch.helpers
from elasticsearch import Elasticsearch
from crawler.crawler_sys.utils.trans_format import lst_to_csv
from crawler.crawler_sys.utils.trans_format import str_file_to_lst
from crawler.crawler_sys.utils.trans_format import str_lst_to_file
#from crawler.crawler_sys.utils.trans_format import csv_to_lst_with_headline
hosts = '192.168.17.11'
port = 80
user_id = 'fangyucheng'
password = 'VK0FkWf1fV8f'
http_auth = (user_id, password)
es_connection = Elasticsearch(hosts=hosts, port=port, http_auth=http_auth)
def init_task_list(file_path=None):
task_list = []
if file_path is None:
es_scan = elasticsearch.helpers.scan(es_connection, index='album-play-count')
for line in es_scan:
video_dict = line['_source']
task_list.append(video_dict)
return task_list
else:
task_list = str_file_to_lst(file_path)
return task_list
unsolve_lst = []
result_lst2 =[]
task_list = str_file_to_lst('F:/add_target_releaser/album_play_count/dec')
#task_list = init_task_list()
for line in task_list:
try:
if type(line) == dict:
url = line['url']
elif type(line) == str:
url = line
search_body = {"query": {"bool": {"filter": [{"term": {"url.keyword": url}}]}}}
search = es_connection.search(index="test2", doc_type="dec", body=search_body)
if search["hits"]["total"] == 0:
unsolve_lst.append(url)
print("can not get video data at %s" % url)
else:
video_data = search["hits"]["hits"][0]["_source"]
result_lst2.append(video_data)
print("get playcount at %s" % url)
except:
pass
lst_to_csv(listname=result_lst2,
csvname="F:/add_target_releaser/last_month/fix_play_count12242.csv")
str_lst_to_file(unsolve_lst,
filename="F:/add_target_releaser/last_month/unsolved")