Commit aff367ce authored by 向万's avatar 向万

add new function

parent b1359e35
...@@ -22,10 +22,10 @@ from crawler_sys.framework.es_ccr_index_defination import doc_type_url_register ...@@ -22,10 +22,10 @@ from crawler_sys.framework.es_ccr_index_defination import doc_type_url_register
from crawler_sys.framework.es_ccr_index_defination import fields_url_register from crawler_sys.framework.es_ccr_index_defination import fields_url_register
from write_data_into_es.func_cal_doc_id import cal_doc_id from write_data_into_es.func_cal_doc_id import cal_doc_id
from crawler_sys.utils.write_into_file import write_str_into_file from crawler_sys.utils.write_into_file import write_str_into_file
from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy from crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy
from lxml import html from lxml import html
from lxml.html.clean import Cleaner from lxml.html.clean import Cleaner
from crawler.gm_upload.gm_upload import upload, upload_file from gm_upload.gm_upload import upload, upload_file
index_site_crawler = 'crawler-data-raw' index_site_crawler = 'crawler-data-raw'
doc_type_site_crawler = 'doc' doc_type_site_crawler = 'doc'
...@@ -344,6 +344,22 @@ def retry_get_url(url, retrys=3, proxies=None, timeout=10, **kwargs): ...@@ -344,6 +344,22 @@ def retry_get_url(url, retrys=3, proxies=None, timeout=10, **kwargs):
get_resp = requests.get(url, proxies=proxies_dic, timeout=timeout, **kwargs) get_resp = requests.get(url, proxies=proxies_dic, timeout=timeout, **kwargs)
else: else:
get_resp = requests.get(url, timeout=timeout, **kwargs) get_resp = requests.get(url, timeout=timeout, **kwargs)
return get_resp
except Exception as e:
retry_c += 1
time.sleep(1)
print(e)
print('Failed to get page %s after %d retries, %s'
% (url, retrys, datetime.datetime.now()))
return None
def retry_get_url_no_proxies(url, retrys=3, proxies=None, timeout=10, **kwargs):
retry_c = 0
while retry_c < retrys:
try:
get_resp = requests.get(url, timeout=timeout, **kwargs)
return get_resp return get_resp
except Exception as e: except Exception as e:
retry_c += 1 retry_c += 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment