Commit 699ae037 authored by litaolemo's avatar litaolemo

update

parent 2838ff4e
......@@ -5,4 +5,4 @@
3. source /root/anaconda3/bin/activate
4. 创建虚拟环境 conda activate crawler_env/conda deactivate
5. 抓取程序 python /srv/apps/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py
6. 写入抓取url程序 python /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 1 -proxies 5
\ No newline at end of file
6. 写入抓取url程序 python /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 1 -proxies 2
\ No newline at end of file
......@@ -33,6 +33,7 @@ def feed_url_into_redis(dict_Lst, platform,
print('Pushed data into redis')
return True
def pull_url_from_es(platform, release_time_lower_bdr=None):
"""
Just pull urls from es index crawler-url-register.
......
# -*- coding:UTF-8 -*-
# @Time : 2020/7/28 16:31
# @File : __init__.py.py
# @email : litao@igengmei.com
# @author : litao
\ No newline at end of file
......@@ -80,7 +80,7 @@ class CrawlerDouban():
comment_count = trans_play_count(page_json["comments_count"])
favorite_count = trans_play_count(page_json["like_count"])
collection_count = trans_play_count(page_json["collections_count"])
img_list = re.findall('img src=".*?"',content)
img_list = re.findall(r'"(http.*?[jpg|webp]{1}?)"',content)
dic = {
"content":content,
"repost_count":repost_count,
......@@ -185,7 +185,7 @@ class CrawlerDouban():
res_dic["doc_id"] = doc_id
res_dic.update(self.get_single_page(mid,proxies_num))
# print(res_dic)
yield res_dic
except Exception as e:
print("single data parse error %s " %e)
......
git+ssh://git@git.wanmeizhensuo.com/backend/gm-types.git@master
lxml==4.5.1
requests==2.23.0
tqdm==4.46.1
......@@ -5,7 +6,6 @@ absl-py==0.9.0
kdl==0.2.15
redis==3.5.3
elasticsearch==7.8.0
qiniu==7.2.8
aiohttp==3.6.2
bs4==4.9.1
selenium==3.141.0
......@@ -13,3 +13,4 @@ fontTools==4.13.0
numpy==1.19.1
pandas==1.0.5
mymsql==0.10.0
qiniu==7.1.4
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment