Commit e9a87029 authored by litaolemo's avatar litaolemo

update

parent d23650e0
......@@ -163,7 +163,8 @@ class Crawler_xiaohongshu():
time.sleep(random.randint(1, 2))
data_list = res.json()
if not data_list["data"]:
if not data_list.get("data"):
print("data_list error",data_list)
break
if data_list:
print("get data at releaser: %s page: %s" % (releaser_id, count))
......@@ -176,11 +177,12 @@ class Crawler_xiaohongshu():
continue
time_ts = datetime.datetime.strptime(info_dic["time"], '%Y-%m-%d %H:%M').timestamp()
page_data = self.get_one_page_xiaochengxu(page_id, proxies=proxies_num)
print(page_data)
page_data['release_time'] = int(time_ts*1e3)
page_data['platform'] = 'xiaohongshu'
page_data['doc_id'] = page_id
# print(page_data)
# rds.hset("xiaohongshu", key=page_id, value=json.dumps(page_data))
rds.hset("xiaohongshu", key=page_id, value=json.dumps(page_data))
yield page_data
def releaser_page_by_pc(self, releaserUrl,
......@@ -273,6 +275,7 @@ class Crawler_xiaohongshu():
time_ts = datetime.datetime.strptime(info_dic["time"],'%Y-%m-%d %H:%M').timestamp()
if info_dic["type"] != "normal":
continue
time.sleep(0.5)
page_data = self.get_one_page_xiaochengxu(page_id,proxies=proxies_num)
print(page_data)
title = title
......@@ -313,43 +316,43 @@ if __name__ == '__main__':
test = Crawler_xiaohongshu()
releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae'
url_list = [
"https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae",
"https://www.xiaohongshu.com/user/profile/5ea6909900000000010057a3",
"https://www.xiaohongshu.com/user/profile/5a03b1f4b1da1412dd070a86",
"https://www.xiaohongshu.com/user/profile/5b6e76419276ee0001bd5740",
"https://www.xiaohongshu.com/user/profile/5c4140500000000006006cb7",
"https://www.xiaohongshu.com/user/profile/5bd2beff7da0890001b5408a",
"https://www.xiaohongshu.com/user/profile/5b5edc5211be1044bcce7824",
"https://www.xiaohongshu.com/user/profile/5b35cce84eacab52fbe15c0b",
"https://www.xiaohongshu.com/user/profile/5efec35c000000000101d75a",
"https://www.xiaohongshu.com/user/profile/5f91428a000000000101d909",
"https://www.xiaohongshu.com/user/profile/5ed49f1200000000010017f0",
"https://www.xiaohongshu.com/user/profile/5ae3f47b11be105fae4b854c",
"https://www.xiaohongshu.com/user/profile/5a9e10fb11be1006adc5b9d5",
"https://www.xiaohongshu.com/user/profile/5d0c3b900000000012013409",
"https://www.xiaohongshu.com/user/profile/5f1013a70000000001005b16",
"https://www.xiaohongshu.com/user/profile/5f5c6d860000000001001787",
"https://www.xiaohongshu.com/user/profile/5eeb18e600000000010062b6",
"https://www.xiaohongshu.com/user/profile/5bab62e9ee80fc0001505980",
"https://www.xiaohongshu.com/user/profile/5f262a610000000001004ea9",
"https://www.xiaohongshu.com/user/profile/5eb6779300000000010045f5",
"https://www.xiaohongshu.com/user/profile/5c855374000000001202ef0c",
"https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
"https://www.xiaohongshu.com/user/profile/5f100b2d000000000100138d",
"https://www.xiaohongshu.com/user/profile/5c14ae400000000006016f5d",
"https://www.xiaohongshu.com/user/profile/5bbd28de4c26220001881cbd",
"https://www.xiaohongshu.com/user/profile/5f86b6fc000000000100a5d8",
"https://www.xiaohongshu.com/user/profile/5db16ca20000000001004c02",
"https://www.xiaohongshu.com/user/profile/5ad553bb4eacab34ee9f7d4a",
"https://www.xiaohongshu.com/user/profile/5f12cffd000000000101da61",
"https://www.xiaohongshu.com/user/profile/596d7e4f5e87e722ff1bfd32",
"https://www.xiaohongshu.com/user/profile/5ef17ad00000000001005e1c",
"https://www.xiaohongshu.com/user/profile/5f75a5700000000001007679",
"https://www.xiaohongshu.com/user/profile/5c639f59000000001000c731",
"https://www.xiaohongshu.com/user/profile/5f865cbd0000000001002f01",
"https://www.xiaohongshu.com/user/profile/5eccc58f000000000100753e",
"https://www.xiaohongshu.com/user/profile/5fbe05b4000000000101c88d",
"https://www.xiaohongshu.com/user/profile/5b7d1da7e8ac2b471ee6fef3",
# "https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae",
# "https://www.xiaohongshu.com/user/profile/5ea6909900000000010057a3",
# "https://www.xiaohongshu.com/user/profile/5a03b1f4b1da1412dd070a86",
# "https://www.xiaohongshu.com/user/profile/5b6e76419276ee0001bd5740",
# "https://www.xiaohongshu.com/user/profile/5c4140500000000006006cb7",
# "https://www.xiaohongshu.com/user/profile/5bd2beff7da0890001b5408a",
# "https://www.xiaohongshu.com/user/profile/5b5edc5211be1044bcce7824",
# "https://www.xiaohongshu.com/user/profile/5b35cce84eacab52fbe15c0b",
# "https://www.xiaohongshu.com/user/profile/5efec35c000000000101d75a",
# "https://www.xiaohongshu.com/user/profile/5f91428a000000000101d909",
# "https://www.xiaohongshu.com/user/profile/5ed49f1200000000010017f0",
# "https://www.xiaohongshu.com/user/profile/5ae3f47b11be105fae4b854c",
# "https://www.xiaohongshu.com/user/profile/5a9e10fb11be1006adc5b9d5",
# "https://www.xiaohongshu.com/user/profile/5d0c3b900000000012013409",
# "https://www.xiaohongshu.com/user/profile/5f1013a70000000001005b16",
# "https://www.xiaohongshu.com/user/profile/5f5c6d860000000001001787",
# "https://www.xiaohongshu.com/user/profile/5eeb18e600000000010062b6",
# "https://www.xiaohongshu.com/user/profile/5bab62e9ee80fc0001505980",
# "https://www.xiaohongshu.com/user/profile/5f262a610000000001004ea9",
# "https://www.xiaohongshu.com/user/profile/5eb6779300000000010045f5",
# "https://www.xiaohongshu.com/user/profile/5c855374000000001202ef0c",
# "https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
# "https://www.xiaohongshu.com/user/profile/5f100b2d000000000100138d",
# "https://www.xiaohongshu.com/user/profile/5c14ae400000000006016f5d",
# "https://www.xiaohongshu.com/user/profile/5bbd28de4c26220001881cbd",
# "https://www.xiaohongshu.com/user/profile/5f86b6fc000000000100a5d8",
# "https://www.xiaohongshu.com/user/profile/5db16ca20000000001004c02",
# "https://www.xiaohongshu.com/user/profile/5ad553bb4eacab34ee9f7d4a",
# "https://www.xiaohongshu.com/user/profile/5f12cffd000000000101da61",
# "https://www.xiaohongshu.com/user/profile/596d7e4f5e87e722ff1bfd32",
# "https://www.xiaohongshu.com/user/profile/5ef17ad00000000001005e1c",
# "https://www.xiaohongshu.com/user/profile/5f75a5700000000001007679",
# "https://www.xiaohongshu.com/user/profile/5c639f59000000001000c731",
# "https://www.xiaohongshu.com/user/profile/5f865cbd0000000001002f01",
# "https://www.xiaohongshu.com/user/profile/5eccc58f000000000100753e",
# "https://www.xiaohongshu.com/user/profile/5fbe05b4000000000101c88d",
# "https://www.xiaohongshu.com/user/profile/5b7d1da7e8ac2b471ee6fef3",
"https://www.xiaohongshu.com/user/profile/5a11b22211be101018ba7125",
"https://www.xiaohongshu.com/user/profile/5a76c3c611be107f08bd35b3",
"https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
......@@ -405,18 +408,18 @@ if __name__ == '__main__':
"https://www.xiaohongshu.com/user/profile/580e0bc36a6a69043935369d",
"https://www.xiaohongshu.com/user/profile/5d1a17670000000012021d8e",
"https://www.xiaohongshu.com/user/profile/59a830be82ec39155146f421",
"https://www.xiaohongshu.com/user/profile/55efc1b73397db0e969c8fbd",
"https://www.xiaohongshu.com/user/profile/5c8c55220000000010005810",
"https://www.xiaohongshu.com/user/profile/5f337df2000000000101e2b2",
"https://www.xiaohongshu.com/user/profile/5f2111500000000001009b7b",
"https://www.xiaohongshu.com/user/profile/59c840ff44363b497f335cd4",
"https://www.xiaohongshu.com/user/profile/5f8e8508000000000101d70e",
"https://www.xiaohongshu.com/user/profile/5a163e3511be10234e1abffd",
"https://www.xiaohongshu.com/user/profile/5e71f6870000000001005e52",
'https://www.xiaohongshu.com/user/profile/5cca9b3700000000120314c9',
'https://www.xiaohongshu.com/user/profile/5aa0f7bae8ac2b65bfcdaf0e',
'https://www.xiaohongshu.com/user/profile/5c20dd200000000007027c07',
'https://www.xiaohongshu.com/user/profile/5fe1c1ba0000000001006e65',
# "https://www.xiaohongshu.com/user/profile/55efc1b73397db0e969c8fbd",
# "https://www.xiaohongshu.com/user/profile/5c8c55220000000010005810",
# "https://www.xiaohongshu.com/user/profile/5f337df2000000000101e2b2",
# "https://www.xiaohongshu.com/user/profile/5f2111500000000001009b7b",
# "https://www.xiaohongshu.com/user/profile/59c840ff44363b497f335cd4",
# "https://www.xiaohongshu.com/user/profile/5f8e8508000000000101d70e",
# "https://www.xiaohongshu.com/user/profile/5a163e3511be10234e1abffd",
# "https://www.xiaohongshu.com/user/profile/5e71f6870000000001005e52",
# 'https://www.xiaohongshu.com/user/profile/5cca9b3700000000120314c9',
# 'https://www.xiaohongshu.com/user/profile/5aa0f7bae8ac2b65bfcdaf0e',
# 'https://www.xiaohongshu.com/user/profile/5c20dd200000000007027c07',
# 'https://www.xiaohongshu.com/user/profile/5fe1c1ba0000000001006e65',
]
print(len(url_list))
count =0
......
......@@ -131,7 +131,7 @@ def send_file_email(file_path, data_str, email_group=[],
if __name__ == '__main__':
send_file_email("",'',sender="litao@igengmei.com",email_group=["litao@igengmei.com"],email_msg_body_str="test",title_str="test",cc_group=["litao@igengmei.com"],file="/srv/apps/meta_base_code/tmp.log")
send_file_email("",'',sender="litao@igengmei.com",email_group=["litao@igengmei.com"],email_msg_body_str="test",title_str="test",cc_group=["litao@igengmei.com"],file="/srv/apps/readelf/shield.text")
......
......@@ -4,4 +4,5 @@
#conda activate crawler_env
source /srv/envs/crawler/bin/activate
python /srv/apps/crawler_old/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 3 -proxies 5 > /data/log/crawler/write_task.log &
python /srv/apps/crawler_old/crawler/crawler_sys/framework/write_releasers_to_redis.py -p xiaohongshu -d 2 -proxies 5 > /data/log/crawler/write_task.log &
#/home/gmuser/.virtualenvs/litao/bin/python3 /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p douban -d 1 -proxies 5 > /data/log/crawler/write_task.log &
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment