Commit e9a87029 authored by litaolemo's avatar litaolemo

update

parent d23650e0
...@@ -163,7 +163,8 @@ class Crawler_xiaohongshu(): ...@@ -163,7 +163,8 @@ class Crawler_xiaohongshu():
time.sleep(random.randint(1, 2)) time.sleep(random.randint(1, 2))
data_list = res.json() data_list = res.json()
if not data_list["data"]: if not data_list.get("data"):
print("data_list error",data_list)
break break
if data_list: if data_list:
print("get data at releaser: %s page: %s" % (releaser_id, count)) print("get data at releaser: %s page: %s" % (releaser_id, count))
...@@ -176,11 +177,12 @@ class Crawler_xiaohongshu(): ...@@ -176,11 +177,12 @@ class Crawler_xiaohongshu():
continue continue
time_ts = datetime.datetime.strptime(info_dic["time"], '%Y-%m-%d %H:%M').timestamp() time_ts = datetime.datetime.strptime(info_dic["time"], '%Y-%m-%d %H:%M').timestamp()
page_data = self.get_one_page_xiaochengxu(page_id, proxies=proxies_num) page_data = self.get_one_page_xiaochengxu(page_id, proxies=proxies_num)
print(page_data)
page_data['release_time'] = int(time_ts*1e3) page_data['release_time'] = int(time_ts*1e3)
page_data['platform'] = 'xiaohongshu' page_data['platform'] = 'xiaohongshu'
page_data['doc_id'] = page_id page_data['doc_id'] = page_id
# print(page_data) # print(page_data)
# rds.hset("xiaohongshu", key=page_id, value=json.dumps(page_data)) rds.hset("xiaohongshu", key=page_id, value=json.dumps(page_data))
yield page_data yield page_data
def releaser_page_by_pc(self, releaserUrl, def releaser_page_by_pc(self, releaserUrl,
...@@ -273,6 +275,7 @@ class Crawler_xiaohongshu(): ...@@ -273,6 +275,7 @@ class Crawler_xiaohongshu():
time_ts = datetime.datetime.strptime(info_dic["time"],'%Y-%m-%d %H:%M').timestamp() time_ts = datetime.datetime.strptime(info_dic["time"],'%Y-%m-%d %H:%M').timestamp()
if info_dic["type"] != "normal": if info_dic["type"] != "normal":
continue continue
time.sleep(0.5)
page_data = self.get_one_page_xiaochengxu(page_id,proxies=proxies_num) page_data = self.get_one_page_xiaochengxu(page_id,proxies=proxies_num)
print(page_data) print(page_data)
title = title title = title
...@@ -313,43 +316,43 @@ if __name__ == '__main__': ...@@ -313,43 +316,43 @@ if __name__ == '__main__':
test = Crawler_xiaohongshu() test = Crawler_xiaohongshu()
releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae' releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae'
url_list = [ url_list = [
"https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae", # "https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae",
"https://www.xiaohongshu.com/user/profile/5ea6909900000000010057a3", # "https://www.xiaohongshu.com/user/profile/5ea6909900000000010057a3",
"https://www.xiaohongshu.com/user/profile/5a03b1f4b1da1412dd070a86", # "https://www.xiaohongshu.com/user/profile/5a03b1f4b1da1412dd070a86",
"https://www.xiaohongshu.com/user/profile/5b6e76419276ee0001bd5740", # "https://www.xiaohongshu.com/user/profile/5b6e76419276ee0001bd5740",
"https://www.xiaohongshu.com/user/profile/5c4140500000000006006cb7", # "https://www.xiaohongshu.com/user/profile/5c4140500000000006006cb7",
"https://www.xiaohongshu.com/user/profile/5bd2beff7da0890001b5408a", # "https://www.xiaohongshu.com/user/profile/5bd2beff7da0890001b5408a",
"https://www.xiaohongshu.com/user/profile/5b5edc5211be1044bcce7824", # "https://www.xiaohongshu.com/user/profile/5b5edc5211be1044bcce7824",
"https://www.xiaohongshu.com/user/profile/5b35cce84eacab52fbe15c0b", # "https://www.xiaohongshu.com/user/profile/5b35cce84eacab52fbe15c0b",
"https://www.xiaohongshu.com/user/profile/5efec35c000000000101d75a", # "https://www.xiaohongshu.com/user/profile/5efec35c000000000101d75a",
"https://www.xiaohongshu.com/user/profile/5f91428a000000000101d909", # "https://www.xiaohongshu.com/user/profile/5f91428a000000000101d909",
"https://www.xiaohongshu.com/user/profile/5ed49f1200000000010017f0", # "https://www.xiaohongshu.com/user/profile/5ed49f1200000000010017f0",
"https://www.xiaohongshu.com/user/profile/5ae3f47b11be105fae4b854c", # "https://www.xiaohongshu.com/user/profile/5ae3f47b11be105fae4b854c",
"https://www.xiaohongshu.com/user/profile/5a9e10fb11be1006adc5b9d5", # "https://www.xiaohongshu.com/user/profile/5a9e10fb11be1006adc5b9d5",
"https://www.xiaohongshu.com/user/profile/5d0c3b900000000012013409", # "https://www.xiaohongshu.com/user/profile/5d0c3b900000000012013409",
"https://www.xiaohongshu.com/user/profile/5f1013a70000000001005b16", # "https://www.xiaohongshu.com/user/profile/5f1013a70000000001005b16",
"https://www.xiaohongshu.com/user/profile/5f5c6d860000000001001787", # "https://www.xiaohongshu.com/user/profile/5f5c6d860000000001001787",
"https://www.xiaohongshu.com/user/profile/5eeb18e600000000010062b6", # "https://www.xiaohongshu.com/user/profile/5eeb18e600000000010062b6",
"https://www.xiaohongshu.com/user/profile/5bab62e9ee80fc0001505980", # "https://www.xiaohongshu.com/user/profile/5bab62e9ee80fc0001505980",
"https://www.xiaohongshu.com/user/profile/5f262a610000000001004ea9", # "https://www.xiaohongshu.com/user/profile/5f262a610000000001004ea9",
"https://www.xiaohongshu.com/user/profile/5eb6779300000000010045f5", # "https://www.xiaohongshu.com/user/profile/5eb6779300000000010045f5",
"https://www.xiaohongshu.com/user/profile/5c855374000000001202ef0c", # "https://www.xiaohongshu.com/user/profile/5c855374000000001202ef0c",
"https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4", # "https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
"https://www.xiaohongshu.com/user/profile/5f100b2d000000000100138d", # "https://www.xiaohongshu.com/user/profile/5f100b2d000000000100138d",
"https://www.xiaohongshu.com/user/profile/5c14ae400000000006016f5d", # "https://www.xiaohongshu.com/user/profile/5c14ae400000000006016f5d",
"https://www.xiaohongshu.com/user/profile/5bbd28de4c26220001881cbd", # "https://www.xiaohongshu.com/user/profile/5bbd28de4c26220001881cbd",
"https://www.xiaohongshu.com/user/profile/5f86b6fc000000000100a5d8", # "https://www.xiaohongshu.com/user/profile/5f86b6fc000000000100a5d8",
"https://www.xiaohongshu.com/user/profile/5db16ca20000000001004c02", # "https://www.xiaohongshu.com/user/profile/5db16ca20000000001004c02",
"https://www.xiaohongshu.com/user/profile/5ad553bb4eacab34ee9f7d4a", # "https://www.xiaohongshu.com/user/profile/5ad553bb4eacab34ee9f7d4a",
"https://www.xiaohongshu.com/user/profile/5f12cffd000000000101da61", # "https://www.xiaohongshu.com/user/profile/5f12cffd000000000101da61",
"https://www.xiaohongshu.com/user/profile/596d7e4f5e87e722ff1bfd32", # "https://www.xiaohongshu.com/user/profile/596d7e4f5e87e722ff1bfd32",
"https://www.xiaohongshu.com/user/profile/5ef17ad00000000001005e1c", # "https://www.xiaohongshu.com/user/profile/5ef17ad00000000001005e1c",
"https://www.xiaohongshu.com/user/profile/5f75a5700000000001007679", # "https://www.xiaohongshu.com/user/profile/5f75a5700000000001007679",
"https://www.xiaohongshu.com/user/profile/5c639f59000000001000c731", # "https://www.xiaohongshu.com/user/profile/5c639f59000000001000c731",
"https://www.xiaohongshu.com/user/profile/5f865cbd0000000001002f01", # "https://www.xiaohongshu.com/user/profile/5f865cbd0000000001002f01",
"https://www.xiaohongshu.com/user/profile/5eccc58f000000000100753e", # "https://www.xiaohongshu.com/user/profile/5eccc58f000000000100753e",
"https://www.xiaohongshu.com/user/profile/5fbe05b4000000000101c88d", # "https://www.xiaohongshu.com/user/profile/5fbe05b4000000000101c88d",
"https://www.xiaohongshu.com/user/profile/5b7d1da7e8ac2b471ee6fef3", # "https://www.xiaohongshu.com/user/profile/5b7d1da7e8ac2b471ee6fef3",
"https://www.xiaohongshu.com/user/profile/5a11b22211be101018ba7125", "https://www.xiaohongshu.com/user/profile/5a11b22211be101018ba7125",
"https://www.xiaohongshu.com/user/profile/5a76c3c611be107f08bd35b3", "https://www.xiaohongshu.com/user/profile/5a76c3c611be107f08bd35b3",
"https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4", "https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
...@@ -405,18 +408,18 @@ if __name__ == '__main__': ...@@ -405,18 +408,18 @@ if __name__ == '__main__':
"https://www.xiaohongshu.com/user/profile/580e0bc36a6a69043935369d", "https://www.xiaohongshu.com/user/profile/580e0bc36a6a69043935369d",
"https://www.xiaohongshu.com/user/profile/5d1a17670000000012021d8e", "https://www.xiaohongshu.com/user/profile/5d1a17670000000012021d8e",
"https://www.xiaohongshu.com/user/profile/59a830be82ec39155146f421", "https://www.xiaohongshu.com/user/profile/59a830be82ec39155146f421",
"https://www.xiaohongshu.com/user/profile/55efc1b73397db0e969c8fbd", # "https://www.xiaohongshu.com/user/profile/55efc1b73397db0e969c8fbd",
"https://www.xiaohongshu.com/user/profile/5c8c55220000000010005810", # "https://www.xiaohongshu.com/user/profile/5c8c55220000000010005810",
"https://www.xiaohongshu.com/user/profile/5f337df2000000000101e2b2", # "https://www.xiaohongshu.com/user/profile/5f337df2000000000101e2b2",
"https://www.xiaohongshu.com/user/profile/5f2111500000000001009b7b", # "https://www.xiaohongshu.com/user/profile/5f2111500000000001009b7b",
"https://www.xiaohongshu.com/user/profile/59c840ff44363b497f335cd4", # "https://www.xiaohongshu.com/user/profile/59c840ff44363b497f335cd4",
"https://www.xiaohongshu.com/user/profile/5f8e8508000000000101d70e", # "https://www.xiaohongshu.com/user/profile/5f8e8508000000000101d70e",
"https://www.xiaohongshu.com/user/profile/5a163e3511be10234e1abffd", # "https://www.xiaohongshu.com/user/profile/5a163e3511be10234e1abffd",
"https://www.xiaohongshu.com/user/profile/5e71f6870000000001005e52", # "https://www.xiaohongshu.com/user/profile/5e71f6870000000001005e52",
'https://www.xiaohongshu.com/user/profile/5cca9b3700000000120314c9', # 'https://www.xiaohongshu.com/user/profile/5cca9b3700000000120314c9',
'https://www.xiaohongshu.com/user/profile/5aa0f7bae8ac2b65bfcdaf0e', # 'https://www.xiaohongshu.com/user/profile/5aa0f7bae8ac2b65bfcdaf0e',
'https://www.xiaohongshu.com/user/profile/5c20dd200000000007027c07', # 'https://www.xiaohongshu.com/user/profile/5c20dd200000000007027c07',
'https://www.xiaohongshu.com/user/profile/5fe1c1ba0000000001006e65', # 'https://www.xiaohongshu.com/user/profile/5fe1c1ba0000000001006e65',
] ]
print(len(url_list)) print(len(url_list))
count =0 count =0
......
...@@ -131,7 +131,7 @@ def send_file_email(file_path, data_str, email_group=[], ...@@ -131,7 +131,7 @@ def send_file_email(file_path, data_str, email_group=[],
if __name__ == '__main__': if __name__ == '__main__':
send_file_email("",'',sender="litao@igengmei.com",email_group=["litao@igengmei.com"],email_msg_body_str="test",title_str="test",cc_group=["litao@igengmei.com"],file="/srv/apps/meta_base_code/tmp.log") send_file_email("",'',sender="litao@igengmei.com",email_group=["litao@igengmei.com"],email_msg_body_str="test",title_str="test",cc_group=["litao@igengmei.com"],file="/srv/apps/readelf/shield.text")
......
...@@ -4,4 +4,5 @@ ...@@ -4,4 +4,5 @@
#conda activate crawler_env #conda activate crawler_env
source /srv/envs/crawler/bin/activate source /srv/envs/crawler/bin/activate
python /srv/apps/crawler_old/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 3 -proxies 5 > /data/log/crawler/write_task.log & python /srv/apps/crawler_old/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 3 -proxies 5 > /data/log/crawler/write_task.log &
python /srv/apps/crawler_old/crawler/crawler_sys/framework/write_releasers_to_redis.py -p xiaohongshu -d 2 -proxies 5 > /data/log/crawler/write_task.log &
#/home/gmuser/.virtualenvs/litao/bin/python3 /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p douban -d 1 -proxies 5 > /data/log/crawler/write_task.log & #/home/gmuser/.virtualenvs/litao/bin/python3 /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p douban -d 1 -proxies 5 > /data/log/crawler/write_task.log &
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment