Commit addfcd7f authored by litaolemo's avatar litaolemo

update

parent 3ee3e780
......@@ -79,7 +79,7 @@ class Crawler_xiaohongshu():
# self.chrome_options.add_argument('sec-fetch-user="?1"')
# self.chrome_options.add_argument('upgrade-insecure-requests="1"')
# self.chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
self.chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
self.timestamp = str(datetime.datetime.now().timestamp() * 1e3)
prefs = {"profile.managed_default_content_settings.images": 2}
self.chrome_options.add_experimental_option("prefs", prefs)
......@@ -150,18 +150,12 @@ class Crawler_xiaohongshu():
page_element = self.driver.find_element_by_xpath("//*").get_attribute("outerHTML")
print(page_element)
self.driver.find_element_by_xpath("/html/body/div/div/div/div/div[2]/div[1]/div/div/div[1]/div[1]/div[1]/div/div/div/img").click()
self.driver.find_element_by_xpath("/html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]").click()
self.driver.implicitly_wait(2)
time.sleep(1)
page_element = self.driver.find_element_by_xpath("//*").get_attribute("outerHTML")
print(page_element)
self.driver.find_element_by_xpath(
"/html/body/div/div/div/div/div[2]/div[2]/div[1]/span/div[2]/h6/span").click()
self.driver.implicitly_wait(2)
time.sleep(1)
page_element = self.driver.find_element_by_xpath("//*").get_attribute("outerHTML")
print(page_element)
cookie = self.driver.get_cookies()
#
# return True
......@@ -211,12 +205,13 @@ class Crawler_xiaohongshu():
page_text = res.text
print(page_text)
data_list = re.findall("window.__INITIAL_SSR_STATE__=(.*?)</script>", page_text)[0]
data_json = json.loads(data_list)
# print(data_list)
data_json = json.loads(data_list.replace("undefined","null"))
# # print(data_list)
if data_json:
print("get data at releaser: %s page: %s" % (releaser_id, count))
count += 1
for info_dic in data_json["ProfileLayout"]["noteData"]:
for info_dic in data_json["Main"]["notesDetail"]:
video_dic = {}
page_id = info_dic["id"]
title = info_dic["title"]
......@@ -242,4 +237,4 @@ class Crawler_xiaohongshu():
if __name__ == '__main__':
test = Crawler_xiaohongshu()
releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae'
res = test.releaser_page(releaserurl,proxies_num=1)
res = test.releaser_page(releaserurl,proxies_num=0)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment