# -*- coding: utf-8 -*-
#coding=gbk

import threading

__author__ = 'zkqiang'
__zhihu__ = 'https://www.zhihu.com/people/z-kqiang'
__github__ = 'https://github.com/zkqiang/Zhihu-Login'

from bs4 import BeautifulSoup
import base64
import hashlib
import hmac
import json
import re
import time
from http import cookiejar
from urllib.parse import urlencode

import execjs
import requests
from PIL import Image
import sys
from scrapy.selector import Selector
import brotli
import random
import time
import re
from gm_upload import upload, upload_file
from urllib.request import urlretrieve
import os
import cv2
import copy
import traceback
import pymysql
import jieba
import jieba.analyse
import os
from gevent import monkey; monkey.patch_socket()
import gevent
from threading import Thread, Lock
import urllib.parse



mutex_for_get_query_word = Lock()
mutex_for_zhihu_save_question_info = Lock()
mutex_for_zhihu_save_file= Lock()
g_cur_word_index = 0

top_query_list = [
"瘦脸针","双眼皮","水光针","手术面部提升","鼻翼缩小","玻尿酸","吸脂","线雕","鼻综合","光子嫩肤","瘦腿针","美白针","热玛吉","隆鼻","超声刀","脱毛","祛斑","果酸焕肤","面部吸脂","皮秒","瘦肩针","自体脂肪填充面部","牙齿矫正","微针","热拉提","鼻翼缩小","瘦脸","下巴","植发","溶脂针","点阵激光","双眼皮修复","小气泡","鼻基底","祛眼袋","隆胸","祛痘","开眼角","除皱","牙齿美白","埋线双眼皮","颧骨","下颌角","纹眉","激光脱毛","玻尿酸丰下巴","法令纹","玻尿酸隆鼻","洗牙","吸脂瘦大腿","溶脂","保妥适","黄金微针","自体脂肪填充","美白","黑眼圈","白瓷娃娃","祛疤","切开双眼皮","泪沟","光纤溶脂","磨骨","嗨体","肉毒素","丰胸（隆胸）","微针祛痘坑","激光祛斑","假体下巴","植发际线","面部提升","肋骨鼻","蜂巢皮秒","祛痘祛痘印","腰腹吸脂","瘦腿","面部填充","厚唇改薄术","下眼睑下至","溶解酶","私密","点痣","酒窝","女性私密紧致","艾莉薇","伊婉V","无针水光","自体脂肪","人中缩短","m22","激光点痣","丰唇","脸型","埋线隆鼻","埋线","收缩毛孔","黑脸娃娃","伊婉C","开外眼角","童颜针","妊娠纹"
]

# 马甲号
majia_user_list = [
"32269952","32269956","32269962","32269966","32269973","32269978","32269980","32269982","32269987","32269989","32270003","32270004","32270007","32270012","32270015","32270017","32270020","32270024","32270027","32270031","32270041","32270044","32270047","32270050","32270054","32270055","32270057","32270059","32270063","32270066","32269913","32269918","32269920","32269927","32269933","32269939","32269943","32269948","32269957","32269965","32269972","32269979","32269983","32269988","32269995","32270002","32270005","32270011","32270016","32270022","32270029","32270036","32270040","32270051","32270061","32270065","32270071","32270075","32270081","32270085","32270094","32270096","32270110","32270116","32270121","32270141","32270147","32270152","32270156","32270161","32270114","32270119","32270122","32270125","32270129","32270131","32270133","32270134","32270137","32270167","32270068","32270070","32270076","32270078","32270083","32270087","32270093","32270095","32270099","32270105","32269992","32270018","32270023","32270030","32270034","32270043","32270048","32270052","32270056","32270060"
]

g_query_word_set = set()
g_if_get_query_word = False

ZHENGXING_HOST = "172.16.30.141"
ZHENGXING_USER = "work"
ZHENGXING_PWD = "BJQaT9VzDcuPBqkd"
ZHENGXING_DATABASE = "zhengxing"


class ZhihuAccount(object):

    def __init__(self, username: str = None, password: str = None):
        self.username = username
        self.password = password

        self.login_data = {
            'client_id': 'c3cef7c66a1843f8b3a9e6a1e3160e20',
            'grant_type': 'password',
            'source': 'com.zhihu.web',
            'username': '',
            'password': '',
            'lang': 'en',
            'ref_source': 'homepage',
            'utm_source': ''
        }
        self.session = requests.session()
        self.session.headers = {
            'accept-encoding': 'gzip, deflate, br',
            'Host': 'www.zhihu.com',
            'Referer': 'https://www.zhihu.com/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                          '(KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
        }
        self.session.cookies = cookiejar.LWPCookieJar(filename='./cookies.txt')

        self.zhengxing_conn = pymysql.connect(
            host=ZHENGXING_HOST,
            user=ZHENGXING_USER,
            password=ZHENGXING_PWD,
            database=ZHENGXING_DATABASE,
            charset="utf8")
        self.zhengxing_cursor = self.zhengxing_conn.cursor()
        self.tag_words_set = set()

    def login(self, captcha_lang: str = 'en', load_cookies: bool = True):
        """
        模拟登录知乎
        :param captcha_lang: 验证码类型 'en' or 'cn'
        :param load_cookies: 是否读取上次保存的 Cookies
        :return: bool
        若在 PyCharm 下使用中文验证出现无法点击的问题，
        需要在 Settings / Tools / Python Scientific / Show Plots in Toolwindow，取消勾选
        """
        if load_cookies and self.load_cookies():
            print('读取 Cookies 文件')
            if self.check_login():
                print('登录成功')
                return True
            print('Cookies 已过期')

        self._check_user_pass()
        self.login_data.update({
            'username': self.username,
            'password': self.password,
            'lang': captcha_lang
        })

        timestamp = int(time.time() * 1000)
        self.login_data.update({
            'captcha': self._get_captcha(self.login_data['lang']),
            'timestamp': timestamp,
            'signature': self._get_signature(timestamp)
        })

        headers = self.session.headers.copy()
        headers.update({
            'content-type': 'application/x-www-form-urlencoded',
            'x-zse-83': '3_1.1',
            'x-xsrftoken': self._get_xsrf()
        })
        data = self._encrypt(self.login_data)
        login_api = 'https://www.zhihu.com/api/v3/oauth/sign_in'
        resp = self.session.post(login_api, data=data, headers=headers)
        if 'error' in resp.text:
            print(json.loads(resp.text)['error'])
        if self.check_login():
            print('登录成功')
            return True
        print('登录失败')
        return False

    def load_cookies(self):
        """
        读取 Cookies 文件加载到 Session
        :return: bool
        """
        try:
            self.session.cookies.load(ignore_discard=True)
            return True
        except FileNotFoundError:
            return False

    def check_login(self):
        """
        检查登录状态，访问登录页面出现跳转则是已登录，
        如登录成功保存当前 Cookies
        :return: bool
        """
        login_url = 'https://www.zhihu.com/signup'
        resp = self.session.get(login_url, allow_redirects=False)
        if resp.status_code == 302:
            self.session.cookies.save()
            return True
        return False

    def _get_xsrf(self):
        """
        从登录页面获取 xsrf
        :return: str
        """
        self.session.get('https://www.zhihu.com/', allow_redirects=False)
        for c in self.session.cookies:
            if c.name == '_xsrf':
                return c.value
        raise AssertionError('获取 xsrf 失败')

    def _get_captcha(self, lang: str):
        """
        请求验证码的 API 接口，无论是否需要验证码都需要请求一次
        如果需要验证码会返回图片的 base64 编码
        根据 lang 参数匹配验证码，需要人工输入
        :param lang: 返回验证码的语言(en/cn)
        :return: 验证码的 POST 参数
        """
        if lang == 'cn':
            api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=cn'
        else:
            api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
        resp = self.session.get(api)
        show_captcha = re.search(r'true', resp.text)

        if show_captcha:
            put_resp = self.session.put(api)
            json_data = json.loads(put_resp.text)
            img_base64 = json_data['img_base64'].replace(r'\n', '')
            with open('./captcha.jpg', 'wb') as f:
                f.write(base64.b64decode(img_base64))
            img = Image.open('./captcha.jpg')
            if lang == 'cn':
                import matplotlib.pyplot as plt
                plt.imshow(img)
                print('点击所有倒立的汉字，在命令行中按回车提交')
                points = plt.ginput(7)
                capt = json.dumps({'img_size': [200, 44],
                                   'input_points': [[i[0] / 2, i[1] / 2] for i in points]})
            else:
                img_thread = threading.Thread(target=img.show, daemon=True)
                img_thread.start()
                capt = input('请输入图片里的验证码：')
            # 这里必须先把参数 POST 验证码接口
            self.session.post(api, data={'input_text': capt})
            return capt
        return ''

    def _get_signature(self, timestamp: int or str):
        """
        通过 Hmac 算法计算返回签名
        实际是几个固定字符串加时间戳
        :param timestamp: 时间戳
        :return: 签名
        """
        ha = hmac.new(b'd1b964811afb40118a12068ff74a12f4', digestmod=hashlib.sha1)
        grant_type = self.login_data['grant_type']
        client_id = self.login_data['client_id']
        source = self.login_data['source']
        ha.update(bytes((grant_type + client_id + source + str(timestamp)), 'utf-8'))
        return ha.hexdigest()

    def _check_user_pass(self):
        """
        检查用户名和密码是否已输入，若无则手动输入
        """
        if not self.username:
            self.username = input('请输入手机号：')
        if self.username.isdigit() and '+86' not in self.username:
            self.username = '+86' + self.username

        if not self.password:
            self.password = input('请输入密码：')

    @staticmethod
    def _encrypt(form_data: dict):
        with open('./encrypt.js') as f:
            js = execjs.compile(f.read())
            return js.call('Q', urlencode(form_data))

    def add_jieba_tag_word(self):
        try:
            tag_sql = """
                    select tag_type,name from api_tag where is_online=true;
            """

            self.zhengxing_cursor.execute(tag_sql)
            sql_tag_results = self.zhengxing_cursor.fetchall()

            for tag_item in sql_tag_results:
                tag_name = tag_item[1]
                tag_type = tag_item[0]

                jieba.add_word(tag_name)
                self.tag_words_set.add(tag_name)

        except:
            print(traceback.format_exc())

    def get_tfidf_words_from_content(self,content):
        try:
            key_tag_list = list()
            keywords_list = jieba.analyse.extract_tags(content, topK=20, withWeight=True)

            for key_item in keywords_list:
                if key_item[0] in self.tag_words_set:
                    key_tag_list.append(key_item[0])
                    if len(key_tag_list)>=5:
                        break

            return key_tag_list
        except:
            print(traceback.format_exc())
            return []

    def mutex_for_zhihu_save_question_info(self,question_answer_dict,question_id,platform_id):

        mutex_for_zhihu_save_question_info.acquire(True)
        try:
            if question_id not in question_answer_dict:
                question_answer_dict[question_id] = set()

            question_answer_dict[question_id].add(platform_id)
        except:
            print(traceback.format_exc())

        mutex_for_zhihu_save_question_info.release()



    def mutex_for_zhihu_save_file_info(self,file_fd,item_dict):

        mutex_for_zhihu_save_file.acquire(True)
        try:
            file_fd.write(json.dumps(item_dict) + "\n")
        except:
            print(traceback.format_exc())

        mutex_for_zhihu_save_file.release()


    # 知乎个人文章列表
    def test_member_article(self):
        member_article_url = "https://www.zhihu.com/api/v4/members/li-pei-rong-96/articles?include=data%5B*%5D.comment_count%2Csuggest_edit%2Cis_normal%2Cthumbnail_extra_info%2Cthumbnail%2Ccan_comment%2Ccomment_permission%2Cadmin_closed_comment%2Ccontent%2Cvoteup_count%2Ccreated%2Cupdated%2Cupvoted_followees%2Cvoting%2Creview_info%2Cis_labeled%2Clabel_info%3Bdata%5B*%5D.author.badge%5B%3F(type%3Dbest_answerer)%5D.topics&offset=40&limit=20&sort_by=created"


        resp = self.session.get(member_article_url, allow_redirects=False)
        print(10*"*")
        raw_content = brotli.decompress(resp.content)
        print(type(raw_content))

        raw_content_dict = json.loads(str(raw_content,encoding="utf-8"))
        for item in raw_content_dict["data"]:
            print(item["title"])
            print(item["content"])
            print(50*"*")


    def _dispose_content_url(self,content,img_url_list,cur_image_index,cur_word_index):
        try:
            img_dir = "./img_" + str(cur_word_index)
            if not os.path.exists(img_dir):
                os.makedirs(img_dir)

            for ori_img_url in img_url_list:
                if ori_img_url.find(".jpg") >= 0 or ori_img_url.find(".png") >= 0:
                    cur_image_index += 1

                    local_img_url_path = img_dir + "/img_" + str(cur_image_index) + ".png"
                    print(ori_img_url, local_img_url_path)

                    urlretrieve(ori_img_url, local_img_url_path)
                    local_cv2_img = cv2.imread(local_img_url_path)
                    height, weidth, channel = local_cv2_img.shape
                    local_cropped_img = local_cv2_img[0:(height - 100), 0:weidth]
                    local_cropped_img_url_path = img_dir + "/cropped_image_" + str(cur_image_index) + ".png"
                    cv2.imwrite(local_cropped_img_url_path, local_cropped_img)
                    qiniu_url = upload_file(local_cropped_img_url_path)
                    content = content.replace(ori_img_url, qiniu_url)

                    os.remove(local_img_url_path)
                    os.remove(local_cropped_img_url_path)

            return content,cur_image_index
        except:
            print(traceback.format_exc())
            return content,cur_image_index

    # 知乎搜索词搜索
    def zhihu_query_by_word(self,query_word,zhihu_spider_fd,zhihu_spider_question_fd,question_answer_dict,cur_word_index):

        cur_image_index = 0
        for begin_index in range(0,100,10):
            # query_by_word_url = "https://www.zhihu.com/api/v4/search_v3?t=general&correction=1&lc_idx=62&" \
            #                     "show_all_topics=0&search_hash_id=1dbb1e923a17f147356177932d1236e1&" \
            #                     "vertical_info=0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C1" + "&q=" + str(query_word) + "&offset=" + str(begin_index) + "&limit=10"

            query_by_word_url = "https://www.zhihu.com/api/v4/search_v3?"
            query_params_dict = {
                "q": query_word,
                "offset": begin_index,
                "limit": 10,
                "lc_idx": 22,
                "show_all_topics": 0,
                "search_hash_id": "dc4a11848e2540981cf28634ff3609c0",
                "vertical_info": "0,0,0,0,0,0,0,0,0,1",
                "correction": 1,
                "t": "general"
            }
            query_by_word_url += urllib.parse.urlencode(query_params_dict)

            res = self.session.get(query_by_word_url,allow_redirects=False)
            print(10*"*")
            print(query_by_word_url)
            print(res)
            raw_content = brotli.decompress(res.content)
            print(type(raw_content))

            raw_content_dict = json.loads(str(raw_content,encoding="utf-8"))

            if "data" in raw_content_dict:
                for data_item in raw_content_dict["data"]:
                    if data_item["type"] == "search_result":
                        try:
                            data_type = data_item["object"]["type"]
                            content = data_item["object"]["content"] if "content" in data_item["object"] else ""
                            # content = copy.deepcopy(tmp_content)
                            platform_id = str(data_item["object"]["id"])
                            user_id = random.choice(majia_user_list)
                            question_id = ""
                            have_saved_this_answer = False

                            img_url_list = re.findall('src="(.*?)"', content)
                            content,cur_image_index = self._dispose_content_url(content=content,img_url_list=img_url_list,cur_image_index=cur_image_index,cur_word_index=cur_word_index)

                            img_url_list = re.findall('data-original="(.*?)"', content)
                            content,cur_image_index = self._dispose_content_url(content=content,img_url_list=img_url_list,cur_image_index=cur_image_index,cur_word_index=cur_word_index)


                            if data_type == "article":
                                title = data_item["object"]["title"]
                                title = title.replace("<em>","")
                                title = title.replace("</em>", "")
                            elif data_type == "answer":
                                title = data_item["object"]["question"]["name"]
                                title = title.replace("<em>","")
                                title = title.replace("</em>", "")
                                question_id = str(data_item["object"]["question"]["id"])

                                if question_id not in question_answer_dict:
                                    # self.mutex_for_zhihu_save_question_info(question_answer_dict=question_answer_dict,question_id=question_id,platform_id=platform_id)
                                    question_answer_dict[question_id] = set()
                                    question_answer_dict[question_id].add(platform_id)
                                    cur_image_index = self.zhihu_answers_list_by_question(question_id,question_answer_dict,zhihu_spider_fd,cur_image_index,cur_word_index)

                                    question_item_dict = {
                                        "user_id": user_id,
                                        "platform_id": question_id,
                                        "title": title,
                                        "content": content,
                                        "type": data_type,
                                        "question_id": "",
                                        "tags": self.get_tfidf_words_from_content(content)
                                    }
                                    # self.mutex_for_zhihu_save_file_info(file_fd=zhihu_spider_question_fd,item_dict=question_item_dict)
                                    zhihu_spider_question_fd.write(json.dumps(question_item_dict) + "\n")
                                elif platform_id not in question_answer_dict[question_id]:
                                    question_answer_dict[question_id].add(platform_id)
                                    # self.mutex_for_zhihu_save_question_info(question_answer_dict=question_answer_dict,question_id=question_id,platform_id=platform_id)
                                else:
                                    have_saved_this_answer = True
                            else:
                                print("type is:%s" % data_type)
                                title = ""

                            if not have_saved_this_answer:
                                item_dict = {
                                    "user_id": user_id,
                                    "platform_id": platform_id,
                                    "title": title,
                                    "content": content,
                                    "type": data_type,
                                    "question_id": question_id,
                                    "tags": self.get_tfidf_words_from_content(content)
                                }
                                # self.mutex_for_zhihu_save_file_info(file_fd=zhihu_spider_fd, item_dict=item_dict)
                                zhihu_spider_fd.write(json.dumps(item_dict) + "\n")
                        except:
                            print(traceback.format_exc())
                            print(str(data_item))

            # time.sleep(2)


    # 知乎问题对应的回答列表
    def zhihu_answers_list_by_question(self,question_id,question_answer_dict,zhihu_spider_fd,cur_image_index,cur_word_index):

        for begin_index in range(0,100,10):
            # answers_list_by_question_url = "https://www.zhihu.com/api/v4/questions/" + str(question_id) + \
            #                                "/answers?include=data%5B*%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%2Cis_recognized%2Cpaid_info%2Cpaid_info_content%3Bdata%5B*%5D.mark_infos%5B*%5D.url%3Bdata%5B*%5D.author.follower_count%2Cbadge%5B*%5D.topics&" \
            #                                "sort_by=default&platform=desktop" + "&offset=" + str(begin_index) + "&limit=10"

            answers_list_by_question_url = "https://www.zhihu.com/api/v4/questions/" + str(question_id) + "/answers?"
            query_params_dict = {
                    "include": "data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_labeled,is_recognized,paid_info,paid_info_content;data[*].mark_infos[*].url;data[*].author.follower_count,badge[*].topics",
                    "offset": begin_index,
                    "limit": 10,
                    "sort_by": "default",
                    "platform": "desktop"
                }
            answers_list_by_question_url += urllib.parse.urlencode(query_params_dict)

            res = self.session.get(answers_list_by_question_url,allow_redirects=False)
            print(10*"*")
            raw_content = brotli.decompress(res.content)
            print(type(raw_content))

            raw_content_dict = json.loads(str(raw_content,encoding="utf-8"))

            if "data" in raw_content_dict:
                for data_item in raw_content_dict["data"]:
                    try:
                        user_id=random.choice(majia_user_list)
                        data_type = data_item["type"]
                        platform_id = str(data_item["id"])
                        data_content = data_item["content"] if "content" in data_item else ""
                        question_id = ""
                        question_title = ""
                        have_saved_this_answer = False

                        img_url_list = re.findall('src="(.*?)"', data_content)
                        data_content, cur_image_index = self._dispose_content_url(content=data_content, img_url_list=img_url_list,
                                                                             cur_image_index=cur_image_index,cur_word_index=cur_word_index)

                        img_url_list = re.findall('data-original="(.*?)"', data_content)
                        data_content, cur_image_index = self._dispose_content_url(content=data_content, img_url_list=img_url_list,
                                                                             cur_image_index=cur_image_index,cur_word_index=cur_word_index)

                        if data_type == "answer" and "question" in data_item:
                            question_id = str(data_item["question"]["id"])
                            question_title = data_item["question"]["title"]

                            if question_id not in question_answer_dict:
                                question_answer_dict[question_id] = set()
                                question_answer_dict[question_id].add(platform_id)
                                # self.mutex_for_zhihu_save_question_info(question_answer_dict=question_answer_dict,
                                #                                     question_id=question_id, platform_id=platform_id)
                            elif platform_id not in question_answer_dict[question_id]:
                                question_answer_dict[question_id].add(platform_id)
                                # self.mutex_for_zhihu_save_question_info(question_answer_dict=question_answer_dict,
                                #                                     question_id=question_id, platform_id=platform_id)
                            else:
                                have_saved_this_answer = True

                        if not have_saved_this_answer:
                            item_dict = {
                                "user_id": user_id,
                                "platform_id": platform_id,
                                "title": question_title,
                                "content": data_content,
                                "type": data_type,
                                "question_id": question_id,
                                "tags": self.get_tfidf_words_from_content(data_content)
                            }

                            zhihu_spider_fd.write(json.dumps(item_dict) + "\n")
                            # self.mutex_for_zhihu_save_file_info(file_fd=zhihu_spider_fd, item_dict=item_dict)
                    except:
                        print(traceback.format_exc())
                        print(str(data_item))

        return cur_image_index


def get_query_word():

    mutex_for_get_query_word.acquire(True)

    global g_cur_word_index

    ret_word = ""
    g_cur_word_index += 1

    try:
        # query_word = top_query_list.pop()
        global g_query_word_set
        global g_if_get_query_word
        if len(g_query_word_set) == 0 and not g_if_get_query_word:
            g_if_get_query_word = True
            offi_query_word_fd = open("/data/log/spider/test_service/offi_query_word_from_20190101_20200115.txt","r")
            for line in offi_query_word_fd:
                line = line.strip()
                line = line.strip("\r")
                line = line.strip("\t")
                line = line.strip(" ")

                query_word,query_counts = line.split("\t")
                query_word = query_word.strip()

                g_query_word_set.add(query_word)

            offi_query_word_fd.close()

        ret_word = g_query_word_set.pop()
    except:
        print(traceback.format_exc())
    mutex_for_get_query_word.release()

    return ret_word,g_cur_word_index


def concurrence_dispose_query_word(account_obj):
    try:
        is_run = True

        while is_run:
            query_word,g_cur_word_index = get_query_word()
            print("query_word:%s" % query_word)

            zhihu_spider_data_file = "./data/zhihu_spider_data_for_query_word_" + str(query_word) + ".txt"
            zhihu_spider_fd = open(zhihu_spider_data_file, "w")

            zhihu_spider_question_data_file = "./data/zhihu_spider_question_data_for_query_word_" + str(query_word) + ".txt"
            zhihu_spider_question_fd = open(zhihu_spider_question_data_file, "w")

            # 问题回答映射词典
            question_answer_dict = dict()

            if query_word and len(query_word)>0:
                account_obj.zhihu_query_by_word(query_word=query_word, zhihu_spider_fd=zhihu_spider_fd,
                                    zhihu_spider_question_fd=zhihu_spider_question_fd,
                                    question_answer_dict=question_answer_dict, cur_word_index=g_cur_word_index)
            else:
                is_run = False


            zhihu_spider_fd.close()
            zhihu_spider_question_fd.close()

    except:
        print(traceback.format_exc())


if __name__ == '__main__':

    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.settings")

    account = ZhihuAccount('', '')
    account.login(captcha_lang='en', load_cookies=True)
    account.add_jieba_tag_word()
    #account.test_member_article()

    # cur_word_index = 0
    # for query_word in top_query_list:
    #     cur_word_index += 1
    #     print("query_word:%s" % query_word,flush=True)
    #     account.zhihu_query_by_word(query_word=query_word,zhihu_spider_fd=zhihu_spider_fd,zhihu_spider_question_fd=zhihu_spider_question_fd,question_answer_dict=question_answer_dict,cur_word_index=cur_word_index)


    gevent_spawn_obj_list = list()
    for cur_index in range(0,500,1):
        g_obj = gevent.spawn(concurrence_dispose_query_word,account)
        gevent_spawn_obj_list.append(g_obj)


    for g_obj in gevent_spawn_obj_list:
        g_obj.join()
