# -*- coding: utf-8 -*-
"""
Created on Mon Mar 19 10:32:52 2018

@author: fangyucheng
"""


import requests
from bs4 import BeautifulSoup
import json


#两会：lh/_cl/0/30/
#访谈：ft/_cl/6/30/
#资讯：zx/_cl/29/20/
#视点：sd/_cl/29/20/
#全球：qq/_cl/7/30/

class people_video():
   
    def video_page(self,url):
        get_page=requests.get(url)
        get_page.encoding='utf-8'
        page = get_page.text
        midstep1=json.loads(page)
        midstep2=midstep1['data']['article']
        midstep3=midstep2['publish']
        title=midstep2['title']
        author=midstep2['author']
        release_time=midstep2['publishTime']
        playcount=midstep2['playNum']
        dura=midstep2['duration']
        duration_str=dura
        dl=duration_str.split(':')
        dl_int=[]
        for v in dl:
            v=int(v)    
            dl_int.append(v) 
        if len(dl_int) == 2:
            duration=dl_int[0]*60+dl_int[1]
        else:
            duration=dl_int[0]*3660+dl_int[1]*60+dl_int[2]
        releaser=midstep3['name']
        D0={'title':title,'playcount':playcount,'releaser':releaser,'release_time':release_time,'duration':duration,'author':author,'url':url}
        return D0   
    
    def list_page(self,partofurl,totalpage):
        urls=['http://mobilevideo.people.com.cn/movie_pub/News/publishfile/'+partofurl+'list_{}.json'.format(str(i)) for i in range(1,totalpage)]
        list_page=[]
        for url in urls:
            get_page=requests.get(url)
            get_page.encoding='utf-8'
            page=get_page.text
            soup=BeautifulSoup(page,'html.parser')
            try:
                selection=soup.html.head.title.text
                print ('no more page')
            except AttributeError:
                print(url)
                midstep1=json.loads(page)
                midstep2=midstep1['data']['newsList']
                for one_line in midstep2:
                    url=one_line['articleLink']
                    one_video_dic=self.video_page(url)
                    list_page.append(one_video_dic)
        return list_page
            
    
    
if __name__=='__main__':
    people_crawler = people_video()
    #video_page=people_video.video_page(url='http://mobilevideo.people.com.cn/movie_pub/News/publishfile/spk/_cd/10/18/4154954.json')
    list_page2=people_crawler.list_page(partofurl="qq/_cl/7/30/",totalpage=20)
    #search_page=iqiyi_crawler.search_page(keyword="国家相册")