爬取音悦台MV信息(requests,BeautifulSoup,xlwt)----待完善-白红宇

爬取音悦台MV信息(requests,BeautifulSoup,xlwt)----待完善

阅读量：4571 次

发布时间：2019-06-08

本文共 1719 字，大约阅读时间需要 5 分钟。

import requestsfrom bs4 import BeautifulSoupimport xlwt   #写入Excel的库def excel_write(MV_list):    newtable = 'MV.xls'             #创建Excel文件的名称    wb = xlwt.Workbook(encoding = 'utf-8')   #创建Excel文件    ws = wb.add_sheet('MV_name')    #创建Excel表格的名称    headData = ['名称','链接','歌手名称','播放次数','简介']       #创建表格的标题信息    for colnum in range(0,5):        #0表示第1行，colnum表示第几列，headData[colnum]表示获取headData里面的信息，xlwt.easyxf加粗        ws.write(0,colnum,headData[colnum],xlwt.easyxf('font:bold on'))            index = 1    for MV_list in MV_lists:        for i in range(0,5):            print(MV_list[i])            ws.write(index,i,MV_list[i])                    index += 1        wb.save(newtable)page = range(1,3)  MV_lists = []      for S in page:       #循环num,并把树枝赋值给S，然后构建爬去的URL    url = 'http://mv.yinyuetai.com/all?pageType=page&sort=weekViews&page=%d&tab=allmv&parenttab=mv'%S      print(url)    html = requests.get(url)    soup = BeautifulSoup(html.text,'html.parser')    MV_s_html = soup.find('ul',{
   'class':'clearfix'}).find_all('div',{
   'class':'info'})   #获取MV信息HTML，类型为列表            for MV in MV_s_html:    #遍历MV的HTML，并赋值给变量MV        MV_title = MV.a.string    #获取MV名称        MV_href = MV.a['href']      #获取MV链接        MV_name = MV.find('p').a.string  #获取MV歌手名称        MV_count = MV.find('span',{
   'class':'c6'}).get_text()   #获取MV播放次数        MV_description = MV.find('p',{
   'class':'description hid J_id'}).get_text()   #获取MV的简介        MV_list_tuple = (MV_title,MV_href,MV_name,MV_count.replace('\n','').strip('\t'),MV_description.replace('\n','').strip('\t'))        MV_lists.append(MV_list_tuple)excel_write(MV_lists)       #调用写入Excel的函数，并把MV列表信息掺传入到函数里面

转载于:https://www.cnblogs.com/114811yayi/p/6757700.html

你可能感兴趣的文章