8wDlpd.png
8wDFp9.png
8wDEOx.png
8wDMfH.png
8wDKte.png
酷狗繁星主播MV下载 Python
admin 2021-12-12

爬取酷狗繁星的MV

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from requests import get
import re
import json
from os import path
from time import sleep
import time
from sys import stdout
import requests

def write_json(id,obj):
    """
    写入json
    :param id: 传入作者的酷狗UserId
    :param obj: 传入的josn数据
    :return len_json: 返回json的长度
    """
    item_list = []
    if path.exists(f"{id}.json"):
        # 读取出json中的数据
        with open(f"{id}.json",'r',encoding='utf-8') as f:
            load_dict = json.load(f)
        # 循环读取内容,放入列表中
        for i in load_dict:
            if i in item_list:
                pass
            else:
                item_list.append(i)
    # 将新加入的内容也加入列表
    # cunt 计数,如果爬取的重复超过10条,就停止。
    cunt = 0
    retl = 0 # 返回爬取状态 1 代表有重复
    for x in obj:
        if x in item_list:
            cunt += 1
            if cunt == 10:
                print("重复爬取已经有10条了,停止爬取。。。")
                retl = 1
                break
        else:
            item_list.append(x)
    # 将追加的内容写入进json
    # len_json = len(item_list)
    with open(f"{id}.json",'w',encoding='utf-8') as f2:
        json.dump(item_list,f2,ensure_ascii=False)
    return retl

def get_json(id):
    """
    读取json,并返回json对象
    :param id: 传入酷狗用户的UserId
    :return: 返回json对象
    """
    with open(f"{id}.json",'r',encoding='utf-8') as f:
        load_json = json.load(f)
        len_json = len(load_json)
        print(f"当前id:{id} 已经储存 {len_json} 条数据。")
    return load_json

def down_jpg(url):
    """
    将MV的视频封面下载下来
    :param url: 传入的图片网址
    :return: 返回储存的位置
    """
    dist = url.split("/")[3]
    dist = "pics/" + dist
    if not path.exists(dist):
        jpg_request = get(url)
        with open(dist,"wb") as f:
            f.write(jpg_request.content)
        sleep(0.3)
    return dist

def down_mp4(dist,url,name,title):
    """
    将MV的视频MP4保存下来
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
    }
    requests.packages.urllib3.disable_warnings()
    # 防止https报错
    req = get(url, headers=headers, stream=True,verify=False,allow_redirects=True)
    if not path.exists(f'{dist}/{name}'):
        file_size = int(req.headers['content-length'])
        print(f"获取视频总长度:{file_size}")
        with open(f'{dist}/{name}', 'wb') as f:
            dl = 0
            for chunk in req.iter_content(chunk_size=4096):
                if chunk:
                    dl += len(chunk)
                    f.write(chunk)
                    done = int(50 * dl / file_size)
                    timeArray = time.localtime(int(time.time()))
                    times = time.strftime("%H:%M:%S", timeArray)
                    stdout.write("\r[%s%s]%.2f/%.2f M  %s" % (
                    '=' * done, ' ' * (50 - done), dl / float(1024 * 1024), file_size / float(1024 * 1024), times))
                    stdout.flush()
            print("\n")
    else:
        print(f'{title} 已经存在')

def get_kugou(id,page=1):
    """
    爬取所有的MV信息,并调用write_json保存到json中
    :param id: 传入酷狗用户的UserId
    :param page: 传入想要爬取的页数,每页默认20
    :return:
    """
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36',
    }
    pattern_obj = re.compile(r".*?back\((.*?)\);")
    # pattern_obj_2 = re.compile(r".*?json\((.*?)\);")
    for i in range(1,page):
        url = "https://fx.service.kugou.com/NServices/Video/OfflineVideoService/getVideoList?args=[%22{}%22,{},0,20]&jsonpcallback=jsonphttpsfxservicekugoucomNServicesVideoOfflineVideoServicegetVideoListargs29640634946020jsonpcallback".format(id,i)
        # url_2 = "https://fx.service.kugou.com/NServices/Video/OfflineVideoService/getVideoList?args=[%22{}%22,{},2,20]&jsonpcallback=json".format(id,i)
        response = get(url,headers=header)
        json_text = response.text
        # 截取出返回值中的json段
        json_text = re.findall(pattern_obj,json_text)[0]
        # 将文本转换为json对象
        json_text = json.loads(json_text)
        json_list = json_text["data"]["list"]
        print(i,json_list)
        if len(json_list) == 0:
            print("已经爬取了:", i*20)
            print("已经结束了!")
            break
        # 将list字段写入json
        retl = write_json(id,json_list)
        if retl == 1:
            print("已经爬取完最新的MV。。。")
            break
        sleep(1)
    return id
def down_mv_jpg(obj):
    """
    传入所有的json对象,调用down_jpg下载,默认没0.3秒下载一张
    :param obj: 传入的json对象
    :return:
    """
    for i in obj:
        url = i["imgUrl"]
        title = i["title"]
        disc = down_jpg(url)
        print(f"\r {title} 已经保存到 {disc} 目录中。",end="")
    print("\n所有图片已经保存完毕。。。")


def down_mv_mp4(obj):
    """
    传入所有的json,调用down_mp4下载。
    """
    a = 0
    for i in obj:
        a+=1
        id = i["id"]
        hashValue = i["hashValue"]
        if path.exists(f"mp4/{hashValue}"):
            print(f"\r {a} {id} 已经下过。。。",end="")
            continue
        response = get(f"https://fx.service.kugou.com/mvcenter/bss/mvInfo?pid=7&videoId={id}&deviceId=")
        mp4_json = json.loads(response.text)
        dist = "mp4"
        url = mp4_json["data"]["videoUrl"]
        name = mp4_json["data"]["hashValue"]
        title = mp4_json["data"]["title"]
        print(f"\r 下载第 {a} 个 {title} 中...",end="")
        down_mp4(dist,url,name,title)
        sleep(0.2)
    print("\n所有MP4已经保存完。。。")

if __name__ == "__main__":
    while True:
        print("""
        欢迎使用屹铭繁星下载器。
        1.获取MV信息
        2.加载MV信息
        3.下载jpg
        4.下载mp4
        5.退出
        """)
        commod = int(input("请输入您的指令:"))
        if commod == 1:
            print("MV爬中中。。。")
            get_kugou("296406349",500)
        elif commod == 2:
            print("加载json中。。。")
            mv_json = get_json("296406349")
            print("加载json完成")
        elif commod == 3:
            print("下载jgg中...")
            down_mv_jpg(mv_json)
        elif commod == 4:
            print("下载mp4中...")
            down_mv_mp4(mv_json)
        else:
            print("欢迎下次使用,再见。")
            exit(0)

使用前请先修改酷狗id

最后于 2022-8-25 被admin编辑 ,原因: 添加购买
最新回复 (2)
  • 追风少年
    2022-8-25 2 1
    哈哈,不错哦!
    这个老6
    • 朕弟分享 | 专注小众,乐于分享!
      4
          
返回
发新帖 搜索 反馈 回顶部