Backend Development 11 min read

How to Build a Python Douyin Video Downloader Without Selenium

This article walks through creating a Python-based Douyin video downloader that extracts Chrome cookies, parses video URLs, and downloads single videos or entire collections using a simple GUI, complete with code snippets and usage screenshots.

Python Crawling & Data Mining

Feb 14, 2022

How to Build a Python Douyin Video Downloader Without Selenium

Program Demonstration

Hello, I am XiaoXiaoMing. I developed a Douyin video downloader; the interface looks like this:

If the local Chrome browser has never visited Douyin, clicking the start download button shows the following output:

Click "Visit Douyin homepage" to let the program open Douyin in Chrome, then click download again:

The video is a collection video:

Select the first option to download the entire collection:

The whole collection is downloaded at once:

Development Process

First, based on the previous article "Five Levels of Extracting Chrome Cookies", we read Chrome's installation path and local Douyin cookies:

"""
小小明的代码
CSDN主页：https://blog.csdn.net/as604049322
"""
__author__ = '小小明'
__time__ = '2022/1/23'

import base64
import json
import os
import sqlite3
import winreg

import win32crypt
from cryptography.hazmat.primitives.ciphers.aead import AESGCM

def load_local_key(localStateFilePath):
    "读取chrome保存在json文件中的key再进行base64解码和DPAPI解密得到真实的AESGCM key"
    with open(localStateFilePath, encoding='u8') as f:
        encrypted_key = json.load(f)['os_crypt']['encrypted_key']
    encrypted_key_with_header = base64.b64decode(encrypted_key)
    encrypted_key = encrypted_key_with_header[5:]
    key = win32crypt.CryptUnprotectData(encrypted_key, None, None, None, 0)[1]
    return key

def decrypt_value(key, data):
    "AESGCM解密"
    nonce, cipherbytes = data[3:15], data[15:]
    aesgcm = AESGCM(key)
    plaintext = aesgcm.decrypt(nonce, cipherbytes, None).decode('u8')
    return plaintext

def fetch_host_cookie(host):
    "获取指定域名下的所有cookie"
    userDataDir = os.environ['LOCALAPPDATA'] + r'\Google\Chrome\User Data'
    localStateFilePath = userDataDir + r'\Local State'
    cookiepath = userDataDir + r'\Default\Cookies'
    # 97版本已经将Cookies移动到Network目录下
    if not os.path.exists(cookiepath) or os.stat(cookiepath).st_size == 0:
        cookiepath = userDataDir + r'\Default\Network\Cookies'
    sql = f"select name,encrypted_value from cookies where host_key like '%.{host}'"
    cookies = {}
    key = load_local_key(localStateFilePath)
    with sqlite3.connect(cookiepath) as conn:
        cu = conn.cursor()
        for name, encrypted_value in cu.execute(sql).fetchall():
            cookies[name] = decrypt_value(key, encrypted_value)
    return cookies

def get_chrome_path():
    try:
        key = winreg.OpenKey(winreg.HKEY_CLASSES_ROOT, r"ChromeHTML\Application")
        path = winreg.QueryValueEx(key, "ApplicationIcon")[0]
        chrome_path = path[:path.rfind(",")]
        return chrome_path
    except FileNotFoundError as e:
        return

if __name__ == '__main__':
    print(fetch_host_cookie("douyin.com"))
    print(get_chrome_path())

With this utility class we no longer need Selenium.

The core video parsing code is:

def get_video_url(url, cookies):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36",
        "referer": "https://www.douyin.com"
    }
    res = requests.get(url, headers=headers, cookies=cookies)
    if res.status_code == 200:
        RENDER_DATA, = re.findall(r'<script id="RENDER_DATA" type="application/json">([^<>]+)</script>', res.text)
        data = json.loads(unquote(RENDER_DATA))
        key = '8' if url.find("collection") != -1 else '34'
        try:
            detail = data[key]['aweme']['detail']
            title = detail['desc']
        except Exception as e:
            print(f"{url}无效，报错的key:", e)
            return
        if not title:
            title, = re.findall(r"<title[^>]+>\s*([^>]+)\s*</title>", res.text)
        video_url = urljoin(url, detail['video']['playApi'])
        collection_urls = set(re.findall("//www.douyin.com/collection/\d+/\d+", res.text))
        collection_urls = [urljoin("https://www.douyin.com", url) for url in collection_urls]
        collection_urls.sort(key=lambda s: int(s[s.rfind('/') + 1:]))
        collection_title = re.findall(r"<h2 [^>]+>([^<>]+)</h2>", res.text)[0]
        return video_url, title, collection_urls, collection_title
    else:
        print('视频链接请求失败！！！')

The core video download code is:

def download_video(video_url, title, folder):
    start_time = time.time()
    res = requests.get(url=video_url, stream=True)
    done_size, total = 0, int(res.headers['content-length'])
    chunk_size = 1024 * 1024
    title = format_filename(title)
    file_size = round(int(res.headers['content-length']) / 1024 / 1024, 2)
    basename = f"{title}.mp4"
    filename = f"{folder}/{title}.mp4"
    if os.path.exists(filename):
        print(basename, "已存在，跳过...")
        return
    print("-----------------------------------")
    print(f'开始下载文件：{basename}
当前文件大小：{file_size}MB')
    with open(filename, 'wb') as f:
        for chunk in res.iter_content(chunk_size):
            f.write(chunk)
            done_size += len(chunk)
            cost_time = time.time() - start_time
            yield done_size, cost_time, total
    cost_time = time.time() - start_time
    print(f'文件：{basename} 下载完成！
耗时：{cost_time:.2f} 秒')

For analysis of video links, see the article "Using Python to Download Douyin Short Videos Without Watermark" at https://mp.weixin.qq.com/s?__biz=MzUzODA5MjM5NQ==&mid=2247497183&idx=1&sn=e68fb38785bf3f16070519f5679f3ebc&scene=21#wechat_redirect

UI Design Core Code

layout = [
    [sg.Text('抖音视频地址：', font=("楷体", 12)), sg.In(key='url', size=(70, 1), text_color="#bb8b59", default_text="https://www.douyin.com/video/6803929443069988103")],
    [sg.Checkbox('如果是一个合集则下载整个合集', key="download_collection", default=False),
     sg.Button('开始下载'),
     sg.Button('清空输出'),
     sg.Button('访问抖音主页'),
     sg.Button('访问当前地址')],
    [sg.Output(size=(85, 10), key="out", text_color="#15d36a")],
    [sg.ProgressBar(1000, size=(20, 20), key='video_bar', bar_color=("#bb8b59", "#295273")),
     sg.Text('000.0MB,00/00
00:00<00:00', key="message_video"),
     sg.ProgressBar(1000, size=(20, 20), key='progressbar', bar_color=("#15d36a", "#295273")),
     sg.Text('00.00MB/00.00MB
00.00MB/s', key="message")],
    [sg.Text('输出目录：', font=("楷体", 12)), sg.In(size=(35, 1), key="save_dir"), sg.FolderBrowse('...', target='save_dir', initial_folder="."), sg.Checkbox(' 下载完毕后 
打开所在目录', key="open_folder", default=True), sg.Button('打开输出目录')],
    [sg.Text("@小小明：https://blog.csdn.net/as604049322")]
]

Program Download

The complete code and packaged tool can be downloaded from:

https://gitcode.net/as604049322/python_gui/-/tree/master/douyin

Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.

GUI Web Scraping Douyin video-downloader

Written by

Python Crawling & Data Mining

Life's short, I code in Python. This channel shares Python web crawling, data mining, analysis, processing, visualization, automated testing, DevOps, big data, AI, cloud computing, machine learning tools, resources, news, technical articles, tutorial videos and learning materials. Join us!

0 followers

Reader feedback

How this landed with the community

Rate this article

Was this worth your time?

Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.