Artificial Intelligence 12 min read

Python Project: Download Bilibili Video, Extract Frames, Perform Human Segmentation, Generate Word Cloud, and Compose Final Video

This tutorial walks through a complete Python workflow that downloads a B‑site video, extracts frames with OpenCV, uses Baidu AI for human segmentation, crawls danmu comments, creates a masked word‑cloud animation, and finally merges the clips with audio into a polished video.

Python Programming Learning Circle
Python Programming Learning Circle
Python Programming Learning Circle
Python Project: Download Bilibili Video, Extract Frames, Perform Human Segmentation, Generate Word Cloud, and Compose Final Video

The article presents a step‑by‑step Python project aimed at beginners who want to practice video processing, computer‑vision, and AI techniques on a Bilibili video.

Import Required Modules

First, the script automatically installs the necessary libraries using os.system() and then imports them:

import os
import time
libs = {"lxml","requests","pandas","numpy","you-get","opencv-python","fake_useragent","matplotlib","moviepy"}
try:
    for lib in libs:
        os.system(f"pip3 install -i https://pypi.doubanio.com/simple/ {lib}")
        print(lib + "下载成功")
except:
    print("下载失败")

import os
import re
import cv2
import jieba
import requests
import moviepy
import pandas as pd
import numpy as np
from PIL import Image
from lxml import etree
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from fake_useragent import UserAgent

Video Download

Using you-get the target Bilibili video is downloaded:

pip install you-get
you-get -i https://www.bilibili.com/video/BV11C4y1h7nX

Video Splitting (Frame Extraction)

OpenCV reads the video and saves each frame as an image file:

# -*- coding:utf-8 -*-
import cv2
cap = cv2.VideoCapture(r"无价之姐~让我乘风破浪~~~.flv")
num = 1
while True:
    ret, frame = cap.read()
    if ret:
        cv2.imwrite(f"./pictures/img_{num}.jpg", frame)
        num += 1
    else:
        break
cap.release()

Human Segmentation with Baidu AI

After creating a Baidu AI body‑analysis application, the script sends each extracted frame to the API and saves the segmented foreground:

# -*- coding:utf-8 -*-
import cv2, base64, numpy as np, os, time, random
from aip import AipBodyAnalysis
APP_ID = '******'
API_KEY = '*******************'
SECRET_KEY = '********************'
client = AipBodyAnalysis(APP_ID, API_KEY, SECRET_KEY)
path = './mask_img/'
img_files = os.listdir('./pictures')
for num in range(1, len(img_files)+1):
    img_path = f'./pictures/img_{num}.jpg'
    with open(img_path, 'rb') as fp:
        img_info = fp.read()
    seg_res = client.bodySeg(img_info)
    labelmap = base64.b64decode(seg_res['labelmap'])
    nparr = np.frombuffer(labelmap, np.uint8)
    labelimg = cv2.imdecode(nparr, 1)
    labelimg = cv2.resize(labelimg, (width, height), interpolation=cv2.INTER_NEAREST)
    new_img = np.where(labelimg == 1, 255, labelimg)
    mask_name = path + f'mask_{num}.png'
    cv2.imwrite(mask_name, new_img)
    print(f'======== 第{num}张图像分割完成 ========')

Danmu (Comment) Crawling

The script builds a date range, sends requests to Bilibili’s danmu API, and stores the results in an Excel file:

import requests, pandas as pd, re, csv, datetime
from fake_useragent import UserAgent
from concurrent.futures import ThreadPoolExecutor

ua = UserAgent()
url = "https://api.bilibili.com/x/v2/dm/history"
start, end = '20200808', '20200908'
date_list = [x for x in pd.date_range(start, end).strftime('%Y-%m-%d')]

def Grab_barrage(date):
    headers = {"origin": "https://www.bilibili.com", "referer": "https://www.bilibili.com/video/...", "cookie": "", "user-agent": ua.random()}
    params = {'type':1, 'oid':'222413092', 'date':date}
    r = requests.get(url, params=params, headers=headers)
    r.encoding = 'utf-8'
    comments = re.findall('<d p=".*?">(.*?)</d>', r.text)
    for i in comments:
        df.append(i)

# Parallel execution
with ThreadPoolExecutor(max_workers=4) as executor:
    executor.map(Grab_barrage, date_list)

pd.DataFrame(df).to_excel('danmu.xlsx')
print(f'用时:{(datetime.datetime.now()-start_time).total_seconds()}s')

Word Cloud Generation

After cleaning the comments (removing repeated characters) and loading custom stop‑words, the script creates a masked word‑cloud for each frame and saves the images:

import collections, jieba, re, numpy as np
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt

# Load and preprocess text
with open('barrages.txt', 'r', encoding='utf-8') as f:
    data = f.read()
new_data = '/'.join(re.findall('[\u4e00-\u9fa5]+', data))
seg_list = jieba.cut(new_data, cut_all=True)
# Remove stop words
with open('stoplist.txt', 'r', encoding='utf-8') as f:
    stop_words = set(f.read().split('\n'))
words = [w for w in seg_list if w not in stop_words and len(w)>1]
word_counts = collections.Counter(words)

for num in range(1, len(os.listdir('./mask_img'))+1):
    mask = 255 - np.array(Image.open(f'./mask_img/mask_{num}.png'))
    wc = WordCloud(background_color='black', mask=mask, mode='RGBA', max_words=500,
                   font_path='simhei.ttf').generate_from_frequencies(word_counts)
    plt.figure(figsize=(8,5), dpi=200)
    plt.imshow(wc)
    plt.axis('off')
    wc.to_file(f'./wordcloud/wordcloud_{num}.png')
    print(f'======== 第{num}张词云图生成 ========')

Video Composition

Using OpenCV, the generated word‑cloud images are stitched into a video file:

import cv2, os
video_dir = 'result.mp4'
fps = 30
img_size = (1920,1080)
fourcc = cv2.VideoWriter_fourcc('M','P','4','V')
videoWriter = cv2.VideoWriter(video_dir, fourcc, fps, img_size)
for i in range(88, 888):
    img_path = f'./wordcloud/wordcloud_{i}.png'
    frame = cv2.imread(img_path)
    frame = cv2.resize(frame, img_size)
    videoWriter.write(frame)
    print(f'======== 第{i}张图片合进视频 ========')
videoWriter.release()

Audio Adding

MoviePy adds a background music track to the composed video and writes the final output:

import moviepy.editor as mpy
my_clip = mpy.VideoFileClip('result.mp4')
audio_background = mpy.AudioFileClip('song.mp3').subclip(0,25)
final_clip = my_clip.set_audio(audio_background)
final_clip.write_videofile('final_video.mp4')

The resulting video shows a dancing word‑cloud animation synchronized with music, demonstrating a complete end‑to‑end pipeline from data acquisition to visual storytelling.

pythonAIvideo processingopencvWeb ScrapingMoviePyWordCloud
Python Programming Learning Circle
Written by

Python Programming Learning Circle

A global community of Chinese Python developers offering technical articles, columns, original video tutorials, and problem sets. Topics include web full‑stack development, web scraping, data analysis, natural language processing, image processing, machine learning, automated testing, DevOps automation, and big data.

0 followers
Reader feedback

How this landed with the community

login Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.