What Types of Douyin Videos Really Capture Millions of Views? A Data‑Driven Deep Dive
This article analyzes a dataset of over 5,000 Douyin influencers, exploring gender balance, likes, fan counts, comments, shares, content categories, geographic distribution, graduation schools, verification status, and bio word clouds, and provides full Python code for each visualization.
01 Data Acquisition
Data was obtained from a third‑party monitoring service covering more than 5,000 Douyin "big V" accounts. The CSV includes fields such as nickname, gender, location, category, likes, followers, video count, comments, shares, following count, graduation school, verification, and biography.
02 Data Visualization
Python libraries (pandas, pyecharts, matplotlib, wordcloud) were used to visualize the dataset.
Gender Distribution
After removing unknown values, the gender ratio is roughly 1:1.
Likes – Top 10
The top‑10 liked videos are dominated by news media accounts; many received over 100 million likes during the pandemic.
Fans – Distribution
"People's Daily" and "CCTV News" each have close to 120 million followers, while many accounts have only a few thousand.
Comments – Top 10
Most commented videos belong to the news media category.
Shares – Top 10
Users prefer sharing news and food‑related videos.
Likes by Content Type
A treemap shows that beauty, humor, gaming, and drama videos attract the most likes.
Average Likes per Video – Top 10
Li Xian ranks first in average likes per video.
Geographic Distribution – Provinces
Guangdong, Zhejiang and Sichuan have the highest number of Douyin big V accounts.
Geographic Distribution – Cities
Beijing leads, followed by Hangzhou.
International Presence
The United States has the most foreign Douyin big V accounts.
Graduation Schools – Top 10
Beijing Film Academy, Communication University of China, Zhejiang Media College, and other performing‑arts schools dominate.
Verification Status
Most big V accounts have a verified status other than "unknown".
Bio Word Cloud
A word cloud of the biographies highlights keywords such as "business cooperation" and "content creator".
Code Samples
The following Python code snippets perform the data loading, processing, and visualizations described above.
from pyecharts.charts import Pie, Bar, TreeMap, Map, Geo
from wordcloud import WordCloud, ImageColorGenerator
from pyecharts import options as opts
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import numpy as np
import jieba
df = pd.read_csv('douyin.csv', header=0, encoding='utf-8-sig')
print(df) def create_gender(df):
df = df.copy()
df.loc[df.gender == '0', 'gender'] = '未知'
df.loc[df.gender == '1', 'gender'] = '男性'
df.loc[df.gender == '2', 'gender'] = '女性'
gender_com = df.groupby('gender')['gender'].agg(['count']).reset_index()
attr = gender_com['gender']
v1 = gender_com['count']
pie = Pie(init_opts=opts.InitOpts(width="800px", height="400px"))
pie.add('', [list(z) for z in zip(attr, v1)], radius=["40%", "75%"])
pie.set_global_opts(title_opts=opts.TitleOpts(title="抖音大V性别分布情况", pos_left="center", pos_top="top"),
legend_opts=opts.LegendOpts(orient="vertical", pos_left="left"),
toolbox_opts=opts.ToolboxOpts(is_show=True, feature={"saveAsImage": {}}))
pie.set_series_opts(label_opts=opts.LabelOpts(is_show=True, formatter="{b}:{d}%"))
pie.render("抖音大V性别分布情况.html") def create_likes(df):
df = df.sort_values('likes', ascending=False)
attr = df['name'][0:10]
v1 = [float('%.1f' % (i/100000000)) for i in df['likes'][0:10]]
bar = Bar(init_opts=opts.InitOpts(width="800px", height="400px"))
bar.add_xaxis(list(reversed(attr.tolist())))
bar.add_yaxis('', list(reversed(v1)))
bar.set_global_opts(title_opts=opts.TitleOpts(title="抖音大V点赞数TOP10(亿)", pos_left="center", pos_top="18"),
toolbox_opts=opts.ToolboxOpts(is_show=True, feature={"saveAsImage": {}}),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)))
bar.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position="right", color="black"))
bar.reversal_axis()
bar.render("抖音大V点赞数TOP10(亿).html") def create_type_likes(df):
likes_type_message = df.groupby('category')
likes_type_com = likes_type_message['likes'].agg(['sum']).reset_index()
dom = []
for name, num in zip(likes_type_com['category'], likes_type_com['sum']):
dom.append({'name': name, 'value': num})
treemap = TreeMap(init_opts=opts.InitOpts(width="800px", height="400px"))
treemap.add('', dom)
treemap.set_global_opts(title_opts=opts.TitleOpts(title="各类型抖音大V点赞数汇总图", pos_left="center", pos_top="5"),
toolbox_opts=opts.ToolboxOpts(is_show=True, feature={"saveAsImage": {}}),
legend_opts=opts.LegendOpts(is_show=False))
treemap.render("各类型抖音大V点赞数汇总图.html") def create_avg_likes(df):
df = df[df['videos'] > 0]
df.eval('result = likes/(videos*10000)', inplace=True)
df['result'] = df['result'].round(1)
df = df.sort_values('result', ascending=False)
attr = df['name'][0:10]
v1 = ['%.1f' % i for i in df['result'][0:10]]
bar = Bar(init_opts=opts.InitOpts(width="800px", height="400px"))
bar.add_xaxis(list(reversed(attr.tolist())))
bar.add_yaxis('', list(reversed(v1)))
bar.set_global_opts(title_opts=opts.TitleOpts(title="抖音大V平均视频点赞数TOP10(万)", pos_left="center", pos_top="18"),
toolbox_opts=opts.ToolboxOpts(is_show=True, feature={"saveAsImage": {}}),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)))
bar.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position="right", color="black"))
bar.reversal_axis()
bar.render("抖音大V平均视频点赞数TOP10(万).html") def create_province_map(df):
df = df[df["country"] == "中国"]
df1 = df.copy()
df1["province"] = df1["province"].str.replace("省", "").str.replace("壮族自治区", "").str.replace("维吾尔自治区", "").str.replace("自治区", "")
df_num = df1.groupby("province")["province"].agg(count="count")
provinces = df_num.index.tolist()
counts = df_num["count"].values.tolist()
map = Map(init_opts=opts.InitOpts(width="800px", height="400px"))
map.add('', [list(z) for z in zip(provinces, counts)], "china")
map.set_global_opts(title_opts=opts.TitleOpts(title="抖音大V省份分布情况", pos_left="center", pos_top="0"),
toolbox_opts=opts.ToolboxOpts(is_show=True, feature={"saveAsImage": {}}),
visualmap_opts=opts.VisualMapOpts(max_=600, is_piecewise=False))
map.render("抖音大V省份分布情况.html") def create_city(df):
df1 = df[df["country"] == "中国"].copy()
df1["city"] = df1["city"].str.replace("市", "")
df_num = df1.groupby("city")["city"].agg(count="count").reset_index().sort_values(by="count", ascending=False)
cities = df_num[:10]["city"].values.tolist()
counts = df_num[:10]["count"].values.tolist()
bar = Bar(init_opts=opts.InitOpts(width="800px", height="400px"))
bar.add_xaxis(cities)
bar.add_yaxis('', counts)
bar.set_global_opts(title_opts=opts.TitleOpts(title="抖音大V城市分布TOP10", pos_left="center", pos_top="18"),
toolbox_opts=opts.ToolboxOpts(is_show=True, feature={"saveAsImage": {}}),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)))
bar.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position="top", color="black"))
bar.render("抖音大V城市分布TOP10.html") def create_wordcloud(df, picture):
words = pd.read_csv('chineseStopWords.txt', encoding='gbk', sep='\t', names=['stopword'])
text = ''
df1 = df[df["signature"] != ""].copy()
for line in df1['signature']:
text += ' '.join(jieba.cut(str(line).replace(' ', ''), cut_all=False))
stopwords = set(words['stopword'])
backgroud_Image = plt.imread('douyin.png')
alice_coloring = np.array(Image.open(r"douyin.png"))
image_colors = ImageColorGenerator(alice_coloring)
wc = WordCloud(background_color='white', mask=backgroud_Image, font_path='方正兰亭刊黑.TTF',
max_words=2000, max_font_size=70, min_font_size=1, prefer_horizontal=1,
color_func=image_colors, random_state=50, stopwords=stopwords, margin=5)
wc.generate_from_text(text)
plt.imshow(wc)
plt.axis('off')
wc.to_file(picture)
print('生成词云成功!')Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Python Crawling & Data Mining
Life's short, I code in Python. This channel shares Python web crawling, data mining, analysis, processing, visualization, automated testing, DevOps, big data, AI, cloud computing, machine learning tools, resources, news, technical articles, tutorial videos and learning materials. Join us!
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
