How to Automate WeChat Public Account Metrics Scraping with Python and MitmProxy
This article explains how to use Python and MitmProxy to automatically capture reading counts, likes, and view numbers from WeChat public articles, covering installation, dynamic cookie/token handling, batch fetching, and full code examples for a streamlined backend scraping solution.
What is MitmProxy
Mitmproxy is an HTTP/HTTPS proxy with features similar to Fiddler, providing a command‑line interface (mitmdump) and a web UI (mitmweb) for inspecting traffic.
Installation and Setup
Install via pip install mitmproxy and start the web UI with mitmweb. Configure your mobile device to use the PC’s IP address and port 8080.
Scraping Read, Like, and View Counts
After installing the certificate on the phone, open any WeChat article. The request URL for metrics is https://mp.weixin.qq.com/mp/getappmsgext. The request requires the article URL, User‑Agent, cookie, and a body containing __biz, mid, idx, sn, plus fixed parameters is_only_read=1, is_temp_url=0, appmsg_type=9, and a time‑limited appmsg_token.
Python Implementation
The Articles class encapsulates the logic. Its read_like_nums method posts to the getappmsgext endpoint and returns read, old_like, and like numbers.
# articles.py
import html, requests, utils
class Articles(object):
"""Article information"""
def __init__(self, appmsg_token, cookie):
self.appmsg_token = appmsg_token
self.headers = {
"User-Agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 Mobile",
"Cookie": cookie
}
self.data = {
"is_only_read": "1",
"is_temp_url": "0",
"appmsg_type": "9",
}
def read_like_nums(self, article_url):
"""Get data"""
appmsgstat = self.get_appmsgext(article_url)["appmsgstat"]
return appmsgstat["read_num"], appmsgstat["old_like_num"], appmsgstat["like_num"]
def get_params(self, article_url):
"""Extract query parameters from article URL"""
article_url = html.unescape(article_url)
url_params = utils.str_to_dict(urlsplit(article_url).query, "&", "=")
return url_params
def get_appmsgext(self, article_url):
"""Request read numbers"""
url_params = self.get_params(article_url)
appmsgext_url = f"https://mp.weixin.qq.com/mp/getappmsgext?appmsg_token={self.appmsg_token}&x5=0"
self.data.update(url_params)
appmsgext_json = requests.post(appmsgext_url, headers=self.headers, data=self.data).json()
if "appmsgstat" not in appmsgext_json:
raise Exception(appmsgext_json)
return appmsgext_json
if __name__ == '__main__':
info = Articles('YOUR_APPMSG_TOKEN', 'YOUR_COOKIE')
a, b, c = info.read_like_nums('ARTICLE_URL')
print(a, b, c)Dynamic Cookie and Token Retrieval
Because appmsg_token and cookies expire, a Mitmproxy script ( write_cookie.py) captures the getappmsgext URL and cookies, writes them to a file, and exits. ReadCookie parses this file to extract appmsg_token, biz, and the cookie string.
# write_cookie.py
import urllib, sys
from mitmproxy import http
class WriterCookie:
"""Mitmproxy script that writes URL and cookies to a file"""
def __init__(self, outfile: str) -> None:
self.f = open(outfile, "w")
def response(self, flow: http.HTTPFlow) -> None:
url = urllib.parse.unquote(flow.request.url)
if "mp.weixin.qq.com/mp/getappmsgext" in url:
self.f.write(url + '
')
self.f.write(str(flow.request.cookies))
self.f.close()
exit()
addons = [WriterCookie(sys.argv[4])] # read_cookie.py
import re, os
class ReadCookie(object):
"""Parse the file generated by write_cookie.py"""
def __init__(self, outfile):
self.outfile = outfile
def parse_cookie(self):
f = open(self.outfile)
lines = f.readlines()
appmsg_token = re.findall("appmsg_token.+?&", lines[0])[0].split('=')[1][:-1]
biz = re.findall('__biz.+?&', lines[0])[0].split('__biz=')[1][:-1]
cookie_str = '; '.join(lines[1][15:-2].split('], ['))
cookie_str = cookie_str.replace("'", '').replace(', ', '=')
return appmsg_token, biz, cookie_str
def write_cookie(self, outfile):
path = os.path.split(os.path.realpath(__file__))[0]
command = f"mitmdump -s {path}/write_cookie.py -w {outfile} mp.weixin.qq.com/mp/getappmsgext"
os.system(command)Batch Fetching Articles
Pagination is performed by calling
https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=…&offset=10&count=10. The WxCrawler class builds the request URL, converts the cookie to a dict, and iterates through pages, printing each article’s title and URL.
# wxCrawler.py
import os, requests, json, urllib3, utils
class WxCrawler(object):
urllib3.disable_warnings()
def __init__(self, appmsg_token, biz, cookie, begin_page_index=0, end_page_index=100):
self.begin_page_index = begin_page_index
self.end_page_index = end_page_index
self.num = 1
self.appmsg_token = appmsg_token
self.biz = biz
self.headers = {
"User-Agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 Mobile",
"Cookie": cookie
}
self.cookie = cookie
def article_list(self, context):
articles = json.loads(context).get('general_msg_list')
return json.loads(articles)
def run(self):
page_url = f"https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={self.biz}&f=json&offset={{}}&count=10&is_ok=1&scene=&uin=777&key=777&pass_ticket={{}}&wxtoken=&appmsg_token={self.appmsg_token}&x5=0f=json"
wx_dict = utils.str_to_dict(self.cookie, '; ', '=')
response = requests.get(page_url.format(self.begin_page_index*10, wx_dict['pass_ticket']), headers=self.headers, verify=False)
articles = self.article_list(response.text)
for a in articles['list']:
if 'app_msg_ext_info' in a and a['app_msg_ext_info'].get('content_url'):
print(f"{self.num}条", a['app_msg_ext_info']['title'], a['app_msg_ext_info']['content_url'])
if 'app_msg_ext_info' in a:
for m in a['app_msg_ext_info'].get('multi_app_msg_item_list', []):
print(f"{self.num}条", m['title'], a['content_url'])
self.num += 1
self.is_exit_or_continue()
self.run()
def is_exit_or_continue(self):
self.begin_page_index += 1
if self.begin_page_index > self.end_page_index:
os._exit(0)Running the Whole Pipeline
The main script launches ReadCookie to refresh the token, then starts WxCrawler to collect all historical articles.
# main.py
from read_cookie import ReadCookie
from wxCrawler import WxCrawler
if __name__ == '__main__':
rc = ReadCookie('cookie.txt')
rc.write_cookie('cookie.txt')
appmsg_token, biz, cookie_str = rc.parse_cookie()
wx = WxCrawler(appmsg_token, biz, cookie_str)
wx.run()Conclusion
The method automates most of the data‑collection steps, though a manual refresh of the public account page is still required when the token expires.
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Python Crawling & Data Mining
Life's short, I code in Python. This channel shares Python web crawling, data mining, analysis, processing, visualization, automated testing, DevOps, big data, AI, cloud computing, machine learning tools, resources, news, technical articles, tutorial videos and learning materials. Join us!
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
