Python Office Automation Techniques: Document Conversion, Geocoding, Distance Calculation, Batch File Processing, and Outlook Email Automation
This article presents a collection of Python scripts for office automation, covering Word doc‑to‑docx conversion, batch address geocoding using Baidu API, distance computation with geopy, coordinate conversion between Baidu and Gaode, Excel merging, Word‑to‑PDF conversion, extracting tables from Word, and bulk Outlook email sending.
The article introduces several practical Python techniques for automating everyday office tasks.
1. Word document doc to docx
Using win32com to open each .doc file, save it as .docx , and handle conversion errors.
import os
from win32com import client as wc
path = "C:/Users/yyz/Desktop/python办公技巧/data/doc转docx/"
files = []
for file in os.listdir(path):
if file.endswith(".doc"):
files.append(path + file)
word = wc.Dispatch("Word.Application")
i = 0
for file in files:
try:
doc = word.Documents.Open(file)
doc.SaveAs("{}x".format(file), 12)
doc.Close()
print(file + ':转换成功')
i += 1
except:
print(file + ':转换[不成功]')
files.append(file)
pass
print('转换文件%i个' % i)
word.Quit()2. Batch address to latitude/longitude
Reads addresses from an Excel file, calls Baidu Geocoding API, and stores the resulting coordinates.
import pandas as pd
import json
from urllib.request import urlopen, quote
import requests
def getlnglat(address):
url = 'http://api.map.baidu.com/geocoding/v3/'
output = 'json'
ak = "<your_api_key>"
address = quote(address)
uri = f"{url}?address={address}&output={output}&ak={ak}&callback=showLocation "
res = requests.get(uri).text
temp = json.loads(res)
lat = temp['result']['location']['lat']
lng = temp['result']['location']['lng']
return lng, lat
# Single example
print(getlnglat('北京市朝阳区高碑店地区办事处高井村委会'))
# Batch example
data = pd.read_excel('C:/Users/yyz/Desktop/python办公技巧/data/地址信息.xlsx')
data['经度'] = ''
data['纬度'] = ''
for i in range(data.shape[0]):
try:
data.iloc[i, 2] = getlnglat(data.iloc[i, 1])[0]
data.iloc[i, 3] = getlnglat(data.iloc[i, 1])[1]
except:
pass3. Baidu to Gaode coordinate conversion
Defines a function to transform Baidu coordinates to Gaode (GCJ‑02) coordinates.
import math
def bdToGaoDe(lon, lat):
PI = 3.14159265358979324 * 3000.0 / 180.0
x = lon - 0.0065
y = lat - 0.006
z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * PI)
theta = math.atan2(y, x) - 0.000003 * math.cos(x * PI)
lon = z * math.cos(theta)
lat = z * math.sin(theta)
return lon, lat
# Single conversion example
print(bdToGaoDe(116.512885, 39.847469))4. Batch coordinate conversion
data = pd.read_excel('C:/Users/yyz/Desktop/python办公技巧/data/百度经纬度转高德.xlsx')
wd = data['纬度'].tolist()
jd = data['经度'].tolist()
li1 = []
for i in range(len(data)):
li1.append(bdToGaoDe(jd[i], wd[i]))
data['经度_re'] = [i[0] for i in li1]
data['纬度_re'] = [i[1] for i in li1]
print(data.head())5. Excel files batch merge
import pandas as pd
import os
path = 'C:/Users/yyz/Desktop/python办公技巧/data/数据合并/'
files = [path + f for f in os.listdir(path) if f.endswith('.xlsx')]
data = pd.DataFrame()
for file in files:
datai = pd.read_excel(file)
data = data.append(datai)
print('读取%i行数据,合并后文件%i列, 名称:%s' % (len(datai), len(data.columns), file.split('/')[-1]))
data.reset_index(drop=True, inplace=True)6. Word to PDF batch conversion
pip install docx2pdf
from docx2pdf import convert
import os
path = 'C:/Users/yyz/Desktop/python办公技巧/data/word转pdf/'
files = [path + f for f in os.listdir(path) if f.endswith('.docx')]
for file in files:
convert(file, file.split('.')[0] + '.pdf')
print(file + '转换成功')7. Extract tables from Word documents
pip install python-docx
import docx
import pandas as pd
# Read a single Word file
doc = docx.Document('C:/Users/yyz/Desktop/python办公技巧/data/word信息.docx')
biaoges = doc.tables
# Example of processing the first table
rowi = len(biaoges[0].rows)
lis1 = []
for i in range(1, rowi):
lis1.append([
biaoges[0].cell(i, 0).text,
biaoges[0].cell(i, 1).text,
biaoges[0].cell(i, 2).text,
biaoges[0].cell(i, 3).text,
biaoges[0].cell(i, 4).text
])
data1 = pd.DataFrame(lis1, columns=['日期','品类','数量','价格','金额'])
print(data1.head())8. Bulk Outlook email sending
import win32com.client as win32
import pandas as pd
outlook = win32.Dispatch('outlook.Application')
data1 = pd.read_excel('C:/Users/yyz/Desktop/python批量发送邮件.xlsx', sheet_name='发送邮件')
for i in range(data1.shape[0]):
mail = outlook.CreateItem(0)
mail.To = data1.iloc[i, 0]
mail.CC = data1.iloc[i, 1]
mail.Subject = data1.iloc[i, 2]
mail.HTMLBody = data1.iloc[i, 3]
mail.Attachments.Add(data1.iloc[i, 4])
mail.Send()
print('发送邮件%i份' % i)All code snippets are wrapped in ... tags to preserve their original formatting.
Python Programming Learning Circle
A global community of Chinese Python developers offering technical articles, columns, original video tutorials, and problem sets. Topics include web full‑stack development, web scraping, data analysis, natural language processing, image processing, machine learning, automated testing, DevOps automation, and big data.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.