Master Bulk Excel Automation with Python: Merge, Filter, Format & More
Learn a collection of Python scripts that automate common Excel operations—including merging multiple files, find‑and‑replace, filtering rows, adding formulas, adjusting formats, extracting images, and encrypting workbooks—providing step‑by‑step code examples for efficient data processing.
Bulk Merging Multiple Excel Files
This script scans a directory for all .xlsx files, reads each into a pandas DataFrame, concatenates them, and writes the combined data to a new workbook.
import pandas as pd
import os
def merge_excel_files(directory, output_file):
excel_files = [f for f in os.listdir(directory) if f.endswith('.xlsx')]
all_data = pd.DataFrame()
for file_name in excel_files:
print(f"Processing: {file_name}")
file_path = os.path.join(directory, file_name)
df = pd.read_excel(file_path)
all_data = pd.concat([all_data, df], ignore_index=True)
all_data.to_excel(output_file, index=False)
print(f"Merging completed, result saved to: {output_file}")
# Usage example
merge_excel_files('input_directory', 'merged_output.xlsx')Find and Replace Content in Excel
Using openpyxl, this function iterates every cell in every worksheet, replaces a target string with a new one, and saves the modified workbook.
from openpyxl import load_workbook
def find_and_replace_in_excel(file_path, search_text, replace_text):
wb = load_workbook(file_path)
for sheet in wb.worksheets:
print(f"Processing sheet: {sheet.title}")
for row in sheet.iter_rows():
for cell in row:
if cell.value and search_text in str(cell.value):
old_value = cell.value
new_value = old_value.replace(search_text, replace_text)
cell.value = new_value
print(f"In cell {cell.coordinate} found '{search_text}', replaced with '{replace_text}'")
wb.save('replaced_' + os.path.basename(file_path))
print("Find and replace completed")
# Usage example
find_and_replace_in_excel('example.xlsx', 'old text', 'new text')Filter Data by Condition and Save
Read an Excel file with pandas, keep rows where a specified column exceeds a threshold, and write the result to a new file.
import pandas as pd
def filter_excel_data(input_file, output_file, column_name, threshold):
df = pd.read_excel(input_file)
filtered_df = df[df[column_name] > threshold]
filtered_df.to_excel(output_file, index=False)
print(f"Filtering completed, results saved to: {output_file}")
# Usage example
filter_excel_data('data.xlsx', 'filtered_data.xlsx', 'Age', 30)Add Formula to a Cell
Loads a workbook, writes a formula string into a target cell, and saves the modified file.
from openpyxl import load_workbook
def add_formula_to_excel(file_path, formula, target_cell):
wb = load_workbook(file_path)
ws = wb.active
ws[target_cell] = formula
print(f"Formula '{formula}' added to cell {target_cell}")
wb.save('formulated_' + os.path.basename(file_path))
# Usage example
add_formula_to_excel('example.xlsx', '=SUM(A1:A10)', 'B1')Batch Adjust Excel Formatting
Applies a red, bold Arial font and center alignment to every cell in the active worksheet.
from openpyxl import load_workbook
from openpyxl.styles import Font, Alignment
def format_excel_file(file_path):
wb = load_workbook(file_path)
ws = wb.active
for row in ws.iter_rows():
for cell in row:
cell.font = Font(name='Arial', size=12, bold=True, color='FF0000')
cell.alignment = Alignment(horizontal='center', vertical='center')
wb.save('formatted_' + os.path.basename(file_path))
print("Formatting completed")
# Usage example
format_excel_file('example.xlsx')Batch Rename Worksheets
Prepends a user‑defined prefix to each worksheet title in a workbook.
from openpyxl import load_workbook
def rename_sheets(file_path, prefix):
wb = load_workbook(file_path)
for sheet in wb.worksheets:
old_name = sheet.title
new_name = f"{prefix}_{old_name}"
sheet.title = new_name
print(f"Worksheet '{old_name}' renamed to: {new_name}")
wb.save('renamed_' + os.path.basename(file_path))
print("All worksheets renamed")
# Usage example
rename_sheets('example.xlsx', 'New')Extract Specific Columns from Multiple Files
Iterates over all Excel files in a folder, selects given columns, concatenates them, and writes the combined result.
import pandas as pd
import os
def extract_columns_from_multiple_files(directory, columns_to_extract, output_file):
all_data = pd.DataFrame()
for filename in os.listdir(directory):
if filename.endswith('.xlsx'):
file_path = os.path.join(directory, filename)
df = pd.read_excel(file_path)
extracted_df = df[columns_to_extract]
all_data = pd.concat([all_data, extracted_df], ignore_index=True)
all_data.to_excel(output_file, index=False)
print(f"Data extraction completed, result saved to: {output_file}")
# Usage example
extract_columns_from_multiple_files('input_directory', ['Name', 'Age'], 'extracted_output.xlsx')Insert New Row or Column
Depending on the dimension argument, inserts a row or column at the specified index.
from openpyxl import load_workbook
def insert_row_or_column(file_path, location, dimension='row'):
wb = load_workbook(file_path)
ws = wb.active
if dimension == 'row':
ws.insert_rows(location)
print(f"Inserted a row before row {location}")
elif dimension == 'column':
ws.insert_cols(location)
print(f"Inserted a column before column {location}")
wb.save('inserted_' + os.path.basename(file_path))
# Usage examples
insert_row_or_column('example.xlsx', 5, 'row')
insert_row_or_column('example.xlsx', 3, 'column')Remove Duplicate Rows
Loads a workbook with pandas, drops duplicate rows (optionally based on a subset of columns), and saves the cleaned data.
import pandas as pd
def remove_duplicates(input_file, output_file, subset=None):
df = pd.read_excel(input_file)
df.drop_duplicates(subset=subset, inplace=True)
df.to_excel(output_file, index=False)
print(f"Duplicate removal completed, result saved to: {output_file}")
# Usage example
remove_duplicates('data_with_duplicates.xlsx', 'cleaned_data.xlsx', subset=['Name', 'Email'])Copy a Worksheet Across Workbooks
Copies the contents of a specified sheet from a source workbook into a new sheet of a target workbook.
from openpyxl import load_workbook
def copy_sheet(source_file, target_file, sheet_name):
source_wb = load_workbook(source_file)
target_wb = load_workbook(target_file)
if sheet_name in source_wb.sheetnames:
source_ws = source_wb[sheet_name]
target_ws = target_wb.create_sheet(title=sheet_name)
for row in source_ws.iter_rows():
for cell in row:
target_ws[cell.coordinate].value = cell.value
target_wb.save(target_file)
print(f"Sheet '{sheet_name}' copied from {source_file} to {target_file}")
else:
print(f"Sheet '{sheet_name}' not found in source file")
# Usage example
copy_sheet('source.xlsx', 'target.xlsx', 'Sheet1')Adjust Column Width Automatically
Calculates the maximum string length in each column and sets the column width accordingly.
from openpyxl import load_workbook
def adjust_column_width(file_path):
wb = load_workbook(file_path)
ws = wb.active
for column in ws.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
ws.column_dimensions[column_letter].width = max_length + 2
wb.save('adjusted_' + os.path.basename(file_path))
print("All column widths adjusted")
# Usage example
adjust_column_width('example.xlsx')Create a Pivot Table
Generates a pivot table that aggregates values by specified index columns using pandas.
import pandas as pd
def create_pivot_table(input_file, output_file, index_columns, values_columns):
df = pd.read_excel(input_file)
pivot_table = pd.pivot_table(df, values=values_columns, index=index_columns, aggfunc='sum')
pivot_table.to_excel(output_file)
print(f"Pivot table generated and saved to: {output_file}")
# Usage example
create_pivot_table('sales_data.xlsx', 'pivot_output.xlsx', ['Region'], ['Sales'])Add a Bar Chart to a Worksheet
Creates a bar chart with titles and data range, then inserts it at cell E5.
from openpyxl import load_workbook
from openpyxl.chart import BarChart, Reference
def add_chart_to_excel(file_path, sheet_name, chart_title, x_axis_title, y_axis_title, data_range):
wb = load_workbook(file_path)
ws = wb[sheet_name]
chart = BarChart()
chart.title = chart_title
chart.x_axis.title = x_axis_title
chart.y_axis.title = y_axis_title
data = Reference(ws, min_col=data_range['min_col'], min_row=data_range['min_row'],
max_col=data_range['max_col'], max_row=data_range['max_row'])
categories = Reference(ws, min_col=data_range['category_col'], min_row=data_range['category_min_row'],
max_row=data_range['category_max_row'])
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
ws.add_chart(chart, "E5")
wb.save('chart_added_' + os.path.basename(file_path))
print("Chart addition completed")
# Usage example
add_chart_to_excel('example.xlsx', 'Sheet1', 'Sales Statistics', 'Product', 'Sales',
{'min_col': 1, 'min_row': 2, 'max_col': 2, 'max_row': 7,
'category_col': 1, 'category_min_row': 2, 'category_max_row': 7})Merge Adjacent Cells with Identical Content
Scans each column, merges consecutive cells that share the same value, and saves the workbook.
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def merge_cells_with_same_content(file_path):
wb = load_workbook(file_path)
ws = wb.active
for col in range(1, ws.max_column + 1):
start = None
for row in range(1, ws.max_row + 1):
current_cell = ws[f"{get_column_letter(col)}{row}"]
next_cell = ws[f"{get_column_letter(col)}{row + 1}"] if row < ws.max_row else None
if start is None and next_cell and current_cell.value == next_cell.value:
start = current_cell.coordinate
elif start is not None and (not next_cell or current_cell.value != next_cell.value):
end = current_cell.coordinate
ws.merge_cells(f"{start}:{end}")
start = None
wb.save('merged_' + os.path.basename(file_path))
print("Merging same-content cells completed")
# Usage example
merge_cells_with_same_content('example.xlsx')Extract Images Embedded in an Excel File
Iterates over the worksheet's image objects, saves each as a PNG file in the specified output directory.
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
import os
def extract_images_from_excel(file_path, output_dir):
wb = load_workbook(file_path, keep_vba=True)
ws = wb.active
for img in ws._images:
image = Image(img.ref)
image_path = os.path.join(output_dir, f"image_{img.anchor._from.row}.png")
image.save(image_path)
print(f"Image saved to: {image_path}")
# Usage example
extract_images_from_excel('example_with_images.xlsx', 'output_images')Batch Convert Date Formats
Converts a date column from one string format to another using pandas' datetime utilities.
import pandas as pd
def convert_date_format(input_file, output_file, date_column, current_format, target_format):
df = pd.read_excel(input_file)
df[date_column] = pd.to_datetime(df[date_column], format=current_format).dt.strftime(target_format)
df.to_excel(output_file, index=False)
print(f"Date format conversion completed, result saved to: {output_file}")
# Usage example
convert_date_format('dates.xlsx', 'formatted_dates.xlsx', 'Date', '%Y-%m-%d', '%d/%m/%Y')Split an Excel File by a Condition
Separates rows that match a given value in a specific column from those that do not, saving each subset to its own workbook.
import pandas as pd
def split_excel_by_condition(input_file, column_name, condition_value):
df = pd.read_excel(input_file)
matching_df = df[df[column_name] == condition_value]
non_matching_df = df[df[column_name] != condition_value]
matching_df.to_excel(f'matching_{condition_value}.xlsx', index=False)
non_matching_df.to_excel('non_matching.xlsx', index=False)
print(f"Data matching condition saved to: matching_{condition_value}.xlsx")
print("Data not matching condition saved to: non_matching.xlsx")
# Usage example
split_excel_by_condition('data.xlsx', 'Category', 'A')Add a Text Watermark (Conceptual)
This example shows how to create a text drawing with openpyxl and place it on the worksheet. In practice, additional adjustments may be required, or a library like Pillow might be used for more complex watermarks.
from openpyxl import load_workbook
from openpyxl.drawing.text import TextBlock, Paragraph, ParagraphProperties, CharacterProperties
from openpyxl.drawing.spreadsheet_drawing import OneCellAnchor, AnchorMarker
from openpyxl.utils.units import cm_to_EMU
import os
def add_watermark(file_path, watermark_text):
wb = load_workbook(file_path)
ws = wb.active
drawing = TextBlock(
Paragraph(paragraphProperties=ParagraphProperties(defRPr=CharacterProperties(sz=400))),
[Paragraph(watermark_text, paragraphProperties=ParagraphProperties(defRPr=CharacterProperties(sz=400)))]
)
anchor = OneCellAnchor(anchorRow=5, anchorCol=5,
_from=AnchorMarker(x=cm_to_EMU(2), y=cm_to_EMU(2)),
ext=AnchorMarker(x=cm_to_EMU(10), y=cm_to_EMU(1)))
ws.add_drawing(drawing, anchor)
wb.save('watermarked_' + os.path.basename(file_path))
print("Watermark addition completed")
# Usage example
add_watermark('example.xlsx', 'CONFIDENTIAL')Batch Encrypt Excel Workbooks
Applies a password to each workbook in a directory by setting the WorkbookProtection object.
from openpyxl import load_workbook
from openpyxl.workbook.protection import WorkbookProtection
import os
def encrypt_excel_files(directory, password):
for filename in os.listdir(directory):
if filename.endswith('.xlsx'):
file_path = os.path.join(directory, filename)
wb = load_workbook(file_path)
wb.security = WorkbookProtection(workbookPassword=password, lockStructure=True)
encrypted_filename = f'encrypted_{filename}'
wb.save(os.path.join(directory, encrypted_filename))
print(f"{filename} encrypted and saved as {encrypted_filename}")
# Usage example
encrypt_excel_files('input_directory', 'your_password')Update Hyperlinks Across a Workbook
Searches for hyperlinks that start with a given old URL and replaces the domain with a new one.
from openpyxl import load_workbook
from openpyxl.worksheet.hyperlink import Hyperlink
def update_hyperlinks(file_path, old_url, new_url):
wb = load_workbook(file_path)
for sheet in wb.worksheets:
for row in sheet.iter_rows():
for cell in row:
if cell.hyperlink and cell.hyperlink.target.startswith(old_url):
new_link = cell.hyperlink.target.replace(old_url, new_url)
cell.hyperlink = Hyperlink(display=None, ref=cell.coordinate,
location=None, tooltip=None, target=new_link)
print(f"Hyperlink in cell {cell.coordinate} updated from {old_url} to {new_url}")
wb.save('updated_links_' + os.path.basename(file_path))
print("Hyperlink update completed")
# Usage example
update_hyperlinks('example.xlsx', 'http://oldwebsite.com', 'http://newwebsite.com')These snippets collectively provide a practical toolbox for automating repetitive Excel tasks, improving productivity, and ensuring consistency across large datasets.
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
