Building a Chrome Extension for Image OCR Using Python and Tesseract
This tutorial walks through creating a Chrome extension that captures images from web pages, sends them to a Python‑backed Tesseract OCR engine, and displays the recognized text, covering the plugin's file structure, manifest configuration, JavaScript code, HTML UI, CSS styling, and installation steps.
In this guide we demonstrate how to develop a Chrome extension that performs OCR on images using Python and the Tesseract library, providing a complete workflow from file structure to deployment.
The extension consists of the following core files:
manifest.json<br>content_script.js<br>background_script.js<br>popup.html<br>popup.js<br>icon.png2.1 Create the plugin file structure
Place the files listed above in a dedicated directory.
2.2 Write manifest.json
{
"manifest_version": 2,
"name": "Smart Image OCR",
"version": "1.0",
"description": "A smart Chrome extension for OCR text recognition from images.",
"icons": {"16": "icon.png", "48": "icon.png", "128": "icon.png"},
"browser_action": {"default_icon": {"16": "icon.png", "48": "icon.png", "128": "icon.png"}, "default_popup": "popup.html"},
"permissions": ["activeTab", "storage"],
"content_scripts": [{"matches": ["http://*/*", "https://*/*"], "js": ["content_script.js"]}],
"background": {"scripts": ["background_script.js"], "persistent": false}
}2.3 Write content_script.js
// Listen for messages from the popup
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
if (request.action === 'uploadImage') {
var imageData = request.imageData;
// Forward image data to background script for OCR
chrome.runtime.sendMessage({action: 'recognizeImage', imageData: imageData}, function(response) {
sendResponse({success: true, result: response.result});
});
return true;
}
});2.4 Write background_script.js
// Listen for OCR requests from content script
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
if (request.action === 'recognizeImage') {
var imageData = request.imageData;
var result = recognizeText(imageData);
sendResponse({result: result});
}
});
// Simple wrapper for Tesseract OCR (replace with real implementation)
function recognizeText(imageData) {
// OCR logic goes here, calling Tesseract library
return 'Hello, OCR!';
}2.5 Write popup.html
<!DOCTYPE html>
<html>
<head>
<title>Smart Image OCR</title>
<link rel="stylesheet" href="popup.css">
</head>
<body>
<input type="file" id="imageUpload" accept="image/*">
<button id="uploadButton">Upload Image</button>
<div id="result"></div>
<script src="popup.js"></script>
</body>
</html>2.6 Write popup.js
document.addEventListener('DOMContentLoaded', function() {
var imageUpload = document.getElementById('imageUpload');
var uploadButton = document.getElementById('uploadButton');
var resultDiv = document.getElementById('result');
uploadButton.addEventListener('click', function() {
imageUpload.click();
});
imageUpload.addEventListener('change', function() {
var file = imageUpload.files[0];
if (file) {
var reader = new FileReader();
reader.onload = function(e) {
var imageData = e.target.result;
chrome.tabs.query({active: true, currentWindow: true}, function(tabs) {
chrome.tabs.sendMessage(tabs[0].id, {action: 'uploadImage', imageData: imageData}, function(response) {
if (response && response.success) {
resultDiv.textContent = 'OCR Result: ' + response.result;
} else {
resultDiv.textContent = 'Failed to recognize image.';
}
});
});
};
reader.readAsDataURL(file);
}
});
});2.7 Add CSS in popup.css
body { width: 200px; padding: 10px; }
input[type="file"] { display: none; }
button { margin-top: 10px; }
#result { margin-top: 10px; }2.8 Add an icon file named icon.png to the extension directory.
3. Install and debug the extension
Open chrome://extensions/ in Chrome, enable Developer Mode, click “Load unpacked”, select the extension folder, and then use the toolbar icon to open the popup, upload an image, and view the OCR result.
By following these steps you will have a functional Chrome extension that leverages Python and Tesseract to recognize text in images directly from web pages.
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
