Manual file conversion doesn't scale. Here's how to automate every common conversion type in Python with the best library for each job.
PDF to Images (PyMuPDF)
import fitz # pip install pymupdf
import os
def pdf_to_images(pdf_path, output_dir, dpi=150):
os.makedirs(output_dir, exist_ok=True)
doc = fitz.open(pdf_path)
paths = []
for i, page in enumerate(doc):
pix = page.get_pixmap(dpi=dpi)
path = os.path.join(output_dir, f'page_{i+1:03d}.png')
pix.save(path)
paths.append(path)
return paths
Images to WebP (Pillow)
from PIL import Image
import glob
def batch_to_webp(input_glob, quality=85):
for path in glob.glob(input_glob):
with Image.open(path) as img:
out = path.rsplit('.', 1)[0] + '.webp'
img.save(out, 'WEBP', quality=quality)
print(f'{path} → {out}')
Word to PDF (LibreOffice)
import subprocess, glob, os
def batch_docx_to_pdf(input_dir, output_dir):
os.makedirs(output_dir, exist_ok=True)
files = glob.glob(os.path.join(input_dir, '*.docx'))
subprocess.run([
'libreoffice', '--headless', '--convert-to', 'pdf',
'--outdir', output_dir, *files
], check=True)
print(f'Converted {len(files)} files')
JSON to CSV (pandas)
import pandas as pd, json, glob
def batch_json_to_csv(input_glob):
for path in glob.glob(input_glob):
with open(path) as f:
data = json.load(f)
out = path.replace('.json', '.csv')
pd.json_normalize(data).to_csv(out, index=False)
print(f'{path} → {out}')
CSV to Excel (openpyxl)
import pandas as pd
def csv_to_excel(csv_path, excel_path):
pd.read_csv(csv_path).to_excel(excel_path, index=False, engine='openpyxl')
Watcher Pattern (Convert on File Drop)
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class ConvertHandler(FileSystemEventHandler):
def on_created(self, event):
if event.src_path.endswith('.docx'):
batch_docx_to_pdf(os.path.dirname(event.src_path), 'output/')
observer = Observer()
observer.schedule(ConvertHandler(), 'watch_folder/', recursive=False)
observer.start()
One-off conversions: free online file converter →