带UI界面的PDF内容提取代码
import fitz # PyMuPDF
import os
import tkinter as tk
from tkinter import filedialog, messagebox
class PDFPageExtractor:
def __init__(self, root):
self.root = root
self.root.title("PDF Page Extractor")
self.pdf_path = ""
# GUI Elements
self.create_widgets()
def create_widgets(self):
# File Selection
self.file_label = tk.Label(self.root, text="PDF File:")
self.file_label.grid(row=0, column=0, sticky=tk.W, padx=10, pady=10)
self.file_entry = tk.Entry(self.root, width=50)
self.file_entry.grid(row=0, column=1, padx=10, pady=10)
self.browse_button = tk.Button(self.root, text="Browse", command=self.browse_file)
self.browse_button.grid(row=0, column=2, padx=10, pady=10)
# Pages Input
self.pages_label = tk.Label(self.root, text="Pages to Extract (e.g., 1-5,7,9-12):")
self.pages_label.grid(row=1, column=0, sticky=tk.W, padx=10, pady=10)
self.pages_entry = tk.Entry(self.root, width=50)
self.pages_entry.grid(row=1, column=1, padx=10, pady=10)
# Extract Button
self.extract_button = tk.Button(self.root, text="Extract Pages", command=self.extract_pages)
self.extract_button.grid(row=2, column=1, pady=20)
# Output Path Label
self.output_label = tk.Label(self.root, text="")
self.output_label.grid(row=3, column=0, columnspan=3, pady=10)
def browse_file(self):
self.pdf_path = filedialog.askopenfilename(filetypes=[("PDF files", "*.pdf")])
self.file_entry.delete(0, tk.END)
self.file_entry.insert(0, self.pdf_path)
def extract_pages(self):
pdf_path = self.file_entry.get()
pages = self.pages_entry.get()
if not pdf_path:
messagebox.showwarning("Warning", "Please select a PDF file.")
return
if not pages:
messagebox.showwarning("Warning", "Please enter page numbers to extract.")
return
try:
# Open the PDF file
pdf_doc = fitz.open(pdf_path)
total_pages = len(pdf_doc) # Get the total number of pages
# Parse the pages string
pages_to_extract = set()
for part in pages.split(','):
if '-' in part:
a, b = part.split('-')
pages_to_extract.update(range(int(a)-1, int(b)))
else:
pages_to_extract.add(int(part)-1)
# Validate page numbers
if any(pg < 0 or pg >= total_pages for pg in pages_to_extract):
messagebox.showerror("Error", "Invalid page range.")
return
# Create the output file path with page numbers included
filename = os.path.splitext(os.path.basename(pdf_path))[0]
pages_str = '-'.join([str(pg + 1) for pg in sorted(pages_to_extract)])
outpath = os.path.join(os.path.dirname(pdf_path), f"{filename}_Pages_{pages_str}.pdf")
# Create a new PDF document
new_doc = fitz.open()
# Extract the specified pages
for pg in sorted(pages_to_extract):
new_doc.insert_pdf(pdf_doc, from_page=pg, to_page=pg)
# Save the new PDF file
new_doc.save(outpath)
new_doc.close() # Close the new document
pdf_doc.close() # Close the original document
self.output_label.config(text=f"Extracted pages saved to: {outpath}")
except Exception as e:
messagebox.showerror("Error", f"An error occurred: {e}")
if __name__ == "__main__":
root = tk.Tk()
app = PDFPageExtractor(root)
root.mainloop()