带UI界面的PDF内容提取代码

带UI界面的PDF内容提取代码

import fitz  # PyMuPDF
import os
import tkinter as tk
from tkinter import filedialog, messagebox

class PDFPageExtractor:
    def __init__(self, root):
        self.root = root
        self.root.title("PDF Page Extractor")

        self.pdf_path = ""

        # GUI Elements
        self.create_widgets()

    def create_widgets(self):
        # File Selection
        self.file_label = tk.Label(self.root, text="PDF File:")
        self.file_label.grid(row=0, column=0, sticky=tk.W, padx=10, pady=10)

        self.file_entry = tk.Entry(self.root, width=50)
        self.file_entry.grid(row=0, column=1, padx=10, pady=10)

        self.browse_button = tk.Button(self.root, text="Browse", command=self.browse_file)
        self.browse_button.grid(row=0, column=2, padx=10, pady=10)

        # Pages Input
        self.pages_label = tk.Label(self.root, text="Pages to Extract (e.g., 1-5,7,9-12):")
        self.pages_label.grid(row=1, column=0, sticky=tk.W, padx=10, pady=10)

        self.pages_entry = tk.Entry(self.root, width=50)
        self.pages_entry.grid(row=1, column=1, padx=10, pady=10)

        # Extract Button
        self.extract_button = tk.Button(self.root, text="Extract Pages", command=self.extract_pages)
        self.extract_button.grid(row=2, column=1, pady=20)

        # Output Path Label
        self.output_label = tk.Label(self.root, text="")
        self.output_label.grid(row=3, column=0, columnspan=3, pady=10)

    def browse_file(self):
        self.pdf_path = filedialog.askopenfilename(filetypes=[("PDF files", "*.pdf")])
        self.file_entry.delete(0, tk.END)
        self.file_entry.insert(0, self.pdf_path)

    def extract_pages(self):
        pdf_path = self.file_entry.get()
        pages = self.pages_entry.get()

        if not pdf_path:
            messagebox.showwarning("Warning", "Please select a PDF file.")
            return
        if not pages:
            messagebox.showwarning("Warning", "Please enter page numbers to extract.")
            return

        try:
            # Open the PDF file
            pdf_doc = fitz.open(pdf_path)
            total_pages = len(pdf_doc)  # Get the total number of pages

            # Parse the pages string
            pages_to_extract = set()
            for part in pages.split(','):
                if '-' in part:
                    a, b = part.split('-')
                    pages_to_extract.update(range(int(a)-1, int(b)))
                else:
                    pages_to_extract.add(int(part)-1)

            # Validate page numbers
            if any(pg < 0 or pg >= total_pages for pg in pages_to_extract):
                messagebox.showerror("Error", "Invalid page range.")
                return

            # Create the output file path with page numbers included
            filename = os.path.splitext(os.path.basename(pdf_path))[0]
            pages_str = '-'.join([str(pg + 1) for pg in sorted(pages_to_extract)])
            outpath = os.path.join(os.path.dirname(pdf_path), f"{filename}_Pages_{pages_str}.pdf")

            # Create a new PDF document
            new_doc = fitz.open()

            # Extract the specified pages
            for pg in sorted(pages_to_extract):
                new_doc.insert_pdf(pdf_doc, from_page=pg, to_page=pg)

            # Save the new PDF file
            new_doc.save(outpath)
            new_doc.close()  # Close the new document
            pdf_doc.close()  # Close the original document
            self.output_label.config(text=f"Extracted pages saved to: {outpath}")
        except Exception as e:
            messagebox.showerror("Error", f"An error occurred: {e}")

if __name__ == "__main__":
    root = tk.Tk()
    app = PDFPageExtractor(root)
    root.mainloop()