zdl0929
100 天前
找了半天没合适的,然后 gpt 半小时搞定😂
------
# 读取文件夹中的所有 word 文件,把每一个转换为图像文件,再将图像文件合并到一个同名的 pdf 文件中
import os
from docx2pdf import convert
from pdf2image import convert_from_path
import img2pdf
import shutil
def word_to_pdf(word_file, pdf_file):
convert(word_file, pdf_file)
def pdf_to_images(pdf_file, image_prefix):
images = convert_from_path(pdf_file)
# 如果 image_prefix 文件夹不存在创建文件夹
os.makedirs(os.path.join("imagetmp", image_prefix), exist_ok=True)
image_paths = []
for i, image in enumerate(images):
image_path = os.path.join("imagetmp/"+ image_prefix, f'page_{i + 1}.png')
image.save(image_path, 'PNG')
image_paths.append(image_path)
return image_paths
def images_to_pdf(images, pdf_file):
with open(pdf_file, "wb") as f:
f.write(img2pdf.convert([i for i in images if i.endswith(".png")]))
def convert_word_files_to_pdf(source_directory, target_directory):
for root, dirs, files in os.walk(source_directory):
for file in files:
if file.endswith(".docx"):
source_file = os.path.join(root, file)
pdf_file = os.path.join(root, file.replace(".docx", ".pdf"))
image_pdf_file = os.path.join(root, file.replace(".docx", ".pdf"))
word_to_pdf(source_file, pdf_file)
images = pdf_to_images(pdf_file, file.replace(".docx", ""))
images_to_pdf(images, image_pdf_file)
# os.remove(pdf_file)
target_dir = root.replace(source_directory, target_directory)
os.makedirs(target_dir, exist_ok=True)
shutil.move(image_pdf_file, target_dir)
convert_word_files_to_pdf(source_dir, dist_dir)