我刚刚问了下 gpt4 给的回答看起来挺靠谱的
import os
import subprocess
from docx import Document
from docx.opc.constants import RELATIONSHIP_TYPE as RT
import matplotlib.pyplot as plt
from sympy import preview
def extract_equations(word_document):
doc = Document(word_document)
equations = []
for rel in doc.part.rels:
if doc.part.rels[rel].reltype == RT.MATH:
equations.append(doc.part.rels[rel]._target._blob)
return equations
def equations_to_latex(equations, output_folder):
latex_equations = []
for idx, eq in enumerate(equations):
with open(f"{output_folder}/temp_eq{idx}.omml", "wb") as f:
f.write(eq)
result =
subprocess.run(
["pandoc", "-s", f"{output_folder}/temp_eq{idx}.omml", "-t", "latex"],
capture_output=True,
)
latex_equation = result.stdout.decode("utf-8").strip()
latex_equations.append(latex_equation)
return latex_equations
def latex_equations_to_images(latex_equations, output_folder):
for idx, eq in enumerate(latex_equations):
preview(
eq,
viewer="file",
filename=f"{output_folder}/equation{idx}.png",
output="png",
dvioptions=["-D", "300"],
)
def main():
word_document = "input.docx"
output_folder = "output"
if not os.path.exists(output_folder):
os.mkdir(output_folder)
equations = extract_equations(word_document)
latex_equations = equations_to_latex(equations, output_folder)
latex_equations_to_images(latex_equations, output_folder)
if __name__ == "__main__":
main()