|
import os
|
|
import subprocess
|
|
|
|
|
|
|
|
input_directory = "./corpus"
|
|
xsl_file = "./Stylesheets-master/start-txt.xsl"
|
|
output_directory = "./txt"
|
|
saxon_jar = "./SaxonHE12-5J/saxon-he-12.5.jar"
|
|
|
|
|
|
def transform_xml_files(input_dir, output_dir):
|
|
for root, dirs, files in os.walk(input_dir):
|
|
for file in files:
|
|
if file.endswith(".xml"):
|
|
xml_file_path = os.path.join(root, file)
|
|
|
|
|
|
relative_path = os.path.relpath(root, input_dir)
|
|
output_subdir = os.path.join(output_dir, relative_path)
|
|
os.makedirs(output_subdir, exist_ok=True)
|
|
|
|
|
|
output_file_path = os.path.join(output_subdir, file.replace(".xml", ".txt"))
|
|
|
|
try:
|
|
|
|
command = [
|
|
"java", "-jar", saxon_jar,
|
|
"-s:" + xml_file_path,
|
|
"-xsl:" + xsl_file,
|
|
"-o:" + output_file_path
|
|
]
|
|
|
|
|
|
subprocess.run(command, check=True)
|
|
print(f"Transformation appliquée à {xml_file_path} -> {output_file_path}")
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"Erreur lors de la transformation du fichier {xml_file_path}: {e}")
|
|
|
|
|
|
transform_xml_files(input_directory, output_directory) |