123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156 |
- """odmlToRDF
- odmlToRDF searches for odML files within a provided SEARCHDIR
- and converts them to the newest odML format version and
- exports all found and resulting odML files to XML formatted RDF.
- Original files will never be overwritten. New files will be
- written either to a new directory at the current or a specified
- location.
- Usage: odmltordf [-r] [-o OUT] SEARCHDIR
- Arguments:
- SEARCHDIR Directory to search for odML files.
- Options:
- -o OUT Output directory. Must exist if specified.
- If not specified, output files will be
- written to the current directory.
- -r Search recursively. Directory structures
- will not be retained.
- -h --help Show this screen.
- --version Show version.
- """
- import os
- import pathlib
- import sys
- import tempfile
- from docopt import docopt
- try:
- from StringIO import StringIO
- except ImportError:
- from io import StringIO
- import odml
- from odml.tools.odmlparser import ODMLReader, ODMLWriter
- from odml.tools.version_converter import VersionConverter as VerConf
- try:
- unicode = unicode
- except NameError:
- unicode = str
- def run_rdf_export(odml_file, export_dir):
- """
- Convert an odML file to an XML RDF file and
- export it to an export directory with the
- same name as the original file and a '.rdf' file
- ending.
- :param odml_file: odML file to be converted to RDF.
- :param export_dir:
- """
- out_name = os.path.splitext(os.path.basename(odml_file))[0]
- out_file = os.path.join(export_dir, "%s.rdf" % out_name)
- doc = ODMLReader().from_file(odml_file)
- ODMLWriter("RDF").write_file(doc, out_file)
- def run_conversion(file_list, output_dir, rdf_dir, report, source_format="XML"):
- """
- Convert a list of odML files to the latest odML version if required
- and export all files to XML RDF files in a specified output directory.
- :param file_list: list of files to be exported to RDF.
- :param output_dir: Directory where odML files converted to
- the latest odML version will be saved.
- :param rdf_dir: Directory where exported RDF files will be saved.
- :param report: Reporting StringIO.
- :param source_format: Original file format of the odML source files.
- XML, JSON and YAML are supported, default is XML.
- """
- # Exceptions are kept as broad as possible to ignore any non-odML or
- # invalid odML files and ensuring everything that can be will be converted.
- for curr_file in file_list:
- file_path = unicode(curr_file.absolute())
- report.write("[Info] Handling file '%s'\n" % file_path)
- # When loading the current file succeeds, it is
- # a recent odML format file and can be exported
- # to RDF right away. Otherwise it needs to be
- # converted to the latest odML version first.
- try:
- odml.load(file_path, source_format)
- report.write("[Info] RDF conversion of '%s'\n" % file_path)
- run_rdf_export(file_path, rdf_dir)
- except Exception as exc:
- out_name = os.path.splitext(os.path.basename(file_path))[0]
- outfile = os.path.join(output_dir, "%s_conv.xml" % out_name)
- try:
- VerConf(file_path).write_to_file(outfile, source_format)
- try:
- report.write("[Info] RDF conversion of '%s'\n" % outfile)
- run_rdf_export(outfile, rdf_dir)
- except Exception as exc:
- report.write("[Error] converting '%s' to RDF: '%s'\n" %
- (file_path, exc))
- except Exception as exc:
- # Ignore files we cannot parse or convert
- report.write("[Error] version converting file '%s': '%s'\n" %
- (file_path, exc))
- def main(args=None):
- """
- Convenience script to automatically convert odML files
- within a directory (tree) to RDF. Check the cli help
- for details.
- :param args: Command line arguments
- """
- parser = docopt(__doc__, argv=args, version="0.1.0")
- root = parser['SEARCHDIR']
- if not os.path.isdir(root):
- print(docopt(__doc__, "-h"))
- exit(1)
- # Handle all supported odML file formats.
- if parser['-r']:
- xfiles = list(pathlib.Path(root).rglob('*.odml'))
- xfiles.extend(list(pathlib.Path(root).rglob('*.xml')))
- jfiles = list(pathlib.Path(root).rglob('*.json'))
- yfiles = list(pathlib.Path(root).rglob('*.yaml'))
- else:
- xfiles = list(pathlib.Path(root).glob('*.odml'))
- xfiles.extend(list(pathlib.Path(root).glob('*.xml')))
- jfiles = list(pathlib.Path(root).glob('*.json'))
- yfiles = list(pathlib.Path(root).glob('*.yaml'))
- out_root = os.getcwd()
- if parser["-o"]:
- if not os.path.isdir(parser["-o"]):
- print("[Error] Could not find output directory '%s'" % parser["-o"])
- exit(1)
- out_root = parser["-o"]
- out_dir = tempfile.mkdtemp(prefix="odmlconv_", dir=out_root)
- rdf_dir = tempfile.mkdtemp(prefix="odmlrdf_", dir=out_dir)
- # Use this monkeypatch reporter until there is a way
- # to run the converters silently.
- report = StringIO()
- report.write("[Info] Files will be saved to '%s'\n" % out_dir)
- run_conversion(xfiles, out_dir, rdf_dir, report)
- run_conversion(jfiles, out_dir, rdf_dir, report, "JSON")
- run_conversion(yfiles, out_dir, rdf_dir, report, "YAML")
- print(report.getvalue())
- report.close()
- if __name__ == "__main__":
- main(sys.argv[1:])
|