odml_to_rdf.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. """odmlToRDF
  2. odmlToRDF searches for odML files within a provided SEARCHDIR
  3. and converts them to the newest odML format version and
  4. exports all found and resulting odML files to XML formatted RDF.
  5. Original files will never be overwritten. New files will be
  6. written either to a new directory at the current or a specified
  7. location.
  8. Usage: odmltordf [-r] [-o OUT] SEARCHDIR
  9. Arguments:
  10. SEARCHDIR Directory to search for odML files.
  11. Options:
  12. -o OUT Output directory. Must exist if specified.
  13. If not specified, output files will be
  14. written to the current directory.
  15. -r Search recursively. Directory structures
  16. will not be retained.
  17. -h --help Show this screen.
  18. --version Show version.
  19. """
  20. import os
  21. import pathlib
  22. import sys
  23. import tempfile
  24. from docopt import docopt
  25. try:
  26. from StringIO import StringIO
  27. except ImportError:
  28. from io import StringIO
  29. import odml
  30. from odml.tools.odmlparser import ODMLReader, ODMLWriter
  31. from odml.tools.converters import VersionConverter as VerConf
  32. try:
  33. unicode = unicode
  34. except NameError:
  35. unicode = str
  36. def run_rdf_export(odml_file, export_dir):
  37. """
  38. Convert an odML file to an XML RDF file and
  39. export it to an export directory with the
  40. same name as the original file and a '.rdf' file
  41. ending.
  42. :param odml_file: odML file to be converted to RDF.
  43. :param export_dir:
  44. """
  45. out_name = os.path.splitext(os.path.basename(odml_file))[0]
  46. out_file = os.path.join(export_dir, "%s.rdf" % out_name)
  47. doc = ODMLReader().from_file(odml_file)
  48. ODMLWriter("RDF").write_file(doc, out_file)
  49. def run_conversion(file_list, output_dir, rdf_dir, report, source_format="XML"):
  50. """
  51. Convert a list of odML files to the latest odML version if required
  52. and export all files to XML RDF files in a specified output directory.
  53. :param file_list: list of files to be exported to RDF.
  54. :param output_dir: Directory where odML files converted to
  55. the latest odML version will be saved.
  56. :param rdf_dir: Directory where exported RDF files will be saved.
  57. :param report: Reporting StringIO.
  58. :param source_format: Original file format of the odML source files.
  59. XML, JSON and YAML are supported, default is XML.
  60. """
  61. # Exceptions are kept as broad as possible to ignore any non-odML or
  62. # invalid odML files and ensuring everything that can be will be converted.
  63. for curr_file in file_list:
  64. file_path = unicode(curr_file.absolute())
  65. report.write("[Info] Handling file '%s'\n" % file_path)
  66. # When loading the current file succeeds, it is
  67. # a recent odML format file and can be exported
  68. # to RDF right away. Otherwise it needs to be
  69. # converted to the latest odML version first.
  70. try:
  71. odml.load(file_path, source_format)
  72. report.write("[Info] RDF conversion of '%s'\n" % file_path)
  73. run_rdf_export(file_path, rdf_dir)
  74. except Exception as exc:
  75. out_name = os.path.splitext(os.path.basename(file_path))[0]
  76. outfile = os.path.join(output_dir, "%s_conv.xml" % out_name)
  77. try:
  78. VerConf(file_path).write_to_file(outfile, source_format)
  79. try:
  80. report.write("[Info] RDF conversion of '%s'\n" % outfile)
  81. run_rdf_export(outfile, rdf_dir)
  82. except Exception as exc:
  83. report.write("[Error] converting '%s' to RDF: '%s'\n" %
  84. (file_path, exc))
  85. except Exception as exc:
  86. # Ignore files we cannot parse or convert
  87. report.write("[Error] version converting file '%s': '%s'\n" %
  88. (file_path, exc))
  89. def main(args=None):
  90. """
  91. Convenience script to automatically convert odML files
  92. within a directory (tree) to RDF. Check the cli help
  93. for details.
  94. :param args: Command line arguments
  95. """
  96. parser = docopt(__doc__, argv=args, version="0.1.0")
  97. root = parser['SEARCHDIR']
  98. if not os.path.isdir(root):
  99. print(docopt(__doc__, "-h"))
  100. exit(1)
  101. # Handle all supported odML file formats.
  102. if parser['-r']:
  103. xfiles = list(pathlib.Path(root).rglob('*.odml'))
  104. xfiles.extend(list(pathlib.Path(root).rglob('*.xml')))
  105. jfiles = list(pathlib.Path(root).rglob('*.json'))
  106. yfiles = list(pathlib.Path(root).rglob('*.yaml'))
  107. else:
  108. xfiles = list(pathlib.Path(root).glob('*.odml'))
  109. xfiles.extend(list(pathlib.Path(root).glob('*.xml')))
  110. jfiles = list(pathlib.Path(root).glob('*.json'))
  111. yfiles = list(pathlib.Path(root).glob('*.yaml'))
  112. out_root = os.getcwd()
  113. if parser["-o"]:
  114. if not os.path.isdir(parser["-o"]):
  115. print("[Error] Could not find output directory '%s'" % parser["-o"])
  116. exit(1)
  117. out_root = parser["-o"]
  118. out_dir = tempfile.mkdtemp(prefix="odmlconv_", dir=out_root)
  119. rdf_dir = tempfile.mkdtemp(prefix="odmlrdf_", dir=out_dir)
  120. # Use this monkeypatch reporter until there is a way
  121. # to run the converters silently.
  122. report = StringIO()
  123. report.write("[Info] Files will be saved to '%s'\n" % out_dir)
  124. run_conversion(xfiles, out_dir, rdf_dir, report)
  125. run_conversion(jfiles, out_dir, rdf_dir, report, "JSON")
  126. run_conversion(yfiles, out_dir, rdf_dir, report, "YAML")
  127. print(report.getvalue())
  128. report.close()
  129. if __name__ == "__main__":
  130. main(sys.argv[1:])