Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

odmlparser.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. #!/usr/bin/env python
  2. """
  3. A generic odML parsing module.
  4. Parses odML files and documents.
  5. """
  6. import datetime
  7. import json
  8. import sys
  9. import yaml
  10. from os.path import basename
  11. from . import xmlparser
  12. from .dict_parser import DictWriter, DictReader
  13. from ..info import FORMAT_VERSION
  14. from .parser_utils import ParserException
  15. from .parser_utils import SUPPORTED_PARSERS
  16. from .rdf_converter import RDFReader, RDFWriter
  17. from ..validation import Validation
  18. try:
  19. unicode = unicode
  20. except NameError:
  21. unicode = str
  22. class ODMLWriter:
  23. """
  24. A generic odML document writer, for XML, YAML and JSON.
  25. Usage:
  26. xml_writer = ODMLWriter(parser='XML')
  27. xml_writer.write_file(odml_document, filepath)
  28. """
  29. def __init__(self, parser='XML'):
  30. self.parsed_doc = None # Python dictionary object equivalent
  31. parser = parser.upper()
  32. if parser not in SUPPORTED_PARSERS:
  33. raise NotImplementedError("'%s' odML parser does not exist!" % parser)
  34. self.parser = parser
  35. def write_file(self, odml_document, filename):
  36. # Write document only if it does not contain validation errors.
  37. validation = Validation(odml_document)
  38. msg = ""
  39. for err in validation.errors:
  40. if err.is_error:
  41. msg += "\n\t- %s %s: %s" % (err.obj, err.rank, err.msg)
  42. if msg != "":
  43. msg = "Resolve document validation errors before saving %s" % msg
  44. raise ParserException(msg)
  45. with open(filename, 'w') as file:
  46. # Add XML header to support odML stylesheets.
  47. if self.parser == 'XML':
  48. file.write(xmlparser.XMLWriter.header)
  49. file.write(self.to_string(odml_document))
  50. def to_string(self, odml_document):
  51. string_doc = ''
  52. if self.parser == 'XML':
  53. string_doc = unicode(xmlparser.XMLWriter(odml_document))
  54. elif self.parser == "RDF":
  55. # Use turtle as default output format for now.
  56. string_doc = RDFWriter(odml_document).get_rdf_str("xml")
  57. else:
  58. self.parsed_doc = DictWriter().to_dict(odml_document)
  59. odml_output = {'Document': self.parsed_doc,
  60. 'odml-version': FORMAT_VERSION}
  61. if self.parser == 'YAML':
  62. string_doc = yaml.dump(odml_output, default_flow_style=False)
  63. elif self.parser == 'JSON':
  64. string_doc = json.dumps(odml_output, indent=4,
  65. cls=JSONDateTimeSerializer)
  66. if sys.version_info.major < 3:
  67. string_doc = string_doc.encode("utf-8")
  68. return string_doc
  69. # Required to serialize datetime values with JSON.
  70. class JSONDateTimeSerializer(json.JSONEncoder):
  71. def default(self, o):
  72. if isinstance(o, (datetime.datetime, datetime.date, datetime.time)):
  73. return str(o)
  74. return json.JSONEncoder.default(self, o)
  75. class ODMLReader:
  76. """
  77. A reader to parse odML files or strings into odml documents,
  78. based on the given data exchange format, like XML, YAML, JSON or RDF.
  79. Usage:
  80. yaml_odml_doc = ODMLReader(parser='YAML').from_file("odml_doc.yaml")
  81. json_odml_doc = ODMLReader(parser='JSON').from_file("odml_doc.json")
  82. """
  83. def __init__(self, parser='XML', show_warnings=True):
  84. """
  85. :param parser: odml parser; supported are 'XML', 'JSON', 'YAML' and 'RDF'.
  86. :param show_warnings: Toggle whether to print warnings to the command line.
  87. """
  88. self.doc = None # odML document
  89. self.parsed_doc = None # Python dictionary object equivalent
  90. parser = parser.upper()
  91. if parser not in SUPPORTED_PARSERS:
  92. raise NotImplementedError("'%s' odML parser does not exist!" % parser)
  93. self.parser = parser
  94. self.show_warnings = show_warnings
  95. self.warnings = []
  96. def from_file(self, file, doc_format=None):
  97. if self.parser == 'XML':
  98. par = xmlparser.XMLReader(ignore_errors=True,
  99. show_warnings=self.show_warnings)
  100. self.warnings = par.warnings
  101. self.doc = par.from_file(file)
  102. return self.doc
  103. elif self.parser == 'YAML':
  104. with open(file) as yaml_data:
  105. try:
  106. self.parsed_doc = yaml.load(yaml_data)
  107. except yaml.parser.ParserError as err:
  108. print(err)
  109. return
  110. par = DictReader(show_warnings=self.show_warnings)
  111. self.doc = par.to_odml(self.parsed_doc)
  112. # Provide original file name via the in memory document
  113. self.doc._origin_file_name = basename(file)
  114. return self.doc
  115. elif self.parser == 'JSON':
  116. with open(file) as json_data:
  117. try:
  118. self.parsed_doc = json.load(json_data)
  119. except ValueError as err: # Python 2 does not support JSONDecodeError
  120. print("JSON Decoder Error: %s" % err)
  121. return
  122. par = DictReader(show_warnings=self.show_warnings)
  123. self.doc = par.to_odml(self.parsed_doc)
  124. # Provide original file name via the in memory document
  125. self.doc._origin_file_name = basename(file)
  126. return self.doc
  127. elif self.parser == 'RDF':
  128. if not doc_format:
  129. raise ValueError("Format of the rdf file was not specified")
  130. self.doc = RDFReader().from_file(file, doc_format)
  131. return self.doc
  132. def from_string(self, string, doc_format=None):
  133. if self.parser == 'XML':
  134. self.doc = xmlparser.XMLReader().from_string(string)
  135. return self.doc
  136. elif self.parser == 'YAML':
  137. try:
  138. self.parsed_doc = yaml.load(string)
  139. except yaml.parser.ParserError as err:
  140. print(err)
  141. return
  142. self.doc = DictReader().to_odml(self.parsed_doc)
  143. return self.doc
  144. elif self.parser == 'JSON':
  145. try:
  146. self.parsed_doc = json.loads(string)
  147. except ValueError as err: # Python 2 does not support JSONDecodeError
  148. print("JSON Decoder Error: %s" % err)
  149. return
  150. self.doc = DictReader().to_odml(self.parsed_doc)
  151. return self.doc
  152. elif self.parser == 'RDF':
  153. if not doc_format:
  154. raise ValueError("Format of the rdf file was not specified")
  155. self.doc = RDFReader().from_string(string, doc_format)
  156. return self.doc