terminology.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. """
  2. Handles (deferred) loading of terminology data and access to it for odML documents.
  3. """
  4. import datetime
  5. import os
  6. import sys
  7. import tempfile
  8. import threading
  9. try:
  10. import urllib.request as urllib2
  11. except ImportError:
  12. import urllib2
  13. from hashlib import md5
  14. from .tools.parser_utils import ParserException
  15. from .tools.xmlparser import XMLReader
  16. REPOSITORY_BASE = 'https://terminologies.g-node.org'
  17. REPOSITORY = '/'.join([REPOSITORY_BASE, 'v1.1', 'terminologies.xml'])
  18. CACHE_AGE = datetime.timedelta(days=1)
  19. def cache_load(url, replace_file=False):
  20. """
  21. Loads the url and store it in a temporary cache directory
  22. subsequent requests for this url will use the cached version.
  23. :param url: URL from where to load an odML terminology file from.
  24. :param replace_file: True, if file should be reloaded
  25. """
  26. filename = '.'.join([md5(url.encode()).hexdigest(), os.path.basename(url)])
  27. cache_dir = os.path.join(tempfile.gettempdir(), "odml.cache")
  28. if not os.path.exists(cache_dir):
  29. try:
  30. os.makedirs(cache_dir)
  31. except OSError: # might happen due to concurrency
  32. if not os.path.exists(cache_dir):
  33. raise
  34. cache_file = os.path.join(cache_dir, filename)
  35. if not os.path.exists(cache_file) \
  36. or replace_file \
  37. or datetime.datetime.fromtimestamp(os.path.getmtime(cache_file)) < \
  38. datetime.datetime.now() - CACHE_AGE:
  39. try:
  40. data = urllib2.urlopen(url).read()
  41. if sys.version_info.major > 2:
  42. data = data.decode("utf-8")
  43. except Exception as exc:
  44. print("failed loading '%s': %s" % (url, exc))
  45. return
  46. file_obj = open(cache_file, "w")
  47. file_obj.write(str(data))
  48. file_obj.close()
  49. return open(cache_file)
  50. class Terminologies(dict):
  51. """
  52. Terminologies facilitates synchronous and deferred loading, caching,
  53. browsing and importing of full or partial odML terminologies.
  54. """
  55. loading = {}
  56. reload_cache = False
  57. def load(self, url):
  58. """
  59. Loads and caches an odML XML file from a URL.
  60. :param url: location of an odML XML file.
  61. :return: The odML document loaded from url.
  62. """
  63. if url in self:
  64. return self[url]
  65. if url in self.loading:
  66. self.loading[url].join()
  67. self.loading.pop(url, None)
  68. return self.load(url)
  69. return self._load(url)
  70. def _load(self, url):
  71. """
  72. Cache loads an odML XML file from a URL and returns
  73. the result as a parsed odML document.
  74. :param url: location of an odML XML file.
  75. :return: The odML document loaded from url.
  76. It will silently return None, if any exceptions
  77. occur to enable loading of nested odML files.
  78. """
  79. file_obj = cache_load(url, self.reload_cache)
  80. if file_obj is None:
  81. print("did not successfully load '%s'" % url)
  82. return
  83. try:
  84. term = XMLReader(filename=url, ignore_errors=True).from_file(file_obj)
  85. term.finalize()
  86. except ParserException as exc:
  87. print("Failed to load %s due to parser errors" % url)
  88. print(' "%s"' % exc)
  89. term = None
  90. self[url] = term
  91. return term
  92. def deferred_load(self, url):
  93. """
  94. Starts a background thread to load an odML XML file from a URL.
  95. :param url: location of an odML XML file.
  96. """
  97. if url in self or url in self.loading:
  98. return
  99. self.loading[url] = threading.Thread(target=self._load, args=(url,))
  100. self.loading[url].start()
  101. def refresh(self, url):
  102. """
  103. Deletes and reloads all cached odML XML files given in the
  104. terminology file from a URL.
  105. :param url: location of an odML XML file.
  106. """
  107. self.reload_cache = True
  108. self.clear()
  109. self.load(url)
  110. self.reload_cache = False
  111. terminologies = Terminologies()
  112. load = terminologies.load
  113. deferred_load = terminologies.deferred_load
  114. refresh = terminologies.refresh
  115. if __name__ == "__main__":
  116. FILE_OBJECT = cache_load(REPOSITORY)