123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- """
- Handles (deferred) loading of terminology data and access to it for odML documents.
- """
- import datetime
- import os
- import sys
- import tempfile
- import threading
- try:
- import urllib.request as urllib2
- except ImportError:
- import urllib2
- from hashlib import md5
- from .tools.parser_utils import ParserException
- from .tools.xmlparser import XMLReader
- REPOSITORY_BASE = 'https://terminologies.g-node.org'
- REPOSITORY = '/'.join([REPOSITORY_BASE, 'v1.1', 'terminologies.xml'])
- CACHE_AGE = datetime.timedelta(days=1)
- def cache_load(url, replace_file=False):
- """
- Loads the url and store it in a temporary cache directory
- subsequent requests for this url will use the cached version.
- :param url: URL from where to load an odML terminology file from.
- :param replace_file: True, if file should be reloaded
- """
- filename = '.'.join([md5(url.encode()).hexdigest(), os.path.basename(url)])
- cache_dir = os.path.join(tempfile.gettempdir(), "odml.cache")
- if not os.path.exists(cache_dir):
- try:
- os.makedirs(cache_dir)
- except OSError: # might happen due to concurrency
- if not os.path.exists(cache_dir):
- raise
- cache_file = os.path.join(cache_dir, filename)
- if not os.path.exists(cache_file) \
- or replace_file \
- or datetime.datetime.fromtimestamp(os.path.getmtime(cache_file)) < \
- datetime.datetime.now() - CACHE_AGE:
- try:
- data = urllib2.urlopen(url).read()
- if sys.version_info.major > 2:
- data = data.decode("utf-8")
- except Exception as exc:
- print("failed loading '%s': %s" % (url, exc))
- return
- file_obj = open(cache_file, "w")
- file_obj.write(str(data))
- file_obj.close()
- return open(cache_file)
- class Terminologies(dict):
- """
- Terminologies facilitates synchronous and deferred loading, caching,
- browsing and importing of full or partial odML terminologies.
- """
- loading = {}
- reload_cache = False
- def load(self, url):
- """
- Loads and caches an odML XML file from a URL.
- :param url: location of an odML XML file.
- :return: The odML document loaded from url.
- """
- if url in self:
- return self[url]
- if url in self.loading:
- self.loading[url].join()
- self.loading.pop(url, None)
- return self.load(url)
- return self._load(url)
- def _load(self, url):
- """
- Cache loads an odML XML file from a URL and returns
- the result as a parsed odML document.
- :param url: location of an odML XML file.
- :return: The odML document loaded from url.
- It will silently return None, if any exceptions
- occur to enable loading of nested odML files.
- """
- file_obj = cache_load(url, self.reload_cache)
- if file_obj is None:
- print("did not successfully load '%s'" % url)
- return
- try:
- term = XMLReader(filename=url, ignore_errors=True).from_file(file_obj)
- term.finalize()
- except ParserException as exc:
- print("Failed to load %s due to parser errors" % url)
- print(' "%s"' % exc)
- term = None
- self[url] = term
- return term
- def deferred_load(self, url):
- """
- Starts a background thread to load an odML XML file from a URL.
- :param url: location of an odML XML file.
- """
- if url in self or url in self.loading:
- return
- self.loading[url] = threading.Thread(target=self._load, args=(url,))
- self.loading[url].start()
- def refresh(self, url):
- """
- Deletes and reloads all cached odML XML files given in the
- terminology file from a URL.
- :param url: location of an odML XML file.
- """
- self.reload_cache = True
- self.clear()
- self.load(url)
- self.reload_cache = False
- terminologies = Terminologies()
- load = terminologies.load
- deferred_load = terminologies.deferred_load
- refresh = terminologies.refresh
- if __name__ == "__main__":
- FILE_OBJECT = cache_load(REPOSITORY)
|