123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- """
- Handles (deferred) loading of terminology data and access to it
- for odML documents
- """
- import os
- import tempfile
- import datetime
- import odml.tools.xmlparser
- from hashlib import md5
- try:
- import urllib.request as urllib2
- except ImportError:
- import urllib2
- import threading
- CACHE_AGE = datetime.timedelta(days=1)
- def cache_load(url):
- """
- load the url and store it in a temporary cache directory
- subsequent requests for this url will use the cached version
- """
- filename = '.'.join([md5(url.encode()).hexdigest(), os.path.basename(url)])
- cache_dir = os.path.join(tempfile.gettempdir(), "odml.cache")
- if not os.path.exists(cache_dir):
- try:
- os.makedirs(cache_dir)
- except OSError: # might happen due to concurrency
- if not os.path.exists(cache_dir):
- raise
- cache_file = os.path.join(cache_dir, filename)
- if not os.path.exists(cache_file) \
- or datetime.datetime.fromtimestamp(os.path.getmtime(cache_file)) < \
- datetime.datetime.now() - CACHE_AGE:
- try:
- data = urllib2.urlopen(url).read()
- except Exception as e:
- print("failed loading '%s': %s" % (url, e))
- return
- fp = open(cache_file, "w")
- fp.write(str(data))
- fp.close()
- return open(cache_file)
- class Terminologies(dict):
- loading = {}
- def load(self, url):
- """
- load and cache a terminology-url
- returns the odml-document for the url
- """
- if url in self:
- return self[url]
- if url in self.loading:
- self.loading[url].join()
- self.loading.pop(url, None)
- return self.load(url)
- return self._load(url)
- def _load(self, url):
- # TODO also cache the data locally on disk
- # if url.startswith("http"): return None
- fp = cache_load(url)
- if fp is None:
- print("did not successfully load '%s'" % url)
- return
- try:
- term = odml.tools.xmlparser.XMLReader(filename=url, ignore_errors=True).fromFile(fp)
- term.finalize()
- except odml.tools.xmlparser.ParserException as e:
- print("Failed to load %s due to parser errors" % url)
- print(' "%s"' % e.message)
- term = None
- self[url] = term
- return term
- def deferred_load(self, url):
- """
- start a thread to load the terminology in background
- """
- if url in self or url in self.loading:
- return
- self.loading[url] = threading.Thread(target=self._load, args=(url,))
- self.loading[url].start()
- terminologies = Terminologies()
- load = terminologies.load
- deferred_load = terminologies.deferred_load
- if __name__ == "__main__":
- f = cache_load('http://portal.g-node.org/odml/terminologies/v1.0/analysis/analysis.xml')
|