query_creator.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. """
  2. The module provides access to QueryParser and QueryCreator classes.
  3. QueryParsers parse search strings to odml query dictionaries that can be
  4. consumed by QueryCreators. QueryCreators create RDF queries from
  5. provided odml query dictionaries.
  6. """
  7. import re
  8. from abc import ABCMeta, abstractmethod
  9. from rdflib import RDF
  10. from rdflib.plugins.sparql import prepareQuery
  11. from ..format import Document
  12. from ..format import Property
  13. from ..format import Section
  14. from ..format import Format
  15. odmlns = Format.namespace()
  16. class BaseQueryCreator:
  17. """
  18. An abstract base class for odml specific QueryCreators.
  19. """
  20. __metaclass__ = ABCMeta
  21. possible_query_variables = {"d": "Document", "s": "Section",
  22. "p": "Property", "v": "Bag URI", "value": "Value"}
  23. possible_q_dict_keys = ["Doc", "Sec", "Prop"]
  24. def __init__(self, q_dict=None):
  25. """
  26. :param q_dict: dictionary with query parameters
  27. """
  28. self.q_dict = q_dict if q_dict else {}
  29. self.query = ""
  30. super(BaseQueryCreator, self).__init__()
  31. @abstractmethod
  32. def get_query(self, q_str, q_parser):
  33. """
  34. Constructs a SPARQL query from an input string.
  35. :param q_str: input string.
  36. :param q_parser: parser to use on the input string.
  37. :return SPARQL query.
  38. """
  39. pass
  40. @abstractmethod
  41. def _prepare_query(self):
  42. pass
  43. class BaseQueryParser:
  44. """
  45. An abstract base class for QueryParsers.
  46. """
  47. __metaclass__ = ABCMeta
  48. def __init__(self):
  49. self.q_dict = {}
  50. @abstractmethod
  51. def parse_query_string(self, q_str):
  52. """
  53. Parses an input string and return a dictionary consumable by a QueryCreator.
  54. """
  55. pass
  56. class QueryParserFuzzy(BaseQueryParser):
  57. """
  58. This class parses an odml specific input string and uses
  59. heuristics to approximate which Section or Property attributes
  60. should be matched against multiple search parameters and constructs
  61. an odml specific SPARQL query.
  62. """
  63. def __init__(self):
  64. super(QueryParserFuzzy, self).__init__()
  65. def parse_query_string(self, q_str):
  66. """
  67. Parse query string and returns dict object with parameters.
  68. :param q_str: query string.
  69. Example: FIND sec(name, type) prop(type) HAVING Stimulus, Contrast
  70. :return: dict object.
  71. Example: {'Sec': ['name', 'type'],
  72. 'Doc': ['author'],
  73. 'Search': ['Stimulus', 'Contrast']}
  74. """
  75. self.q_dict = {}
  76. find_pattern = re.compile("FIND(.*?)HAVING")
  77. find_group = re.search(find_pattern, q_str).group(1).strip()
  78. if find_group:
  79. self._parse_find(find_group)
  80. having_pattern = re.compile("HAVING(.*)")
  81. having_group = re.search(having_pattern, q_str).group(1).strip()
  82. if having_group:
  83. if "Search" in self.q_dict.keys():
  84. raise ValueError("Search values are already parsed")
  85. self._parse_having(having_group)
  86. else:
  87. raise ValueError("Search values in having part were not specified")
  88. return self.q_dict
  89. def _parse_find(self, find_part):
  90. """
  91. Parses find string part into list of specific keys to which search values
  92. would be applied. e.g. 'sec(name, type) prop(name)'
  93. into {'Sec': ['name', 'type'], 'Prop': ['name']}.
  94. :param find_part: string which represent list of searchable odML data model
  95. objects like document(doc), sections(sec) or properties(prop).
  96. e.g. 'sec(name, type) prop(name)'
  97. """
  98. doc_pattern = re.compile("(doc|document)[(].*?[)]")
  99. doc = re.search(doc_pattern, find_part)
  100. if doc:
  101. self._parse_doc(doc)
  102. sec_pattern = re.compile("(sec|section)[(].*?[)]")
  103. sec = re.search(sec_pattern, find_part)
  104. if sec:
  105. self._parse_sec(sec)
  106. prop_pattern = re.compile("(prop|property)[(].*?[)]")
  107. prop = re.search(prop_pattern, find_part)
  108. if prop:
  109. self._parse_prop(prop)
  110. def _parse_doc(self, doc):
  111. re_obj = re.compile("[(, ](id|author|date|version|repository|sections)[),]")
  112. if doc:
  113. self.q_dict["Doc"] = re.findall(re_obj, doc.group(0))
  114. def _parse_sec(self, sec):
  115. attr_list = "id|name|definition|type|repository|reference|sections|properties"
  116. pattern = "[(, ](%s)[),]" % attr_list
  117. re_obj = re.compile(pattern)
  118. if sec:
  119. self.q_dict["Sec"] = re.findall(re_obj, sec.group(0))
  120. def _parse_prop(self, prop):
  121. attr_list = "id|name|definition|dtype|unit|uncertainty|reference|value_origin"
  122. pattern = "[(, ](%s)[),]" % attr_list
  123. re_obj = re.compile(pattern)
  124. if prop:
  125. self.q_dict["Prop"] = re.findall(re_obj, prop.group(0))
  126. def _parse_having(self, having_part):
  127. """
  128. Parses search value string into list of specific values.
  129. e.g. 'Stimulus, Contrast, Date' into list [Stimulus, Contrast, Date].
  130. :param having_part: string with search values, e.g. 'Stimulus, Contrast'
  131. Also spaces errors in the string like 'Stimulus, , Contrast'
  132. will be ignored.
  133. """
  134. search_values_list = []
  135. search_params = re.compile("(.*?)(?:,|$)")
  136. if having_part:
  137. search_values = re.findall(search_params, having_part)
  138. for val in search_values:
  139. if val.strip():
  140. search_values_list.append(val.strip())
  141. self.q_dict["Search"] = search_values_list
  142. class QueryParser(BaseQueryParser):
  143. """
  144. This class parses an odml specific input string into an odml specific SPARQL query.
  145. """
  146. def __init__(self):
  147. super(QueryParser, self).__init__()
  148. def parse_query_string(self, q_str):
  149. """
  150. :param q_str: query string
  151. Example: doc(author:D. N. Adams) section(name:Stimulus)
  152. prop(name:Contrast, value:20, unit:%)
  153. :return: dict object
  154. Example: {'Sec': [('name', 'Stimulus')],
  155. 'Doc': [('author', 'D. N. Adams')],
  156. 'Prop': [('name', 'Contrast'), ('value':[20]), ('unit':'%')]}
  157. """
  158. doc_pattern = re.compile("(doc|document)[(].*?[)]")
  159. doc = re.search(doc_pattern, q_str)
  160. if doc:
  161. self._parse_doc(doc)
  162. sec_pattern = re.compile("(sec|section)[(].*?[)]")
  163. sec = re.search(sec_pattern, q_str)
  164. if sec:
  165. self._parse_sec(sec)
  166. prop_pattern = re.compile("(prop|property)[(].*?[)]")
  167. prop = re.search(prop_pattern, q_str)
  168. if prop:
  169. self._parse_prop(prop)
  170. return self.q_dict
  171. def _parse_doc(self, doc):
  172. attr_list = "id|author|date|version|repository|sections"
  173. pattern = "[, (](%s):(.*?)[,)]" % attr_list
  174. re_obj = re.compile(pattern)
  175. if doc:
  176. self.q_dict["Doc"] = re.findall(re_obj, doc.group(0))
  177. def _parse_sec(self, sec):
  178. attr_list = "id|name|definition|type|repository|reference|sections|properties"
  179. pattern = "[, (](%s):(.*?)[,)]" % attr_list
  180. re_obj = re.compile(pattern)
  181. if sec:
  182. self.q_dict["Sec"] = re.findall(re_obj, sec.group(0))
  183. def _parse_prop(self, prop):
  184. attr_list = "id|name|definition|dtype|unit|uncertainty|reference|value_origin"
  185. pattern = "[, (](%s):(.*?)[,)]" % attr_list
  186. re_obj = re.compile(pattern)
  187. if prop:
  188. self.q_dict["Prop"] = re.findall(re_obj, prop.group(0))
  189. p_value = re.compile(r"value:\[(.*)]")
  190. value_group = re.findall(p_value, prop.group(0))
  191. if value_group:
  192. values = re.split(", ?", value_group[0])
  193. self.q_dict["Prop"].append(("value", values))
  194. class QueryCreator(BaseQueryCreator):
  195. """
  196. Class for simplifying the creation of prepared SPARQL queries.
  197. Usage:
  198. query = "doc(author:D. N. Adams) section(name:Stimulus)
  199. prop(name:Contrast, value:20, unit:%)"
  200. prepared_query = QueryCreator().get_query(query, QueryParser())
  201. query = "FIND sec(name, type) prop(name) HAVING Recording,
  202. Recording-2012-04-04-ab, Date"
  203. prepared_query = QueryCreator().get_query(query, QueryParser2())
  204. """
  205. def __init__(self, q_dict=None):
  206. """
  207. :param q_dict: dictionary with query parameters
  208. """
  209. super(QueryCreator, self).__init__(q_dict)
  210. def get_query(self, q_str=None, q_parser=None):
  211. """
  212. :param q_parser: one of possible query parsers.
  213. :param q_str: doc(author:D. N. Adams) section(name:Stimulus)
  214. prop(name:Contrast, value:20, unit:%)
  215. :return rdflib prepared query.
  216. """
  217. if not self.q_dict:
  218. if not q_str:
  219. raise AttributeError("Please fulfill q_str param (query string)")
  220. elif not q_parser:
  221. raise AttributeError("Please fulfill q_parser param (query parser)")
  222. self.q_dict = q_parser.parse_query_string(q_str)
  223. self._prepare_query()
  224. use_ns = {"odml": odmlns, "rdf": RDF}
  225. return prepareQuery(self.query, initNs=use_ns)
  226. def _prepare_query(self):
  227. """
  228. Creates rdflib query using parameters from self.q_dict.
  229. :return: string representing rdflib query.
  230. """
  231. odml_uri = str(odmlns)
  232. self.query = "SELECT * WHERE {\n"
  233. if "Doc" in self.q_dict.keys():
  234. doc_attrs = self.q_dict["Doc"]
  235. if len(doc_attrs) > 0:
  236. self.query += "?d rdf:type odml:Document .\n"
  237. for i in doc_attrs:
  238. if len(i) > 2:
  239. msg = "Attributes in the query \"{}\" are not valid.".format(i)
  240. raise ValueError(msg)
  241. else:
  242. attr = Document.rdf_map(i[0])
  243. if attr:
  244. re_sub = re.sub(odml_uri, "odml:", attr)
  245. self.query += "?d {0} \"{1}\" .\n".format(re_sub, i[1])
  246. if "Sec" in self.q_dict.keys():
  247. sec_attrs = self.q_dict["Sec"]
  248. if len(sec_attrs) > 0:
  249. self.query += "?d odml:hasSection ?s .\n"
  250. self.query += "?s rdf:type odml:Section .\n"
  251. for i in sec_attrs:
  252. if len(i) > 2:
  253. msg = "Attributes in the query \"{}\" are not valid.".format(i)
  254. raise ValueError(msg)
  255. else:
  256. attr = Section.rdf_map(i[0])
  257. if attr:
  258. re_sub = re.sub(odml_uri, "odml:", attr)
  259. self.query += "?s {0} \"{1}\" .\n".format(re_sub, i[1])
  260. if "Prop" in self.q_dict.keys():
  261. prop_attrs = self.q_dict["Prop"]
  262. if len(prop_attrs) > 0:
  263. self.query += "?s odml:hasProperty ?p .\n"
  264. self.query += "?p rdf:type odml:Property .\n"
  265. for i in prop_attrs:
  266. if len(i) > 2:
  267. msg = "Attributes in the query \"{}\" are not valid.".format(i)
  268. raise ValueError(msg)
  269. elif i[0] == "value":
  270. values = i[1]
  271. if values:
  272. self.query += "?p odml:hasValue ?v .\n?v rdf:type rdf:Bag .\n"
  273. for val in values:
  274. self.query += "?v rdf:li \"{}\" .\n".format(val)
  275. else:
  276. attr = Property.rdf_map(i[0])
  277. if attr:
  278. re_sub = re.sub(odml_uri, "odml:", attr)
  279. self.query += "?p {0} \"{1}\" .\n".format(re_sub, i[1])
  280. self.query += "}\n"
  281. return self.query