1
1

fuzzy_finder.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. """
  2. This module provides the FuzzyFinder class that enables querying
  3. an odML RDF graph using abstract parameters.
  4. """
  5. from .query_creator import QueryCreator, QueryParser, QueryParserFuzzy
  6. class FuzzyFinder(object):
  7. """
  8. FuzzyFinder tool for querying graph through 'fuzzy' queries. If the user does not
  9. know the exact attributes and structure of the odML data model, the finder executes
  10. multiple queries to better match the parameters and returns sets of triples.
  11. """
  12. def __init__(self, graph=None, q_params=None):
  13. self.graph = graph
  14. self.q_params = q_params
  15. self.prepared_queries_list = []
  16. self._subsets = []
  17. def find(self, mode="fuzzy", graph=None, q_str=None, q_params=None):
  18. """
  19. Applies set of queries to the graph and returns info
  20. that was retrieved from queries.
  21. :param mode: define the type of parser which will be used for parsing
  22. parameters or queries. Please find our more info about concrete
  23. parsers in odml/tool/query_creator.py or tutorials.
  24. :param graph: graph object.
  25. :param q_str: query string which used in QueryCreator class.
  26. Example for QueryParser: doc(author:D. N. Adams)
  27. section(name:Stimulus) prop(name:Contrast, value:20, unit:%)
  28. Example for QueryParserFuzzy: "FIND sec(name) prop(type)
  29. HAVING Stimulus, Contrast"
  30. :param q_params: dictionary object with set of parameters for a query
  31. Example for QueryParser:
  32. {'Sec': [('name', 'Stimulus')],
  33. Doc': [('author', 'D. N. Adams')],
  34. 'Prop': [('name', 'Contrast'), ('value':[20, 25]), ('unit':'%')]}
  35. Example for QueryParserFuzzy:
  36. {'Sec': ['name', 'type'], 'Doc': ['author'],
  37. 'Search': ['Stimulus','Contrast']}
  38. :return: string which contains set of triples.
  39. """
  40. if mode == "fuzzy":
  41. q_parser = QueryParserFuzzy()
  42. pairs_generator = self._generate_parameters_pairs_fuzzy
  43. elif mode == "match":
  44. q_parser = QueryParser()
  45. pairs_generator = self._generate_parameters_pairs
  46. else:
  47. raise ValueError("Parameter mode can be either 'fuzzy' or 'match'")
  48. self._validate_find_input_attributes(graph, q_str, q_params, q_parser)
  49. self._generate_parameters_subsets(pairs_generator())
  50. return self._output_query_results()
  51. def _validate_find_input_attributes(self, graph, q_str, q_params, q_parser):
  52. if not graph and not self.graph:
  53. raise ValueError("Please provide a RDF graph")
  54. if not self.graph:
  55. self.graph = graph
  56. if q_str and q_params:
  57. msg = "Please pass query parameters only as a string or a dict object"
  58. raise ValueError(msg)
  59. if q_str:
  60. self.q_params = q_parser.parse_query_string(q_str)
  61. elif q_params:
  62. self.q_params = q_params
  63. else:
  64. msg = "Please pass query parameters either as a string or a dict object"
  65. raise ValueError(msg)
  66. def _generate_parameters_pairs(self):
  67. """
  68. Example: {'Sec': [('name', 'some_name'), ('type', 'Stimulus')]}
  69. :return: [('Sec', ('name', 'some_name')), ('Sec', ('type', 'Stimulus'))]
  70. """
  71. parameters_pairs = []
  72. possible_keys = QueryCreator.possible_q_dict_keys
  73. for key in possible_keys:
  74. if key in self.q_params.keys():
  75. object_attrs = self.q_params[key]
  76. for object_attr in object_attrs:
  77. obj_pair = tuple([key, object_attr])
  78. parameters_pairs.append(obj_pair)
  79. return parameters_pairs
  80. def _generate_parameters_pairs_fuzzy(self):
  81. """
  82. Generates set of tuples matching search select and where parts of
  83. fuzzy finder query from dictionary of parameters.
  84. Example: {'Sec': ['name', 'type'],
  85. 'Doc': ['author'],
  86. 'Search': ['Stimulus', 'Contrast']}
  87. :return: [('Sec', ('name', 'Stimulus')), ('Sec', ('name', 'Contrast')),
  88. ('Sec', ('type', 'Stimulus')), ('Sec', ('name', 'Contrast')),
  89. ('Doc', ('author', 'Stimulus')), ('Doc', ('author', 'Contrast'))]
  90. """
  91. parameters_pairs = []
  92. search_values = []
  93. possible_keys = QueryCreator.possible_q_dict_keys
  94. if "Search" in self.q_params.keys():
  95. search_values = self.q_params["Search"]
  96. for key in possible_keys:
  97. if key in self.q_params.keys():
  98. object_attrs = self.q_params[key]
  99. for object_attr in object_attrs:
  100. for value in search_values:
  101. parameters_pairs.append(tuple([key, tuple([object_attr, value])]))
  102. return parameters_pairs
  103. def _generate_parameters_subsets(self, attrs):
  104. """
  105. Generates the set of parameters to create queries
  106. from specific to more broad ones.
  107. """
  108. self._subsets = []
  109. if len(attrs) > 0:
  110. self._subsets_util_dfs(0, [], self._subsets, sorted(attrs))
  111. self._subsets.sort(key=len, reverse=True)
  112. def _subsets_util_dfs(self, index, path, res, attrs):
  113. """
  114. Generates all subsets of attrs set using Depth-first search.
  115. Example (with numbers for explicity:
  116. [1,2,3] -> [[1], [2], [3], [1,2], [1,3], [2,3], [1,2,3]]
  117. :param index: help index for going through list.
  118. :param path: array for saving subsets.
  119. :param res: result subset.
  120. :param attrs: input list of attrs e.g. [('Sec', ('name', 'some_name')),
  121. ('Sec', ('type', 'Stimulus'))]
  122. """
  123. if path:
  124. res.append(path)
  125. for i in range(index, len(attrs)):
  126. if self._check_duplicate_attrs(path, attrs[i]):
  127. self._subsets_util_dfs(i + 1, path + [attrs[i]], res, attrs)
  128. @staticmethod
  129. def _check_duplicate_attrs(attrs_list, attr):
  130. for i in attrs_list:
  131. if attr[1][0] == i[1][0]:
  132. return False
  133. return True
  134. def _output_query_results(self):
  135. output_triples_string = ""
  136. for query in self._subsets:
  137. creator = self._prepare_query(query)
  138. curr_query = creator.get_query()
  139. triples = self._execute_query(curr_query)
  140. if triples:
  141. output_triples_string += creator.query
  142. output_triples_string += triples
  143. output_triples_string += "\n"
  144. return output_triples_string
  145. def _execute_query(self, query):
  146. """
  147. Execute prepared query on the graph.
  148. :param query: prepared query object
  149. :return: string with output triples
  150. """
  151. output_string = ""
  152. for row in self.graph.query(query):
  153. row_string = self._build_output_str(row)
  154. output_string += row_string
  155. return output_string
  156. @staticmethod
  157. def _build_output_str(row):
  158. """
  159. Build output string depending on the query variables.
  160. :param row: rdflib query row.
  161. :return: string with values.
  162. """
  163. out_str = ""
  164. possible_vars = QueryCreator.possible_query_variables
  165. for curr_key in possible_vars.keys():
  166. try:
  167. val = getattr(row, curr_key)
  168. out_str += "{0}: {1}\n".format(possible_vars[curr_key], val)
  169. except AttributeError:
  170. pass
  171. return out_str
  172. @staticmethod
  173. def _prepare_query(args):
  174. """
  175. Return a query for given parameters.
  176. :param args: dict with list of odML object attributes for creation query
  177. Example: {'Sec': [('name', 'some_name'), ('type', 'Stimulus')]}
  178. :return: QueryCreator object.
  179. """
  180. q_params = {}
  181. for arg in args:
  182. if arg[0] in q_params:
  183. q_params[arg[0]].append(arg[1])
  184. else:
  185. q_params[arg[0]] = [arg[1]]
  186. creator = QueryCreator(q_params)
  187. return creator