Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

flags_search.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. from whoosh import index as windex
  2. from whoosh import fields as wfields
  3. from whoosh import qparser as wqparser
  4. import view
  5. from ..python_core.appdirs import get_app_usr_data_dir
  6. import pathlib as pl
  7. from ..python_core.flags import FlagsManager
  8. def get_index_dir():
  9. """
  10. Return the path of the directory used for storing indexing files
  11. :return: pathlib.Path object
  12. """
  13. return pl.Path(get_app_usr_data_dir()) / "flags_text_index"
  14. def get_indexname():
  15. """
  16. Returns the index name corresponding to the current version of VIEW
  17. """
  18. return view.__version__.replace("+", "_")
  19. def check_if_index_is_correct(index_dir: pl.Path):
  20. """
  21. Open the index if possible, check whether the index is of the current version of VIEW. If true return the
  22. index, else delete the index directory and return None
  23. :param index_dir: pathlib.Path object, pointing to a directory on file system
  24. :return: whoosh.index.FileIndex or None
  25. """
  26. if not index_dir.is_dir():
  27. return False
  28. elif windex.exists_in(str(index_dir), indexname=get_indexname()):
  29. return True
  30. else:
  31. return False
  32. def get_schema():
  33. """
  34. creates and returns the schema used for indexing flag data
  35. """
  36. return wfields.Schema(flag_name=wfields.KEYWORD(stored=True), flag_subgroup=wfields.STORED,
  37. flag_description=wfields.TEXT(stored=True))
  38. def create_new_index(index_dir, flags):
  39. """
  40. create a new index and return it, exluding deprecated flags
  41. """
  42. index_dir.mkdir(parents=True, exist_ok=True)
  43. ix = windex.create_in(dirname=str(index_dir), schema=get_schema(), indexname=get_indexname())
  44. index_writer = ix.writer()
  45. for flag_subgroup in flags.get_subgroups():
  46. subgroup_definition = flags.get_subgroup_definition(flag_subgroup)
  47. for flag_index, (flag_name, flag_default, flag_description, selectable_options, flag_value_type) \
  48. in subgroup_definition.iterrows():
  49. index_writer.add_document(flag_name=flag_name, flag_description=flag_description,
  50. flag_subgroup=flag_subgroup)
  51. index_writer.commit()
  52. return ix
  53. def get_flags_index(flags: FlagsManager):
  54. """
  55. Checks if an index for this version of VIEW exists. If it does returns. Else, creates one and returns it.
  56. :param flags: flags object
  57. :returns: index object
  58. """
  59. index_dir = get_index_dir()
  60. if check_if_index_is_correct(index_dir):
  61. return windex.open_dir(str(index_dir), indexname=get_indexname())
  62. else:
  63. return create_new_index(index_dir, flags)
  64. def query(index, query_str: str, max_results: int = 5):
  65. query_str = f"*{query_str}*"
  66. with index.searcher() as searcher:
  67. name_qp = wqparser.QueryParser("flag_name", schema=index.schema)
  68. description_qp = wqparser.QueryParser("flag_description", schema=index.schema)
  69. name_query = name_qp.parse(query_str)
  70. description_query = description_qp.parse(query_str)
  71. name_results = searcher.search(name_query, limit=20)
  72. desc_results = searcher.search(description_query, limit=20)
  73. highlights = []
  74. for hit in name_results:
  75. if len(highlights) < max_results:
  76. highlight = {}
  77. highlight["flag_name"] = hit.highlights("flag_name")
  78. highlight["flag_subgroup"] = hit["flag_subgroup"]
  79. highlight["flag_description"] = f"{hit['flag_description'][:100]}..."
  80. highlight["which"] = "flag_name"
  81. highlights.append(highlight)
  82. for hit in desc_results:
  83. if len(highlights) < max_results:
  84. highlight = {}
  85. highlight["flag_description"] = f"...{hit.highlights('flag_description')}..."
  86. highlight["flag_subgroup"] = hit["flag_subgroup"]
  87. highlight["flag_name"] = hit["flag_name"]
  88. highlight["which"] = "flag_desc"
  89. highlights.append(highlight)
  90. return highlights