diff --git a/CHANGELOG.md b/CHANGELOG.md index 201cf7b9..aede6fb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,6 +122,7 @@ and this project adheres to - 🔧(project) add DJANGO_EMAIL_URL_APP environment variable #1825 - ✨(frontend) activate Find search #1834 - ✨ handle searching on subdocuments #1834 +- ✨(backend) add search feature flags #1897 ### Changed diff --git a/docs/search.md b/docs/search.md index f2de68ad..4b322096 100644 --- a/docs/search.md +++ b/docs/search.md @@ -39,3 +39,14 @@ OIDC_STORE_REFRESH_TOKEN_KEY="" `OIDC_STORE_REFRESH_TOKEN_KEY` must be a valid Fernet key (32 url-safe base64-encoded bytes). To create one, use the `bin/generate-oidc-store-refresh-token-key.sh` command. + +## Feature flags + +The Find search integration is controlled by two feature flags: +- `flag_find_hybrid_search` +- `flag_find_full_text_search` + +If a user has both flags activated the most advanced search is used (hybrid > full text > title). +A user with no flag will default to the basic title search. + +Feature flags can be activated through the admin interface. diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 8911bfb9..7e7a2f18 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -33,6 +33,7 @@ from django.utils.translation import gettext_lazy as _ import requests import rest_framework as drf +import waffle from botocore.exceptions import ClientError from csp.constants import NONE from csp.decorators import csp_update @@ -71,6 +72,7 @@ from core.utils import ( users_sharing_documents_with, ) +from ..enums import FeatureFlag, SearchType from . import permissions, serializers, utils from .filters import ( DocumentFilter, @@ -1414,6 +1416,10 @@ class DocumentViewSet( """ params = serializers.SearchDocumentSerializer(data=request.query_params) params.is_valid(raise_exception=True) + search_type = self._get_search_type() + + if search_type == SearchType.TITLE: + return self._title_search(request, params.validated_data, *args, **kwargs) indexer = get_document_indexer() if indexer is None: @@ -1421,14 +1427,29 @@ class DocumentViewSet( return self._title_search(request, params.validated_data, *args, **kwargs) try: - return self._search_with_indexer(indexer, request, params=params) + return self._search_with_indexer( + indexer, request, params=params, search_type=search_type + ) except requests.exceptions.RequestException as e: logger.error("Error while searching documents with indexer: %s", e) # fallback on title search if the indexer is not reached return self._title_search(request, params.validated_data, *args, **kwargs) + def _get_search_type(self) -> SearchType: + """ + Returns the search type to use for the search endpoint based on feature flags. + If a user has both flags activated the most advanced search is used + (HYBRID > FULL_TEXT > TITLE). + A user with no flag will default to the basic title search. + """ + if waffle.flag_is_active(self.request, FeatureFlag.FLAG_FIND_HYBRID_SEARCH): + return SearchType.HYBRID + if waffle.flag_is_active(self.request, FeatureFlag.FLAG_FIND_FULL_TEXT_SEARCH): + return SearchType.FULL_TEXT + return SearchType.TITLE + @staticmethod - def _search_with_indexer(indexer, request, params): + def _search_with_indexer(indexer, request, params, search_type): """ Returns a list of documents matching the query (q) according to the configured indexer. """ @@ -1436,6 +1457,7 @@ class DocumentViewSet( results = indexer.search( q=params.validated_data["q"], + search_type=search_type, token=request.session.get("oidc_access_token"), path=( params.validated_data["path"] diff --git a/src/backend/core/enums.py b/src/backend/core/enums.py index 46e62b2c..c792cd23 100644 --- a/src/backend/core/enums.py +++ b/src/backend/core/enums.py @@ -3,7 +3,7 @@ Core application enums declaration """ import re -from enum import StrEnum +from enum import Enum, StrEnum from django.conf import global_settings, settings from django.db import models @@ -46,3 +46,24 @@ class DocumentAttachmentStatus(StrEnum): PROCESSING = "processing" READY = "ready" + + +class SearchType(str, Enum): + """ + Defines the possible search types for a document search query. + - TITLE: DRF based search in the title of the documents only. + - HYBRID and FULL_TEXT: more advanced search based on Find indexer. + """ + + TITLE = "title" + HYBRID = "hybrid" + FULL_TEXT = "full-text" + + +class FeatureFlag(str, Enum): + """ + Defines the possible feature flags for the application. + """ + + FLAG_FIND_HYBRID_SEARCH = "flag_find_hybrid_search" + FLAG_FIND_FULL_TEXT_SEARCH = "flag_find_full_text_search" diff --git a/src/backend/core/services/search_indexers.py b/src/backend/core/services/search_indexers.py index 1a364706..d2bf9ed0 100644 --- a/src/backend/core/services/search_indexers.py +++ b/src/backend/core/services/search_indexers.py @@ -13,6 +13,7 @@ from django.utils.module_loading import import_string import requests from core import models, utils +from core.enums import SearchType logger = logging.getLogger(__name__) @@ -68,7 +69,7 @@ def get_batch_accesses_by_users_and_teams(paths): return dict(access_by_document_path) -def get_visited_document_ids_of(queryset, user): +def get_visited_document_ids_of(queryset, user) -> tuple[str, ...]: """ Returns the ids of the documents that have a linktrace to the user and NOT owned. It will be use to limit the opensearch responses to the public documents already @@ -92,7 +93,7 @@ def get_visited_document_ids_of(queryset, user): .distinct("pk") ) - return [str(id) for id in docs.values_list("pk", flat=True)] + return tuple(str(id) for id in docs.values_list("pk", flat=True)) class BaseDocumentIndexer(ABC): @@ -181,8 +182,16 @@ class BaseDocumentIndexer(ABC): Must be implemented by subclasses. """ - # pylint: disable-next=too-many-arguments,too-many-positional-arguments - def search(self, q, token, visited=(), nb_results=None, path=None): + # pylint: disable=too-many-arguments, too-many-positional-arguments + def search( # noqa : PLR0913 + self, + q: str, + token: str, + visited: tuple[str, ...] = (), + nb_results: int = None, + path: str = None, + search_type: SearchType = None, + ): """ Search for documents in Find app. Ensure the same default ordering as "Docs" list : -updated_at @@ -200,6 +209,9 @@ class BaseDocumentIndexer(ABC): Defaults to 50 if not specified. path (str, optional): The parent path to search descendants of. + search_type (SearchType, optional): + Type of search to perform. Can be SearchType.HYBRID or SearchType.FULL_TEXT. + If None, the backend search service will use its default search behavior. """ nb_results = nb_results or self.search_limit results = self.search_query( @@ -211,6 +223,7 @@ class BaseDocumentIndexer(ABC): "order_by": "updated_at", "order_direction": "desc", "path": path, + "search_type": search_type, }, token=token, ) @@ -231,10 +244,25 @@ class FindDocumentIndexer(BaseDocumentIndexer): Document indexer that indexes and searches documents with La Suite Find app. """ - # pylint: disable=too-many-arguments,too-many-positional-arguments - def search(self, q, token, visited=(), nb_results=None, path=None): + # pylint: disable=too-many-arguments, too-many-positional-arguments + def search( # noqa : PLR0913 + self, + q: str, + token: str, + visited: tuple[()] = (), + nb_results: int = None, + path: str = None, + search_type: SearchType = None, + ): """format Find search results""" - search_results = super().search(q, token, visited, nb_results, path) + search_results = super().search( + q=q, + token=token, + visited=visited, + nb_results=nb_results, + path=path, + search_type=search_type, + ) return [ { **hit["_source"], diff --git a/src/backend/core/tests/documents/test_api_documents_search.py b/src/backend/core/tests/documents/test_api_documents_search.py index 5b8151f9..f6c8dd8b 100644 --- a/src/backend/core/tests/documents/test_api_documents_search.py +++ b/src/backend/core/tests/documents/test_api_documents_search.py @@ -9,14 +9,23 @@ import responses from faker import Faker from rest_framework import response as drf_response from rest_framework.test import APIClient +from waffle.testutils import override_flag from core import factories +from core.enums import FeatureFlag, SearchType from core.services.search_indexers import get_document_indexer fake = Faker() pytestmark = pytest.mark.django_db +@pytest.fixture(autouse=True) +def enable_flag_find_hybrid_search(): + """Enable flag_find_hybrid_search for all tests in this module.""" + with override_flag(FeatureFlag.FLAG_FIND_HYBRID_SEARCH, active=True): + yield + + @mock.patch("core.services.search_indexers.FindDocumentIndexer.search_query") @responses.activate def test_api_documents_search_anonymous(search_query, indexer_settings): @@ -46,6 +55,7 @@ def test_api_documents_search_anonymous(search_query, indexer_settings): "order_by": "updated_at", "order_direction": "desc", "path": None, + "search_type": SearchType.HYBRID, }, "token": None, } diff --git a/src/backend/core/tests/documents/test_api_documents_search_feature_flag.py b/src/backend/core/tests/documents/test_api_documents_search_feature_flag.py new file mode 100644 index 00000000..72119de0 --- /dev/null +++ b/src/backend/core/tests/documents/test_api_documents_search_feature_flag.py @@ -0,0 +1,90 @@ +""" +Tests for Find search feature flags +""" + +from unittest import mock + +from django.http import HttpResponse + +import pytest +import responses +from rest_framework.test import APIClient +from waffle.testutils import override_flag + +from core.enums import FeatureFlag, SearchType +from core.services.search_indexers import get_document_indexer + +pytestmark = pytest.mark.django_db + + +@responses.activate +@mock.patch("core.api.viewsets.DocumentViewSet._title_search") +@mock.patch("core.api.viewsets.DocumentViewSet._search_with_indexer") +@pytest.mark.parametrize( + "activated_flags," + "expected_search_type," + "expected_search_with_indexer_called," + "expected_title_search_called", + [ + ([], SearchType.TITLE, False, True), + ([FeatureFlag.FLAG_FIND_HYBRID_SEARCH], SearchType.HYBRID, True, False), + ( + [ + FeatureFlag.FLAG_FIND_HYBRID_SEARCH, + FeatureFlag.FLAG_FIND_FULL_TEXT_SEARCH, + ], + SearchType.HYBRID, + True, + False, + ), + ([FeatureFlag.FLAG_FIND_FULL_TEXT_SEARCH], SearchType.FULL_TEXT, True, False), + ], +) +# pylint: disable=too-many-arguments, too-many-positional-arguments +def test_api_documents_search_success( # noqa : PLR0913 + mock_search_with_indexer, + mock_title_search, + activated_flags, + expected_search_type, + expected_search_with_indexer_called, + expected_title_search_called, + indexer_settings, +): + """ + Test that the API endpoint for searching documents returns a successful response + with the expected search type according to the activated feature flags, + and that the appropriate search method is called. + """ + assert get_document_indexer() is not None + + mock_search_with_indexer.return_value = HttpResponse() + mock_title_search.return_value = HttpResponse() + + with override_flag( + FeatureFlag.FLAG_FIND_HYBRID_SEARCH, + active=FeatureFlag.FLAG_FIND_HYBRID_SEARCH in activated_flags, + ): + with override_flag( + FeatureFlag.FLAG_FIND_FULL_TEXT_SEARCH, + active=FeatureFlag.FLAG_FIND_FULL_TEXT_SEARCH in activated_flags, + ): + response = APIClient().get( + "/api/v1.0/documents/search/", data={"q": "alpha"} + ) + + assert response.status_code == 200 + + if expected_search_with_indexer_called: + mock_search_with_indexer.assert_called_once() + assert ( + mock_search_with_indexer.call_args.kwargs["search_type"] + == expected_search_type + ) + else: + assert not mock_search_with_indexer.called + + if expected_title_search_called: + assert SearchType.TITLE == expected_search_type + mock_title_search.assert_called_once() + else: + assert not mock_title_search.called diff --git a/src/backend/core/tests/test_services_find_document_indexer.py b/src/backend/core/tests/test_services_find_document_indexer.py index 074d0114..9a080cd7 100644 --- a/src/backend/core/tests/test_services_find_document_indexer.py +++ b/src/backend/core/tests/test_services_find_document_indexer.py @@ -12,6 +12,7 @@ from django.db import transaction import pytest from core import factories, models +from core.enums import SearchType from core.services.search_indexers import FindDocumentIndexer pytestmark = pytest.mark.django_db @@ -473,8 +474,14 @@ def test_find_document_indexer_search(mock_search_query): nb_results = 10 path = "/some/path/" visited = ["doc-123"] + search_type = SearchType.HYBRID results = FindDocumentIndexer().search( - q=q, token=token, nb_results=nb_results, path=path, visited=visited + q=q, + token=token, + nb_results=nb_results, + path=path, + visited=visited, + search_type=search_type, ) mock_search_query.assert_called_once() @@ -487,6 +494,7 @@ def test_find_document_indexer_search(mock_search_query): "order_by": "updated_at", "order_direction": "desc", "path": path, + "search_type": search_type, } assert len(results) == 2 diff --git a/src/backend/core/tests/test_services_search_indexers.py b/src/backend/core/tests/test_services_search_indexers.py index 18c90979..01131e39 100644 --- a/src/backend/core/tests/test_services_search_indexers.py +++ b/src/backend/core/tests/test_services_search_indexers.py @@ -494,7 +494,7 @@ def test_get_visited_document_ids_of(): factories.UserDocumentAccessFactory(user=user, document=doc2) # The second document have an access for the user - assert get_visited_document_ids_of(queryset, user) == [str(doc1.pk)] + assert get_visited_document_ids_of(queryset, user) == (str(doc1.pk),) @pytest.mark.usefixtures("indexer_settings") @@ -528,7 +528,7 @@ def test_get_visited_document_ids_of_deleted(): doc_deleted.soft_delete() # Only the first document is not deleted - assert get_visited_document_ids_of(queryset, user) == [str(doc.pk)] + assert get_visited_document_ids_of(queryset, user) == (str(doc.pk),) @responses.activate @@ -548,7 +548,7 @@ def test_services_search_indexers_search_errors(indexer_settings): ) with pytest.raises(HTTPError): - FindDocumentIndexer().search("alpha", token="mytoken") + FindDocumentIndexer().search(q="alpha", token="mytoken") @patch("requests.post") @@ -572,7 +572,7 @@ def test_services_search_indexers_search(mock_post, indexer_settings): visited = get_visited_document_ids_of(models.Document.objects.all(), user) - indexer.search("alpha", visited=visited, token="mytoken") + indexer.search(q="alpha", visited=visited, token="mytoken") args, kwargs = mock_post.call_args @@ -613,7 +613,7 @@ def test_services_search_indexers_search_nb_results(mock_post, indexer_settings) visited = get_visited_document_ids_of(models.Document.objects.all(), user) - indexer.search("alpha", visited=visited, token="mytoken") + indexer.search(q="alpha", visited=visited, token="mytoken") args, kwargs = mock_post.call_args @@ -621,7 +621,7 @@ def test_services_search_indexers_search_nb_results(mock_post, indexer_settings) assert kwargs.get("json")["nb_results"] == 25 # The argument overrides the setting value - indexer.search("alpha", visited=visited, token="mytoken", nb_results=109) + indexer.search(q="alpha", visited=visited, token="mytoken", nb_results=109) args, kwargs = mock_post.call_args diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index e8c66e87..800ecb3c 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -331,6 +331,7 @@ class Base(Configuration): "django.contrib.messages.middleware.MessageMiddleware", "dockerflow.django.middleware.DockerflowMiddleware", "csp.middleware.CSPMiddleware", + "waffle.middleware.WaffleMiddleware", ] AUTHENTICATION_BACKENDS = [ @@ -352,6 +353,7 @@ class Base(Configuration): "parler", "treebeard", "easy_thumbnails", + "waffle", # Django "django.contrib.admin", "django.contrib.auth", diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml index 6a22fa58..65798a13 100644 --- a/src/backend/pyproject.toml +++ b/src/backend/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "django<6.0.0", "django-treebeard<5.0.0", "djangorestframework==3.16.1", + "django-waffle==5.0.0", "drf_spectacular==0.29.0", "dockerflow==2026.1.26", "easy_thumbnails==2.10.1",