diff --git a/CHANGELOG.md b/CHANGELOG.md index d851101a..f5174dd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to ### Added - 🔧(backend) settings CONVERSION_UPLOAD_ENABLED to control usage of docspec +- ✨(backend) add a public_search API view to the Document viewset #2068 ### Changed diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 7c27eb15..dbb69c83 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -512,6 +512,9 @@ class DocumentViewSet( 15. **AI Proxy**: Proxy an AI request to an external AI service. Example: POST /api/v1.0/documents//ai-proxy + 13. **Public Search**: Search within a public document and the related tree. + Example: GET /documents/{id}/public_search/?q=search_text + ### Ordering: created_at, updated_at, is_favorite, title Example: @@ -1536,6 +1539,43 @@ class DocumentViewSet( queryset = filterset.qs return self.get_response_for_queryset(queryset) + @drf.decorators.action(detail=True, methods=["get"], url_path="public_search") + def public_search(self, request, *args, **kwargs): + """ + Returns a DRF response containing the filtered, annotated and ordered document list + for public search on the tree of a given public document. + + Applies filtering based on request parameter 'q' from `SearchDocumentSerializer`. + + The filtering is done on the model field 'title', there is no full text search. + + The ordering is always by the most recent first. + """ + document = self.get_object() + + params = serializers.SearchDocumentSerializer(data=request.query_params) + params.is_valid(raise_exception=True) + text = params.validated_data["q"] + + public_root = document.get_highest_public_ancestor() + + # We limit the queryset to the current public tree, filtering out deleted documents. + queryset = public_root.get_descendants(include_self=True).filter( + ancestors_deleted_at__isnull=True + ) + + filterset = DocumentFilter({"title": text}, queryset=queryset) + + if not filterset.is_valid(): + raise drf.exceptions.ValidationError(filterset.errors) + + queryset = filterset.filter_queryset(queryset) + queryset = queryset.filter(ancestors_deleted_at__isnull=True) + + return self.get_response_for_queryset( + queryset.order_by("-updated_at"), + ) + @drf.decorators.action(detail=True, methods=["get"], url_path="versions") def versions_list(self, request, *args, **kwargs): """ diff --git a/src/backend/core/models.py b/src/backend/core/models.py index c6ed9ae1..c044e22e 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -1018,6 +1018,22 @@ class Document(MP_Node, BaseModel): self._content = content + def get_highest_public_ancestor(self): + """ + Get the highest ancestor of the document that has a public link reach. + If the document itself has a public link reach, it will be returned. + If there is no public ancestor, None will be returned. + """ + if self.link_reach == LinkReachChoices.PUBLIC: + return self + + return ( + self.get_ancestors() + .filter(link_reach=LinkReachChoices.PUBLIC) + .order_by("-path") + .first() + ) + def get_content_response(self, version_id=""): """Get the content in a specific version of the document""" params = { @@ -1283,6 +1299,8 @@ class Document(MP_Node, BaseModel): else (is_owner_or_admin or (user.is_authenticated and self.creator == user)) ) and not is_deleted + is_public = link_reach == LinkReachChoices.PUBLIC + ai_allow_reach_from = settings.AI_ALLOW_REACH_FROM ai_access = any( [ @@ -1319,6 +1337,7 @@ class Document(MP_Node, BaseModel): "mask": can_get and user.is_authenticated, "move": is_owner_or_admin and not is_deleted, "partial_update": can_update, + "public_search": is_public and not is_deleted, "restore": is_owner, "retrieve": retrieve, "media_auth": can_get, diff --git a/src/backend/core/tests/documents/test_api_documents_public_search.py b/src/backend/core/tests/documents/test_api_documents_public_search.py new file mode 100644 index 00000000..619eed30 --- /dev/null +++ b/src/backend/core/tests/documents/test_api_documents_public_search.py @@ -0,0 +1,288 @@ +""" +Tests for Documents API endpoint: public_search action. +""" + +import datetime + +from django.utils import timezone + +import pytest +from rest_framework.test import APIClient + +from core import factories, models + +pytestmark = pytest.mark.django_db + + +def test_api_documents_public_search_missing_q(): + """Missing `q` param should return 400.""" + client = APIClient() + + document = factories.DocumentFactory(link_reach="public") + response = client.get( + f"/api/v1.0/documents/{document.id}/public_search/", + data={}, + ) + + assert response.status_code == 400 + assert response.json() == {"q": ["This field is required."]} + + +def test_api_documents_public_search_blank_q(): + """Blank `q` param should return all documents in the public tree.""" + client = APIClient() + document = factories.DocumentFactory(link_reach="public") + child = factories.DocumentFactory(parent=document) + + response = client.get( + f"/api/v1.0/documents/{document.id}/public_search/", + data={"q": " "}, + ) + + assert response.status_code == 200 + result_ids = {r["id"] for r in response.json()["results"]} + assert len(result_ids) == 2 + assert str(document.id) in result_ids + assert str(child.id) in result_ids + + +# --------------------------------------------------------------------------- +# Permissions +# --------------------------------------------------------------------------- + + +def test_api_documents_public_search_anonymous_on_public_document_tree(): + """Anonymous users can search within a public document's tree.""" + client = APIClient() + + document = factories.DocumentFactory(link_reach="public") + match = factories.DocumentFactory(parent=document, title="match me") + no_match = factories.DocumentFactory(parent=document, title="don't find me") + + response = client.get( + f"/api/v1.0/documents/{document.id}/public_search/", + data={"q": "match"}, + ) + assert response.status_code == 200 + + result_ids = {r["id"] for r in response.json()["results"]} + assert len(result_ids) == 1 + assert str(match.id) in result_ids + assert str(no_match.id) not in result_ids + + +def test_api_documents_public_search_anonymous_on_restricted_document(): + """Anonymous users cannot search on a restricted document.""" + client = APIClient() + document = factories.DocumentFactory(link_reach="restricted") + + response = client.get( + f"/api/v1.0/documents/{document.id}/public_search/", + data={"q": "anything"}, + ) + assert response.status_code == 401 + assert response.json() == { + "detail": "Authentication credentials were not provided." + } + + +def test_api_documents_public_search_anonymous_on_authenticated_document(): + """Anonymous users cannot search on an authenticated-only document.""" + client = APIClient() + document = factories.DocumentFactory(link_reach="authenticated") + + response = client.get( + f"/api/v1.0/documents/{document.id}/public_search/", + data={"q": "anything"}, + ) + assert response.status_code == 401 + assert response.json() == { + "detail": "Authentication credentials were not provided." + } + + +def test_api_documents_public_search_authenticated_on_restricted_document(): + """Authenticated users cannot search on a restricted document they don't own.""" + user = factories.UserFactory() + document = factories.DocumentFactory(link_reach="restricted") + + client = APIClient() + client.force_login(user) + response = client.get( + f"/api/v1.0/documents/{document.id}/public_search/", + data={"q": "anything"}, + ) + assert response.status_code == 403 + assert response.json() == { + "detail": "You do not have permission to perform this action." + } + + +def test_api_documents_public_search_authenticated_on_authenticated_document(): + """Authenticated users cannot search on a authenticated document they don't own.""" + user = factories.UserFactory() + document = factories.DocumentFactory(link_reach="authenticated") + + client = APIClient() + client.force_login(user) + response = client.get( + f"/api/v1.0/documents/{document.id}/public_search/", + data={"q": "anything"}, + ) + assert response.status_code == 403 + assert response.json() == { + "detail": "You do not have permission to perform this action." + } + + +# --------------------------------------------------------------------------- +# Public via ancestor +# --------------------------------------------------------------------------- + + +def test_api_documents_public_search_document_public_via_ancestor(): + """ + A restricted child document whose ancestor is public is effectively public. + The search scope should be rooted at the highest public ancestor. + """ + client = APIClient() + + root = factories.DocumentFactory(link_reach="public", title="root") + child = factories.DocumentFactory( + parent=root, link_reach="restricted", title="child alpha" + ) + sibling = factories.DocumentFactory(parent=root, title="sibling alpha") + grand_child = factories.DocumentFactory(parent=child, title="grand alpha") + + # child is public via root + assert child.computed_link_reach == models.LinkReachChoices.PUBLIC + + response = client.get( + f"/api/v1.0/documents/{child.id}/public_search/", + data={"q": "alpha"}, + ) + assert response.status_code == 200 + + content = response.json() + result_ids = {r["id"] for r in content["results"]} + + # All descendants of root that match "alpha" should be returned + assert len(result_ids) == 3 + assert str(child.id) in result_ids + assert str(sibling.id) in result_ids + assert str(grand_child.id) in result_ids + + +def test_api_documents_public_search_scope_limited_to_public_tree(): + """ + Documents outside the public tree should not appear in results, even if they + match the query. + """ + client = APIClient() + + private_root = factories.DocumentFactory( + link_reach="restricted", title="private root" + ) + public_doc = factories.DocumentFactory( + parent=private_root, link_reach="public", title="public doc" + ) + inside = factories.DocumentFactory(parent=public_doc, title="alpha inside") + + # Separate tree — should never appear + other_root = factories.DocumentFactory(link_reach="public", title="other root") + outside = factories.DocumentFactory(parent=other_root, title="alpha outside") + + response = client.get( + f"/api/v1.0/documents/{public_doc.id}/public_search/", + data={"q": "alpha"}, + ) + assert response.status_code == 200 + + result_ids = {r["id"] for r in response.json()["results"]} + assert len(result_ids) == 1 + assert str(inside.id) in result_ids + assert str(outside.id) not in result_ids + + +def test_api_documents_public_search_excludes_deleted_documents(): + """Soft-deleted documents should not appear in results.""" + client = APIClient() + root = factories.DocumentFactory(link_reach="public") + alive = factories.DocumentFactory(parent=root, title="alive alpha") + deleted = factories.DocumentFactory( + parent=root, + title="deleted alpha", + deleted_at="2024-01-01T00:00:00Z", + ancestors_deleted_at="2024-01-01T00:00:00Z", + ) + + response = client.get( + f"/api/v1.0/documents/{root.id}/public_search/", + data={"q": "alpha"}, + ) + assert response.status_code == 200 + + result_ids = {r["id"] for r in response.json()["results"]} + assert len(result_ids) == 1 + assert str(alive.id) in result_ids + assert str(deleted.id) not in result_ids + + +def test_api_documents_public_search_excludes_documents_with_deleted_ancestor(): + """Documents whose ancestor is deleted should not appear in results.""" + client = APIClient() + root = factories.DocumentFactory(link_reach="public") + deleted_parent = factories.DocumentFactory( + parent=root, + title="deleted parent", + deleted_at="2024-01-01T00:00:00Z", + ancestors_deleted_at="2024-01-01T00:00:00Z", + ) + orphan = factories.DocumentFactory( + parent=deleted_parent, + title="orphan alpha", + ancestors_deleted_at="2024-01-01T00:00:00Z", + ) + alive = factories.DocumentFactory(parent=root, title="alive alpha") + + response = client.get( + f"/api/v1.0/documents/{root.id}/public_search/", + data={"q": "alpha"}, + ) + + assert response.status_code == 200 + + result_ids = {r["id"] for r in response.json()["results"]} + assert len(result_ids) == 1 + assert str(alive.id) in result_ids + assert str(orphan.id) not in result_ids + + +# --------------------------------------------------------------------------- +# Ordering +# --------------------------------------------------------------------------- + + +def test_api_documents_public_search_ordered_by_most_recent_first(): + """Results should be ordered by -updated_at.""" + client = APIClient() + + root_doc = factories.DocumentFactory(link_reach="public") + old = factories.DocumentFactory(parent=root_doc, title="old alpha") + new = factories.DocumentFactory(parent=root_doc, title="new alpha") + + # Force updated_at ordering + models.Document.objects.filter(pk=old.pk).update( + updated_at=timezone.now() - datetime.timedelta(days=10) + ) + models.Document.objects.filter(pk=new.pk).update(updated_at=timezone.now()) + + response = client.get( + f"/api/v1.0/documents/{root_doc.id}/public_search/", + data={"q": "alpha"}, + ) + assert response.status_code == 200 + + result_ids = [r["id"] for r in response.json()["results"]] + assert len(result_ids) == 2 + assert result_ids.index(str(new.id)) < result_ids.index(str(old.id)) diff --git a/src/backend/core/tests/documents/test_api_documents_retrieve.py b/src/backend/core/tests/documents/test_api_documents_retrieve.py index f4a4876c..e92cd572 100644 --- a/src/backend/core/tests/documents/test_api_documents_retrieve.py +++ b/src/backend/core/tests/documents/test_api_documents_retrieve.py @@ -57,6 +57,7 @@ def test_api_documents_retrieve_anonymous_public_standalone(): "media_check": True, "move": False, "partial_update": document.link_role == "editor", + "public_search": document.link_reach == "public", "restore": False, "retrieve": True, "search": True, @@ -135,6 +136,7 @@ def test_api_documents_retrieve_anonymous_public_parent(): "media_check": True, "move": False, "partial_update": grand_parent.link_role == "editor", + "public_search": True, "restore": False, "retrieve": True, "search": True, @@ -246,6 +248,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated( "media_check": True, "move": False, "partial_update": document.link_role == "editor", + "public_search": document.link_reach == "public", "restore": False, "retrieve": True, "search": True, @@ -331,6 +334,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea "media_auth": True, "media_check": True, "partial_update": grand_parent.link_role == "editor", + "public_search": grand_parent.link_reach == "public", "restore": False, "retrieve": True, "search": True, @@ -531,6 +535,7 @@ def test_api_documents_retrieve_authenticated_related_parent(): "media_check": True, "move": access.role in ["administrator", "owner"], "partial_update": access.role not in ["reader", "commenter"], + "public_search": document.computed_link_reach == "public", "restore": access.role == "owner", "retrieve": True, "search": True, diff --git a/src/backend/core/tests/documents/test_api_documents_trashbin.py b/src/backend/core/tests/documents/test_api_documents_trashbin.py index a8ee8368..2e4b71e3 100644 --- a/src/backend/core/tests/documents/test_api_documents_trashbin.py +++ b/src/backend/core/tests/documents/test_api_documents_trashbin.py @@ -99,6 +99,7 @@ def test_api_documents_trashbin_format(): "media_check": False, "move": False, # Can't move a deleted document "partial_update": False, + "public_search": False, "restore": True, "retrieve": True, "search": False, diff --git a/src/backend/core/tests/test_models_documents.py b/src/backend/core/tests/test_models_documents.py index 82edf82d..08b13c7b 100644 --- a/src/backend/core/tests/test_models_documents.py +++ b/src/backend/core/tests/test_models_documents.py @@ -182,6 +182,7 @@ def test_models_documents_get_abilities_forbidden( "restricted": None, }, "partial_update": False, + "public_search": document.computed_link_reach == "public", "restore": False, "retrieve": False, "tree": False, @@ -249,6 +250,7 @@ def test_models_documents_get_abilities_reader( "media_check": True, "move": False, "partial_update": False, + "public_search": reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -321,6 +323,7 @@ def test_models_documents_get_abilities_commenter( "media_check": True, "move": False, "partial_update": False, + "public_search": reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -390,6 +393,7 @@ def test_models_documents_get_abilities_editor( "media_check": True, "move": False, "partial_update": True, + "public_search": reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -448,6 +452,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries): "media_check": True, "move": True, "partial_update": True, + "public_search": document.computed_link_reach == "public", "restore": True, "retrieve": True, "tree": True, @@ -492,6 +497,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries): "media_check": False, "move": False, "partial_update": False, + "public_search": document.computed_link_reach == "public", "restore": True, "retrieve": True, "tree": True, @@ -540,6 +546,7 @@ def test_models_documents_get_abilities_administrator(django_assert_num_queries) "media_check": True, "move": True, "partial_update": True, + "public_search": document.computed_link_reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -598,6 +605,7 @@ def test_models_documents_get_abilities_editor_user(django_assert_num_queries): "media_check": True, "move": False, "partial_update": True, + "public_search": document.computed_link_reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -664,6 +672,7 @@ def test_models_documents_get_abilities_reader_user( "media_check": True, "move": False, "partial_update": access_from_link, + "public_search": document.computed_link_reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -731,6 +740,7 @@ def test_models_documents_get_abilities_commenter_user( "media_check": True, "move": False, "partial_update": access_from_link, + "public_search": document.computed_link_reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -794,6 +804,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries): "media_check": True, "move": False, "partial_update": False, + "public_search": access.document.computed_link_reach == "public", "restore": False, "retrieve": True, "tree": True, @@ -1691,3 +1702,74 @@ def test_models_documents_compute_ancestors_links_paths_mapping_structure( {"link_reach": sibling.link_reach, "link_role": sibling.link_role}, ], } + + +# get_highest_public_ancestor method + + +def test_models_documents_get_highest_public_ancestor_root_public(): + """A root document with public link reach should return itself.""" + document = factories.DocumentFactory(link_reach="public") + assert document.get_highest_public_ancestor() == document + + +def test_models_documents_get_highest_public_ancestor_root_restricted(): + """A root document with restricted link reach should return None.""" + document = factories.DocumentFactory(link_reach="restricted") + assert document.get_highest_public_ancestor() is None + + +def test_models_documents_get_highest_public_ancestor_root_authenticated(): + """A root document with authenticated link reach should return None.""" + document = factories.DocumentFactory(link_reach="authenticated") + assert document.get_highest_public_ancestor() is None + + +def test_models_documents_get_highest_public_ancestor_child_is_public(): + """A child document that is itself public should return itself, not the parent.""" + parent = factories.DocumentFactory(link_reach="restricted") + child = factories.DocumentFactory(parent=parent, link_reach="public") + assert child.get_highest_public_ancestor() == child + + +def test_models_documents_get_highest_public_ancestor_parent_is_public(): + """A child with restricted reach whose parent is public should return the parent.""" + parent = factories.DocumentFactory(link_reach="public") + child = factories.DocumentFactory(parent=parent, link_reach="restricted") + assert child.get_highest_public_ancestor() == parent + + +def test_models_documents_get_highest_public_ancestor_neither_public(): + """A child with restricted reach and a restricted parent should return None.""" + parent = factories.DocumentFactory(link_reach="restricted") + child = factories.DocumentFactory(parent=parent, link_reach="restricted") + assert child.get_highest_public_ancestor() is None + + +def test_models_documents_get_highest_public_ancestor_grandparent_is_public(): + """ + Returns the highest public ancestor (grandparent) when only the grandparent is public. + """ + grandparent = factories.DocumentFactory(link_reach="public") + parent = factories.DocumentFactory(parent=grandparent, link_reach="restricted") + child = factories.DocumentFactory(parent=parent, link_reach="restricted") + assert child.get_highest_public_ancestor() == grandparent + + +def test_models_documents_get_highest_public_ancestor_deep_tree_only_middle_public(): + """ + When only a middle ancestor is public, it is returned as the highest public ancestor. + """ + root_doc = factories.DocumentFactory(link_reach="restricted") + middle = factories.DocumentFactory(parent=root_doc, link_reach="public") + child = factories.DocumentFactory(parent=middle, link_reach="restricted") + grandchild = factories.DocumentFactory(parent=child, link_reach="restricted") + assert grandchild.get_highest_public_ancestor() == middle + + +def test_models_documents_get_highest_public_ancestor_all_restricted_deep(): + """A deeply nested document with no public ancestor should return None.""" + root_doc = factories.DocumentFactory(link_reach="restricted") + child = factories.DocumentFactory(parent=root_doc, link_reach="restricted") + grandchild = factories.DocumentFactory(parent=child, link_reach="restricted") + assert grandchild.get_highest_public_ancestor() is None