(backend) add a public_search API view to the Document viewset

This allows searching (on document titles only) on a public documents
and related public documents on the same tree.

A new get_highest_public_ancestor() helper method is also added to the
Document model.
This commit is contained in:
Sylvain Boissel 2026-03-18 12:10:04 +01:00
parent f166e75921
commit fc71424419
No known key found for this signature in database
7 changed files with 436 additions and 0 deletions

View file

@ -9,6 +9,7 @@ and this project adheres to
### Added
- 🔧(backend) settings CONVERSION_UPLOAD_ENABLED to control usage of docspec
- ✨(backend) add a public_search API view to the Document viewset #2068
### Changed

View file

@ -512,6 +512,9 @@ class DocumentViewSet(
15. **AI Proxy**: Proxy an AI request to an external AI service.
Example: POST /api/v1.0/documents/<resource_id>/ai-proxy
13. **Public Search**: Search within a public document and the related tree.
Example: GET /documents/{id}/public_search/?q=search_text
### Ordering: created_at, updated_at, is_favorite, title
Example:
@ -1536,6 +1539,43 @@ class DocumentViewSet(
queryset = filterset.qs
return self.get_response_for_queryset(queryset)
@drf.decorators.action(detail=True, methods=["get"], url_path="public_search")
def public_search(self, request, *args, **kwargs):
"""
Returns a DRF response containing the filtered, annotated and ordered document list
for public search on the tree of a given public document.
Applies filtering based on request parameter 'q' from `SearchDocumentSerializer`.
The filtering is done on the model field 'title', there is no full text search.
The ordering is always by the most recent first.
"""
document = self.get_object()
params = serializers.SearchDocumentSerializer(data=request.query_params)
params.is_valid(raise_exception=True)
text = params.validated_data["q"]
public_root = document.get_highest_public_ancestor()
# We limit the queryset to the current public tree, filtering out deleted documents.
queryset = public_root.get_descendants(include_self=True).filter(
ancestors_deleted_at__isnull=True
)
filterset = DocumentFilter({"title": text}, queryset=queryset)
if not filterset.is_valid():
raise drf.exceptions.ValidationError(filterset.errors)
queryset = filterset.filter_queryset(queryset)
queryset = queryset.filter(ancestors_deleted_at__isnull=True)
return self.get_response_for_queryset(
queryset.order_by("-updated_at"),
)
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
def versions_list(self, request, *args, **kwargs):
"""

View file

@ -1018,6 +1018,22 @@ class Document(MP_Node, BaseModel):
self._content = content
def get_highest_public_ancestor(self):
"""
Get the highest ancestor of the document that has a public link reach.
If the document itself has a public link reach, it will be returned.
If there is no public ancestor, None will be returned.
"""
if self.link_reach == LinkReachChoices.PUBLIC:
return self
return (
self.get_ancestors()
.filter(link_reach=LinkReachChoices.PUBLIC)
.order_by("-path")
.first()
)
def get_content_response(self, version_id=""):
"""Get the content in a specific version of the document"""
params = {
@ -1283,6 +1299,8 @@ class Document(MP_Node, BaseModel):
else (is_owner_or_admin or (user.is_authenticated and self.creator == user))
) and not is_deleted
is_public = link_reach == LinkReachChoices.PUBLIC
ai_allow_reach_from = settings.AI_ALLOW_REACH_FROM
ai_access = any(
[
@ -1319,6 +1337,7 @@ class Document(MP_Node, BaseModel):
"mask": can_get and user.is_authenticated,
"move": is_owner_or_admin and not is_deleted,
"partial_update": can_update,
"public_search": is_public and not is_deleted,
"restore": is_owner,
"retrieve": retrieve,
"media_auth": can_get,

View file

@ -0,0 +1,288 @@
"""
Tests for Documents API endpoint: public_search action.
"""
import datetime
from django.utils import timezone
import pytest
from rest_framework.test import APIClient
from core import factories, models
pytestmark = pytest.mark.django_db
def test_api_documents_public_search_missing_q():
"""Missing `q` param should return 400."""
client = APIClient()
document = factories.DocumentFactory(link_reach="public")
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={},
)
assert response.status_code == 400
assert response.json() == {"q": ["This field is required."]}
def test_api_documents_public_search_blank_q():
"""Blank `q` param should return all documents in the public tree."""
client = APIClient()
document = factories.DocumentFactory(link_reach="public")
child = factories.DocumentFactory(parent=document)
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": " "},
)
assert response.status_code == 200
result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 2
assert str(document.id) in result_ids
assert str(child.id) in result_ids
# ---------------------------------------------------------------------------
# Permissions
# ---------------------------------------------------------------------------
def test_api_documents_public_search_anonymous_on_public_document_tree():
"""Anonymous users can search within a public document's tree."""
client = APIClient()
document = factories.DocumentFactory(link_reach="public")
match = factories.DocumentFactory(parent=document, title="match me")
no_match = factories.DocumentFactory(parent=document, title="don't find me")
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "match"},
)
assert response.status_code == 200
result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 1
assert str(match.id) in result_ids
assert str(no_match.id) not in result_ids
def test_api_documents_public_search_anonymous_on_restricted_document():
"""Anonymous users cannot search on a restricted document."""
client = APIClient()
document = factories.DocumentFactory(link_reach="restricted")
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 401
assert response.json() == {
"detail": "Authentication credentials were not provided."
}
def test_api_documents_public_search_anonymous_on_authenticated_document():
"""Anonymous users cannot search on an authenticated-only document."""
client = APIClient()
document = factories.DocumentFactory(link_reach="authenticated")
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 401
assert response.json() == {
"detail": "Authentication credentials were not provided."
}
def test_api_documents_public_search_authenticated_on_restricted_document():
"""Authenticated users cannot search on a restricted document they don't own."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")
client = APIClient()
client.force_login(user)
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 403
assert response.json() == {
"detail": "You do not have permission to perform this action."
}
def test_api_documents_public_search_authenticated_on_authenticated_document():
"""Authenticated users cannot search on a authenticated document they don't own."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="authenticated")
client = APIClient()
client.force_login(user)
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 403
assert response.json() == {
"detail": "You do not have permission to perform this action."
}
# ---------------------------------------------------------------------------
# Public via ancestor
# ---------------------------------------------------------------------------
def test_api_documents_public_search_document_public_via_ancestor():
"""
A restricted child document whose ancestor is public is effectively public.
The search scope should be rooted at the highest public ancestor.
"""
client = APIClient()
root = factories.DocumentFactory(link_reach="public", title="root")
child = factories.DocumentFactory(
parent=root, link_reach="restricted", title="child alpha"
)
sibling = factories.DocumentFactory(parent=root, title="sibling alpha")
grand_child = factories.DocumentFactory(parent=child, title="grand alpha")
# child is public via root
assert child.computed_link_reach == models.LinkReachChoices.PUBLIC
response = client.get(
f"/api/v1.0/documents/{child.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200
content = response.json()
result_ids = {r["id"] for r in content["results"]}
# All descendants of root that match "alpha" should be returned
assert len(result_ids) == 3
assert str(child.id) in result_ids
assert str(sibling.id) in result_ids
assert str(grand_child.id) in result_ids
def test_api_documents_public_search_scope_limited_to_public_tree():
"""
Documents outside the public tree should not appear in results, even if they
match the query.
"""
client = APIClient()
private_root = factories.DocumentFactory(
link_reach="restricted", title="private root"
)
public_doc = factories.DocumentFactory(
parent=private_root, link_reach="public", title="public doc"
)
inside = factories.DocumentFactory(parent=public_doc, title="alpha inside")
# Separate tree — should never appear
other_root = factories.DocumentFactory(link_reach="public", title="other root")
outside = factories.DocumentFactory(parent=other_root, title="alpha outside")
response = client.get(
f"/api/v1.0/documents/{public_doc.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200
result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 1
assert str(inside.id) in result_ids
assert str(outside.id) not in result_ids
def test_api_documents_public_search_excludes_deleted_documents():
"""Soft-deleted documents should not appear in results."""
client = APIClient()
root = factories.DocumentFactory(link_reach="public")
alive = factories.DocumentFactory(parent=root, title="alive alpha")
deleted = factories.DocumentFactory(
parent=root,
title="deleted alpha",
deleted_at="2024-01-01T00:00:00Z",
ancestors_deleted_at="2024-01-01T00:00:00Z",
)
response = client.get(
f"/api/v1.0/documents/{root.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200
result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 1
assert str(alive.id) in result_ids
assert str(deleted.id) not in result_ids
def test_api_documents_public_search_excludes_documents_with_deleted_ancestor():
"""Documents whose ancestor is deleted should not appear in results."""
client = APIClient()
root = factories.DocumentFactory(link_reach="public")
deleted_parent = factories.DocumentFactory(
parent=root,
title="deleted parent",
deleted_at="2024-01-01T00:00:00Z",
ancestors_deleted_at="2024-01-01T00:00:00Z",
)
orphan = factories.DocumentFactory(
parent=deleted_parent,
title="orphan alpha",
ancestors_deleted_at="2024-01-01T00:00:00Z",
)
alive = factories.DocumentFactory(parent=root, title="alive alpha")
response = client.get(
f"/api/v1.0/documents/{root.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200
result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 1
assert str(alive.id) in result_ids
assert str(orphan.id) not in result_ids
# ---------------------------------------------------------------------------
# Ordering
# ---------------------------------------------------------------------------
def test_api_documents_public_search_ordered_by_most_recent_first():
"""Results should be ordered by -updated_at."""
client = APIClient()
root_doc = factories.DocumentFactory(link_reach="public")
old = factories.DocumentFactory(parent=root_doc, title="old alpha")
new = factories.DocumentFactory(parent=root_doc, title="new alpha")
# Force updated_at ordering
models.Document.objects.filter(pk=old.pk).update(
updated_at=timezone.now() - datetime.timedelta(days=10)
)
models.Document.objects.filter(pk=new.pk).update(updated_at=timezone.now())
response = client.get(
f"/api/v1.0/documents/{root_doc.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200
result_ids = [r["id"] for r in response.json()["results"]]
assert len(result_ids) == 2
assert result_ids.index(str(new.id)) < result_ids.index(str(old.id))

View file

@ -57,6 +57,7 @@ def test_api_documents_retrieve_anonymous_public_standalone():
"media_check": True,
"move": False,
"partial_update": document.link_role == "editor",
"public_search": document.link_reach == "public",
"restore": False,
"retrieve": True,
"search": True,
@ -135,6 +136,7 @@ def test_api_documents_retrieve_anonymous_public_parent():
"media_check": True,
"move": False,
"partial_update": grand_parent.link_role == "editor",
"public_search": True,
"restore": False,
"retrieve": True,
"search": True,
@ -246,6 +248,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated(
"media_check": True,
"move": False,
"partial_update": document.link_role == "editor",
"public_search": document.link_reach == "public",
"restore": False,
"retrieve": True,
"search": True,
@ -331,6 +334,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea
"media_auth": True,
"media_check": True,
"partial_update": grand_parent.link_role == "editor",
"public_search": grand_parent.link_reach == "public",
"restore": False,
"retrieve": True,
"search": True,
@ -531,6 +535,7 @@ def test_api_documents_retrieve_authenticated_related_parent():
"media_check": True,
"move": access.role in ["administrator", "owner"],
"partial_update": access.role not in ["reader", "commenter"],
"public_search": document.computed_link_reach == "public",
"restore": access.role == "owner",
"retrieve": True,
"search": True,

View file

@ -99,6 +99,7 @@ def test_api_documents_trashbin_format():
"media_check": False,
"move": False, # Can't move a deleted document
"partial_update": False,
"public_search": False,
"restore": True,
"retrieve": True,
"search": False,

View file

@ -182,6 +182,7 @@ def test_models_documents_get_abilities_forbidden(
"restricted": None,
},
"partial_update": False,
"public_search": document.computed_link_reach == "public",
"restore": False,
"retrieve": False,
"tree": False,
@ -249,6 +250,7 @@ def test_models_documents_get_abilities_reader(
"media_check": True,
"move": False,
"partial_update": False,
"public_search": reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -321,6 +323,7 @@ def test_models_documents_get_abilities_commenter(
"media_check": True,
"move": False,
"partial_update": False,
"public_search": reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -390,6 +393,7 @@ def test_models_documents_get_abilities_editor(
"media_check": True,
"move": False,
"partial_update": True,
"public_search": reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -448,6 +452,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries):
"media_check": True,
"move": True,
"partial_update": True,
"public_search": document.computed_link_reach == "public",
"restore": True,
"retrieve": True,
"tree": True,
@ -492,6 +497,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries):
"media_check": False,
"move": False,
"partial_update": False,
"public_search": document.computed_link_reach == "public",
"restore": True,
"retrieve": True,
"tree": True,
@ -540,6 +546,7 @@ def test_models_documents_get_abilities_administrator(django_assert_num_queries)
"media_check": True,
"move": True,
"partial_update": True,
"public_search": document.computed_link_reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -598,6 +605,7 @@ def test_models_documents_get_abilities_editor_user(django_assert_num_queries):
"media_check": True,
"move": False,
"partial_update": True,
"public_search": document.computed_link_reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -664,6 +672,7 @@ def test_models_documents_get_abilities_reader_user(
"media_check": True,
"move": False,
"partial_update": access_from_link,
"public_search": document.computed_link_reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -731,6 +740,7 @@ def test_models_documents_get_abilities_commenter_user(
"media_check": True,
"move": False,
"partial_update": access_from_link,
"public_search": document.computed_link_reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -794,6 +804,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries):
"media_check": True,
"move": False,
"partial_update": False,
"public_search": access.document.computed_link_reach == "public",
"restore": False,
"retrieve": True,
"tree": True,
@ -1691,3 +1702,74 @@ def test_models_documents_compute_ancestors_links_paths_mapping_structure(
{"link_reach": sibling.link_reach, "link_role": sibling.link_role},
],
}
# get_highest_public_ancestor method
def test_models_documents_get_highest_public_ancestor_root_public():
"""A root document with public link reach should return itself."""
document = factories.DocumentFactory(link_reach="public")
assert document.get_highest_public_ancestor() == document
def test_models_documents_get_highest_public_ancestor_root_restricted():
"""A root document with restricted link reach should return None."""
document = factories.DocumentFactory(link_reach="restricted")
assert document.get_highest_public_ancestor() is None
def test_models_documents_get_highest_public_ancestor_root_authenticated():
"""A root document with authenticated link reach should return None."""
document = factories.DocumentFactory(link_reach="authenticated")
assert document.get_highest_public_ancestor() is None
def test_models_documents_get_highest_public_ancestor_child_is_public():
"""A child document that is itself public should return itself, not the parent."""
parent = factories.DocumentFactory(link_reach="restricted")
child = factories.DocumentFactory(parent=parent, link_reach="public")
assert child.get_highest_public_ancestor() == child
def test_models_documents_get_highest_public_ancestor_parent_is_public():
"""A child with restricted reach whose parent is public should return the parent."""
parent = factories.DocumentFactory(link_reach="public")
child = factories.DocumentFactory(parent=parent, link_reach="restricted")
assert child.get_highest_public_ancestor() == parent
def test_models_documents_get_highest_public_ancestor_neither_public():
"""A child with restricted reach and a restricted parent should return None."""
parent = factories.DocumentFactory(link_reach="restricted")
child = factories.DocumentFactory(parent=parent, link_reach="restricted")
assert child.get_highest_public_ancestor() is None
def test_models_documents_get_highest_public_ancestor_grandparent_is_public():
"""
Returns the highest public ancestor (grandparent) when only the grandparent is public.
"""
grandparent = factories.DocumentFactory(link_reach="public")
parent = factories.DocumentFactory(parent=grandparent, link_reach="restricted")
child = factories.DocumentFactory(parent=parent, link_reach="restricted")
assert child.get_highest_public_ancestor() == grandparent
def test_models_documents_get_highest_public_ancestor_deep_tree_only_middle_public():
"""
When only a middle ancestor is public, it is returned as the highest public ancestor.
"""
root_doc = factories.DocumentFactory(link_reach="restricted")
middle = factories.DocumentFactory(parent=root_doc, link_reach="public")
child = factories.DocumentFactory(parent=middle, link_reach="restricted")
grandchild = factories.DocumentFactory(parent=child, link_reach="restricted")
assert grandchild.get_highest_public_ancestor() == middle
def test_models_documents_get_highest_public_ancestor_all_restricted_deep():
"""A deeply nested document with no public ancestor should return None."""
root_doc = factories.DocumentFactory(link_reach="restricted")
child = factories.DocumentFactory(parent=root_doc, link_reach="restricted")
grandchild = factories.DocumentFactory(parent=child, link_reach="restricted")
assert grandchild.get_highest_public_ancestor() is None