️(backend) stream s3 file content with a dedicated endpoint

We created a dedicated endpoint to retrieve a document content. The
content of the s3 file is stream when this endpoint is fetch.
This commit is contained in:
Manuel Raynaud 2026-04-07 16:22:59 +02:00 committed by Anthony LC
parent 75806cea41
commit d5a4468f96
No known key found for this signature in database
9 changed files with 253 additions and 5 deletions

View file

@ -9,6 +9,7 @@ and this project adheres to
### Added
- ✨(backend) create a dedicated endpoint to update document content
- ⚡️(backend) stream s3 file content with a dedicated endpoint
### Changed

View file

@ -12,7 +12,7 @@ from core.models import DocumentAccess, RoleChoices, get_trashbin_cutoff
ACTION_FOR_METHOD_TO_PERMISSION = {
"versions_detail": {"DELETE": "versions_destroy", "GET": "versions_retrieve"},
"children": {"GET": "children_list", "POST": "children_create"},
"content": {"PATCH": "content_patch"},
"content": {"PATCH": "content_patch", "GET": "content_retrieve"},
}

View file

@ -16,7 +16,7 @@ from django.utils.translation import gettext_lazy as _
import magic
from rest_framework import serializers
from core import choices, enums, models, utils, validators
from core import choices, enums, models, validators
from core.services import mime_types
from core.services.ai_services import AI_ACTIONS
from core.services.converter_services import (

View file

@ -1873,10 +1873,8 @@ class DocumentViewSet(
return drf.response.Response("authorized", headers=request.headers, status=200)
@drf.decorators.action(detail=True, methods=["patch"], url_path="content")
def content(self, request, *args, **kwargs):
def _content_patch(self, request, document):
"""Update the raw Yjs content of a document stored in S3."""
document = self.get_object()
serializer = serializers.DocumentContentSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
@ -1931,6 +1929,47 @@ class DocumentViewSet(
return drf_response.Response(status=status.HTTP_204_NO_CONTENT)
def _content_retrieve(self, document):
"""Retrieve the raw content file ni s3 and stream it."""
if not default_storage.exists(document.file_key):
return StreamingHttpResponse(
b"", content_type="text/plain", status=status.HTTP_200_OK
)
file = default_storage.open(document.file_key, "rb")
response = StreamingHttpResponse(
streaming_content=iter(lambda: file.read(8192), b""),
content_type="text/plain",
status=status.HTTP_200_OK,
)
try:
response["Content-Length"] = default_storage.size(document.file_key)
except NotImplementedError:
pass
return response
@drf.decorators.action(detail=True, methods=["patch", "get"], url_path="content")
def content(self, request, *args, **kwargs):
"""Retrieve or update content stored in s3."""
document = self.get_object()
if request.method == "PATCH":
return self._content_patch(request, document)
if request.method == "GET":
# We don't need db connection, we force to close it to prevent
# having number of connection growing on websocket reconnection burst
# the call to the s3 to fetch the document can take time and the connection
# is used for nothing.
connection.close()
return self._content_retrieve(document)
return drf_response.Response(status=status.HTTP_501_NOT_IMPLEMENTED)
@drf.decorators.action(detail=True, methods=["get"], url_path="media-check")
def media_check(self, request, *args, **kwargs):
"""

View file

@ -1310,6 +1310,7 @@ class Document(MP_Node, BaseModel):
"comment": can_comment,
"formatted_content": can_get,
"content_patch": can_update,
"content_retrieve": retrieve,
"cors_proxy": can_get,
"descendants": can_get,
"destroy": can_destroy,

View file

@ -0,0 +1,190 @@
"""
Tests for the GET /api/v1.0/documents/{id}/content/ endpoint.
"""
from uuid import uuid4
from django.core.files.storage import default_storage
import pytest
from rest_framework import status
from rest_framework.test import APIClient
from core import factories
from core.tests.conftest import TEAM, USER, VIA
pytestmark = pytest.mark.django_db
@pytest.mark.parametrize("reach", ["authenticated", "restricted"])
def test_api_documents_content_retrieve_anonymous_non_public(reach):
"""Anonymous users cannot retrieve content of non-public documents."""
document = factories.DocumentFactory(link_reach=reach)
response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_401_UNAUTHORIZED
def test_api_documents_content_retrieve_anonymous_public():
"""Anonymous users can retrieve content of a public document."""
document = factories.DocumentFactory(link_reach="public")
response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_200_OK
assert response["Content-Type"] == "text/plain"
assert b"".join(
response.streaming_content
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")
def test_api_documents_content_retrieve_authenticated_no_access():
"""Authenticated users without access cannot retrieve content of a restricted document."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")
client = APIClient()
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_403_FORBIDDEN
@pytest.mark.parametrize("link_reach", ["authenticated", "public"])
def test_api_documents_content_retrieve_authenticated_not_restricted(link_reach):
"""
Authenticated users can retrieve content of a public document
without any explicit access grant.
"""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach=link_reach)
client = APIClient()
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_200_OK
assert b"".join(
response.streaming_content
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")
@pytest.mark.parametrize("via", VIA)
@pytest.mark.parametrize(
"role", ["reader", "commenter", "editor", "administrator", "owner"]
)
def test_api_documents_content_retrieve_success(role, via, mock_user_teams):
"""Users with any role can retrieve document content, directly or via a team."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")
if via == USER:
factories.UserDocumentAccessFactory(document=document, user=user, role=role)
elif via == TEAM:
mock_user_teams.return_value = ["lasuite"]
factories.TeamDocumentAccessFactory(
document=document, team="lasuite", role=role
)
client = APIClient()
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_200_OK
assert b"".join(
response.streaming_content
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")
def test_api_documents_content_retrieve_nonexistent_document():
"""Retrieving content of a non-existent document returns 404."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{uuid4()!s}/content/")
assert response.status_code == status.HTTP_404_NOT_FOUND
def test_api_documents_content_retrieve_file_not_in_storage():
"""Returns an empty string when the file does not exists on the storage."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")
factories.UserDocumentAccessFactory(document=document, user=user, role="reader")
client = APIClient()
client.force_login(user)
default_storage.delete(document.file_key)
assert not default_storage.exists(document.file_key)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_200_OK
assert b"".join(response.streaming_content) == b""
def test_api_documents_content_retrieve_content_length_header():
"""The response includes the Content-Length header when available from storage."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")
factories.UserDocumentAccessFactory(document=document, user=user, role="reader")
client = APIClient()
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_200_OK
expected_size = default_storage.size(document.file_key)
assert int(response["Content-Length"]) == expected_size
@pytest.mark.parametrize("role", ["reader", "commenter", "editor", "administrator"])
def test_api_documents_content_retrieve_deleted_document_for_non_owners_all_roles(role):
"""
Retrieving content of a soft-deleted document returns 404 for any non-owner role.
"""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")
factories.UserDocumentAccessFactory(document=document, user=user, role=role)
document.soft_delete()
document.refresh_from_db()
client = APIClient()
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_404_NOT_FOUND
def test_api_documents_content_retrieve_deleted_document_for_owner():
"""
Owners can still retrieve content of a soft-deleted document.
The 'retrieve' ability is True for owners regardless of deletion state.
"""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")
factories.UserDocumentAccessFactory(document=document, user=user, role="owner")
document.soft_delete()
document.refresh_from_db()
client = APIClient()
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
assert response.status_code == status.HTTP_200_OK
assert b"".join(
response.streaming_content
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")

View file

@ -54,6 +54,7 @@ def test_api_documents_retrieve_anonymous_public_standalone():
},
"mask": False,
"content_patch": document.link_role == "editor",
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -132,6 +133,7 @@ def test_api_documents_retrieve_anonymous_public_parent():
),
"mask": False,
"content_patch": grand_parent.link_role == "editor",
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -243,6 +245,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated(
},
"mask": True,
"content_patch": document.link_role == "editor",
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -329,6 +332,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea
"mask": True,
"move": False,
"content_patch": grand_parent.link_role == "editor",
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"partial_update": grand_parent.link_role == "editor",
@ -527,6 +531,7 @@ def test_api_documents_retrieve_authenticated_related_parent():
),
"mask": True,
"content_patch": access.role not in ["reader", "commenter"],
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": access.role in ["administrator", "owner"],

View file

@ -96,6 +96,7 @@ def test_api_documents_trashbin_format():
},
"mask": False,
"content_patch": False,
"content_retrieve": True,
"media_auth": False,
"media_check": False,
"move": False, # Can't move a deleted document

View file

@ -173,6 +173,7 @@ def test_models_documents_get_abilities_forbidden(
"invite_owner": False,
"mask": False,
"content_patch": False,
"content_retrieve": False,
"media_auth": False,
"media_check": False,
"move": False,
@ -247,6 +248,7 @@ def test_models_documents_get_abilities_reader(
},
"mask": is_authenticated,
"content_patch": False,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -320,6 +322,7 @@ def test_models_documents_get_abilities_commenter(
},
"mask": is_authenticated,
"content_patch": False,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -390,6 +393,7 @@ def test_models_documents_get_abilities_editor(
},
"mask": is_authenticated,
"content_patch": True,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -449,6 +453,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries):
},
"mask": True,
"content_patch": True,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": True,
@ -494,6 +499,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries):
},
"mask": False,
"content_patch": False,
"content_retrieve": True,
"media_auth": False,
"media_check": False,
"move": False,
@ -543,6 +549,7 @@ def test_models_documents_get_abilities_administrator(django_assert_num_queries)
},
"mask": True,
"content_patch": True,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": True,
@ -602,6 +609,7 @@ def test_models_documents_get_abilities_editor_user(django_assert_num_queries):
},
"mask": True,
"content_patch": True,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -669,6 +677,7 @@ def test_models_documents_get_abilities_reader_user(
},
"mask": True,
"content_patch": access_from_link,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -737,6 +746,7 @@ def test_models_documents_get_abilities_commenter_user(
},
"mask": True,
"content_patch": access_from_link,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,
@ -801,6 +811,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries):
},
"mask": True,
"content_patch": False,
"content_retrieve": True,
"media_auth": True,
"media_check": True,
"move": False,