mirror of
https://github.com/suitenumerique/docs
synced 2026-04-21 13:37:20 +00:00
⚡️(backend) stream s3 file content with a dedicated endpoint
We created a dedicated endpoint to retrieve a document content. The content of the s3 file is stream when this endpoint is fetch.
This commit is contained in:
parent
75806cea41
commit
d5a4468f96
9 changed files with 253 additions and 5 deletions
|
|
@ -9,6 +9,7 @@ and this project adheres to
|
|||
### Added
|
||||
|
||||
- ✨(backend) create a dedicated endpoint to update document content
|
||||
- ⚡️(backend) stream s3 file content with a dedicated endpoint
|
||||
|
||||
### Changed
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ from core.models import DocumentAccess, RoleChoices, get_trashbin_cutoff
|
|||
ACTION_FOR_METHOD_TO_PERMISSION = {
|
||||
"versions_detail": {"DELETE": "versions_destroy", "GET": "versions_retrieve"},
|
||||
"children": {"GET": "children_list", "POST": "children_create"},
|
||||
"content": {"PATCH": "content_patch"},
|
||||
"content": {"PATCH": "content_patch", "GET": "content_retrieve"},
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from django.utils.translation import gettext_lazy as _
|
|||
import magic
|
||||
from rest_framework import serializers
|
||||
|
||||
from core import choices, enums, models, utils, validators
|
||||
from core import choices, enums, models, validators
|
||||
from core.services import mime_types
|
||||
from core.services.ai_services import AI_ACTIONS
|
||||
from core.services.converter_services import (
|
||||
|
|
|
|||
|
|
@ -1873,10 +1873,8 @@ class DocumentViewSet(
|
|||
|
||||
return drf.response.Response("authorized", headers=request.headers, status=200)
|
||||
|
||||
@drf.decorators.action(detail=True, methods=["patch"], url_path="content")
|
||||
def content(self, request, *args, **kwargs):
|
||||
def _content_patch(self, request, document):
|
||||
"""Update the raw Yjs content of a document stored in S3."""
|
||||
document = self.get_object()
|
||||
serializer = serializers.DocumentContentSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
|
||||
|
|
@ -1931,6 +1929,47 @@ class DocumentViewSet(
|
|||
|
||||
return drf_response.Response(status=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
def _content_retrieve(self, document):
|
||||
"""Retrieve the raw content file ni s3 and stream it."""
|
||||
|
||||
if not default_storage.exists(document.file_key):
|
||||
return StreamingHttpResponse(
|
||||
b"", content_type="text/plain", status=status.HTTP_200_OK
|
||||
)
|
||||
|
||||
file = default_storage.open(document.file_key, "rb")
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
streaming_content=iter(lambda: file.read(8192), b""),
|
||||
content_type="text/plain",
|
||||
status=status.HTTP_200_OK,
|
||||
)
|
||||
|
||||
try:
|
||||
response["Content-Length"] = default_storage.size(document.file_key)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
return response
|
||||
|
||||
@drf.decorators.action(detail=True, methods=["patch", "get"], url_path="content")
|
||||
def content(self, request, *args, **kwargs):
|
||||
"""Retrieve or update content stored in s3."""
|
||||
document = self.get_object()
|
||||
|
||||
if request.method == "PATCH":
|
||||
return self._content_patch(request, document)
|
||||
|
||||
if request.method == "GET":
|
||||
# We don't need db connection, we force to close it to prevent
|
||||
# having number of connection growing on websocket reconnection burst
|
||||
# the call to the s3 to fetch the document can take time and the connection
|
||||
# is used for nothing.
|
||||
connection.close()
|
||||
return self._content_retrieve(document)
|
||||
|
||||
return drf_response.Response(status=status.HTTP_501_NOT_IMPLEMENTED)
|
||||
|
||||
@drf.decorators.action(detail=True, methods=["get"], url_path="media-check")
|
||||
def media_check(self, request, *args, **kwargs):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1310,6 +1310,7 @@ class Document(MP_Node, BaseModel):
|
|||
"comment": can_comment,
|
||||
"formatted_content": can_get,
|
||||
"content_patch": can_update,
|
||||
"content_retrieve": retrieve,
|
||||
"cors_proxy": can_get,
|
||||
"descendants": can_get,
|
||||
"destroy": can_destroy,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,190 @@
|
|||
"""
|
||||
Tests for the GET /api/v1.0/documents/{id}/content/ endpoint.
|
||||
"""
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from django.core.files.storage import default_storage
|
||||
|
||||
import pytest
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories
|
||||
from core.tests.conftest import TEAM, USER, VIA
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
@pytest.mark.parametrize("reach", ["authenticated", "restricted"])
|
||||
def test_api_documents_content_retrieve_anonymous_non_public(reach):
|
||||
"""Anonymous users cannot retrieve content of non-public documents."""
|
||||
document = factories.DocumentFactory(link_reach=reach)
|
||||
|
||||
response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_401_UNAUTHORIZED
|
||||
|
||||
|
||||
def test_api_documents_content_retrieve_anonymous_public():
|
||||
"""Anonymous users can retrieve content of a public document."""
|
||||
document = factories.DocumentFactory(link_reach="public")
|
||||
|
||||
response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response["Content-Type"] == "text/plain"
|
||||
assert b"".join(
|
||||
response.streaming_content
|
||||
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")
|
||||
|
||||
|
||||
def test_api_documents_content_retrieve_authenticated_no_access():
|
||||
"""Authenticated users without access cannot retrieve content of a restricted document."""
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(link_reach="restricted")
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_403_FORBIDDEN
|
||||
|
||||
|
||||
@pytest.mark.parametrize("link_reach", ["authenticated", "public"])
|
||||
def test_api_documents_content_retrieve_authenticated_not_restricted(link_reach):
|
||||
"""
|
||||
Authenticated users can retrieve content of a public document
|
||||
without any explicit access grant.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(link_reach=link_reach)
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert b"".join(
|
||||
response.streaming_content
|
||||
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("via", VIA)
|
||||
@pytest.mark.parametrize(
|
||||
"role", ["reader", "commenter", "editor", "administrator", "owner"]
|
||||
)
|
||||
def test_api_documents_content_retrieve_success(role, via, mock_user_teams):
|
||||
"""Users with any role can retrieve document content, directly or via a team."""
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(link_reach="restricted")
|
||||
|
||||
if via == USER:
|
||||
factories.UserDocumentAccessFactory(document=document, user=user, role=role)
|
||||
elif via == TEAM:
|
||||
mock_user_teams.return_value = ["lasuite"]
|
||||
factories.TeamDocumentAccessFactory(
|
||||
document=document, team="lasuite", role=role
|
||||
)
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert b"".join(
|
||||
response.streaming_content
|
||||
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")
|
||||
|
||||
|
||||
def test_api_documents_content_retrieve_nonexistent_document():
|
||||
"""Retrieving content of a non-existent document returns 404."""
|
||||
user = factories.UserFactory()
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{uuid4()!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_404_NOT_FOUND
|
||||
|
||||
|
||||
def test_api_documents_content_retrieve_file_not_in_storage():
|
||||
"""Returns an empty string when the file does not exists on the storage."""
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(link_reach="restricted")
|
||||
factories.UserDocumentAccessFactory(document=document, user=user, role="reader")
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
default_storage.delete(document.file_key)
|
||||
|
||||
assert not default_storage.exists(document.file_key)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert b"".join(response.streaming_content) == b""
|
||||
|
||||
|
||||
def test_api_documents_content_retrieve_content_length_header():
|
||||
"""The response includes the Content-Length header when available from storage."""
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(link_reach="restricted")
|
||||
factories.UserDocumentAccessFactory(document=document, user=user, role="reader")
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
expected_size = default_storage.size(document.file_key)
|
||||
assert int(response["Content-Length"]) == expected_size
|
||||
|
||||
|
||||
@pytest.mark.parametrize("role", ["reader", "commenter", "editor", "administrator"])
|
||||
def test_api_documents_content_retrieve_deleted_document_for_non_owners_all_roles(role):
|
||||
"""
|
||||
Retrieving content of a soft-deleted document returns 404 for any non-owner role.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(link_reach="restricted")
|
||||
factories.UserDocumentAccessFactory(document=document, user=user, role=role)
|
||||
|
||||
document.soft_delete()
|
||||
document.refresh_from_db()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_404_NOT_FOUND
|
||||
|
||||
|
||||
def test_api_documents_content_retrieve_deleted_document_for_owner():
|
||||
"""
|
||||
Owners can still retrieve content of a soft-deleted document.
|
||||
|
||||
The 'retrieve' ability is True for owners regardless of deletion state.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(link_reach="restricted")
|
||||
factories.UserDocumentAccessFactory(document=document, user=user, role="owner")
|
||||
|
||||
document.soft_delete()
|
||||
document.refresh_from_db()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert b"".join(
|
||||
response.streaming_content
|
||||
) == factories.YDOC_HELLO_WORLD_BASE64.encode("utf-8")
|
||||
|
|
@ -54,6 +54,7 @@ def test_api_documents_retrieve_anonymous_public_standalone():
|
|||
},
|
||||
"mask": False,
|
||||
"content_patch": document.link_role == "editor",
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -132,6 +133,7 @@ def test_api_documents_retrieve_anonymous_public_parent():
|
|||
),
|
||||
"mask": False,
|
||||
"content_patch": grand_parent.link_role == "editor",
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -243,6 +245,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated(
|
|||
},
|
||||
"mask": True,
|
||||
"content_patch": document.link_role == "editor",
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -329,6 +332,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea
|
|||
"mask": True,
|
||||
"move": False,
|
||||
"content_patch": grand_parent.link_role == "editor",
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"partial_update": grand_parent.link_role == "editor",
|
||||
|
|
@ -527,6 +531,7 @@ def test_api_documents_retrieve_authenticated_related_parent():
|
|||
),
|
||||
"mask": True,
|
||||
"content_patch": access.role not in ["reader", "commenter"],
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": access.role in ["administrator", "owner"],
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ def test_api_documents_trashbin_format():
|
|||
},
|
||||
"mask": False,
|
||||
"content_patch": False,
|
||||
"content_retrieve": True,
|
||||
"media_auth": False,
|
||||
"media_check": False,
|
||||
"move": False, # Can't move a deleted document
|
||||
|
|
|
|||
|
|
@ -173,6 +173,7 @@ def test_models_documents_get_abilities_forbidden(
|
|||
"invite_owner": False,
|
||||
"mask": False,
|
||||
"content_patch": False,
|
||||
"content_retrieve": False,
|
||||
"media_auth": False,
|
||||
"media_check": False,
|
||||
"move": False,
|
||||
|
|
@ -247,6 +248,7 @@ def test_models_documents_get_abilities_reader(
|
|||
},
|
||||
"mask": is_authenticated,
|
||||
"content_patch": False,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -320,6 +322,7 @@ def test_models_documents_get_abilities_commenter(
|
|||
},
|
||||
"mask": is_authenticated,
|
||||
"content_patch": False,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -390,6 +393,7 @@ def test_models_documents_get_abilities_editor(
|
|||
},
|
||||
"mask": is_authenticated,
|
||||
"content_patch": True,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -449,6 +453,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries):
|
|||
},
|
||||
"mask": True,
|
||||
"content_patch": True,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": True,
|
||||
|
|
@ -494,6 +499,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries):
|
|||
},
|
||||
"mask": False,
|
||||
"content_patch": False,
|
||||
"content_retrieve": True,
|
||||
"media_auth": False,
|
||||
"media_check": False,
|
||||
"move": False,
|
||||
|
|
@ -543,6 +549,7 @@ def test_models_documents_get_abilities_administrator(django_assert_num_queries)
|
|||
},
|
||||
"mask": True,
|
||||
"content_patch": True,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": True,
|
||||
|
|
@ -602,6 +609,7 @@ def test_models_documents_get_abilities_editor_user(django_assert_num_queries):
|
|||
},
|
||||
"mask": True,
|
||||
"content_patch": True,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -669,6 +677,7 @@ def test_models_documents_get_abilities_reader_user(
|
|||
},
|
||||
"mask": True,
|
||||
"content_patch": access_from_link,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -737,6 +746,7 @@ def test_models_documents_get_abilities_commenter_user(
|
|||
},
|
||||
"mask": True,
|
||||
"content_patch": access_from_link,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
@ -801,6 +811,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries):
|
|||
},
|
||||
"mask": True,
|
||||
"content_patch": False,
|
||||
"content_retrieve": True,
|
||||
"media_auth": True,
|
||||
"media_check": True,
|
||||
"move": False,
|
||||
|
|
|
|||
Loading…
Reference in a new issue