Skip to content

Commit 11cc20c

Browse files
committed
🚚(back) serve legacy deposited files from Scaleway S3
Deposited files are now served from the aws/ directory in Scaleway S3. They are served using the django-storage already in place. As content headers cannot be set to Scaleway Edge services URLs, legacy deposited files need to be renamed to their filenames. Adding a management command to do that.
1 parent 8206e63 commit 11cc20c

File tree

5 files changed

+229
-60
lines changed

5 files changed

+229
-60
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ Versioning](https://semver.org/spec/v2.0.0.html).
88

99
## [Unreleased]
1010

11-
## [5.10.] - 2025-07-09
11+
### Changed
12+
13+
- Serve legacy deposited files from Scaleway S3 after AWS migration
14+
15+
## [5.10.0] - 2025-07-09
1216

1317
### Changed
1418

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""Rename deposited files in Scaleway S3 for serving them with the right name."""
2+
3+
import logging
4+
from os.path import splitext
5+
6+
from django.conf import settings
7+
from django.core.management.base import BaseCommand
8+
9+
import boto3
10+
11+
from marsha.core.defaults import AWS_S3, AWS_STORAGE_BASE_DIRECTORY, READY
12+
from marsha.core.storage.storage_class import file_storage
13+
from marsha.core.utils import time_utils
14+
from marsha.deposit.models import DepositedFile
15+
16+
17+
logger = logging.getLogger(__name__)
18+
19+
scw_credentials = {
20+
"aws_access_key_id": settings.STORAGE_S3_ACCESS_KEY,
21+
"aws_secret_access_key": settings.STORAGE_S3_SECRET_KEY,
22+
"region_name": settings.STORAGE_S3_REGION_NAME,
23+
"endpoint_url": settings.STORAGE_S3_ENDPOINT_URL,
24+
}
25+
26+
# Configure medialive client
27+
s3_client = boto3.client("s3", **scw_credentials)
28+
29+
30+
class Command(BaseCommand):
31+
"""Rename deposited files in Scaleway S3 to their filename."""
32+
33+
help = "Rename deposited files in Scaleway S3 to their filename."
34+
35+
def validate_filename(self, value):
36+
"""Transform filename to make it valid."""
37+
38+
value = value.replace("/", "_")
39+
value = value.replace("\\", "_")
40+
value = value.lstrip(".")
41+
42+
return value
43+
44+
def handle(self, *args, **options):
45+
"""Execute management command."""
46+
47+
files = DepositedFile.objects.filter(
48+
storage_location=AWS_S3, upload_state=READY
49+
)
50+
51+
for file in files:
52+
# Get the file stored on Scaleway S3 under `aws/`
53+
stamp = time_utils.to_timestamp(file.uploaded_on)
54+
extension = ""
55+
if "." in file.filename:
56+
extension = splitext(file.filename)[1]
57+
58+
file_key_src = file.get_storage_key(
59+
filename=f"{stamp}{extension}", base_dir=AWS_STORAGE_BASE_DIRECTORY
60+
)
61+
copy_source = {
62+
"Bucket": settings.STORAGE_S3_BUCKET_NAME,
63+
"Key": file_key_src,
64+
}
65+
66+
filename = self.validate_filename(file.filename)
67+
68+
# Override file filename with the validated S3-compatible filename
69+
if filename != file.filename:
70+
file.filename = filename
71+
file.save()
72+
73+
# Compute file key destination which should be the document filename
74+
file_key_dest = file.get_storage_key(
75+
filename, base_dir=AWS_STORAGE_BASE_DIRECTORY
76+
)
77+
if file_storage.exists(file_key_dest):
78+
logger.info("Object %s already exists", file_key_dest)
79+
continue
80+
81+
logger.info("Copying %s to %s", file_key_src, file_key_dest)
82+
s3_client.copy_object(
83+
Bucket=settings.STORAGE_S3_BUCKET_NAME,
84+
CopySource=copy_source,
85+
Key=file_key_dest,
86+
)
87+
88+
logger.info("Finished copying!")

src/backend/marsha/deposit/serializers.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,20 @@
22

33
import mimetypes
44
from os.path import splitext
5-
from urllib.parse import quote, quote_plus
65

76
from django.conf import settings
87
from django.urls import reverse
98

109
from rest_framework import serializers
1110

12-
from marsha.core.defaults import SCW_S3
11+
from marsha.core.defaults import AWS_STORAGE_BASE_DIRECTORY, SCW_S3
1312
from marsha.core.models import User
1413
from marsha.core.serializers import (
1514
BaseInitiateUploadSerializer,
1615
UploadableFileWithExtensionSerializerMixin,
17-
get_resource_cloudfront_url_params,
1816
)
1917
from marsha.core.serializers.playlist import PlaylistLiteSerializer
2018
from marsha.core.storage.storage_class import file_storage
21-
from marsha.core.utils import cloudfront_utils, time_utils
2219
from marsha.deposit.models import DepositedFile, FileDepository
2320

2421

@@ -145,26 +142,10 @@ def get_url(self, obj):
145142

146143
return file_storage.url(file_key)
147144

148-
base = (
149-
f"{settings.AWS_S3_URL_PROTOCOL}://{settings.CLOUDFRONT_DOMAIN}/"
150-
f"{obj.file_depository.pk}/depositedfile/{obj.pk}/"
151-
f"{time_utils.to_timestamp(obj.uploaded_on)}"
145+
file_key = obj.get_storage_key(
146+
obj.filename, base_dir=AWS_STORAGE_BASE_DIRECTORY
152147
)
153-
154-
response_content_disposition = quote_plus(
155-
"attachment; filename=" + quote(obj.filename)
156-
)
157-
url = (
158-
f"{base:s}{self._get_extension_string(obj)}?"
159-
f"response-content-disposition={response_content_disposition}"
160-
)
161-
162-
if settings.CLOUDFRONT_SIGNED_URLS_ACTIVE:
163-
params = get_resource_cloudfront_url_params(
164-
"depositedfile", obj.file_depository_id
165-
)
166-
url = cloudfront_utils.build_signed_url(url, params)
167-
return url
148+
return file_storage.url(file_key)
168149

169150

170151
class DepositedFileInitiateUploadSerializer(BaseInitiateUploadSerializer):

src/backend/marsha/deposit/tests/api/depositedfiles/test_list.py

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424

2525

2626
# We don't enforce arguments documentation in tests
27-
# pylint: disable=unused-argument
27+
# flake8: noqa: E501
28+
# pylint: disable=unused-argument,line-too-long
2829

2930

3031
@override_settings(DEPOSIT_ENABLED=True)
@@ -292,11 +293,8 @@ def test_api_file_depository_list_deposited_files_instructor_filtered(self):
292293
},
293294
)
294295

295-
@override_settings(
296-
CLOUDFRONT_SIGNED_URLS_ACTIVE=True,
297-
CLOUDFRONT_SIGNED_PUBLIC_KEY_ID="cloudfront-access-key-id",
298-
)
299-
def test_api_file_depository_list_deposited_files_instructor_signed_urls_aws(self):
296+
@override_settings(MEDIA_URL="https://abc.svc.edge.scw.cloud/")
297+
def test_api_file_depository_list_deposited_files_instructor_urls_aws(self):
300298
"""All deposited files should have the same signature."""
301299
file_depository = FileDepositoryFactory(
302300
id="4e126eac-9ca8-47b1-8dcd-157686b43c60"
@@ -329,17 +327,6 @@ def test_api_file_depository_list_deposited_files_instructor_signed_urls_aws(sel
329327
)
330328
self.assertEqual(response.status_code, 200)
331329

332-
expected_cloudfront_signature = (
333-
"Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9hYmMuY2xvdWRmcm9udC5uZXQvNGUxM"
334-
"jZlYWMtOWNhOC00N2IxLThkY2QtMTU3Njg2YjQzYzYwLyoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuI"
335-
"jp7IkFXUzpFcG9jaFRpbWUiOjE2MzgyMzc2MDB9fX1dfQ__&Signature=BGMvqnlKwJW~PwkL1Om4Pp7Pk5"
336-
"ZLlJGgS~q5c02NIL5--QBssu6C-gbhBgfGVQOY8~YwEqkJVSFfsqX54jOvzjVi-0t4mDANocv0hD5CQAy103"
337-
"79gj14UQ5-4i2lcPoDdEcpsTekrtC9W1oRzZlyKSygNnL5NJKSjLy7St3TN8AK7sHbOMYTiFEpnxvuz8CaIh"
338-
"DLf0xG~IbILgw83w9D1xlmAFu9Mxe5KXXQZa6Z60dXcXf67AS9vO1YRTK4CxtfF5EkDI31DeOm-Fm78VZzFE"
339-
"j4MtdzMRQV1ag~4SruE7RMS10nIgHLN7CxpdHpybqAK4V-OWXlMsx8vSxC1bLHcQ__"
340-
"&Key-Pair-Id=cloudfront-access-key-id"
341-
)
342-
343330
self.assertEqual(
344331
response.json(),
345332
{
@@ -357,10 +344,8 @@ def test_api_file_depository_list_deposited_files_instructor_signed_urls_aws(sel
357344
"upload_state": "pending",
358345
"uploaded_on": "2018-08-08T00:00:00Z",
359346
"url": (
360-
f"https://abc.cloudfront.net/{file_depository.id}/depositedfile/"
361-
f"{unicode_deposited_file.id}/1533686400?response-content-disposition"
362-
"=attachment%3B+filename%3Dtest%25E2%2580%2599.pdf"
363-
f"&{expected_cloudfront_signature}"
347+
f"https://abc.svc.edge.scw.cloud/aws/{file_depository.id}/depositedfile/"
348+
f"{unicode_deposited_file.id}/test%E2%80%99.pdf"
364349
),
365350
},
366351
{
@@ -373,10 +358,8 @@ def test_api_file_depository_list_deposited_files_instructor_signed_urls_aws(sel
373358
"upload_state": "pending",
374359
"uploaded_on": "2018-08-08T00:00:00Z",
375360
"url": (
376-
f"https://abc.cloudfront.net/{file_depository.id}/depositedfile/"
377-
f"{deposited_files[2].id}/1533686400?response-content-disposition"
378-
f"=attachment%3B+filename%3D{deposited_files[2].filename}"
379-
f"&{expected_cloudfront_signature}"
361+
f"https://abc.svc.edge.scw.cloud/aws/{file_depository.id}/depositedfile/"
362+
f"{deposited_files[2].id}/{deposited_files[2].filename}"
380363
),
381364
},
382365
{
@@ -389,10 +372,8 @@ def test_api_file_depository_list_deposited_files_instructor_signed_urls_aws(sel
389372
"upload_state": "pending",
390373
"uploaded_on": "2018-08-08T00:00:00Z",
391374
"url": (
392-
f"https://abc.cloudfront.net/{file_depository.id}/depositedfile/"
393-
f"{deposited_files[1].id}/1533686400?response-content-disposition"
394-
f"=attachment%3B+filename%3D{deposited_files[1].filename}"
395-
f"&{expected_cloudfront_signature}"
375+
f"https://abc.svc.edge.scw.cloud/aws/{file_depository.id}/depositedfile/"
376+
f"{deposited_files[1].id}/{deposited_files[1].filename}"
396377
),
397378
},
398379
{
@@ -405,19 +386,15 @@ def test_api_file_depository_list_deposited_files_instructor_signed_urls_aws(sel
405386
"upload_state": "pending",
406387
"uploaded_on": "2018-08-08T00:00:00Z",
407388
"url": (
408-
f"https://abc.cloudfront.net/{file_depository.id}/depositedfile/"
409-
f"{deposited_files[0].id}/1533686400?response-content-disposition"
410-
f"=attachment%3B+filename%3D{deposited_files[0].filename}"
411-
f"&{expected_cloudfront_signature}"
389+
f"https://abc.svc.edge.scw.cloud/aws/{file_depository.id}/depositedfile/"
390+
f"{deposited_files[0].id}/{deposited_files[0].filename}"
412391
),
413392
},
414393
],
415394
},
416395
)
417396

418-
@override_settings(
419-
MEDIA_URL="https://abc.svc.edge.scw.cloud/",
420-
)
397+
@override_settings(MEDIA_URL="https://abc.svc.edge.scw.cloud/")
421398
def test_api_file_depository_list_deposited_files_instructor_urls_scw(self):
422399
"""Deposited files on Scaleway should not be signed."""
423400
file_depository = FileDepositoryFactory(
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""Test the ``rename_deposited_files`` management command."""
2+
3+
from datetime import datetime, timezone
4+
from os.path import splitext
5+
from unittest import mock
6+
7+
from django.core.management import call_command
8+
from django.test import TestCase
9+
10+
from botocore.stub import Stubber
11+
12+
from marsha import settings
13+
from marsha.core.defaults import AWS_S3, PENDING, READY, SCW_S3
14+
from marsha.core.utils import time_utils
15+
from marsha.deposit.factories import DepositedFileFactory
16+
from marsha.deposit.management.commands import rename_deposited_files
17+
from marsha.deposit.models import DepositedFile
18+
19+
20+
class RenameDepositedFilesTestCase(TestCase):
21+
"""
22+
Test the ``rename_deposited_files`` command.
23+
"""
24+
25+
@mock.patch("marsha.core.storage.storage_class.file_storage.exists")
26+
def test_rename_deposited_files(self, mock_exists):
27+
"""Command should rename S3 objects to their filename."""
28+
29+
mock_exists.return_value = False
30+
31+
now = datetime(2018, 8, 8, tzinfo=timezone.utc)
32+
33+
with Stubber(rename_deposited_files.s3_client) as s3_client_stubber:
34+
# Generate some classroom files
35+
# (<original filename>, <expected and cleaned>)
36+
filenames = [
37+
("normal_filename.pdf", "normal_filename.pdf"),
38+
("weird\\file/name.pdf", "weird_file_name.pdf"),
39+
(".hidden_file", "hidden_file"),
40+
]
41+
42+
files = []
43+
for filename_src, _ in filenames:
44+
file = DepositedFileFactory(
45+
filename=filename_src,
46+
uploaded_on=now,
47+
upload_state=READY,
48+
storage_location=AWS_S3,
49+
)
50+
files.append(file)
51+
52+
# Create mocks for copy_objects with Stubber
53+
# Note: Stubber requires that its mocks are called in the exact order they
54+
# were created, so we must iterate over objects.all() in the same sequence
55+
for file in DepositedFile.objects.all():
56+
stamp = time_utils.to_timestamp(file.uploaded_on)
57+
extension = ""
58+
if "." in file.filename:
59+
extension = splitext(file.filename)[1]
60+
61+
file_key_src = (
62+
f"aws/{file.file_depository.id}/depositedfile/"
63+
f"{file.id}/{stamp}{extension}"
64+
)
65+
66+
sanitized_filename = rename_deposited_files.Command().validate_filename(
67+
file.filename
68+
)
69+
file_key_dest = (
70+
f"aws/{file.file_depository.id}/depositedfile/"
71+
f"{file.id}/{sanitized_filename}"
72+
)
73+
74+
expected_params = {
75+
"Bucket": settings.STORAGE_S3_BUCKET_NAME,
76+
"CopySource": {
77+
"Bucket": settings.STORAGE_S3_BUCKET_NAME,
78+
"Key": file_key_src,
79+
},
80+
"Key": file_key_dest,
81+
}
82+
s3_client_stubber.add_response("copy_object", {}, expected_params)
83+
84+
# Create some classroom files that should not be concerned
85+
DepositedFileFactory(
86+
upload_state=READY,
87+
storage_location=SCW_S3,
88+
)
89+
DepositedFileFactory(
90+
upload_state=PENDING,
91+
storage_location=AWS_S3,
92+
)
93+
94+
call_command("rename_deposited_files")
95+
96+
s3_client_stubber.assert_no_pending_responses()
97+
98+
# Check that each file.filename has been updated with the clean
99+
# S3-compatible filename
100+
for file, (_, expected_filename) in zip(files, filenames):
101+
file.refresh_from_db()
102+
assert file.filename == expected_filename
103+
104+
@mock.patch("marsha.core.storage.storage_class.file_storage.exists")
105+
def test_rename_deposited_files_file_exists(self, mock_exists):
106+
"""Command should not copy file if file already exists."""
107+
108+
mock_exists.return_value = True
109+
110+
now = datetime(2018, 8, 8, tzinfo=timezone.utc)
111+
112+
DepositedFileFactory(
113+
filename="filename.pdf",
114+
uploaded_on=now,
115+
)
116+
117+
with Stubber(rename_deposited_files.s3_client) as s3_client_stubber:
118+
call_command("rename_deposited_files")
119+
s3_client_stubber.assert_no_pending_responses()

0 commit comments

Comments
 (0)