diff --git a/swh/deposit/file_storage.py b/swh/deposit/file_storage.py new file mode 100644 --- /dev/null +++ b/swh/deposit/file_storage.py @@ -0,0 +1,84 @@ +# Copyright (C) 2020 The Software Heritage developers +# Copyright (c) Django Software Foundation and individual contributors. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of Django nor the names of its contributors may be used +# to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os.path + +from django.core.exceptions import SuspiciousFileOperation +from django.core.files.storage import FileSystemStorage +from django.utils.crypto import get_random_string + + +class PrefixedFileSystemStorage(FileSystemStorage): + """Subclass of FileSystemStorage, that disambiguates by adding + a random prefix, instead of a random suffix. + This prevents issues with compound extensions (eg. .tar.gz), as a suffix + changes the second-to-last extension; which messages with shutil's + file type detection..""" + + # This function is copy-pasted from + # + # with just the two computations of 'name' changed + # + # TODO: when we upgrade to Django 3, this whole function can be removed + # and replaced with an override of get_alternative_name. + def get_available_name(self, name, max_length=None): + """ + Return a filename that's free on the target storage system and + available for new content to be written to. + """ + dir_name, file_name = os.path.split(name) + file_root, file_ext = os.path.splitext(file_name) + # If the filename already exists, add an underscore and a random 7 + # character alphanumeric string (before the file extension, if one + # exists) to the filename until the generated filename doesn't exist. + # Truncate original name if required, so the new filename does not + # exceed the max_length. + while self.exists(name) or (max_length and len(name) > max_length): + # file_ext includes the dot. + name = os.path.join( + dir_name, "%s_%s%s" % (get_random_string(7), file_root, file_ext) + ) + if max_length is None: + continue + # Truncate file_root if max_length exceeded. + truncation = len(name) - max_length + if truncation > 0: + file_root = file_root[:-truncation] + # Entire file_root was truncated in attempt to find an available + # filename. + if not file_root: + raise SuspiciousFileOperation( + 'Storage can not find an available filename for "%s". ' + "Please make sure that the corresponding file field " + 'allows sufficient "max_length".' % name + ) + name = os.path.join( + dir_name, "%s_%s%s" % (get_random_string(7), file_root, file_ext) + ) + return name diff --git a/swh/deposit/settings/common.py b/swh/deposit/settings/common.py --- a/swh/deposit/settings/common.py +++ b/swh/deposit/settings/common.py @@ -112,3 +112,5 @@ "django.core.files.uploadhandler.MemoryFileUploadHandler", "django.core.files.uploadhandler.TemporaryFileUploadHandler", ] + +DEFAULT_FILE_STORAGE = "swh.deposit.file_storage.PrefixedFileSystemStorage" diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -133,9 +133,5 @@ archive_name_to_check """ - if "." in archive_name: - filename, extension = archive_name.split(".") - pattern = re.compile(".*/%s.*\\.%s" % (filename, extension)) - else: - pattern = re.compile(".*/%s" % archive_name) + pattern = re.compile(".*/.*_%s" % archive_name) assert pattern.match(archive_name_to_check) is not None