diff --git a/bin/swh-loader-tar-producer b/bin/swh-loader-tar-producer index d2da48c..d351729 100755 --- a/bin/swh-loader-tar-producer +++ b/bin/swh-loader-tar-producer @@ -1,99 +1,135 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import sys import os from swh.core import config from swh.loader.dir import producer +# Static setup +EPOCH = 0 +UTC_OFFSET = '+0000' + + conf_file = sys.argv[1] if not os.path.exists(conf_file): conf_file = '../resources/producer/tar.ini' conf = config.read(conf_file) def compute_origin(config, filename): return { 'origin_url': os.path.join(config['origin_url_scheme'], filename), 'origin_type': config['origin_type'], } -def compute_release_number(filename): - """Compute the release number from a given filename. - - """ - pass - - def compute_occurrence(filepath): pass def compute_occurrences(filepath): pass +def time_from_file(filepath): + """Extract time from filepath. + + Args: + filepath: path to the file we want to extract metadata + + Returns: + Modification time from filepath. + + """ + return os.lstat(filepath).st_mtime + + def compute_release(filepath): """Compute a release from a given filepath. If the filepath does not contain a recognizable release number, the release can be skipped. + + Args: + filepath: file's absolute path + + Returns: + None if the release number cannot be extracted from the filename. + Otherwise a synthetic release is computed with the following keys: + - name: the release computed from the filename + - date: the modification timestamp as returned by a fstat call + - offset: +0000 + - author_name: '' + - author_email: '' + - comment: '' + """ - pass + filename = os.path.basename(filepath) + release_number = producer.release_number(filename) + if release_number: + return { + 'name': release_number, + 'date': time_from_file(filepath), + 'offset': UTC_OFFSET, + 'author_name': '', + 'author_email': '', + 'comment': '', + } + return None def compute_revision(filepath): pass def compute_from_filepath(filepath): pass def list_archives_from(path): """From path, produce archive tarball message to celery. Args: path: top directory to list archives from. """ for dirpath, dirnames, filenames in os.walk(path): for fname in [f for f in filenames if producer.is_archive(f)]: yield dirpath, fname # LIMIT = 100 LIMIT = None def compute_message_from(dirpath, filename): # filepath = os.path.join(dirpath, filename) version = producer.release_number(filename) print('|'.join(['', filename, version, ''])) def produce_archive_messages(path): """From path, produce archive tarball message to celery. Args: path: top directory to list archives from. """ limit = 0 for dirpath, filename in list_archives_from(path): compute_message_from(dirpath, filename) if LIMIT and limit > LIMIT: return limit += 1 produce_archive_messages(conf['mirror_root_directory'])