diff --git a/README b/README --- a/README +++ b/README @@ -47,36 +47,28 @@ Configuration ============= -You can run the loader or the updater directly by calling python3 -m swh.loader.git.{loader,updater}. +You can run the loader or the updater directly by calling: +``` +python3 -m swh.loader.git.{loader,updater} +``` + +## Location + +Both tools expect a configuration file. + +Either one of the following location: +- /etc/softwareheritage/ +- ~/.config/swh/ +- ~/.swh/ -Both tools expect a configuration file in .ini format to be present in ~/.config/swh/loader/git-{loader,updater}.ini +Note: Will call that location $SWH_CONFIG_PATH -The configuration file contains the following directives: +## Configuration sample +$SWH_CONFIG_PATH/loader/git-{loader,updater}.yml: ``` -[main] -# the storage class used. one of remote_storage, local_storage -storage_class = remote_storage - -# arguments passed to the storage class -# for remote_storage: URI of the storage server -storage_args = http://localhost:5002/ - -# for local_storage: database connection string and root of the -# storage, comma separated -# storage_args = dbname=softwareheritage-dev, /tmp/swh/storage - -# Whether to send the given types of objects -send_contents = True -send_directories = True -send_revisions = True -send_releases = True -send_snapshot = True - -# The size of the packets sent to storage for each kind of object -content_packet_size = 100000 -content_packet_size_bytes = 1073741824 -directory_packet_size = 25000 -revision_packet_size = 100000 -release_packet_size = 100000 +storage: + cls: remote + args: + url: http://localhost:5002/ ``` diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -12,7 +12,7 @@ python3-setuptools, python3-swh.core (>= 0.0.7~), python3-swh.loader.core (>= 0.0.32), - python3-swh.model (>= 0.0.15~), + python3-swh.model (>= 0.0.27~), python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (>= 0.0.83~), python3-vcversioner @@ -23,7 +23,7 @@ Architecture: all Depends: python3-swh.core (>= 0.0.7~), python3-swh.loader.core (>= 0.0.32~), - python3-swh.model (>= 0.0.15~), + python3-swh.model (>= 0.0.27~), python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (>= 0.0.83~), ${misc:Depends}, diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.0.7 swh.loader.core >= 0.0.32 -swh.model >= 0.0.15 +swh.model >= 0.0.27 swh.scheduler >= 0.0.14 swh.storage >= 0.0.83 diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py --- a/swh/loader/git/converters.py +++ b/swh/loader/git/converters.py @@ -5,10 +5,13 @@ """Convert dulwich objects to dictionaries suitable for swh.storage""" -from swh.model import hashutil, identifiers +from swh.model import identifiers +from swh.model.hashutil import ( + DEFAULT_ALGORITHMS, hash_to_hex, hash_to_bytes, MultiHash +) -HASH_ALGORITHMS = hashutil.DEFAULT_ALGORITHMS - {'sha1_git'} +HASH_ALGORITHMS = DEFAULT_ALGORITHMS - {'sha1_git'} def origin_url_to_origin(origin_url): @@ -21,20 +24,15 @@ def dulwich_blob_to_content_id(blob): """Convert a dulwich blob to a Software Heritage content id""" - if blob.type_name != b'blob': return size = blob.raw_length() - ret = { - 'sha1_git': blob.sha().digest(), - 'length': size, - } - data = blob.as_raw_string() - ret.update(hashutil.hash_data(data, HASH_ALGORITHMS)) - - return ret + hashes = MultiHash.from_data(data, HASH_ALGORITHMS, length=size).digest() + hashes['sha1_git'] = blob.sha().digest() + hashes['length'] = size + return hashes def dulwich_blob_to_content(blob, log=None, max_content_size=None, @@ -50,7 +48,7 @@ if max_content_size: if size > max_content_size: - id = hashutil.hash_to_hex(ret['sha1_git']) + id = hash_to_hex(ret['sha1_git']) if log: log.info('Skipping content %s, too large (%s > %s)' % (id, size, max_content_size), extra={ @@ -94,7 +92,7 @@ 'type': entry_mode_map.get(entry.mode, 'file'), 'perms': entry.mode, 'name': entry.path, - 'target': hashutil.hash_to_bytes(entry.sha.decode('ascii')), + 'target': hash_to_bytes(entry.sha.decode('ascii')), }) return ret diff --git a/swh/loader/git/reader.py b/swh/loader/git/reader.py --- a/swh/loader/git/reader.py +++ b/swh/loader/git/reader.py @@ -10,7 +10,7 @@ import click from swh.core import utils -from swh.model import hashutil +from swh.model.hashutil import MultiHash, hash_to_hex from .updater import BulkUpdater, SWHRepoRepresentation from . import converters @@ -158,7 +158,7 @@ """We want to store only object identifiers""" # compute the sha1 (obj.id is the sha1_git) data = obj.as_raw_string() - hashes = hashutil.hash_data(data, {'sha1'}) + hashes = MultiHash.from_data(data, {'sha1'}).digest() oid = hashes['sha1'] return (oid, b'blob', oid) @@ -236,7 +236,7 @@ if ids: for oid in ids: - print(hashutil.hash_to_hex(oid)) + print(hash_to_hex(oid)) @main.command()