Page MenuHomeSoftware Heritage

D437.id1369.diff
No OneTemporary

D437.id1369.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -11,25 +11,13 @@
cls: remote
args:
url: http://localhost:5002/
-
-send_contents: True
-send_directories: True
-send_revisions: True
-send_releases: True
-send_occurrences: True
-content_packet_size: 1000
-content_packet_size_bytes: 1073741824
-directory_packet_size: 2500
-revision_packet_size: 1000
-release_packet_size: 1000
-occurrence_packet_size: 1000
```
# Basic use
From python3's toplevel:
-## Remote (failure)
+## Remote
``` Python
project = 'hello'
@@ -47,7 +35,7 @@
t.run(origin_url=origin_url, directory=directory, visit_date='2016-05-03T15:16:32+00:00')
```
-## local directory (failure)
+## local directory
Only origin, contents, and directories are filled so far.
@@ -61,13 +49,13 @@
import logging
logging.basicConfig(level=logging.DEBUG)
-from swh.loader.mercurial.tasks import SlowLoadMercurialTsk
+from swh.loader.mercurial.tasks import LoadMercurialTsk
-t = SlowLoadMercurialTsk()
+t = LoadMercurialTsk()
t.run(origin_url=origin_url, directory=directory, visit_date='2016-05-03T15:16:32+00:00')
```
-## local archive (failure)
+## local archive
``` Python
project = '756015-ipv6-source-archive.zip'
@@ -77,8 +65,8 @@
import logging
logging.basicConfig(level=logging.DEBUG)
-from swh.loader.mercurial.tasks import SlowLoadMercurialArchiveTsk
+from swh.loader.mercurial.tasks import LoadArchiveMercurialTsk
-t = SlowLoadMercurialArchiveTsk()
+t = LoadArchiveMercurialTsk()
t.run(origin_url=origin_url, archive_path=archive_path, visit_date='2016-05-03T15:16:32+00:00')
```
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -13,7 +13,7 @@
python3-hglib,
patool,
python3-swh.core (>= 0.0.36~),
- python3-swh.model (>= 0.0.20~),
+ python3-swh.model (>= 0.0.27~),
python3-swh.storage (>= 0.0.95~),
python3-swh.scheduler (>= 0.0.19~),
python3-swh.loader.core (>= 0.0.33~),
@@ -25,7 +25,7 @@
Architecture: all
Depends: python3-swh.core (>= 0.0.36~),
python3-swh.loader.core (>= 0.0.33~),
- python3-swh.model (>= 0.0.20~),
+ python3-swh.model (>= 0.0.27~),
python3-swh.storage (>= 0.0.95~),
python3-swh.scheduler (>= 0.0.19~),
patool,
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,5 +1,5 @@
swh.core >= 0.0.36
-swh.model >= 0.0.20
+swh.model >= 0.0.27
swh.storage >= 0.0.95
swh.scheduler >= 0.0.19
swh.loader.core >= 0.0.33
diff --git a/swh/loader/mercurial/bundle20_loader.py b/swh/loader/mercurial/bundle20_loader.py
--- a/swh/loader/mercurial/bundle20_loader.py
+++ b/swh/loader/mercurial/bundle20_loader.py
@@ -27,7 +27,11 @@
from shutil import rmtree
from tempfile import mkdtemp
-from swh.model import hashutil, identifiers
+from swh.model import identifiers
+from swh.model.hashutil import (
+ MultiHash, hash_to_hex, hash_to_bytes,
+ DEFAULT_ALGORITHMS
+)
from swh.loader.core.loader import SWHStatelessLoader
from swh.loader.core.converters import content_for_storage
from swh.loader.core.utils import clean_dangling_folders
@@ -100,15 +104,14 @@
"""
b = {}
for _, node_hash_id, _, branch_name, *_ in repo.heads():
- b[branch_name] = hashutil.hash_to_bytes(
+ b[branch_name] = hash_to_bytes(
node_hash_id.decode())
bookmarks = repo.bookmarks()
if bookmarks and bookmarks[0]:
for bookmark_name, _, target_short in bookmarks[0]:
target = repo[target_short].node()
- b[bookmark_name] = hashutil.hash_to_bytes(
- target.decode())
+ b[bookmark_name] = hash_to_bytes(target.decode())
return b
@@ -225,12 +228,14 @@
file_name = node_info[0]
header = node_info[2]
+ length = len(blob)
if header['linknode'] in self.reduce_effort:
- content = hashutil.hash_data(blob, algorithms=[ALGO],
- with_length=True)
+ algorithms = [ALGO]
else:
- content = hashutil.hash_data(blob, with_length=True)
-
+ algorithms = DEFAULT_ALGORITHMS
+ h = MultiHash.from_data(blob, hash_names=algorithms, length=length)
+ content = h.digest()
+ content['length'] = length
blob_hash = content[ALGO]
self.file_node_to_hash[header['node']] = blob_hash
@@ -273,7 +278,6 @@
content = contents.pop(node_hashes[node], None)
if content:
content['data'] = blob
- content['length'] = len(blob)
yield content_for_storage(
content,
log=self.log,
@@ -380,7 +384,7 @@
'directory': directory_id,
'message': commit['message'],
'metadata': {
- 'node': hashutil.hash_to_hex(header['node']),
+ 'node': hash_to_hex(header['node']),
'extra_headers': [
['time_offset_seconds',
str(commit['time_offset_seconds']).encode('utf-8')],
@@ -397,7 +401,7 @@
if p2:
revision['parents'].append(p2)
- revision['id'] = hashutil.hash_to_bytes(
+ revision['id'] = hash_to_bytes(
identifiers.revision_identifier(revision)
)
self.node_2_rev[header['node']] = revision['id']
@@ -433,7 +437,7 @@
self.num_releases += 1
node, name = self._read_tag(t)
node = node.decode()
- node_bytes = hashutil.hash_to_bytes(node)
+ node_bytes = hash_to_bytes(node)
if not TAG_PATTERN.match(node):
self.log.warn('Wrong pattern (%s) found in tags. Skipping' % (
node, ))
@@ -454,7 +458,7 @@
'author': {'name': None, 'email': None, 'fullname': b''},
'date': None
}
- id_hash = hashutil.hash_to_bytes(
+ id_hash = hash_to_bytes(
identifiers.release_identifier(release))
release['id'] = id_hash
missing_releases.append(id_hash)
diff --git a/swh/loader/mercurial/bundle20_loader_verifier.py b/swh/loader/mercurial/bundle20_loader_verifier.py
--- a/swh/loader/mercurial/bundle20_loader_verifier.py
+++ b/swh/loader/mercurial/bundle20_loader_verifier.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -13,7 +13,7 @@
from binascii import hexlify, unhexlify
-from swh.model import hashutil
+from swh.model.hashutil import MultiHash
from .bundle20_loader import HgBundle20Loader
from .converters import PRIMARY_ALGO as ALGO
@@ -31,7 +31,8 @@
header = node_info[2]
i += 1
- bhash = hashutil.hash_data(blob, algorithms=set([ALGO]))[ALGO]
+ hashes = MultiHash.from_data(blob, hash_names=set([ALGO])).digest()
+ bhash = hashes[ALGO]
self.file_node_to_hash[header['node']] = bhash
u.update([bhash])
diff --git a/swh/loader/mercurial/slow_loader.py b/swh/loader/mercurial/slow_loader.py
--- a/swh/loader/mercurial/slow_loader.py
+++ b/swh/loader/mercurial/slow_loader.py
@@ -11,7 +11,8 @@
import hglib
import os
-from swh.model import identifiers, hashutil
+from swh.model import identifiers
+from swh.model.hashutil import MultiHash, DEFAULT_ALGORITHMS, hash_to_hex
from swh.loader.core.loader import SWHStatelessLoader
from .converters import parse_author, PRIMARY_ALGO as ALGO
@@ -51,8 +52,9 @@
content.update(existing_hashes)
hash_types = list(existing_hashes.keys())
- hashes_to_do = hashutil.DEFAULT_ALGORITHMS.difference(hash_types)
- content.update(hashutil.hash_data(data, algorithms=hashes_to_do))
+ hashes_to_do = DEFAULT_ALGORITHMS.difference(hash_types)
+ hashes = MultiHash.from_data(data, hash_names=hashes_to_do).digest()
+ content.update(hashes)
if max_size and (size > max_size):
content.update({
@@ -60,7 +62,7 @@
'reason': 'Content too large',
})
if logger:
- id_hash = hashutil.hash_to_hex(content[ALGO])
+ id_hash = hash_to_hex(content[ALGO])
logger.info(
'Skipping content %s, too large (%s > %s)'
% (id_hash, size, max_size),

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 10:03 AM (19 h, 16 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216486

Event Timeline