Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123527
D437.id1369.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D437.id1369.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -11,25 +11,13 @@
cls: remote
args:
url: http://localhost:5002/
-
-send_contents: True
-send_directories: True
-send_revisions: True
-send_releases: True
-send_occurrences: True
-content_packet_size: 1000
-content_packet_size_bytes: 1073741824
-directory_packet_size: 2500
-revision_packet_size: 1000
-release_packet_size: 1000
-occurrence_packet_size: 1000
```
# Basic use
From python3's toplevel:
-## Remote (failure)
+## Remote
``` Python
project = 'hello'
@@ -47,7 +35,7 @@
t.run(origin_url=origin_url, directory=directory, visit_date='2016-05-03T15:16:32+00:00')
```
-## local directory (failure)
+## local directory
Only origin, contents, and directories are filled so far.
@@ -61,13 +49,13 @@
import logging
logging.basicConfig(level=logging.DEBUG)
-from swh.loader.mercurial.tasks import SlowLoadMercurialTsk
+from swh.loader.mercurial.tasks import LoadMercurialTsk
-t = SlowLoadMercurialTsk()
+t = LoadMercurialTsk()
t.run(origin_url=origin_url, directory=directory, visit_date='2016-05-03T15:16:32+00:00')
```
-## local archive (failure)
+## local archive
``` Python
project = '756015-ipv6-source-archive.zip'
@@ -77,8 +65,8 @@
import logging
logging.basicConfig(level=logging.DEBUG)
-from swh.loader.mercurial.tasks import SlowLoadMercurialArchiveTsk
+from swh.loader.mercurial.tasks import LoadArchiveMercurialTsk
-t = SlowLoadMercurialArchiveTsk()
+t = LoadArchiveMercurialTsk()
t.run(origin_url=origin_url, archive_path=archive_path, visit_date='2016-05-03T15:16:32+00:00')
```
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -13,7 +13,7 @@
python3-hglib,
patool,
python3-swh.core (>= 0.0.36~),
- python3-swh.model (>= 0.0.20~),
+ python3-swh.model (>= 0.0.27~),
python3-swh.storage (>= 0.0.95~),
python3-swh.scheduler (>= 0.0.19~),
python3-swh.loader.core (>= 0.0.33~),
@@ -25,7 +25,7 @@
Architecture: all
Depends: python3-swh.core (>= 0.0.36~),
python3-swh.loader.core (>= 0.0.33~),
- python3-swh.model (>= 0.0.20~),
+ python3-swh.model (>= 0.0.27~),
python3-swh.storage (>= 0.0.95~),
python3-swh.scheduler (>= 0.0.19~),
patool,
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,5 +1,5 @@
swh.core >= 0.0.36
-swh.model >= 0.0.20
+swh.model >= 0.0.27
swh.storage >= 0.0.95
swh.scheduler >= 0.0.19
swh.loader.core >= 0.0.33
diff --git a/swh/loader/mercurial/bundle20_loader.py b/swh/loader/mercurial/bundle20_loader.py
--- a/swh/loader/mercurial/bundle20_loader.py
+++ b/swh/loader/mercurial/bundle20_loader.py
@@ -27,7 +27,11 @@
from shutil import rmtree
from tempfile import mkdtemp
-from swh.model import hashutil, identifiers
+from swh.model import identifiers
+from swh.model.hashutil import (
+ MultiHash, hash_to_hex, hash_to_bytes,
+ DEFAULT_ALGORITHMS
+)
from swh.loader.core.loader import SWHStatelessLoader
from swh.loader.core.converters import content_for_storage
from swh.loader.core.utils import clean_dangling_folders
@@ -100,15 +104,14 @@
"""
b = {}
for _, node_hash_id, _, branch_name, *_ in repo.heads():
- b[branch_name] = hashutil.hash_to_bytes(
+ b[branch_name] = hash_to_bytes(
node_hash_id.decode())
bookmarks = repo.bookmarks()
if bookmarks and bookmarks[0]:
for bookmark_name, _, target_short in bookmarks[0]:
target = repo[target_short].node()
- b[bookmark_name] = hashutil.hash_to_bytes(
- target.decode())
+ b[bookmark_name] = hash_to_bytes(target.decode())
return b
@@ -225,12 +228,14 @@
file_name = node_info[0]
header = node_info[2]
+ length = len(blob)
if header['linknode'] in self.reduce_effort:
- content = hashutil.hash_data(blob, algorithms=[ALGO],
- with_length=True)
+ algorithms = [ALGO]
else:
- content = hashutil.hash_data(blob, with_length=True)
-
+ algorithms = DEFAULT_ALGORITHMS
+ h = MultiHash.from_data(blob, hash_names=algorithms, length=length)
+ content = h.digest()
+ content['length'] = length
blob_hash = content[ALGO]
self.file_node_to_hash[header['node']] = blob_hash
@@ -273,7 +278,6 @@
content = contents.pop(node_hashes[node], None)
if content:
content['data'] = blob
- content['length'] = len(blob)
yield content_for_storage(
content,
log=self.log,
@@ -380,7 +384,7 @@
'directory': directory_id,
'message': commit['message'],
'metadata': {
- 'node': hashutil.hash_to_hex(header['node']),
+ 'node': hash_to_hex(header['node']),
'extra_headers': [
['time_offset_seconds',
str(commit['time_offset_seconds']).encode('utf-8')],
@@ -397,7 +401,7 @@
if p2:
revision['parents'].append(p2)
- revision['id'] = hashutil.hash_to_bytes(
+ revision['id'] = hash_to_bytes(
identifiers.revision_identifier(revision)
)
self.node_2_rev[header['node']] = revision['id']
@@ -433,7 +437,7 @@
self.num_releases += 1
node, name = self._read_tag(t)
node = node.decode()
- node_bytes = hashutil.hash_to_bytes(node)
+ node_bytes = hash_to_bytes(node)
if not TAG_PATTERN.match(node):
self.log.warn('Wrong pattern (%s) found in tags. Skipping' % (
node, ))
@@ -454,7 +458,7 @@
'author': {'name': None, 'email': None, 'fullname': b''},
'date': None
}
- id_hash = hashutil.hash_to_bytes(
+ id_hash = hash_to_bytes(
identifiers.release_identifier(release))
release['id'] = id_hash
missing_releases.append(id_hash)
diff --git a/swh/loader/mercurial/bundle20_loader_verifier.py b/swh/loader/mercurial/bundle20_loader_verifier.py
--- a/swh/loader/mercurial/bundle20_loader_verifier.py
+++ b/swh/loader/mercurial/bundle20_loader_verifier.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -13,7 +13,7 @@
from binascii import hexlify, unhexlify
-from swh.model import hashutil
+from swh.model.hashutil import MultiHash
from .bundle20_loader import HgBundle20Loader
from .converters import PRIMARY_ALGO as ALGO
@@ -31,7 +31,8 @@
header = node_info[2]
i += 1
- bhash = hashutil.hash_data(blob, algorithms=set([ALGO]))[ALGO]
+ hashes = MultiHash.from_data(blob, hash_names=set([ALGO])).digest()
+ bhash = hashes[ALGO]
self.file_node_to_hash[header['node']] = bhash
u.update([bhash])
diff --git a/swh/loader/mercurial/slow_loader.py b/swh/loader/mercurial/slow_loader.py
--- a/swh/loader/mercurial/slow_loader.py
+++ b/swh/loader/mercurial/slow_loader.py
@@ -11,7 +11,8 @@
import hglib
import os
-from swh.model import identifiers, hashutil
+from swh.model import identifiers
+from swh.model.hashutil import MultiHash, DEFAULT_ALGORITHMS, hash_to_hex
from swh.loader.core.loader import SWHStatelessLoader
from .converters import parse_author, PRIMARY_ALGO as ALGO
@@ -51,8 +52,9 @@
content.update(existing_hashes)
hash_types = list(existing_hashes.keys())
- hashes_to_do = hashutil.DEFAULT_ALGORITHMS.difference(hash_types)
- content.update(hashutil.hash_data(data, algorithms=hashes_to_do))
+ hashes_to_do = DEFAULT_ALGORITHMS.difference(hash_types)
+ hashes = MultiHash.from_data(data, hash_names=hashes_to_do).digest()
+ content.update(hashes)
if max_size and (size > max_size):
content.update({
@@ -60,7 +62,7 @@
'reason': 'Content too large',
})
if logger:
- id_hash = hashutil.hash_to_hex(content[ALGO])
+ id_hash = hash_to_hex(content[ALGO])
logger.info(
'Skipping content %s, too large (%s > %s)'
% (id_hash, size, max_size),
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 10:03 AM (19 h, 16 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216486
Attached To
D437: mercurial.loader: Migrate to swh.model.hashutil.MultiHash
Event Timeline
Log In to Comment