Page MenuHomeSoftware Heritage

D444.diff
No OneTemporary

D444.diff

diff --git a/swh/storage/algos/diff.py b/swh/storage/algos/diff.py
--- a/swh/storage/algos/diff.py
+++ b/swh/storage/algos/diff.py
@@ -14,6 +14,7 @@
import collections
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
from .dir_iterators import (
@@ -21,7 +22,7 @@
)
# get the hash identifier for an empty directory
-_empty_dir_hash = directory_identifier({'entries': []})
+_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []}))
def _get_rev(storage, rev_id):
diff --git a/swh/storage/algos/dir_iterators.py b/swh/storage/algos/dir_iterators.py
--- a/swh/storage/algos/dir_iterators.py
+++ b/swh/storage/algos/dir_iterators.py
@@ -13,10 +13,11 @@
from enum import Enum
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
# get the hash identifier for an empty directory
-_empty_dir_hash = directory_identifier({'entries': []})
+_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []}))
def _get_dir(storage, dir_id):
@@ -69,19 +70,14 @@
Args:
dir_id (bytes): identifier of a root directory
"""
- if dir_id:
- if dir_id == _empty_dir_hash:
- self.frames.append([])
- else:
- # get directory entries
- dir_data = _get_dir(self.storage, dir_id)
- # sort them in lexicographical order
- dir_data = sorted(dir_data, key=lambda e: e['name'])
- # reverse the ordering in order to unstack the "smallest"
- # entry each time the iterator advances
- dir_data.reverse()
- # push the directory frame to the main stack
- self.frames.append(dir_data)
+ # get directory entries
+ dir_data = _get_dir(self.storage, dir_id)
+ # sort them in lexicographical order and reverse the ordering
+ # in order to unstack the "smallest" entry each time the
+ # iterator advances
+ dir_data = sorted(dir_data, key=lambda e: e['name'], reverse=True)
+ # push the directory frame to the main stack
+ self.frames.append(dir_data)
def top(self):
"""
@@ -157,7 +153,8 @@
self.has_started = True
return current
- if descend and self.current_is_dir():
+ if descend and self.current_is_dir() \
+ and current['target'] != _empty_dir_hash:
self._push_dir_frame(current['target'])
else:
self.drop()
@@ -201,6 +198,36 @@
self.frames.pop()
self.drop()
+ def __next__(self):
+ entry = self.step()
+ if not entry:
+ raise StopIteration
+ entry['path'] = self.current_path()
+ return entry
+
+ def __iter__(self):
+ return DirectoryIterator(self.storage, self.root_dir_id,
+ self.base_path)
+
+
+def dir_iterator(storage, dir_id):
+ """
+ Return an iterator for recursively visiting a directory and
+ its sub-directories. The associated paths are visited in
+ lexicographic depth-first search order.
+
+ Args:
+ storage (swh.storage.Storage): an instance of a swh storage
+ dir_id (bytes): a directory identifier
+
+ Returns:
+ swh.storage.algos.dir_iterators.DirectoryIterator: an iterator
+ returning a dict at each iteration step describing a directory
+ entry. A 'path' field is added in that dict to store the
+ absolute path of the entry.
+ """
+ return DirectoryIterator(storage, dir_id)
+
class Remaining(Enum):
"""
diff --git a/swh/storage/tests/algos/test_diff.py b/swh/storage/tests/algos/test_diff.py
--- a/swh/storage/tests/algos/test_diff.py
+++ b/swh/storage/tests/algos/test_diff.py
@@ -7,84 +7,15 @@
import unittest
-from nose.tools import istest, nottest
from unittest.mock import patch
+from nose.tools import istest, nottest
+
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
from swh.storage.algos import diff
-
-class DirectoryModel(object):
- """
- Quick and dirty directory model to ease the writing
- of revision trees differential tests.
- """
- def __init__(self, name=''):
- self.data = {}
- self.data['name'] = name
- self.data['perms'] = 16384
- self.data['type'] = 'dir'
- self.data['entries'] = []
- self.data['entry_idx'] = {}
-
- def __getitem__(self, item):
- if item == 'target':
- return directory_identifier(self)
- else:
- return self.data[item]
-
- def add_file(self, path, sha1=None):
- path_parts = path.split(b'/')
- if len(path_parts) == 1:
- self['entry_idx'][path] = len(self['entries'])
- self['entries'].append({
- 'target': sha1,
- 'name': path,
- 'perms': 33188,
- 'type': 'file'
- })
- else:
- if not path_parts[0] in self['entry_idx']:
- self['entry_idx'][path_parts[0]] = len(self['entries'])
- self['entries'].append(DirectoryModel(path_parts[0]))
- if path_parts[1]:
- dir_idx = self['entry_idx'][path_parts[0]]
- self['entries'][dir_idx].add_file(b'/'.join(path_parts[1:]), sha1)
-
- def get_hash_data(self, entry_hash):
- if self['target'] == entry_hash:
- ret = []
- for e in self['entries']:
- ret.append({
- 'target': e['target'],
- 'name': e['name'],
- 'perms': e['perms'],
- 'type': e['type']
- })
- return ret
- else:
- for e in self['entries']:
- if e['type'] == 'file' and e['target'] == entry_hash:
- return e
- elif e['type'] == 'dir':
- data = e.get_hash_data(entry_hash)
- if data:
- return data
- return None
-
- def get_path_data(self, path):
- path_parts = path.split(b'/')
- entry_idx = self['entry_idx'][path_parts[0]]
- entry = self['entries'][entry_idx]
- if len(path_parts) == 1:
- return {
- 'target': entry['target'],
- 'name': entry['name'],
- 'perms': entry['perms'],
- 'type': entry['type']
- }
- else:
- return entry.get_path_data(b'/'.join(path_parts[1:]))
+from .test_dir_iterator import DirectoryModel
@patch('swh.storage.algos.diff._get_rev')
@@ -95,20 +26,25 @@
def diff_revisions(self, rev_from, rev_to, from_dir_model, to_dir_model,
expected_changes, mock_get_dir, mock_get_rev):
+ rev_from_bytes = hash_to_bytes(rev_from)
+ rev_to_bytes = hash_to_bytes(rev_to)
+
def _get_rev(*args, **kwargs):
- if args[1] == rev_from:
+ if args[1] == rev_from_bytes:
return {'directory': from_dir_model['target']}
else:
return {'directory': to_dir_model['target']}
def _get_dir(*args, **kwargs):
- return from_dir_model.get_hash_data(args[1]) or \
- to_dir_model.get_hash_data(args[1])
+ from_dir = from_dir_model.get_hash_data(args[1])
+ to_dir = to_dir_model.get_hash_data(args[1])
+ return from_dir if from_dir != None else to_dir
mock_get_rev.side_effect = _get_rev
mock_get_dir.side_effect = _get_dir
- changes = diff.diff_revisions(None, rev_from, rev_to, track_renaming=True)
+ changes = diff.diff_revisions(None, rev_from_bytes, rev_to_bytes,
+ track_renaming=True)
self.assertEqual(changes, expected_changes)
diff --git a/swh/storage/tests/algos/test_dir_iterator.py b/swh/storage/tests/algos/test_dir_iterator.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/tests/algos/test_dir_iterator.py
@@ -0,0 +1,153 @@
+
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from unittest.mock import patch
+
+from nose.tools import istest, nottest
+
+from swh.model.from_disk import DentryPerms
+from swh.model.hashutil import hash_to_bytes, MultiHash
+from swh.model.identifiers import directory_identifier
+from swh.storage.algos.dir_iterators import dir_iterator
+
+# flake8: noqa
+
+class DirectoryModel(object):
+ """
+ Quick and dirty directory model to ease the writing
+ of directory iterators and revision trees differential tests.
+ """
+ def __init__(self, name=''):
+ self.data = {}
+ self.data['name'] = name
+ self.data['perms'] = DentryPerms.directory
+ self.data['type'] = 'dir'
+ self.data['entries'] = []
+ self.data['entry_idx'] = {}
+
+ def __getitem__(self, item):
+ if item == 'target':
+ return hash_to_bytes(directory_identifier(self))
+ else:
+ return self.data[item]
+
+ def add_file(self, path, sha1=None):
+ path_parts = path.split(b'/')
+ sha1 = hash_to_bytes(sha1) if sha1 \
+ else MultiHash.from_data(path).digest()['sha1']
+ if len(path_parts) == 1:
+ self['entry_idx'][path] = len(self['entries'])
+ self['entries'].append({
+ 'target': sha1,
+ 'name': path,
+ 'perms': DentryPerms.content,
+ 'type': 'file'
+ })
+ else:
+ if not path_parts[0] in self['entry_idx']:
+ self['entry_idx'][path_parts[0]] = len(self['entries'])
+ self['entries'].append(DirectoryModel(path_parts[0]))
+ if path_parts[1]:
+ dir_idx = self['entry_idx'][path_parts[0]]
+ self['entries'][dir_idx].add_file(b'/'.join(path_parts[1:]), sha1)
+
+ def get_hash_data(self, entry_hash):
+ if self['target'] == entry_hash:
+ ret = []
+ for e in self['entries']:
+ ret.append({
+ 'target': e['target'],
+ 'name': e['name'],
+ 'perms': e['perms'],
+ 'type': e['type']
+ })
+ return ret
+ else:
+ for e in self['entries']:
+ if e['type'] == 'file' and e['target'] == entry_hash:
+ return e
+ elif e['type'] == 'dir':
+ data = e.get_hash_data(entry_hash)
+ if data:
+ return data
+ return None
+
+ def get_path_data(self, path):
+ path_parts = path.split(b'/')
+ entry_idx = self['entry_idx'][path_parts[0]]
+ entry = self['entries'][entry_idx]
+ if len(path_parts) == 1:
+ return {
+ 'target': entry['target'],
+ 'name': entry['name'],
+ 'perms': entry['perms'],
+ 'type': entry['type']
+ }
+ else:
+ return entry.get_path_data(b'/'.join(path_parts[1:]))
+
+
+@patch('swh.storage.algos.dir_iterators._get_dir')
+class TestDirectoryIterator(unittest.TestCase):
+
+ @nottest
+ def check_iterated_paths(self, dir_model, expected_paths_order,
+ mock_get_dir):
+
+ def _get_dir(*args, **kwargs):
+ return dir_model.get_hash_data(args[1])
+
+ mock_get_dir.side_effect = _get_dir # noqa
+ paths_order = [e['path'] for e in dir_iterator(None, dir_model['target'])]
+ self.assertEqual(paths_order, expected_paths_order)
+
+ @istest
+ def test_dir_iterator_empty_dir(self, mock_get_dir):
+ dir_model = DirectoryModel()
+ expected_paths_order = []
+ self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir)
+
+ @istest
+ def test_dir_iterator_no_empty_dirs(self, mock_get_dir):
+ dir_model = DirectoryModel()
+ dir_model.add_file(b'xyz/gtr/uhb')
+ dir_model.add_file(b'bca/ef')
+ dir_model.add_file(b'abc/ab')
+ dir_model.add_file(b'abc/bc')
+ dir_model.add_file(b'xyz/ouy/poi')
+
+ expected_paths_order = [b'abc',
+ b'abc/ab',
+ b'abc/bc',
+ b'bca',
+ b'bca/ef',
+ b'xyz',
+ b'xyz/gtr',
+ b'xyz/gtr/uhb',
+ b'xyz/ouy',
+ b'xyz/ouy/poi']
+
+ self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir)
+
+ @istest
+ def test_dir_iterator_with_empty_dirs(self, mock_get_dir):
+ dir_model = DirectoryModel()
+ dir_model.add_file(b'xyz/gtr/')
+ dir_model.add_file(b'bca/ef')
+ dir_model.add_file(b'abc/')
+ dir_model.add_file(b'xyz/ouy/poi')
+
+ expected_paths_order = [b'abc',
+ b'bca',
+ b'bca/ef',
+ b'xyz',
+ b'xyz/gtr',
+ b'xyz/ouy',
+ b'xyz/ouy/poi']
+
+ self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir)

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 12:03 PM (18 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217073

Event Timeline