Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066489
D444.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Subscribers
None
D444.diff
View Options
diff --git a/swh/storage/algos/diff.py b/swh/storage/algos/diff.py
--- a/swh/storage/algos/diff.py
+++ b/swh/storage/algos/diff.py
@@ -14,6 +14,7 @@
import collections
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
from .dir_iterators import (
@@ -21,7 +22,7 @@
)
# get the hash identifier for an empty directory
-_empty_dir_hash = directory_identifier({'entries': []})
+_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []}))
def _get_rev(storage, rev_id):
diff --git a/swh/storage/algos/dir_iterators.py b/swh/storage/algos/dir_iterators.py
--- a/swh/storage/algos/dir_iterators.py
+++ b/swh/storage/algos/dir_iterators.py
@@ -13,10 +13,11 @@
from enum import Enum
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
# get the hash identifier for an empty directory
-_empty_dir_hash = directory_identifier({'entries': []})
+_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []}))
def _get_dir(storage, dir_id):
@@ -69,19 +70,14 @@
Args:
dir_id (bytes): identifier of a root directory
"""
- if dir_id:
- if dir_id == _empty_dir_hash:
- self.frames.append([])
- else:
- # get directory entries
- dir_data = _get_dir(self.storage, dir_id)
- # sort them in lexicographical order
- dir_data = sorted(dir_data, key=lambda e: e['name'])
- # reverse the ordering in order to unstack the "smallest"
- # entry each time the iterator advances
- dir_data.reverse()
- # push the directory frame to the main stack
- self.frames.append(dir_data)
+ # get directory entries
+ dir_data = _get_dir(self.storage, dir_id)
+ # sort them in lexicographical order and reverse the ordering
+ # in order to unstack the "smallest" entry each time the
+ # iterator advances
+ dir_data = sorted(dir_data, key=lambda e: e['name'], reverse=True)
+ # push the directory frame to the main stack
+ self.frames.append(dir_data)
def top(self):
"""
@@ -157,7 +153,8 @@
self.has_started = True
return current
- if descend and self.current_is_dir():
+ if descend and self.current_is_dir() \
+ and current['target'] != _empty_dir_hash:
self._push_dir_frame(current['target'])
else:
self.drop()
@@ -201,6 +198,36 @@
self.frames.pop()
self.drop()
+ def __next__(self):
+ entry = self.step()
+ if not entry:
+ raise StopIteration
+ entry['path'] = self.current_path()
+ return entry
+
+ def __iter__(self):
+ return DirectoryIterator(self.storage, self.root_dir_id,
+ self.base_path)
+
+
+def dir_iterator(storage, dir_id):
+ """
+ Return an iterator for recursively visiting a directory and
+ its sub-directories. The associated paths are visited in
+ lexicographic depth-first search order.
+
+ Args:
+ storage (swh.storage.Storage): an instance of a swh storage
+ dir_id (bytes): a directory identifier
+
+ Returns:
+ swh.storage.algos.dir_iterators.DirectoryIterator: an iterator
+ returning a dict at each iteration step describing a directory
+ entry. A 'path' field is added in that dict to store the
+ absolute path of the entry.
+ """
+ return DirectoryIterator(storage, dir_id)
+
class Remaining(Enum):
"""
diff --git a/swh/storage/tests/algos/test_diff.py b/swh/storage/tests/algos/test_diff.py
--- a/swh/storage/tests/algos/test_diff.py
+++ b/swh/storage/tests/algos/test_diff.py
@@ -7,84 +7,15 @@
import unittest
-from nose.tools import istest, nottest
from unittest.mock import patch
+from nose.tools import istest, nottest
+
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
from swh.storage.algos import diff
-
-class DirectoryModel(object):
- """
- Quick and dirty directory model to ease the writing
- of revision trees differential tests.
- """
- def __init__(self, name=''):
- self.data = {}
- self.data['name'] = name
- self.data['perms'] = 16384
- self.data['type'] = 'dir'
- self.data['entries'] = []
- self.data['entry_idx'] = {}
-
- def __getitem__(self, item):
- if item == 'target':
- return directory_identifier(self)
- else:
- return self.data[item]
-
- def add_file(self, path, sha1=None):
- path_parts = path.split(b'/')
- if len(path_parts) == 1:
- self['entry_idx'][path] = len(self['entries'])
- self['entries'].append({
- 'target': sha1,
- 'name': path,
- 'perms': 33188,
- 'type': 'file'
- })
- else:
- if not path_parts[0] in self['entry_idx']:
- self['entry_idx'][path_parts[0]] = len(self['entries'])
- self['entries'].append(DirectoryModel(path_parts[0]))
- if path_parts[1]:
- dir_idx = self['entry_idx'][path_parts[0]]
- self['entries'][dir_idx].add_file(b'/'.join(path_parts[1:]), sha1)
-
- def get_hash_data(self, entry_hash):
- if self['target'] == entry_hash:
- ret = []
- for e in self['entries']:
- ret.append({
- 'target': e['target'],
- 'name': e['name'],
- 'perms': e['perms'],
- 'type': e['type']
- })
- return ret
- else:
- for e in self['entries']:
- if e['type'] == 'file' and e['target'] == entry_hash:
- return e
- elif e['type'] == 'dir':
- data = e.get_hash_data(entry_hash)
- if data:
- return data
- return None
-
- def get_path_data(self, path):
- path_parts = path.split(b'/')
- entry_idx = self['entry_idx'][path_parts[0]]
- entry = self['entries'][entry_idx]
- if len(path_parts) == 1:
- return {
- 'target': entry['target'],
- 'name': entry['name'],
- 'perms': entry['perms'],
- 'type': entry['type']
- }
- else:
- return entry.get_path_data(b'/'.join(path_parts[1:]))
+from .test_dir_iterator import DirectoryModel
@patch('swh.storage.algos.diff._get_rev')
@@ -95,20 +26,25 @@
def diff_revisions(self, rev_from, rev_to, from_dir_model, to_dir_model,
expected_changes, mock_get_dir, mock_get_rev):
+ rev_from_bytes = hash_to_bytes(rev_from)
+ rev_to_bytes = hash_to_bytes(rev_to)
+
def _get_rev(*args, **kwargs):
- if args[1] == rev_from:
+ if args[1] == rev_from_bytes:
return {'directory': from_dir_model['target']}
else:
return {'directory': to_dir_model['target']}
def _get_dir(*args, **kwargs):
- return from_dir_model.get_hash_data(args[1]) or \
- to_dir_model.get_hash_data(args[1])
+ from_dir = from_dir_model.get_hash_data(args[1])
+ to_dir = to_dir_model.get_hash_data(args[1])
+ return from_dir if from_dir != None else to_dir
mock_get_rev.side_effect = _get_rev
mock_get_dir.side_effect = _get_dir
- changes = diff.diff_revisions(None, rev_from, rev_to, track_renaming=True)
+ changes = diff.diff_revisions(None, rev_from_bytes, rev_to_bytes,
+ track_renaming=True)
self.assertEqual(changes, expected_changes)
diff --git a/swh/storage/tests/algos/test_dir_iterator.py b/swh/storage/tests/algos/test_dir_iterator.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/tests/algos/test_dir_iterator.py
@@ -0,0 +1,153 @@
+
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from unittest.mock import patch
+
+from nose.tools import istest, nottest
+
+from swh.model.from_disk import DentryPerms
+from swh.model.hashutil import hash_to_bytes, MultiHash
+from swh.model.identifiers import directory_identifier
+from swh.storage.algos.dir_iterators import dir_iterator
+
+# flake8: noqa
+
+class DirectoryModel(object):
+ """
+ Quick and dirty directory model to ease the writing
+ of directory iterators and revision trees differential tests.
+ """
+ def __init__(self, name=''):
+ self.data = {}
+ self.data['name'] = name
+ self.data['perms'] = DentryPerms.directory
+ self.data['type'] = 'dir'
+ self.data['entries'] = []
+ self.data['entry_idx'] = {}
+
+ def __getitem__(self, item):
+ if item == 'target':
+ return hash_to_bytes(directory_identifier(self))
+ else:
+ return self.data[item]
+
+ def add_file(self, path, sha1=None):
+ path_parts = path.split(b'/')
+ sha1 = hash_to_bytes(sha1) if sha1 \
+ else MultiHash.from_data(path).digest()['sha1']
+ if len(path_parts) == 1:
+ self['entry_idx'][path] = len(self['entries'])
+ self['entries'].append({
+ 'target': sha1,
+ 'name': path,
+ 'perms': DentryPerms.content,
+ 'type': 'file'
+ })
+ else:
+ if not path_parts[0] in self['entry_idx']:
+ self['entry_idx'][path_parts[0]] = len(self['entries'])
+ self['entries'].append(DirectoryModel(path_parts[0]))
+ if path_parts[1]:
+ dir_idx = self['entry_idx'][path_parts[0]]
+ self['entries'][dir_idx].add_file(b'/'.join(path_parts[1:]), sha1)
+
+ def get_hash_data(self, entry_hash):
+ if self['target'] == entry_hash:
+ ret = []
+ for e in self['entries']:
+ ret.append({
+ 'target': e['target'],
+ 'name': e['name'],
+ 'perms': e['perms'],
+ 'type': e['type']
+ })
+ return ret
+ else:
+ for e in self['entries']:
+ if e['type'] == 'file' and e['target'] == entry_hash:
+ return e
+ elif e['type'] == 'dir':
+ data = e.get_hash_data(entry_hash)
+ if data:
+ return data
+ return None
+
+ def get_path_data(self, path):
+ path_parts = path.split(b'/')
+ entry_idx = self['entry_idx'][path_parts[0]]
+ entry = self['entries'][entry_idx]
+ if len(path_parts) == 1:
+ return {
+ 'target': entry['target'],
+ 'name': entry['name'],
+ 'perms': entry['perms'],
+ 'type': entry['type']
+ }
+ else:
+ return entry.get_path_data(b'/'.join(path_parts[1:]))
+
+
+@patch('swh.storage.algos.dir_iterators._get_dir')
+class TestDirectoryIterator(unittest.TestCase):
+
+ @nottest
+ def check_iterated_paths(self, dir_model, expected_paths_order,
+ mock_get_dir):
+
+ def _get_dir(*args, **kwargs):
+ return dir_model.get_hash_data(args[1])
+
+ mock_get_dir.side_effect = _get_dir # noqa
+ paths_order = [e['path'] for e in dir_iterator(None, dir_model['target'])]
+ self.assertEqual(paths_order, expected_paths_order)
+
+ @istest
+ def test_dir_iterator_empty_dir(self, mock_get_dir):
+ dir_model = DirectoryModel()
+ expected_paths_order = []
+ self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir)
+
+ @istest
+ def test_dir_iterator_no_empty_dirs(self, mock_get_dir):
+ dir_model = DirectoryModel()
+ dir_model.add_file(b'xyz/gtr/uhb')
+ dir_model.add_file(b'bca/ef')
+ dir_model.add_file(b'abc/ab')
+ dir_model.add_file(b'abc/bc')
+ dir_model.add_file(b'xyz/ouy/poi')
+
+ expected_paths_order = [b'abc',
+ b'abc/ab',
+ b'abc/bc',
+ b'bca',
+ b'bca/ef',
+ b'xyz',
+ b'xyz/gtr',
+ b'xyz/gtr/uhb',
+ b'xyz/ouy',
+ b'xyz/ouy/poi']
+
+ self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir)
+
+ @istest
+ def test_dir_iterator_with_empty_dirs(self, mock_get_dir):
+ dir_model = DirectoryModel()
+ dir_model.add_file(b'xyz/gtr/')
+ dir_model.add_file(b'bca/ef')
+ dir_model.add_file(b'abc/')
+ dir_model.add_file(b'xyz/ouy/poi')
+
+ expected_paths_order = [b'abc',
+ b'bca',
+ b'bca/ef',
+ b'xyz',
+ b'xyz/gtr',
+ b'xyz/ouy',
+ b'xyz/ouy/poi']
+
+ self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 12:03 PM (18 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217073
Attached To
D444: storage.algos.dir_iterators: Fixes and improvements
Event Timeline
Log In to Comment