diff --git a/swh/storage/algos/dir_iterators.py b/swh/storage/algos/dir_iterators.py index 897d0a82..e96ee68c 100644 --- a/swh/storage/algos/dir_iterators.py +++ b/swh/storage/algos/dir_iterators.py @@ -1,345 +1,374 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Utility module to iterate on directory trees. # The implementation is inspired from the work of Alberto Cortés # for the go-git project. For more details, you can refer to: # - this blog post: https://blog.sourced.tech/post/difftree/ # - the reference implementation in go: # https://github.com/src-d/go-git/tree/master/utils/merkletrie from enum import Enum from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import directory_identifier # get the hash identifier for an empty directory _empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []})) def _get_dir(storage, dir_id): """ Return directory data from swh storage. """ return storage.directory_ls(dir_id) if dir_id else [] class DirectoryIterator(object): """ Helper class used to iterate on a directory tree in a depth-first search way with some additional features: - sibling nodes are iterated in lexicographic order by name - it is possible to skip the visit of sub-directories nodes for efficiency reasons when comparing two trees (no need to go deeper if two directories have the same hash) """ def __init__(self, storage, dir_id, base_path=b''): """ Args: storage (swh.storage.storage.Storage): instance of swh storage (either local or remote) dir_id (bytes): identifier of a root directory base_path (bytes): optional base path used when traversing a sub-directory """ self.storage = storage self.root_dir_id = dir_id self.base_path = base_path self.restart() def restart(self): """ Restart the iteration at the beginning. """ # stack of frames representing currently visited directories: # the root directory is at the bottom while the current one # is at the top self.frames = [] self._push_dir_frame(self.root_dir_id) self.has_started = False def _push_dir_frame(self, dir_id): """ Visit a sub-directory by pushing a new frame to the stack. Each frame is itself a stack of directory entries. Args: dir_id (bytes): identifier of a root directory """ # get directory entries dir_data = _get_dir(self.storage, dir_id) - # sort them in lexicographical order - dir_data = sorted(dir_data, key=lambda e: e['name']) - # reverse the ordering in order to unstack the "smallest" - # entry each time the iterator advances - dir_data.reverse() + # sort them in lexicographical order and reverse the ordering + # in order to unstack the "smallest" entry each time the + # iterator advances + dir_data = sorted(dir_data, key=lambda e: e['name'], reverse=True) # push the directory frame to the main stack self.frames.append(dir_data) def top(self): """ Returns: list: The top frame of the main directories stack """ if not self.frames: return None return self.frames[-1] def current(self): """ Returns: dict: The current visited directory entry, i.e. the top element from the top frame """ top_frame = self.top() if not top_frame: return None return top_frame[-1] def current_hash(self): """ Returns: bytes: The hash value of the currently visited directory entry """ return self.current()['target'] def current_perms(self): """ Returns: int: The permissions value of the currently visited directory entry """ return self.current()['perms'] def current_path(self): """ Returns: str: The absolute path from the root directory of the currently visited directory entry """ top_frame = self.top() if not top_frame: return None path = [] for frame in self.frames: path.append(frame[-1]['name']) return self.base_path + b'/'.join(path) def current_is_dir(self): """ Returns: bool: If the currently visited directory entry is a directory """ return self.current()['type'] == 'dir' def _advance(self, descend): """ Advance in the tree iteration. Args: descend (bool): whether or not to push a new frame if the currently visited element is a sub-directory Returns: dict: The description of the newly visited directory entry """ current = self.current() if not self.has_started or not current: self.has_started = True return current if descend and self.current_is_dir() \ and current['target'] != _empty_dir_hash: self._push_dir_frame(current['target']) else: self.drop() return self.current() def next(self): """ Advance the tree iteration by dropping the current visited directory entry from the top frame. If the top frame ends up empty, the operation is recursively applied to remove all empty frames as the tree is climbed up towards its root. Returns: dict: The description of the newly visited directory entry """ return self._advance(False) def step(self): """ Advance the tree iteration like the next operation with the difference that if the current visited element is a sub-directory a new frame representing its content is pushed to the main stack. Returns: dict: The description of the newly visited directory entry """ return self._advance(True) def drop(self): """ Drop the current visited element from the top frame. If the frame ends up empty, the operation is recursively applied. """ frame = self.top() if not frame: return frame.pop() if not frame: self.frames.pop() self.drop() + def __next__(self): + entry = self.step() + if not entry: + raise StopIteration + entry['path'] = self.current_path() + return entry + + def __iter__(self): + return DirectoryIterator(self.storage, self.root_dir_id, + self.base_path) + + +def dir_iterator(storage, dir_id): + """ + Return an iterator for recursively visiting a directory and + its sub-directories. The associated paths are visited in + lexicographic depth-first search order. + + Args: + storage (swh.storage.Storage): an instance of a swh storage + dir_id (bytes): a directory identifier + + Returns: + swh.storage.algos.dir_iterators.DirectoryIterator: an iterator + returning a dict at each iteration step describing a directory + entry. A 'path' field is added in that dict to store the + absolute path of the entry. + """ + return DirectoryIterator(storage, dir_id) + class Remaining(Enum): """ Enum to represent the current state when iterating on both directory trees at the same time. """ NoMoreFiles = 0 OnlyToFilesRemain = 1 OnlyFromFilesRemain = 2 BothHaveFiles = 3 class DoubleDirectoryIterator(object): """ Helper class to traverse two directory trees at the same time and compare their contents to detect changes between them. """ def __init__(self, storage, dir_from, dir_to): """ Args: storage: instance of swh storage dir_from (bytes): hash identifier of the from directory dir_to (bytes): hash identifier of the to directory """ self.storage = storage self.dir_from = dir_from self.dir_to = dir_to self.restart() def restart(self): """ Restart the double iteration at the beginning. """ # initialize custom dfs iterators for the two directories self.it_from = DirectoryIterator(self.storage, self.dir_from) self.it_to = DirectoryIterator(self.storage, self.dir_to) # grab the first element of each iterator self.it_from.next() self.it_to.next() def next_from(self): """ Apply the next operation on the from iterator. """ self.it_from.next() def next_to(self): """ Apply the next operation on the to iterator. """ self.it_to.next() def next_both(self): """ Apply the next operation on both iterators. """ self.next_from() self.next_to() def step_from(self): """ Apply the step operation on the from iterator. """ self.it_from.step() def step_to(self): """ Apply the step operation on the from iterator. """ self.it_to.step() def step_both(self): """ Apply the step operation on the both iterators. """ self.step_from() self.step_to() def remaining(self): """ Returns: Remaining: the current state of the double iteration """ from_current = self.it_from.current() to_current = self.it_to.current() # no more files to iterate in both iterators if not from_current and not to_current: return Remaining.NoMoreFiles # still some files to iterate in the to iterator elif not from_current and to_current: return Remaining.OnlyToFilesRemain # still some files to iterate in the from iterator elif from_current and not to_current: return Remaining.OnlyFromFilesRemain # still files to iterate in the both iterators else: return Remaining.BothHaveFiles def compare(self): """ Compare the current iterated directory entries in both iterators and return the comparison status. Returns: dict: The status of the comparison with the following bool values: * *same_hash*: indicates if the two entries have the same hash * *same_perms*: indicates if the two entries have the same permissions * *both_are_dirs*: indicates if the two entries are directories * *both_are_files*: indicates if the two entries are regular files * *file_and_dir*: indicates if one of the entry is a directory and the other a regular file * *from_is_empty_dir*: indicates if the from entry is the empty directory * *from_is_empty_dir*: indicates if the to entry is the empty directory """ from_current_hash = self.it_from.current_hash() to_current_hash = self.it_to.current_hash() from_current_perms = self.it_from.current_perms() to_current_perms = self.it_to.current_perms() from_is_dir = self.it_from.current_is_dir() to_is_dir = self.it_to.current_is_dir() status = {} # compare hash status['same_hash'] = from_current_hash == to_current_hash # compare permissions status['same_perms'] = from_current_perms == to_current_perms # check if both elements are directories status['both_are_dirs'] = from_is_dir and to_is_dir # check if both elements are regular files status['both_are_files'] = not from_is_dir and not to_is_dir # check if one element is a directory, the other a regular file status['file_and_dir'] = (not status['both_are_dirs'] and not status['both_are_files']) # check if the from element is the empty directory status['from_is_empty_dir'] = (from_is_dir and from_current_hash == _empty_dir_hash) # check if the to element is the empty directory status['to_is_empty_dir'] = (to_is_dir and to_current_hash == _empty_dir_hash) return status diff --git a/swh/storage/tests/algos/test_diff.py b/swh/storage/tests/algos/test_diff.py index 6551e9b3..c25e0f31 100644 --- a/swh/storage/tests/algos/test_diff.py +++ b/swh/storage/tests/algos/test_diff.py @@ -1,374 +1,304 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa import unittest -from nose.tools import istest, nottest from unittest.mock import patch +from nose.tools import istest, nottest + from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import directory_identifier from swh.storage.algos import diff - -class DirectoryModel(object): - """ - Quick and dirty directory model to ease the writing - of revision trees differential tests. - """ - def __init__(self, name=''): - self.data = {} - self.data['name'] = name - self.data['perms'] = 16384 - self.data['type'] = 'dir' - self.data['entries'] = [] - self.data['entry_idx'] = {} - - def __getitem__(self, item): - if item == 'target': - return hash_to_bytes(directory_identifier(self)) - else: - return self.data[item] - - def add_file(self, path, sha1=None): - path_parts = path.split(b'/') - if len(path_parts) == 1: - self['entry_idx'][path] = len(self['entries']) - self['entries'].append({ - 'target': hash_to_bytes(sha1), - 'name': path, - 'perms': 33188, - 'type': 'file' - }) - else: - if not path_parts[0] in self['entry_idx']: - self['entry_idx'][path_parts[0]] = len(self['entries']) - self['entries'].append(DirectoryModel(path_parts[0])) - if path_parts[1]: - dir_idx = self['entry_idx'][path_parts[0]] - self['entries'][dir_idx].add_file(b'/'.join(path_parts[1:]), sha1) - - def get_hash_data(self, entry_hash): - if self['target'] == entry_hash: - ret = [] - for e in self['entries']: - ret.append({ - 'target': e['target'], - 'name': e['name'], - 'perms': e['perms'], - 'type': e['type'] - }) - return ret - else: - for e in self['entries']: - if e['type'] == 'file' and e['target'] == entry_hash: - return e - elif e['type'] == 'dir': - data = e.get_hash_data(entry_hash) - if data: - return data - return None - - def get_path_data(self, path): - path_parts = path.split(b'/') - entry_idx = self['entry_idx'][path_parts[0]] - entry = self['entries'][entry_idx] - if len(path_parts) == 1: - return { - 'target': entry['target'], - 'name': entry['name'], - 'perms': entry['perms'], - 'type': entry['type'] - } - else: - return entry.get_path_data(b'/'.join(path_parts[1:])) +from .test_dir_iterator import DirectoryModel @patch('swh.storage.algos.diff._get_rev') @patch('swh.storage.algos.dir_iterators._get_dir') class TestDiffRevisions(unittest.TestCase): @nottest def diff_revisions(self, rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev): rev_from_bytes = hash_to_bytes(rev_from) rev_to_bytes = hash_to_bytes(rev_to) def _get_rev(*args, **kwargs): if args[1] == rev_from_bytes: return {'directory': from_dir_model['target']} else: return {'directory': to_dir_model['target']} def _get_dir(*args, **kwargs): from_dir = from_dir_model.get_hash_data(args[1]) to_dir = to_dir_model.get_hash_data(args[1]) return from_dir if from_dir != None else to_dir mock_get_rev.side_effect = _get_rev mock_get_dir.side_effect = _get_dir changes = diff.diff_revisions(None, rev_from_bytes, rev_to_bytes, track_renaming=True) self.assertEqual(changes, expected_changes) @istest def test_insert_delete(self, mock_get_dir, mock_get_rev): rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba' rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2' from_dir_model = DirectoryModel() to_dir_model = DirectoryModel() to_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') to_dir_model.add_file(b'file2', '3e5faecb3836ffcadf82cc160787e35d4e2bec6a') to_dir_model.add_file(b'file3', '2ae33b2984974d35eababe4890d37fbf4bce6b2c') expected_changes = \ [{ 'type': 'insert', 'from': None, 'from_path': None, 'to': to_dir_model.get_path_data(b'file1'), 'to_path': b'file1' }, { 'type': 'insert', 'from': None, 'from_path': None, 'to': to_dir_model.get_path_data(b'file2'), 'to_path': b'file2' }, { 'type': 'insert', 'from': None, 'from_path': None, 'to': to_dir_model.get_path_data(b'file3'), 'to_path': b'file3' }] self.diff_revisions(rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev) from_dir_model = DirectoryModel() from_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') from_dir_model.add_file(b'file2', '3e5faecb3836ffcadf82cc160787e35d4e2bec6a') from_dir_model.add_file(b'file3', '2ae33b2984974d35eababe4890d37fbf4bce6b2c') to_dir_model = DirectoryModel() expected_changes = \ [{ 'type': 'delete', 'from': from_dir_model.get_path_data(b'file1'), 'from_path': b'file1', 'to': None, 'to_path': None }, { 'type': 'delete', 'from': from_dir_model.get_path_data(b'file2'), 'from_path': b'file2', 'to': None, 'to_path': None }, { 'type': 'delete', 'from': from_dir_model.get_path_data(b'file3'), 'from_path': b'file3', 'to': None, 'to_path': None }] self.diff_revisions(rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev) @istest def test_onelevel_diff(self, mock_get_dir, mock_get_rev): rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba' rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2' from_dir_model = DirectoryModel() from_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') from_dir_model.add_file(b'file2', 'f4a96b2000be83b61254d107046fa9777b17eb34') from_dir_model.add_file(b'file3', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da') to_dir_model = DirectoryModel() to_dir_model.add_file(b'file2', '3ee0f38ee0ea23cc2c8c0b9d66b27be4596b002b') to_dir_model.add_file(b'file3', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da') to_dir_model.add_file(b'file4', '40460b9653b1dc507e1b6eb333bd4500634bdffc') expected_changes = \ [{ 'type': 'delete', 'from': from_dir_model.get_path_data(b'file1'), 'from_path': b'file1', 'to': None, 'to_path': None}, { 'type': 'modify', 'from': from_dir_model.get_path_data(b'file2'), 'from_path': b'file2', 'to': to_dir_model.get_path_data(b'file2'), 'to_path': b'file2'}, { 'type': 'insert', 'from': None, 'from_path': None, 'to': to_dir_model.get_path_data(b'file4'), 'to_path': b'file4' }] self.diff_revisions(rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev) @istest def test_twolevels_diff(self, mock_get_dir, mock_get_rev): rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba' rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2' from_dir_model = DirectoryModel() from_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') from_dir_model.add_file(b'dir1/file1', '8335fca266811bac7ae5c8e1621476b4cf4156b6') from_dir_model.add_file(b'dir1/file2', 'a6127d909e79f1fcb28bbf220faf86e7be7831e5') from_dir_model.add_file(b'dir1/file3', '18049b8d067ce1194a7e1cce26cfa3ae4242a43d') from_dir_model.add_file(b'file2', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da') to_dir_model = DirectoryModel() to_dir_model.add_file(b'file1', '3ee0f38ee0ea23cc2c8c0b9d66b27be4596b002b') to_dir_model.add_file(b'dir1/file2', 'de3548b32a8669801daa02143a66dae21fe852fd') to_dir_model.add_file(b'dir1/file3', '18049b8d067ce1194a7e1cce26cfa3ae4242a43d') to_dir_model.add_file(b'dir1/file4', 'f5c3f42aec5fe7b92276196c350cbadaf4c51f87') to_dir_model.add_file(b'file2', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da') expected_changes = \ [{ 'type': 'delete', 'from': from_dir_model.get_path_data(b'dir1/file1'), 'from_path': b'dir1/file1', 'to': None, 'to_path': None }, { 'type': 'modify', 'from': from_dir_model.get_path_data(b'dir1/file2'), 'from_path': b'dir1/file2', 'to': to_dir_model.get_path_data(b'dir1/file2'), 'to_path': b'dir1/file2' }, { 'type': 'insert', 'from': None, 'from_path': None, 'to': to_dir_model.get_path_data(b'dir1/file4'), 'to_path': b'dir1/file4' }, { 'type': 'modify', 'from': from_dir_model.get_path_data(b'file1'), 'from_path': b'file1', 'to': to_dir_model.get_path_data(b'file1'), 'to_path': b'file1' }] self.diff_revisions(rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev) @istest def test_insert_delete_empty_dirs(self, mock_get_dir, mock_get_rev): rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba' rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2' from_dir_model = DirectoryModel() from_dir_model.add_file(b'dir3/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') to_dir_model = DirectoryModel() to_dir_model.add_file(b'dir3/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') to_dir_model.add_file(b'dir3/dir1/') expected_changes = \ [{ 'type': 'insert', 'from': None, 'from_path': None, 'to': to_dir_model.get_path_data(b'dir3/dir1'), 'to_path': b'dir3/dir1' }] self.diff_revisions(rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev) from_dir_model = DirectoryModel() from_dir_model.add_file(b'dir1/dir2/') from_dir_model.add_file(b'dir1/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') to_dir_model = DirectoryModel() to_dir_model.add_file(b'dir1/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204') expected_changes = \ [{ 'type': 'delete', 'from': from_dir_model.get_path_data(b'dir1/dir2'), 'from_path': b'dir1/dir2', 'to': None, 'to_path': None }] self.diff_revisions(rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev) @istest def test_track_renaming(self, mock_get_dir, mock_get_rev): rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba' rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2' from_dir_model = DirectoryModel() from_dir_model.add_file(b'file1_oldname', 'ea15f54ca215e7920c60f564315ebb7f911a5204') from_dir_model.add_file(b'dir1/file1_oldname', 'ea15f54ca215e7920c60f564315ebb7f911a5204') from_dir_model.add_file(b'file2_oldname', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da') to_dir_model = DirectoryModel() to_dir_model.add_file(b'dir1/file1_newname', 'ea15f54ca215e7920c60f564315ebb7f911a5204') to_dir_model.add_file(b'dir2/file1_newname', 'ea15f54ca215e7920c60f564315ebb7f911a5204') to_dir_model.add_file(b'file2_newname', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da') expected_changes = \ [{ 'type': 'rename', 'from': from_dir_model.get_path_data(b'dir1/file1_oldname'), 'from_path': b'dir1/file1_oldname', 'to': to_dir_model.get_path_data(b'dir1/file1_newname'), 'to_path': b'dir1/file1_newname' }, { 'type': 'rename', 'from': from_dir_model.get_path_data(b'file1_oldname'), 'from_path': b'file1_oldname', 'to': to_dir_model.get_path_data(b'dir2/file1_newname'), 'to_path': b'dir2/file1_newname' }, { 'type': 'rename', 'from': from_dir_model.get_path_data(b'file2_oldname'), 'from_path': b'file2_oldname', 'to': to_dir_model.get_path_data(b'file2_newname'), 'to_path': b'file2_newname' }] self.diff_revisions(rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev) diff --git a/swh/storage/tests/algos/test_dir_iterator.py b/swh/storage/tests/algos/test_dir_iterator.py new file mode 100644 index 00000000..44830c7b --- /dev/null +++ b/swh/storage/tests/algos/test_dir_iterator.py @@ -0,0 +1,153 @@ + +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest + +from unittest.mock import patch + +from nose.tools import istest, nottest + +from swh.model.from_disk import DentryPerms +from swh.model.hashutil import hash_to_bytes, MultiHash +from swh.model.identifiers import directory_identifier +from swh.storage.algos.dir_iterators import dir_iterator + +# flake8: noqa + +class DirectoryModel(object): + """ + Quick and dirty directory model to ease the writing + of directory iterators and revision trees differential tests. + """ + def __init__(self, name=''): + self.data = {} + self.data['name'] = name + self.data['perms'] = DentryPerms.directory + self.data['type'] = 'dir' + self.data['entries'] = [] + self.data['entry_idx'] = {} + + def __getitem__(self, item): + if item == 'target': + return hash_to_bytes(directory_identifier(self)) + else: + return self.data[item] + + def add_file(self, path, sha1=None): + path_parts = path.split(b'/') + sha1 = hash_to_bytes(sha1) if sha1 \ + else MultiHash.from_data(path).digest()['sha1'] + if len(path_parts) == 1: + self['entry_idx'][path] = len(self['entries']) + self['entries'].append({ + 'target': sha1, + 'name': path, + 'perms': DentryPerms.content, + 'type': 'file' + }) + else: + if not path_parts[0] in self['entry_idx']: + self['entry_idx'][path_parts[0]] = len(self['entries']) + self['entries'].append(DirectoryModel(path_parts[0])) + if path_parts[1]: + dir_idx = self['entry_idx'][path_parts[0]] + self['entries'][dir_idx].add_file(b'/'.join(path_parts[1:]), sha1) + + def get_hash_data(self, entry_hash): + if self['target'] == entry_hash: + ret = [] + for e in self['entries']: + ret.append({ + 'target': e['target'], + 'name': e['name'], + 'perms': e['perms'], + 'type': e['type'] + }) + return ret + else: + for e in self['entries']: + if e['type'] == 'file' and e['target'] == entry_hash: + return e + elif e['type'] == 'dir': + data = e.get_hash_data(entry_hash) + if data: + return data + return None + + def get_path_data(self, path): + path_parts = path.split(b'/') + entry_idx = self['entry_idx'][path_parts[0]] + entry = self['entries'][entry_idx] + if len(path_parts) == 1: + return { + 'target': entry['target'], + 'name': entry['name'], + 'perms': entry['perms'], + 'type': entry['type'] + } + else: + return entry.get_path_data(b'/'.join(path_parts[1:])) + + +@patch('swh.storage.algos.dir_iterators._get_dir') +class TestDirectoryIterator(unittest.TestCase): + + @nottest + def check_iterated_paths(self, dir_model, expected_paths_order, + mock_get_dir): + + def _get_dir(*args, **kwargs): + return dir_model.get_hash_data(args[1]) + + mock_get_dir.side_effect = _get_dir # noqa + paths_order = [e['path'] for e in dir_iterator(None, dir_model['target'])] + self.assertEqual(paths_order, expected_paths_order) + + @istest + def test_dir_iterator_empty_dir(self, mock_get_dir): + dir_model = DirectoryModel() + expected_paths_order = [] + self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir) + + @istest + def test_dir_iterator_no_empty_dirs(self, mock_get_dir): + dir_model = DirectoryModel() + dir_model.add_file(b'xyz/gtr/uhb') + dir_model.add_file(b'bca/ef') + dir_model.add_file(b'abc/ab') + dir_model.add_file(b'abc/bc') + dir_model.add_file(b'xyz/ouy/poi') + + expected_paths_order = [b'abc', + b'abc/ab', + b'abc/bc', + b'bca', + b'bca/ef', + b'xyz', + b'xyz/gtr', + b'xyz/gtr/uhb', + b'xyz/ouy', + b'xyz/ouy/poi'] + + self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir) + + @istest + def test_dir_iterator_with_empty_dirs(self, mock_get_dir): + dir_model = DirectoryModel() + dir_model.add_file(b'xyz/gtr/') + dir_model.add_file(b'bca/ef') + dir_model.add_file(b'abc/') + dir_model.add_file(b'xyz/ouy/poi') + + expected_paths_order = [b'abc', + b'bca', + b'bca/ef', + b'xyz', + b'xyz/gtr', + b'xyz/ouy', + b'xyz/ouy/poi'] + + self.check_iterated_paths(dir_model, expected_paths_order, mock_get_dir)