Page MenuHomeSoftware Heritage

D629.id2000.diff
No OneTemporary

D629.id2000.diff

diff --git a/swh/storage/algos/revisions_walker.py b/swh/storage/algos/revisions_walker.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/algos/revisions_walker.py
@@ -0,0 +1,509 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import heapq
+
+from abc import ABCMeta, abstractmethod
+from collections import deque
+
+_revs_walker_classes = {}
+
+
+class _RevisionsWalkerMetaClass(ABCMeta):
+ def __new__(cls, clsname, bases, attrs):
+ newclass = super().__new__(cls, clsname, bases, attrs)
+ if 'rw_type' in attrs:
+ _revs_walker_classes[attrs['rw_type']] = newclass
+ return newclass
+
+
+class RevisionsWalker(metaclass=_RevisionsWalkerMetaClass):
+ """
+ Abstract base class encapsulating the logic to walk across
+ a revisions history starting from a given one.
+
+ It defines an iterator returning the revisions according
+ to a specific ordering implemented in derived classes.
+
+ The iteration step performs the following operations:
+
+ 1) Check if the iteration is finished by calling method
+ :meth:`is_finished` and raises :exc:`StopIteration` if it
+ it is the case
+
+ 2) Get the next unseen revision by calling method
+ :meth:`get_next_rev_id`
+
+ 3) Process parents of that revision by calling method
+ :meth:`process_parent_revs` for the next iteration
+ steps
+
+ 4) Check if the revision should be returned by calling
+ method :meth:`should_return` and returns it if
+ it is the case
+
+ In order to easily instantiate a specific type of revisions
+ walker, it is recommended to use the factory function
+ :func:`get_revisions_walker`.
+
+ Args:
+ storage (swh.storage.storage.Storage): instance of swh storage
+ (either local or remote)
+ rev_start (bytes): a revision identifier
+ max_revs (Optional[int]): maximum number of revisions to return
+ state (Optional[dict]): previous state of that revisions walker
+ """
+
+ def __init__(self, storage, rev_start, max_revs=None, state=None):
+ self._revs_to_visit = []
+ self._done = set()
+ self._revs = {}
+ self._last_rev = None
+ self._num_revs = 0
+ self._max_revs = max_revs
+ if state:
+ self._revs_to_visit = state['revs_to_visit']
+ self._done = state['done']
+ self._last_rev = state['last_rev']
+ self._num_revs = state['num_revs']
+ self.storage = storage
+ self.process_rev(rev_start)
+
+ @abstractmethod
+ def process_rev(self, rev_id):
+ """
+ Abstract method whose purpose is to process a newly visited
+ revision during the walk.
+ Derived classes must implement it according to the desired
+ method to walk across the revisions history (for instance
+ through a dfs on the revisions DAG).
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ pass
+
+ @abstractmethod
+ def get_next_rev_id(self):
+ """
+ Abstract method whose purpose is to return the next revision
+ during the iteration.
+ Derived classes must implement it according to the desired
+ method to walk across the revisions history.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ pass
+
+ def process_parent_revs(self, rev):
+ """
+ Process the parents of a revision when it is iterated.
+ The default implementation simply calls :meth:`process_rev`
+ for each parent revision in the order they are declared.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ for parent_id in rev['parents']:
+ self.process_rev(parent_id)
+
+ def should_return(self, rev):
+ """
+ Filter out a revision to return if needed.
+ Default implementation returns all iterated revisions.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+
+ Returns:
+ bool: Whether to return the revision in the iteration
+ """
+ return True
+
+ def is_finished(self):
+ """
+ Determine if the iteration is finished.
+ This method is called at the beginning of each iteration loop.
+
+ Returns:
+ bool: Whether the iteration is finished
+ """
+ if self._max_revs is not None and self._num_revs >= self._max_revs:
+ return True
+ if not self._revs_to_visit:
+ return True
+ return False
+
+ def _get_rev(self, rev_id):
+ rev = self._revs.get(rev_id, None)
+ if not rev:
+ # cache some revisions in advance to avoid sending too much
+ # requests to storage and thus speedup the revisions walk
+ for rev in self.storage.revision_log([rev_id], limit=100):
+ self._revs[rev['id']] = rev
+ return self._revs[rev_id]
+
+ def export_state(self):
+ """
+ Export the internal state of that revision walker to a dict.
+ Its purpose is to continue the iteration in a pagination context.
+
+ Returns:
+ dict: A dict containing the internal state of that revisions walker
+ """
+ return {
+ 'revs_to_visit': self._revs_to_visit,
+ 'done': self._done,
+ 'last_rev': self._last_rev,
+ 'num_revs': self._num_revs
+ }
+
+ def __next__(self):
+ if self.is_finished():
+ raise StopIteration
+ while self._revs_to_visit:
+ rev_id = self.get_next_rev_id()
+ if rev_id in self._done:
+ continue
+ self._done.add(rev_id)
+ rev = self._get_rev(rev_id)
+ self.process_parent_revs(rev)
+ if self.should_return(rev):
+ self._num_revs += 1
+ self._last_rev = rev
+ return rev
+ raise StopIteration
+
+ def __iter__(self):
+ return self
+
+
+class CommitterDateRevisionsWalker(RevisionsWalker):
+ """
+ Revisions walker that returns revisions in reverse chronological
+ order according to committer date (same behaviour as ``git log``)
+ """
+
+ rw_type = 'committer_date'
+
+ def process_rev(self, rev_id):
+ """
+ Add the revision to a priority queue according to the committer date.
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ if rev_id not in self._done:
+ rev = self._get_rev(rev_id)
+ commit_time = rev['committer_date']['timestamp']['seconds']
+ heapq.heappush(self._revs_to_visit, (-commit_time, rev_id))
+
+ def get_next_rev_id(self):
+ """
+ Return the smallest revision from the priority queue, i.e.
+ the one with highest committer date.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ _, rev_id = heapq.heappop(self._revs_to_visit)
+ return rev_id
+
+
+class BFSRevisionsWalker(RevisionsWalker):
+ """
+ Revisions walker that returns revisions in the same order
+ as when performing a breadth-first search on the revisions
+ DAG.
+ """
+
+ rw_type = 'bfs'
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._revs_to_visit = deque(self._revs_to_visit)
+
+ def process_rev(self, rev_id):
+ """
+ Append the revision to a queue.
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ if rev_id not in self._done:
+ self._revs_to_visit.append(rev_id)
+
+ def get_next_rev_id(self):
+ """
+ Return the next revision from the queue.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ return self._revs_to_visit.popleft()
+
+
+class DFSPostRevisionsWalker(RevisionsWalker):
+ """
+ Revisions walker that returns revisions in the same order
+ as when performing a depth-first search in post-order on the
+ revisions DAG (i.e. after visiting a merge commit,
+ the merged commit will be visited before the base it was
+ merged on).
+ """
+
+ rw_type = 'dfs_post'
+
+ def process_rev(self, rev_id):
+ """
+ Append the revision to a stack.
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ if rev_id not in self._done:
+ self._revs_to_visit.append(rev_id)
+
+ def get_next_rev_id(self):
+ """
+ Return the next revision from the stack.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ return self._revs_to_visit.pop()
+
+
+class DFSRevisionsWalker(DFSPostRevisionsWalker):
+ """
+ Revisions walker that returns revisions in the same order
+ as when performing a depth-first search in pre-order on the
+ revisions DAG (i.e. after visiting a merge commit,
+ the base commit it was merged on will be visited before
+ the merged commit).
+ """
+
+ rw_type = 'dfs'
+
+ def process_parent_revs(self, rev):
+ """
+ Process the parents of a revision when it is iterated in
+ the reversed order they are declared.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ for parent_id in reversed(rev['parents']):
+ self.process_rev(parent_id)
+
+
+class PathRevisionsWalker(CommitterDateRevisionsWalker):
+ """
+ Revisions walker that returns revisions where a specific
+ path in the source tree has been modified, in other terms
+ it allows to get the history for a specific file or directory.
+
+ It has a behaviour similar to what ``git log`` offers by default,
+ meaning the returned history is simplified in order to only
+ show relevant revisions (see the `History Simplification
+ <https://git-scm.com/docs/git-log#_history_simplification>`_
+ section of the associated manual for more details).
+
+ Please note that to avoid walking the entire history, the iteration
+ will stop once a revision where the path has been added is found.
+
+ .. warning:: Due to client-side implementation, performances
+ are not optimal when the total numbers of revisions to walk
+ is large. This should only be used when the total number of
+ revisions does not exceed a couple of thousands.
+
+ Args:
+ storage (swh.storage.storage.Storage): instance of swh storage
+ (either local or remote)
+ rev_start (bytes): a revision identifier
+ path (str): the path in the source tree to retrieve the history
+ max_revs (Optional[int]): maximum number of revisions to return
+ state (Optional[dict]): previous state of that revisions walker
+ """
+
+ rw_type = 'path'
+
+ def __init__(self, storage, rev_start, path, **kwargs):
+ super().__init__(storage, rev_start, **kwargs)
+ paths = path.strip('/').split('/')
+ self._path = list(map(lambda p: p.encode('utf-8'), paths))
+ self._rev_dir_path = {}
+
+ def _get_path_id(self, rev_id):
+ """
+ Return the path checksum identifier in the source tree of the
+ provided revision. If the path corresponds to a directory, the
+ value computed by :func:`swh.model.identifiers.directory_identifier`
+ will be returned. If the path corresponds to a file, its sha1
+ checksum will be returned.
+
+ Args:
+ rev_id (bytes): a revision identifier
+
+ Returns:
+ bytes: the path identifier
+ """
+
+ rev = self._get_rev(rev_id)
+
+ rev_dir_id = rev['directory']
+
+ if rev_dir_id not in self._rev_dir_path:
+ try:
+ dir_info = \
+ self.storage.directory_entry_get_by_path(rev_dir_id,
+ self._path)
+ self._rev_dir_path[rev_dir_id] = dir_info['target']
+ except Exception:
+ self._rev_dir_path[rev_dir_id] = None
+
+ return self._rev_dir_path[rev_dir_id]
+
+ def is_finished(self):
+ """
+ Check if the revisions iteration is finished.
+ This checks for the specified path's existence in the last
+ returned revision's parents' source trees.
+ If not, the iteration is considered finished.
+
+ Returns:
+ bool: Whether to return the revision in the iteration
+ """
+ if self._path and self._last_rev:
+ last_rev_parents = self._last_rev['parents']
+ last_rev_parents_path_ids = [self._get_path_id(p_rev)
+ for p_rev in last_rev_parents]
+ no_path = all([path_id is None
+ for path_id in last_rev_parents_path_ids])
+ if no_path:
+ return True
+ return super().is_finished()
+
+ def process_parent_revs(self, rev):
+ """
+ Process parents when a new revision is iterated.
+ It enables to get a simplified revisions history in the same
+ manner as ``git log``. When a revision has multiple parents,
+ the following process is applied. If the revision was a merge,
+ and has the same path identifier to one parent, follow only that
+ parent (even if there are several parents with the same path
+ identifier, follow only one of them.) Otherwise, follow all parents.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ rev_path_id = self._get_path_id(rev['id'])
+
+ if rev_path_id:
+ if len(rev['parents']) == 1:
+ self.process_rev(rev['parents'][0])
+ else:
+ parent_rev_path_ids = [self._get_path_id(p_rev)
+ for p_rev in rev['parents']]
+ different_trees = all([path_id != rev_path_id
+ for path_id in parent_rev_path_ids])
+ for i, p_rev in enumerate(rev['parents']):
+ if different_trees or \
+ parent_rev_path_ids[i] == rev_path_id:
+ self.process_rev(p_rev)
+ if not different_trees:
+ break
+ else:
+ super().process_parent_revs(rev)
+
+ def should_return(self, rev):
+ """
+ Check if a revision should be returned when iterating.
+ It verifies that the specified path has been modified
+ by the revision but also that all parents have a path
+ identifier different from the revision one in order
+ to get a simplified history.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+
+ Returns:
+ bool: Whether to return the revision in the iteration
+ """
+ rev_path_id = self._get_path_id(rev['id'])
+
+ if not rev['parents']:
+ return rev_path_id is not None
+
+ parent_rev_path_ids = [self._get_path_id(p_rev)
+ for p_rev in rev['parents']]
+ different_trees = all([path_id != rev_path_id
+ for path_id in parent_rev_path_ids])
+
+ if rev_path_id != parent_rev_path_ids[0] and different_trees:
+ return True
+
+ return False
+
+
+def get_revisions_walker(rev_walker_type, *args, **kwargs):
+ """
+ Instantiate a revisions walker of a given type.
+
+ The following code snippet demonstrates how to use a revisions
+ walker for processing a whole revisions history::
+
+ revs_walker = get_revisions_walker('committer_date', rev_id)
+ for rev in revs_walker:
+ # process revision rev
+
+ It is also possible to walk a revisions history in a paginated
+ way as illustrated below::
+
+ def get_revs_history_page(rw_type, rev_id, page_num, page_size,
+ rw_state):
+ max_revs = (page_num + 1) * page_size
+ revs_walker = get_revisions_walker(rw_type, rev_id,
+ max_revs=max_revs,
+ state=rw_state)
+ revs = list(revs_walker)
+ rw_state = revs_walker.export_state()
+ return revs
+
+ rev_start = ...
+ per_page = 50
+ rw_state = {}
+
+ for page in range(0, 10):
+ revs_page = get_revs_history_page('dfs', rev_start, page,
+ per_page, rw_state)
+ # process revisions page
+
+
+ Args:
+ rev_walker_type (str): the type of revisions walker to return,
+ possible values are: *committer_date*, *dfs*, *dfs_post*,
+ *bfs* and *path*
+ args (list): position arguments to pass to the revisions walker
+ constructor
+ kwargs (dict): keyword arguments to pass to the revisions walker
+ constructor
+
+ """
+ if rev_walker_type not in _revs_walker_classes:
+ raise Exception('No revisions walker found for type "%s"'
+ % rev_walker_type)
+ revs_walker_class = _revs_walker_classes[rev_walker_type]
+ return revs_walker_class(*args, **kwargs)
diff --git a/swh/storage/tests/algos/test_revisions_walker.py b/swh/storage/tests/algos/test_revisions_walker.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/tests/algos/test_revisions_walker.py
@@ -0,0 +1,367 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from unittest.mock import patch
+
+from swh.model.hashutil import hash_to_bytes
+from swh.storage.algos.revisions_walker import get_revisions_walker
+
+# For those tests, we will walk the following revisions history
+# with different orderings:
+#
+# * commit b364f53155044e5308a0f73abb3b5f01995a5b7d
+# |\ Merge: 836d498 b94886c
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:50:49 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:50:49 2013 +0200
+# | |
+# | | Merge branch 'release/1.0'
+# | |
+# | * commit b94886c500c46e32dc3d7ebae8a5409accd592e5
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:50:38 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:50:38 2013 +0200
+# | |
+# | | updating poms for 1.0 release
+# | |
+# | * commit 0cb6b4611d65bee0f57821dac7f611e2f8a02433
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:50:38 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:50:38 2013 +0200
+# | |
+# | | updating poms for 1.0 release
+# | |
+# | * commit 2b0240c6d682bad51532eec15b8a7ed6b75c8d31
+# | | Author: Adam Janicki <janickia>
+# | | AuthorDate: Fri Oct 4 12:50:22 2013 +0200
+# | | Commit: Adam Janicki <janickia>
+# | | CommitDate: Fri Oct 4 12:50:22 2013 +0200
+# | |
+# | | For 1.0 release. Allow untracked.
+# | |
+# | * commit b401c50863475db4440c85c10ac0b6423b61554d
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:48:12 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:48:12 2013 +0200
+# | |
+# | | updating poms for 1.0 release
+# | |
+# | * commit 9c5051397e5c2e0c258bb639c3dd34406584ca10
+# |/ Author: Adam Janicki <janickia>
+# | AuthorDate: Fri Oct 4 12:47:48 2013 +0200
+# | Commit: Adam Janicki <janickia>
+# | CommitDate: Fri Oct 4 12:47:48 2013 +0200
+# |
+# | For 1.0 release.
+# |
+# * commit 836d498396fb9b5d45c896885f84d8d60a5651dc
+# | Author: Adam Janicki <janickia>
+# | AuthorDate: Fri Oct 4 12:08:16 2013 +0200
+# | Commit: Adam Janicki <janickia>
+# | CommitDate: Fri Oct 4 12:08:16 2013 +0200
+# |
+# | Add ignores
+# |
+# * commit ee96c2a2d397b79070d2b6fe3051290963748358
+# | Author: Adam <adam.janicki@roche.com>
+# | AuthorDate: Fri Oct 4 10:48:16 2013 +0100
+# | Commit: Adam <adam.janicki@roche.com>
+# | CommitDate: Fri Oct 4 10:48:16 2013 +0100
+# |
+# | Reset author
+# |
+# * commit 8f89dda8e072383cf50d42532ae8f52ad89f8fdf
+# Author: Adam <adam.janicki@roche.com>
+# AuthorDate: Fri Oct 4 02:20:19 2013 -0700
+# Commit: Adam <adam.janicki@roche.com>
+# CommitDate: Fri Oct 4 02:20:19 2013 -0700
+#
+# Initial commit
+
+# raw dump of the above history in swh format
+_revisions_list = \
+[{'author': {'email': b'adam.janicki@roche.com', # noqa
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883849}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883849}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'\xb3d\xf51U\x04NS\x08\xa0\xf7:\xbb;_\x01\x99Z[}',
+ 'message': b"Merge branch 'release/1.0'",
+ 'metadata': None,
+ 'parents': [b'\x83mI\x83\x96\xfb\x9b]E\xc8\x96\x88_\x84\xd8\xd6\nVQ\xdc',
+ b'\xb9H\x86\xc5\x00\xc4n2\xdc=~\xba\xe8\xa5@\x9a\xcc\xd5\x92\xe5'], # noqa
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'\xb9H\x86\xc5\x00\xc4n2\xdc=~\xba\xe8\xa5@\x9a\xcc\xd5\x92\xe5',
+ 'message': b'updating poms for 1.0 release',
+ 'metadata': None,
+ 'parents': [b'\x0c\xb6\xb4a\x1de\xbe\xe0\xf5x!\xda\xc7\xf6\x11\xe2\xf8\xa0$3'], # noqa
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'\x0c\xb6\xb4a\x1de\xbe\xe0\xf5x!\xda\xc7\xf6\x11\xe2\xf8\xa0$3',
+ 'message': b'updating poms for 1.0 release',
+ 'metadata': None,
+ 'parents': [b'+\x02@\xc6\xd6\x82\xba\xd5\x152\xee\xc1[\x8a~\xd6\xb7\\\x8d1'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883822}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883822}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'+\x02@\xc6\xd6\x82\xba\xd5\x152\xee\xc1[\x8a~\xd6\xb7\\\x8d1',
+ 'message': b'For 1.0 release. Allow untracked.\n',
+ 'metadata': None,
+ 'parents': [b'\xb4\x01\xc5\x08cG]\xb4D\x0c\x85\xc1\n\xc0\xb6B;aUM'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883692}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883692}},
+ 'directory': b'd@\xe7\x143w\xcb\xf7\xad\xae\x91\xd5\xec\xd8\x95\x82'
+ b'\x02\xa6=\x1b',
+ 'id': b'\xb4\x01\xc5\x08cG]\xb4D\x0c\x85\xc1\n\xc0\xb6B;aUM',
+ 'message': b'updating poms for 1.0 release',
+ 'metadata': None,
+ 'parents': [b'\x9cPQ9~\\.\x0c%\x8b\xb69\xc3\xdd4@e\x84\xca\x10'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883668}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883668}},
+ 'directory': b'\n\x857\x94r\xbe\xcc\x04=\xe9}\xe5\xfd\xdf?nR\xe6\xa7\x9e',
+ 'id': b'\x9cPQ9~\\.\x0c%\x8b\xb69\xc3\xdd4@e\x84\xca\x10',
+ 'message': b'For 1.0 release.\n',
+ 'metadata': None,
+ 'parents': [b'\x83mI\x83\x96\xfb\x9b]E\xc8\x96\x88_\x84\xd8\xd6\nVQ\xdc'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380881296}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380881296}},
+ 'directory': b'.\xf9\xa5\xcb\xb0\xd3\xdc\x9b{\xb8\x81\x03l\xe2P\x16c\x0b|\xe6', # noqa
+ 'id': b'\x83mI\x83\x96\xfb\x9b]E\xc8\x96\x88_\x84\xd8\xd6\nVQ\xdc',
+ 'message': b'Add ignores\n',
+ 'metadata': None,
+ 'parents': [b'\xee\x96\xc2\xa2\xd3\x97\xb7\x90p\xd2\xb6\xfe0Q)\tct\x83X'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 60,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380880096}},
+ 'date': {'negative_utc': None,
+ 'offset': 60,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380880096}},
+ 'directory': b'\xc7r\xc4\x9f\xc0$\xd4\xab\xff\xcb]\xf6<\xcb\x8b~\xec\xc4\xd1)', # noqa
+ 'id': b'\xee\x96\xc2\xa2\xd3\x97\xb7\x90p\xd2\xb6\xfe0Q)\tct\x83X',
+ 'message': b'Reset author\n',
+ 'metadata': None,
+ 'parents': [b'\x8f\x89\xdd\xa8\xe0r8<\xf5\rBS*\xe8\xf5*\xd8\x9f\x8f\xdf'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': -420,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380878419}},
+ 'date': {'negative_utc': None,
+ 'offset': -420,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380878419}},
+ 'directory': b'WS\xbaX\xd6x{q\x8f\x020i\xc5\x95\xa01\xf7y\xb2\x80',
+ 'id': b'\x8f\x89\xdd\xa8\xe0r8<\xf5\rBS*\xe8\xf5*\xd8\x9f\x8f\xdf',
+ 'message': b'Initial commit\n',
+ 'metadata': None,
+ 'parents': [],
+ 'synthetic': False,
+ 'type': 'git'}]
+
+
+_rev_start = 'b364f53155044e5308a0f73abb3b5f01995a5b7d'
+
+
+class RevisionsWalkerTest(unittest.TestCase):
+
+ @patch('swh.storage.storage.Storage')
+ def check_revisions_ordering(self, rev_walker_type, expected_result,
+ MockStorage):
+ storage = MockStorage()
+ storage.revision_log.return_value = _revisions_list
+
+ revs_walker = \
+ get_revisions_walker(rev_walker_type, storage,
+ hash_to_bytes(_rev_start))
+
+ self.assertEqual(list(map(hash_to_bytes, expected_result)),
+ [rev['id'] for rev in revs_walker])
+
+ def test_revisions_walker_committer_date(self):
+
+ # revisions should be returned in reverse chronological order
+ # of their committer date
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
+
+ self.check_revisions_ordering('committer_date', expected_result)
+
+ def test_revisions_walker_dfs(self):
+
+ # revisions should be returned in the same order they are
+ # visited when performing a depth-first search in pre order
+ # on the revisions DAG
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10']
+
+ self.check_revisions_ordering('dfs', expected_result)
+
+ def test_revisions_walker_dfs_post(self):
+
+ # revisions should be returned in the same order they are
+ # visited when performing a depth-first search in post order
+ # on the revisions DAG
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
+
+ self.check_revisions_ordering('dfs_post', expected_result)
+
+ def test_revisions_walker_bfs(self):
+
+ # revisions should be returned in the same order they are
+ # visited when performing a breadth-first search on the
+ # revisions DAG
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10']
+
+ self.check_revisions_ordering('bfs', expected_result)

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 9:50 AM (5 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223721

Event Timeline