Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9339658
D629.id2000.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
34 KB
Subscribers
None
D629.id2000.diff
View Options
diff --git a/swh/storage/algos/revisions_walker.py b/swh/storage/algos/revisions_walker.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/algos/revisions_walker.py
@@ -0,0 +1,509 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import heapq
+
+from abc import ABCMeta, abstractmethod
+from collections import deque
+
+_revs_walker_classes = {}
+
+
+class _RevisionsWalkerMetaClass(ABCMeta):
+ def __new__(cls, clsname, bases, attrs):
+ newclass = super().__new__(cls, clsname, bases, attrs)
+ if 'rw_type' in attrs:
+ _revs_walker_classes[attrs['rw_type']] = newclass
+ return newclass
+
+
+class RevisionsWalker(metaclass=_RevisionsWalkerMetaClass):
+ """
+ Abstract base class encapsulating the logic to walk across
+ a revisions history starting from a given one.
+
+ It defines an iterator returning the revisions according
+ to a specific ordering implemented in derived classes.
+
+ The iteration step performs the following operations:
+
+ 1) Check if the iteration is finished by calling method
+ :meth:`is_finished` and raises :exc:`StopIteration` if it
+ it is the case
+
+ 2) Get the next unseen revision by calling method
+ :meth:`get_next_rev_id`
+
+ 3) Process parents of that revision by calling method
+ :meth:`process_parent_revs` for the next iteration
+ steps
+
+ 4) Check if the revision should be returned by calling
+ method :meth:`should_return` and returns it if
+ it is the case
+
+ In order to easily instantiate a specific type of revisions
+ walker, it is recommended to use the factory function
+ :func:`get_revisions_walker`.
+
+ Args:
+ storage (swh.storage.storage.Storage): instance of swh storage
+ (either local or remote)
+ rev_start (bytes): a revision identifier
+ max_revs (Optional[int]): maximum number of revisions to return
+ state (Optional[dict]): previous state of that revisions walker
+ """
+
+ def __init__(self, storage, rev_start, max_revs=None, state=None):
+ self._revs_to_visit = []
+ self._done = set()
+ self._revs = {}
+ self._last_rev = None
+ self._num_revs = 0
+ self._max_revs = max_revs
+ if state:
+ self._revs_to_visit = state['revs_to_visit']
+ self._done = state['done']
+ self._last_rev = state['last_rev']
+ self._num_revs = state['num_revs']
+ self.storage = storage
+ self.process_rev(rev_start)
+
+ @abstractmethod
+ def process_rev(self, rev_id):
+ """
+ Abstract method whose purpose is to process a newly visited
+ revision during the walk.
+ Derived classes must implement it according to the desired
+ method to walk across the revisions history (for instance
+ through a dfs on the revisions DAG).
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ pass
+
+ @abstractmethod
+ def get_next_rev_id(self):
+ """
+ Abstract method whose purpose is to return the next revision
+ during the iteration.
+ Derived classes must implement it according to the desired
+ method to walk across the revisions history.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ pass
+
+ def process_parent_revs(self, rev):
+ """
+ Process the parents of a revision when it is iterated.
+ The default implementation simply calls :meth:`process_rev`
+ for each parent revision in the order they are declared.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ for parent_id in rev['parents']:
+ self.process_rev(parent_id)
+
+ def should_return(self, rev):
+ """
+ Filter out a revision to return if needed.
+ Default implementation returns all iterated revisions.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+
+ Returns:
+ bool: Whether to return the revision in the iteration
+ """
+ return True
+
+ def is_finished(self):
+ """
+ Determine if the iteration is finished.
+ This method is called at the beginning of each iteration loop.
+
+ Returns:
+ bool: Whether the iteration is finished
+ """
+ if self._max_revs is not None and self._num_revs >= self._max_revs:
+ return True
+ if not self._revs_to_visit:
+ return True
+ return False
+
+ def _get_rev(self, rev_id):
+ rev = self._revs.get(rev_id, None)
+ if not rev:
+ # cache some revisions in advance to avoid sending too much
+ # requests to storage and thus speedup the revisions walk
+ for rev in self.storage.revision_log([rev_id], limit=100):
+ self._revs[rev['id']] = rev
+ return self._revs[rev_id]
+
+ def export_state(self):
+ """
+ Export the internal state of that revision walker to a dict.
+ Its purpose is to continue the iteration in a pagination context.
+
+ Returns:
+ dict: A dict containing the internal state of that revisions walker
+ """
+ return {
+ 'revs_to_visit': self._revs_to_visit,
+ 'done': self._done,
+ 'last_rev': self._last_rev,
+ 'num_revs': self._num_revs
+ }
+
+ def __next__(self):
+ if self.is_finished():
+ raise StopIteration
+ while self._revs_to_visit:
+ rev_id = self.get_next_rev_id()
+ if rev_id in self._done:
+ continue
+ self._done.add(rev_id)
+ rev = self._get_rev(rev_id)
+ self.process_parent_revs(rev)
+ if self.should_return(rev):
+ self._num_revs += 1
+ self._last_rev = rev
+ return rev
+ raise StopIteration
+
+ def __iter__(self):
+ return self
+
+
+class CommitterDateRevisionsWalker(RevisionsWalker):
+ """
+ Revisions walker that returns revisions in reverse chronological
+ order according to committer date (same behaviour as ``git log``)
+ """
+
+ rw_type = 'committer_date'
+
+ def process_rev(self, rev_id):
+ """
+ Add the revision to a priority queue according to the committer date.
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ if rev_id not in self._done:
+ rev = self._get_rev(rev_id)
+ commit_time = rev['committer_date']['timestamp']['seconds']
+ heapq.heappush(self._revs_to_visit, (-commit_time, rev_id))
+
+ def get_next_rev_id(self):
+ """
+ Return the smallest revision from the priority queue, i.e.
+ the one with highest committer date.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ _, rev_id = heapq.heappop(self._revs_to_visit)
+ return rev_id
+
+
+class BFSRevisionsWalker(RevisionsWalker):
+ """
+ Revisions walker that returns revisions in the same order
+ as when performing a breadth-first search on the revisions
+ DAG.
+ """
+
+ rw_type = 'bfs'
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._revs_to_visit = deque(self._revs_to_visit)
+
+ def process_rev(self, rev_id):
+ """
+ Append the revision to a queue.
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ if rev_id not in self._done:
+ self._revs_to_visit.append(rev_id)
+
+ def get_next_rev_id(self):
+ """
+ Return the next revision from the queue.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ return self._revs_to_visit.popleft()
+
+
+class DFSPostRevisionsWalker(RevisionsWalker):
+ """
+ Revisions walker that returns revisions in the same order
+ as when performing a depth-first search in post-order on the
+ revisions DAG (i.e. after visiting a merge commit,
+ the merged commit will be visited before the base it was
+ merged on).
+ """
+
+ rw_type = 'dfs_post'
+
+ def process_rev(self, rev_id):
+ """
+ Append the revision to a stack.
+
+ Args:
+ rev_id (bytes): the newly visited revision identifier
+ """
+ if rev_id not in self._done:
+ self._revs_to_visit.append(rev_id)
+
+ def get_next_rev_id(self):
+ """
+ Return the next revision from the stack.
+
+ Returns:
+ dict: A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ return self._revs_to_visit.pop()
+
+
+class DFSRevisionsWalker(DFSPostRevisionsWalker):
+ """
+ Revisions walker that returns revisions in the same order
+ as when performing a depth-first search in pre-order on the
+ revisions DAG (i.e. after visiting a merge commit,
+ the base commit it was merged on will be visited before
+ the merged commit).
+ """
+
+ rw_type = 'dfs'
+
+ def process_parent_revs(self, rev):
+ """
+ Process the parents of a revision when it is iterated in
+ the reversed order they are declared.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ for parent_id in reversed(rev['parents']):
+ self.process_rev(parent_id)
+
+
+class PathRevisionsWalker(CommitterDateRevisionsWalker):
+ """
+ Revisions walker that returns revisions where a specific
+ path in the source tree has been modified, in other terms
+ it allows to get the history for a specific file or directory.
+
+ It has a behaviour similar to what ``git log`` offers by default,
+ meaning the returned history is simplified in order to only
+ show relevant revisions (see the `History Simplification
+ <https://git-scm.com/docs/git-log#_history_simplification>`_
+ section of the associated manual for more details).
+
+ Please note that to avoid walking the entire history, the iteration
+ will stop once a revision where the path has been added is found.
+
+ .. warning:: Due to client-side implementation, performances
+ are not optimal when the total numbers of revisions to walk
+ is large. This should only be used when the total number of
+ revisions does not exceed a couple of thousands.
+
+ Args:
+ storage (swh.storage.storage.Storage): instance of swh storage
+ (either local or remote)
+ rev_start (bytes): a revision identifier
+ path (str): the path in the source tree to retrieve the history
+ max_revs (Optional[int]): maximum number of revisions to return
+ state (Optional[dict]): previous state of that revisions walker
+ """
+
+ rw_type = 'path'
+
+ def __init__(self, storage, rev_start, path, **kwargs):
+ super().__init__(storage, rev_start, **kwargs)
+ paths = path.strip('/').split('/')
+ self._path = list(map(lambda p: p.encode('utf-8'), paths))
+ self._rev_dir_path = {}
+
+ def _get_path_id(self, rev_id):
+ """
+ Return the path checksum identifier in the source tree of the
+ provided revision. If the path corresponds to a directory, the
+ value computed by :func:`swh.model.identifiers.directory_identifier`
+ will be returned. If the path corresponds to a file, its sha1
+ checksum will be returned.
+
+ Args:
+ rev_id (bytes): a revision identifier
+
+ Returns:
+ bytes: the path identifier
+ """
+
+ rev = self._get_rev(rev_id)
+
+ rev_dir_id = rev['directory']
+
+ if rev_dir_id not in self._rev_dir_path:
+ try:
+ dir_info = \
+ self.storage.directory_entry_get_by_path(rev_dir_id,
+ self._path)
+ self._rev_dir_path[rev_dir_id] = dir_info['target']
+ except Exception:
+ self._rev_dir_path[rev_dir_id] = None
+
+ return self._rev_dir_path[rev_dir_id]
+
+ def is_finished(self):
+ """
+ Check if the revisions iteration is finished.
+ This checks for the specified path's existence in the last
+ returned revision's parents' source trees.
+ If not, the iteration is considered finished.
+
+ Returns:
+ bool: Whether to return the revision in the iteration
+ """
+ if self._path and self._last_rev:
+ last_rev_parents = self._last_rev['parents']
+ last_rev_parents_path_ids = [self._get_path_id(p_rev)
+ for p_rev in last_rev_parents]
+ no_path = all([path_id is None
+ for path_id in last_rev_parents_path_ids])
+ if no_path:
+ return True
+ return super().is_finished()
+
+ def process_parent_revs(self, rev):
+ """
+ Process parents when a new revision is iterated.
+ It enables to get a simplified revisions history in the same
+ manner as ``git log``. When a revision has multiple parents,
+ the following process is applied. If the revision was a merge,
+ and has the same path identifier to one parent, follow only that
+ parent (even if there are several parents with the same path
+ identifier, follow only one of them.) Otherwise, follow all parents.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+ """
+ rev_path_id = self._get_path_id(rev['id'])
+
+ if rev_path_id:
+ if len(rev['parents']) == 1:
+ self.process_rev(rev['parents'][0])
+ else:
+ parent_rev_path_ids = [self._get_path_id(p_rev)
+ for p_rev in rev['parents']]
+ different_trees = all([path_id != rev_path_id
+ for path_id in parent_rev_path_ids])
+ for i, p_rev in enumerate(rev['parents']):
+ if different_trees or \
+ parent_rev_path_ids[i] == rev_path_id:
+ self.process_rev(p_rev)
+ if not different_trees:
+ break
+ else:
+ super().process_parent_revs(rev)
+
+ def should_return(self, rev):
+ """
+ Check if a revision should be returned when iterating.
+ It verifies that the specified path has been modified
+ by the revision but also that all parents have a path
+ identifier different from the revision one in order
+ to get a simplified history.
+
+ Args:
+ rev (dict): A dict describing a revision as returned by
+ :meth:`swh.storage.storage.Storage.revision_get`
+
+ Returns:
+ bool: Whether to return the revision in the iteration
+ """
+ rev_path_id = self._get_path_id(rev['id'])
+
+ if not rev['parents']:
+ return rev_path_id is not None
+
+ parent_rev_path_ids = [self._get_path_id(p_rev)
+ for p_rev in rev['parents']]
+ different_trees = all([path_id != rev_path_id
+ for path_id in parent_rev_path_ids])
+
+ if rev_path_id != parent_rev_path_ids[0] and different_trees:
+ return True
+
+ return False
+
+
+def get_revisions_walker(rev_walker_type, *args, **kwargs):
+ """
+ Instantiate a revisions walker of a given type.
+
+ The following code snippet demonstrates how to use a revisions
+ walker for processing a whole revisions history::
+
+ revs_walker = get_revisions_walker('committer_date', rev_id)
+ for rev in revs_walker:
+ # process revision rev
+
+ It is also possible to walk a revisions history in a paginated
+ way as illustrated below::
+
+ def get_revs_history_page(rw_type, rev_id, page_num, page_size,
+ rw_state):
+ max_revs = (page_num + 1) * page_size
+ revs_walker = get_revisions_walker(rw_type, rev_id,
+ max_revs=max_revs,
+ state=rw_state)
+ revs = list(revs_walker)
+ rw_state = revs_walker.export_state()
+ return revs
+
+ rev_start = ...
+ per_page = 50
+ rw_state = {}
+
+ for page in range(0, 10):
+ revs_page = get_revs_history_page('dfs', rev_start, page,
+ per_page, rw_state)
+ # process revisions page
+
+
+ Args:
+ rev_walker_type (str): the type of revisions walker to return,
+ possible values are: *committer_date*, *dfs*, *dfs_post*,
+ *bfs* and *path*
+ args (list): position arguments to pass to the revisions walker
+ constructor
+ kwargs (dict): keyword arguments to pass to the revisions walker
+ constructor
+
+ """
+ if rev_walker_type not in _revs_walker_classes:
+ raise Exception('No revisions walker found for type "%s"'
+ % rev_walker_type)
+ revs_walker_class = _revs_walker_classes[rev_walker_type]
+ return revs_walker_class(*args, **kwargs)
diff --git a/swh/storage/tests/algos/test_revisions_walker.py b/swh/storage/tests/algos/test_revisions_walker.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/tests/algos/test_revisions_walker.py
@@ -0,0 +1,367 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from unittest.mock import patch
+
+from swh.model.hashutil import hash_to_bytes
+from swh.storage.algos.revisions_walker import get_revisions_walker
+
+# For those tests, we will walk the following revisions history
+# with different orderings:
+#
+# * commit b364f53155044e5308a0f73abb3b5f01995a5b7d
+# |\ Merge: 836d498 b94886c
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:50:49 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:50:49 2013 +0200
+# | |
+# | | Merge branch 'release/1.0'
+# | |
+# | * commit b94886c500c46e32dc3d7ebae8a5409accd592e5
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:50:38 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:50:38 2013 +0200
+# | |
+# | | updating poms for 1.0 release
+# | |
+# | * commit 0cb6b4611d65bee0f57821dac7f611e2f8a02433
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:50:38 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:50:38 2013 +0200
+# | |
+# | | updating poms for 1.0 release
+# | |
+# | * commit 2b0240c6d682bad51532eec15b8a7ed6b75c8d31
+# | | Author: Adam Janicki <janickia>
+# | | AuthorDate: Fri Oct 4 12:50:22 2013 +0200
+# | | Commit: Adam Janicki <janickia>
+# | | CommitDate: Fri Oct 4 12:50:22 2013 +0200
+# | |
+# | | For 1.0 release. Allow untracked.
+# | |
+# | * commit b401c50863475db4440c85c10ac0b6423b61554d
+# | | Author: Adam <adam.janicki@roche.com>
+# | | AuthorDate: Fri Oct 4 12:48:12 2013 +0200
+# | | Commit: Adam <adam.janicki@roche.com>
+# | | CommitDate: Fri Oct 4 12:48:12 2013 +0200
+# | |
+# | | updating poms for 1.0 release
+# | |
+# | * commit 9c5051397e5c2e0c258bb639c3dd34406584ca10
+# |/ Author: Adam Janicki <janickia>
+# | AuthorDate: Fri Oct 4 12:47:48 2013 +0200
+# | Commit: Adam Janicki <janickia>
+# | CommitDate: Fri Oct 4 12:47:48 2013 +0200
+# |
+# | For 1.0 release.
+# |
+# * commit 836d498396fb9b5d45c896885f84d8d60a5651dc
+# | Author: Adam Janicki <janickia>
+# | AuthorDate: Fri Oct 4 12:08:16 2013 +0200
+# | Commit: Adam Janicki <janickia>
+# | CommitDate: Fri Oct 4 12:08:16 2013 +0200
+# |
+# | Add ignores
+# |
+# * commit ee96c2a2d397b79070d2b6fe3051290963748358
+# | Author: Adam <adam.janicki@roche.com>
+# | AuthorDate: Fri Oct 4 10:48:16 2013 +0100
+# | Commit: Adam <adam.janicki@roche.com>
+# | CommitDate: Fri Oct 4 10:48:16 2013 +0100
+# |
+# | Reset author
+# |
+# * commit 8f89dda8e072383cf50d42532ae8f52ad89f8fdf
+# Author: Adam <adam.janicki@roche.com>
+# AuthorDate: Fri Oct 4 02:20:19 2013 -0700
+# Commit: Adam <adam.janicki@roche.com>
+# CommitDate: Fri Oct 4 02:20:19 2013 -0700
+#
+# Initial commit
+
+# raw dump of the above history in swh format
+_revisions_list = \
+[{'author': {'email': b'adam.janicki@roche.com', # noqa
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883849}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883849}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'\xb3d\xf51U\x04NS\x08\xa0\xf7:\xbb;_\x01\x99Z[}',
+ 'message': b"Merge branch 'release/1.0'",
+ 'metadata': None,
+ 'parents': [b'\x83mI\x83\x96\xfb\x9b]E\xc8\x96\x88_\x84\xd8\xd6\nVQ\xdc',
+ b'\xb9H\x86\xc5\x00\xc4n2\xdc=~\xba\xe8\xa5@\x9a\xcc\xd5\x92\xe5'], # noqa
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'\xb9H\x86\xc5\x00\xc4n2\xdc=~\xba\xe8\xa5@\x9a\xcc\xd5\x92\xe5',
+ 'message': b'updating poms for 1.0 release',
+ 'metadata': None,
+ 'parents': [b'\x0c\xb6\xb4a\x1de\xbe\xe0\xf5x!\xda\xc7\xf6\x11\xe2\xf8\xa0$3'], # noqa
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883838}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'\x0c\xb6\xb4a\x1de\xbe\xe0\xf5x!\xda\xc7\xf6\x11\xe2\xf8\xa0$3',
+ 'message': b'updating poms for 1.0 release',
+ 'metadata': None,
+ 'parents': [b'+\x02@\xc6\xd6\x82\xba\xd5\x152\xee\xc1[\x8a~\xd6\xb7\\\x8d1'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883822}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883822}},
+ 'directory': b'\xefX\xe7\xa6\\\xda\xdf\xfdH\xdbH\xfbq\x96@{\x98?9\xfe',
+ 'id': b'+\x02@\xc6\xd6\x82\xba\xd5\x152\xee\xc1[\x8a~\xd6\xb7\\\x8d1',
+ 'message': b'For 1.0 release. Allow untracked.\n',
+ 'metadata': None,
+ 'parents': [b'\xb4\x01\xc5\x08cG]\xb4D\x0c\x85\xc1\n\xc0\xb6B;aUM'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883692}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883692}},
+ 'directory': b'd@\xe7\x143w\xcb\xf7\xad\xae\x91\xd5\xec\xd8\x95\x82'
+ b'\x02\xa6=\x1b',
+ 'id': b'\xb4\x01\xc5\x08cG]\xb4D\x0c\x85\xc1\n\xc0\xb6B;aUM',
+ 'message': b'updating poms for 1.0 release',
+ 'metadata': None,
+ 'parents': [b'\x9cPQ9~\\.\x0c%\x8b\xb69\xc3\xdd4@e\x84\xca\x10'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883668}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380883668}},
+ 'directory': b'\n\x857\x94r\xbe\xcc\x04=\xe9}\xe5\xfd\xdf?nR\xe6\xa7\x9e',
+ 'id': b'\x9cPQ9~\\.\x0c%\x8b\xb69\xc3\xdd4@e\x84\xca\x10',
+ 'message': b'For 1.0 release.\n',
+ 'metadata': None,
+ 'parents': [b'\x83mI\x83\x96\xfb\x9b]E\xc8\x96\x88_\x84\xd8\xd6\nVQ\xdc'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer': {'email': b'janickia',
+ 'fullname': b'Adam Janicki <janickia>',
+ 'id': 8040906,
+ 'name': b'Adam Janicki'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380881296}},
+ 'date': {'negative_utc': None,
+ 'offset': 120,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380881296}},
+ 'directory': b'.\xf9\xa5\xcb\xb0\xd3\xdc\x9b{\xb8\x81\x03l\xe2P\x16c\x0b|\xe6', # noqa
+ 'id': b'\x83mI\x83\x96\xfb\x9b]E\xc8\x96\x88_\x84\xd8\xd6\nVQ\xdc',
+ 'message': b'Add ignores\n',
+ 'metadata': None,
+ 'parents': [b'\xee\x96\xc2\xa2\xd3\x97\xb7\x90p\xd2\xb6\xfe0Q)\tct\x83X'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': 60,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380880096}},
+ 'date': {'negative_utc': None,
+ 'offset': 60,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380880096}},
+ 'directory': b'\xc7r\xc4\x9f\xc0$\xd4\xab\xff\xcb]\xf6<\xcb\x8b~\xec\xc4\xd1)', # noqa
+ 'id': b'\xee\x96\xc2\xa2\xd3\x97\xb7\x90p\xd2\xb6\xfe0Q)\tct\x83X',
+ 'message': b'Reset author\n',
+ 'metadata': None,
+ 'parents': [b'\x8f\x89\xdd\xa8\xe0r8<\xf5\rBS*\xe8\xf5*\xd8\x9f\x8f\xdf'],
+ 'synthetic': False,
+ 'type': 'git'},
+ {'author': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer': {'email': b'adam.janicki@roche.com',
+ 'fullname': b'Adam <adam.janicki@roche.com>',
+ 'id': 8040905,
+ 'name': b'Adam'},
+ 'committer_date': {'negative_utc': None,
+ 'offset': -420,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380878419}},
+ 'date': {'negative_utc': None,
+ 'offset': -420,
+ 'timestamp': {'microseconds': 0, 'seconds': 1380878419}},
+ 'directory': b'WS\xbaX\xd6x{q\x8f\x020i\xc5\x95\xa01\xf7y\xb2\x80',
+ 'id': b'\x8f\x89\xdd\xa8\xe0r8<\xf5\rBS*\xe8\xf5*\xd8\x9f\x8f\xdf',
+ 'message': b'Initial commit\n',
+ 'metadata': None,
+ 'parents': [],
+ 'synthetic': False,
+ 'type': 'git'}]
+
+
+_rev_start = 'b364f53155044e5308a0f73abb3b5f01995a5b7d'
+
+
+class RevisionsWalkerTest(unittest.TestCase):
+
+ @patch('swh.storage.storage.Storage')
+ def check_revisions_ordering(self, rev_walker_type, expected_result,
+ MockStorage):
+ storage = MockStorage()
+ storage.revision_log.return_value = _revisions_list
+
+ revs_walker = \
+ get_revisions_walker(rev_walker_type, storage,
+ hash_to_bytes(_rev_start))
+
+ self.assertEqual(list(map(hash_to_bytes, expected_result)),
+ [rev['id'] for rev in revs_walker])
+
+ def test_revisions_walker_committer_date(self):
+
+ # revisions should be returned in reverse chronological order
+ # of their committer date
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
+
+ self.check_revisions_ordering('committer_date', expected_result)
+
+ def test_revisions_walker_dfs(self):
+
+ # revisions should be returned in the same order they are
+ # visited when performing a depth-first search in pre order
+ # on the revisions DAG
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10']
+
+ self.check_revisions_ordering('dfs', expected_result)
+
+ def test_revisions_walker_dfs_post(self):
+
+ # revisions should be returned in the same order they are
+ # visited when performing a depth-first search in post order
+ # on the revisions DAG
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
+
+ self.check_revisions_ordering('dfs_post', expected_result)
+
+ def test_revisions_walker_bfs(self):
+
+ # revisions should be returned in the same order they are
+ # visited when performing a breadth-first search on the
+ # revisions DAG
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ '836d498396fb9b5d45c896885f84d8d60a5651dc',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ 'ee96c2a2d397b79070d2b6fe3051290963748358',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '8f89dda8e072383cf50d42532ae8f52ad89f8fdf',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10']
+
+ self.check_revisions_ordering('bfs', expected_result)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jul 3 2025, 9:50 AM (5 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223721
Attached To
D629: algos: Add iterators to walk across revisions history
Event Timeline
Log In to Comment