Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/storage/algos/diff.py b/swh/storage/algos/diff.py
index 1d75ffe27..6eafbb856 100644
--- a/swh/storage/algos/diff.py
+++ b/swh/storage/algos/diff.py
@@ -1,402 +1,403 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# Utility module to efficiently compute the list of changed files
# between two directory trees.
# The implementation is inspired from the work of Alberto Cortés
# for the go-git project. For more details, you can refer to:
# - this blog post: https://blog.sourced.tech/post/difftree/
# - the reference implementation in go:
# https://github.com/src-d/go-git/tree/master/utils/merkletrie
import collections
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
from .dir_iterators import (
DirectoryIterator, DoubleDirectoryIterator, Remaining
)
# get the hash identifier for an empty directory
-_empty_dir_hash = directory_identifier({'entries': []})
+_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []}))
def _get_rev(storage, rev_id):
"""
Return revision data from swh storage.
"""
return list(storage.revision_get([rev_id]))[0]
class _RevisionChangesList(object):
"""
Helper class to track the changes between two
revision directories.
"""
def __init__(self, storage, track_renaming):
"""
Args:
storage: instance of swh storage
track_renaming (bool): whether to track or not files renaming
"""
self.storage = storage
self.track_renaming = track_renaming
self.result = []
# dicts used to track file renaming based on hash value
# we use a list instead of a single entry to handle the corner
# case when a repository contains multiple instance of
# the same file in different directories and a commit
# renames all of them
self.inserted_hash_idx = collections.defaultdict(list)
self.deleted_hash_idx = collections.defaultdict(list)
def add_insert(self, it_to):
"""
Add a file insertion in the to directory.
Args:
it_to (swh.storage.algos.dir_iterators.DirectoryIterator):
iterator on the to directory
"""
to_hash = it_to.current_hash()
# if the current file hash has been previously marked as deleted,
# the file has been renamed
if self.track_renaming and self.deleted_hash_idx[to_hash]:
# pop the delete change index in the same order it was inserted
change = self.result[self.deleted_hash_idx[to_hash].pop(0)]
# change the delete change as a rename one
change['type'] = 'rename'
change['to'] = it_to.current()
change['to_path'] = it_to.current_path()
else:
# add the insert change in the list
self.result.append({'type': 'insert',
'from': None,
'from_path': None,
'to': it_to.current(),
'to_path': it_to.current_path()})
# if rename tracking is activated, add the change index in
# the inserted_hash_idx dict
if self.track_renaming:
self.inserted_hash_idx[to_hash].append(len(self.result) - 1)
def add_delete(self, it_from):
"""
Add a file deletion in the from directory.
Args:
it_from (swh.storage.algos.dir_iterators.DirectoryIterator):
iterator on the from directory
"""
from_hash = it_from.current_hash()
# if the current file has been previously marked as inserted,
# the file has been renamed
if self.track_renaming and self.inserted_hash_idx[from_hash]:
# pop the insert change index in the same order it was inserted
change = self.result[self.inserted_hash_idx[from_hash].pop(0)]
# change the insert change as a rename one
change['type'] = 'rename'
change['from'] = it_from.current()
change['from_path'] = it_from.current_path()
else:
# add the delete change in the list
self.result.append({'type': 'delete',
'from': it_from.current(),
'from_path': it_from.current_path(),
'to': None,
'to_path': None})
# if rename tracking is activated, add the change index in
# the deleted_hash_idx dict
if self.track_renaming:
self.deleted_hash_idx[from_hash].append(len(self.result) - 1)
def add_modify(self, it_from, it_to):
"""
Add a file modification in the to directory.
Args:
it_from (swh.storage.algos.dir_iterators.DirectoryIterator):
iterator on the from directory
it_to (swh.storage.algos.dir_iterators.DirectoryIterator):
iterator on the to directory
"""
self.result.append({'type': 'modify',
'from': it_from.current(),
'from_path': it_from.current_path(),
'to': it_to.current(),
'to_path': it_to.current_path()})
def add_recursive(self, it, insert):
"""
Recursively add changes from a directory.
Args:
it (swh.storage.algos.dir_iterators.DirectoryIterator):
iterator on a directory
insert (bool): the type of changes to add (insertion
or deletion)
"""
# current iterated element is a regular file,
# simply add adequate change in the list
if not it.current_is_dir():
if insert:
self.add_insert(it)
else:
self.add_delete(it)
return
# current iterated element is a directory,
dir_id = it.current_hash()
# handle empty dir insertion/deletion as the swh model allow
# to have such object compared to git
if dir_id == _empty_dir_hash:
if insert:
self.add_insert(it)
else:
self.add_delete(it)
# iterate on files reachable from it and add
# adequate changes in the list
else:
sub_it = DirectoryIterator(self.storage, dir_id,
it.current_path() + b'/')
sub_it_current = sub_it.step()
while sub_it_current:
if not sub_it.current_is_dir():
if insert:
self.add_insert(sub_it)
else:
self.add_delete(sub_it)
sub_it_current = sub_it.step()
def add_recursive_insert(self, it_to):
"""
Recursively add files insertion from a to directory.
Args:
it_to (swh.storage.algos.dir_iterators.DirectoryIterator):
iterator on a to directory
"""
self.add_recursive(it_to, True)
def add_recursive_delete(self, it_from):
"""
Recursively add files deletion from a from directory.
Args:
it_from (swh.storage.algos.dir_iterators.DirectoryIterator):
iterator on a from directory
"""
self.add_recursive(it_from, False)
def _diff_elts_same_name(changes, it):
""""
Compare two directory entries with the same name and add adequate
changes if any.
Args:
changes (_RevisionChangesList): the list of changes between
two revisions
it (swh.storage.algos.dir_iterators.DoubleDirectoryIterator):
the iterator traversing two revision directories at the same time
"""
# compare the two current directory elements of the iterator
status = it.compare()
# elements have same hash and same permissions:
# no changes to add and call next on the two iterators
if status['same_hash'] and status['same_perms']:
it.next_both()
# elements are regular files and have been modified:
# insert the modification change in the list and
# call next on the two iterators
elif status['both_are_files']:
changes.add_modify(it.it_from, it.it_to)
it.next_both()
# one element is a regular file, the other a directory:
# recursively add delete/insert changes and call next
# on the two iterators
elif status['file_and_dir']:
changes.add_recursive_delete(it.it_from)
changes.add_recursive_insert(it.it_to)
it.next_both()
# both elements are directories:
elif status['both_are_dirs']:
# from directory is empty:
# recursively add insert changes in the to directory
# and call next on the two iterators
if status['from_is_empty_dir']:
changes.add_recursive_insert(it.it_to)
it.next_both()
# to directory is empty:
# recursively add delete changes in the from directory
# and call next on the two iterators
elif status['to_is_empty_dir']:
changes.add_recursive_delete(it.it_from)
it.next_both()
# both directories are not empty:
# call step on the two iterators to descend further in
# the directory trees.
elif not status['from_is_empty_dir'] and not status['to_is_empty_dir']:
it.step_both()
def _compare_paths(path1, path2):
"""
Compare paths in lexicographic depth-first order.
For instance, it returns:
- "a" < "b"
- "b/c/d" < "b"
- "c/foo.txt" < "c.txt"
"""
path1_parts = path1.split(b'/')
path2_parts = path2.split(b'/')
i = 0
while True:
if len(path1_parts) == len(path2_parts) and i == len(path1_parts):
return 0
elif len(path2_parts) == i:
return 1
elif len(path1_parts) == i:
return -1
else:
if path2_parts[i] > path1_parts[i]:
return -1
elif path2_parts[i] < path1_parts[i]:
return 1
i = i + 1
def _diff_elts(changes, it):
"""
Compare two directory entries.
Args:
changes (_RevisionChangesList): the list of changes between
two revisions
it (swh.storage.algos.dir_iterators.DoubleDirectoryIterator):
the iterator traversing two revision directories at the same time
"""
# compare current to and from path in depth-first lexicographic order
c = _compare_paths(it.it_from.current_path(), it.it_to.current_path())
# current from path is lower than the current to path:
# the from path has been deleted
if c < 0:
changes.add_recursive_delete(it.it_from)
it.next_from()
# current from path is greater than the current to path:
# the to path has been inserted
elif c > 0:
changes.add_recursive_insert(it.it_to)
it.next_to()
# paths are the same and need more processing
else:
_diff_elts_same_name(changes, it)
def diff_directories(storage, from_dir, to_dir, track_renaming=False):
"""
Compute the differential between two directories, i.e. the list of
file changes (insertion / deletion / modification / renaming)
between them.
Args:
storage (swh.storage.storage.Storage): instance of a swh
storage (either local or remote, for optimal performance
the use of a local storage is recommended)
from_dir (bytes): the swh identifier of the directory to compare from
to_dir (bytes): the swh identifier of the directory to compare to
track_renaming (bool): whether or not to track files renaming
Returns:
list: A list of dict representing the changes between the two
revisions. Each dict contains the following entries:
- *type*: a string describing the type of change
('insert' / 'delete' / 'modify' / 'rename')
- *from*: a dict containing the directory entry metadata in the
from revision (None in case of an insertion)
- *from_path*: bytes string corresponding to the absolute path
of the from revision entry (None in case of an insertion)
- *to*: a dict containing the directory entry metadata in the
to revision (None in case of a deletion)
- *to_path*: bytes string corresponding to the absolute path
of the to revision entry (None in case of a deletion)
The returned list is sorted in lexicographic depth-first order
according to the value of the *to_path* field.
"""
changes = _RevisionChangesList(storage, track_renaming)
it = DoubleDirectoryIterator(storage, from_dir, to_dir)
while True:
r = it.remaining()
if r == Remaining.NoMoreFiles:
break
elif r == Remaining.OnlyFromFilesRemain:
changes.add_recursive_delete(it.it_from)
it.next_from()
elif r == Remaining.OnlyToFilesRemain:
changes.add_recursive_insert(it.it_to)
it.next_to()
else:
_diff_elts(changes, it)
return changes.result
def diff_revisions(storage, from_rev, to_rev, track_renaming=False):
"""
Compute the differential between two revisions,
i.e. the list of file changes between the two associated directories.
Args:
storage (swh.storage.storage.Storage): instance of a swh
storage (either local or remote, for optimal performance
the use of a local storage is recommended)
from_rev (bytes): the identifier of the revision to compare from
to_rev (bytes): the identifier of the revision to compare to
track_renaming (bool): whether or not to track files renaming
Returns:
list: A list of dict describing the introduced file changes
(see :func:`swh.storage.algos.diff.diff_directories`).
"""
from_dir = None
if from_rev:
from_dir = _get_rev(storage, from_rev)['directory']
to_dir = _get_rev(storage, to_rev)['directory']
return diff_directories(storage, from_dir, to_dir, track_renaming)
def diff_revision(storage, revision, track_renaming=False):
"""
Computes the differential between a revision and its first parent.
If the revision has no parents, the directory to compare from
is considered as empty.
In other words, it computes the file changes introduced in a
specific revision.
Args:
storage (swh.storage.storage.Storage): instance of a swh
storage (either local or remote, for optimal performance
the use of a local storage is recommended)
revision (bytes): the identifier of the revision from which to
compute the introduced changes.
track_renaming (bool): whether or not to track files renaming
Returns:
list: A list of dict describing the introduced file changes
(see :func:`swh.storage.algos.diff.diff_directories`).
"""
rev_data = _get_rev(storage, revision)
parent = None
if rev_data['parents']:
parent = rev_data['parents'][0]
return diff_revisions(storage, parent, revision, track_renaming)
diff --git a/swh/storage/algos/dir_iterators.py b/swh/storage/algos/dir_iterators.py
index 37d73326c..897d0a82c 100644
--- a/swh/storage/algos/dir_iterators.py
+++ b/swh/storage/algos/dir_iterators.py
@@ -1,347 +1,345 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# Utility module to iterate on directory trees.
# The implementation is inspired from the work of Alberto Cortés
# for the go-git project. For more details, you can refer to:
# - this blog post: https://blog.sourced.tech/post/difftree/
# - the reference implementation in go:
# https://github.com/src-d/go-git/tree/master/utils/merkletrie
from enum import Enum
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
# get the hash identifier for an empty directory
-_empty_dir_hash = directory_identifier({'entries': []})
+_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []}))
def _get_dir(storage, dir_id):
"""
Return directory data from swh storage.
"""
return storage.directory_ls(dir_id) if dir_id else []
class DirectoryIterator(object):
"""
Helper class used to iterate on a directory tree in a depth-first search
way with some additional features:
- sibling nodes are iterated in lexicographic order by name
- it is possible to skip the visit of sub-directories nodes
for efficiency reasons when comparing two trees (no need to
go deeper if two directories have the same hash)
"""
def __init__(self, storage, dir_id, base_path=b''):
"""
Args:
storage (swh.storage.storage.Storage): instance of swh storage
(either local or remote)
dir_id (bytes): identifier of a root directory
base_path (bytes): optional base path used when traversing
a sub-directory
"""
self.storage = storage
self.root_dir_id = dir_id
self.base_path = base_path
self.restart()
def restart(self):
"""
Restart the iteration at the beginning.
"""
# stack of frames representing currently visited directories:
# the root directory is at the bottom while the current one
# is at the top
self.frames = []
self._push_dir_frame(self.root_dir_id)
self.has_started = False
def _push_dir_frame(self, dir_id):
"""
Visit a sub-directory by pushing a new frame to the stack.
Each frame is itself a stack of directory entries.
Args:
dir_id (bytes): identifier of a root directory
"""
- if dir_id:
- if dir_id == _empty_dir_hash:
- self.frames.append([])
- else:
- # get directory entries
- dir_data = _get_dir(self.storage, dir_id)
- # sort them in lexicographical order
- dir_data = sorted(dir_data, key=lambda e: e['name'])
- # reverse the ordering in order to unstack the "smallest"
- # entry each time the iterator advances
- dir_data.reverse()
- # push the directory frame to the main stack
- self.frames.append(dir_data)
+ # get directory entries
+ dir_data = _get_dir(self.storage, dir_id)
+ # sort them in lexicographical order
+ dir_data = sorted(dir_data, key=lambda e: e['name'])
+ # reverse the ordering in order to unstack the "smallest"
+ # entry each time the iterator advances
+ dir_data.reverse()
+ # push the directory frame to the main stack
+ self.frames.append(dir_data)
def top(self):
"""
Returns:
list: The top frame of the main directories stack
"""
if not self.frames:
return None
return self.frames[-1]
def current(self):
"""
Returns:
dict: The current visited directory entry, i.e. the
top element from the top frame
"""
top_frame = self.top()
if not top_frame:
return None
return top_frame[-1]
def current_hash(self):
"""
Returns:
bytes: The hash value of the currently visited directory
entry
"""
return self.current()['target']
def current_perms(self):
"""
Returns:
int: The permissions value of the currently visited directory
entry
"""
return self.current()['perms']
def current_path(self):
"""
Returns:
str: The absolute path from the root directory of
the currently visited directory entry
"""
top_frame = self.top()
if not top_frame:
return None
path = []
for frame in self.frames:
path.append(frame[-1]['name'])
return self.base_path + b'/'.join(path)
def current_is_dir(self):
"""
Returns:
bool: If the currently visited directory entry is
a directory
"""
return self.current()['type'] == 'dir'
def _advance(self, descend):
"""
Advance in the tree iteration.
Args:
descend (bool): whether or not to push a new frame
if the currently visited element is a sub-directory
Returns:
dict: The description of the newly visited directory entry
"""
current = self.current()
if not self.has_started or not current:
self.has_started = True
return current
- if descend and self.current_is_dir():
+ if descend and self.current_is_dir() \
+ and current['target'] != _empty_dir_hash:
self._push_dir_frame(current['target'])
else:
self.drop()
return self.current()
def next(self):
"""
Advance the tree iteration by dropping the current visited
directory entry from the top frame. If the top frame ends up empty,
the operation is recursively applied to remove all empty frames
as the tree is climbed up towards its root.
Returns:
dict: The description of the newly visited directory entry
"""
return self._advance(False)
def step(self):
"""
Advance the tree iteration like the next operation with the
difference that if the current visited element is a sub-directory
a new frame representing its content is pushed to the main stack.
Returns:
dict: The description of the newly visited directory entry
"""
return self._advance(True)
def drop(self):
"""
Drop the current visited element from the top frame.
If the frame ends up empty, the operation is recursively
applied.
"""
frame = self.top()
if not frame:
return
frame.pop()
if not frame:
self.frames.pop()
self.drop()
class Remaining(Enum):
"""
Enum to represent the current state when iterating
on both directory trees at the same time.
"""
NoMoreFiles = 0
OnlyToFilesRemain = 1
OnlyFromFilesRemain = 2
BothHaveFiles = 3
class DoubleDirectoryIterator(object):
"""
Helper class to traverse two directory trees at the same
time and compare their contents to detect changes between them.
"""
def __init__(self, storage, dir_from, dir_to):
"""
Args:
storage: instance of swh storage
dir_from (bytes): hash identifier of the from directory
dir_to (bytes): hash identifier of the to directory
"""
self.storage = storage
self.dir_from = dir_from
self.dir_to = dir_to
self.restart()
def restart(self):
"""
Restart the double iteration at the beginning.
"""
# initialize custom dfs iterators for the two directories
self.it_from = DirectoryIterator(self.storage, self.dir_from)
self.it_to = DirectoryIterator(self.storage, self.dir_to)
# grab the first element of each iterator
self.it_from.next()
self.it_to.next()
def next_from(self):
"""
Apply the next operation on the from iterator.
"""
self.it_from.next()
def next_to(self):
"""
Apply the next operation on the to iterator.
"""
self.it_to.next()
def next_both(self):
"""
Apply the next operation on both iterators.
"""
self.next_from()
self.next_to()
def step_from(self):
"""
Apply the step operation on the from iterator.
"""
self.it_from.step()
def step_to(self):
"""
Apply the step operation on the from iterator.
"""
self.it_to.step()
def step_both(self):
"""
Apply the step operation on the both iterators.
"""
self.step_from()
self.step_to()
def remaining(self):
"""
Returns:
Remaining: the current state of the double iteration
"""
from_current = self.it_from.current()
to_current = self.it_to.current()
# no more files to iterate in both iterators
if not from_current and not to_current:
return Remaining.NoMoreFiles
# still some files to iterate in the to iterator
elif not from_current and to_current:
return Remaining.OnlyToFilesRemain
# still some files to iterate in the from iterator
elif from_current and not to_current:
return Remaining.OnlyFromFilesRemain
# still files to iterate in the both iterators
else:
return Remaining.BothHaveFiles
def compare(self):
"""
Compare the current iterated directory entries in both iterators
and return the comparison status.
Returns:
dict: The status of the comparison with the following bool values:
* *same_hash*: indicates if the two entries have the same hash
* *same_perms*: indicates if the two entries have the same
permissions
* *both_are_dirs*: indicates if the two entries are directories
* *both_are_files*: indicates if the two entries are regular
files
* *file_and_dir*: indicates if one of the entry is a directory
and the other a regular file
* *from_is_empty_dir*: indicates if the from entry is the
empty directory
* *from_is_empty_dir*: indicates if the to entry is the
empty directory
"""
from_current_hash = self.it_from.current_hash()
to_current_hash = self.it_to.current_hash()
from_current_perms = self.it_from.current_perms()
to_current_perms = self.it_to.current_perms()
from_is_dir = self.it_from.current_is_dir()
to_is_dir = self.it_to.current_is_dir()
status = {}
# compare hash
status['same_hash'] = from_current_hash == to_current_hash
# compare permissions
status['same_perms'] = from_current_perms == to_current_perms
# check if both elements are directories
status['both_are_dirs'] = from_is_dir and to_is_dir
# check if both elements are regular files
status['both_are_files'] = not from_is_dir and not to_is_dir
# check if one element is a directory, the other a regular file
status['file_and_dir'] = (not status['both_are_dirs'] and
not status['both_are_files'])
# check if the from element is the empty directory
status['from_is_empty_dir'] = (from_is_dir and
from_current_hash == _empty_dir_hash)
# check if the to element is the empty directory
status['to_is_empty_dir'] = (to_is_dir and
to_current_hash == _empty_dir_hash)
return status
diff --git a/swh/storage/tests/algos/test_diff.py b/swh/storage/tests/algos/test_diff.py
index 0a2f6d660..6551e9b39 100644
--- a/swh/storage/tests/algos/test_diff.py
+++ b/swh/storage/tests/algos/test_diff.py
@@ -1,368 +1,374 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# flake8: noqa
import unittest
from nose.tools import istest, nottest
from unittest.mock import patch
+from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import directory_identifier
from swh.storage.algos import diff
class DirectoryModel(object):
"""
Quick and dirty directory model to ease the writing
of revision trees differential tests.
"""
def __init__(self, name=''):
self.data = {}
self.data['name'] = name
self.data['perms'] = 16384
self.data['type'] = 'dir'
self.data['entries'] = []
self.data['entry_idx'] = {}
def __getitem__(self, item):
if item == 'target':
- return directory_identifier(self)
+ return hash_to_bytes(directory_identifier(self))
else:
return self.data[item]
def add_file(self, path, sha1=None):
path_parts = path.split(b'/')
if len(path_parts) == 1:
self['entry_idx'][path] = len(self['entries'])
self['entries'].append({
- 'target': sha1,
+ 'target': hash_to_bytes(sha1),
'name': path,
'perms': 33188,
'type': 'file'
})
else:
if not path_parts[0] in self['entry_idx']:
self['entry_idx'][path_parts[0]] = len(self['entries'])
self['entries'].append(DirectoryModel(path_parts[0]))
if path_parts[1]:
dir_idx = self['entry_idx'][path_parts[0]]
self['entries'][dir_idx].add_file(b'/'.join(path_parts[1:]), sha1)
def get_hash_data(self, entry_hash):
if self['target'] == entry_hash:
ret = []
for e in self['entries']:
ret.append({
'target': e['target'],
'name': e['name'],
'perms': e['perms'],
'type': e['type']
})
return ret
else:
for e in self['entries']:
if e['type'] == 'file' and e['target'] == entry_hash:
return e
elif e['type'] == 'dir':
data = e.get_hash_data(entry_hash)
if data:
return data
return None
def get_path_data(self, path):
path_parts = path.split(b'/')
entry_idx = self['entry_idx'][path_parts[0]]
entry = self['entries'][entry_idx]
if len(path_parts) == 1:
return {
'target': entry['target'],
'name': entry['name'],
'perms': entry['perms'],
'type': entry['type']
}
else:
return entry.get_path_data(b'/'.join(path_parts[1:]))
@patch('swh.storage.algos.diff._get_rev')
@patch('swh.storage.algos.dir_iterators._get_dir')
class TestDiffRevisions(unittest.TestCase):
@nottest
def diff_revisions(self, rev_from, rev_to, from_dir_model, to_dir_model,
expected_changes, mock_get_dir, mock_get_rev):
+ rev_from_bytes = hash_to_bytes(rev_from)
+ rev_to_bytes = hash_to_bytes(rev_to)
+
def _get_rev(*args, **kwargs):
- if args[1] == rev_from:
+ if args[1] == rev_from_bytes:
return {'directory': from_dir_model['target']}
else:
return {'directory': to_dir_model['target']}
def _get_dir(*args, **kwargs):
- return from_dir_model.get_hash_data(args[1]) or \
- to_dir_model.get_hash_data(args[1])
+ from_dir = from_dir_model.get_hash_data(args[1])
+ to_dir = to_dir_model.get_hash_data(args[1])
+ return from_dir if from_dir != None else to_dir
mock_get_rev.side_effect = _get_rev
mock_get_dir.side_effect = _get_dir
- changes = diff.diff_revisions(None, rev_from, rev_to, track_renaming=True)
+ changes = diff.diff_revisions(None, rev_from_bytes, rev_to_bytes,
+ track_renaming=True)
self.assertEqual(changes, expected_changes)
@istest
def test_insert_delete(self, mock_get_dir, mock_get_rev):
rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba'
rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2'
from_dir_model = DirectoryModel()
to_dir_model = DirectoryModel()
to_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
to_dir_model.add_file(b'file2', '3e5faecb3836ffcadf82cc160787e35d4e2bec6a')
to_dir_model.add_file(b'file3', '2ae33b2984974d35eababe4890d37fbf4bce6b2c')
expected_changes = \
[{
'type': 'insert',
'from': None,
'from_path': None,
'to': to_dir_model.get_path_data(b'file1'),
'to_path': b'file1'
},
{
'type': 'insert',
'from': None,
'from_path': None,
'to': to_dir_model.get_path_data(b'file2'),
'to_path': b'file2'
},
{
'type': 'insert',
'from': None,
'from_path': None,
'to': to_dir_model.get_path_data(b'file3'),
'to_path': b'file3'
}]
self.diff_revisions(rev_from, rev_to, from_dir_model,
to_dir_model, expected_changes,
mock_get_dir, mock_get_rev)
from_dir_model = DirectoryModel()
from_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
from_dir_model.add_file(b'file2', '3e5faecb3836ffcadf82cc160787e35d4e2bec6a')
from_dir_model.add_file(b'file3', '2ae33b2984974d35eababe4890d37fbf4bce6b2c')
to_dir_model = DirectoryModel()
expected_changes = \
[{
'type': 'delete',
'from': from_dir_model.get_path_data(b'file1'),
'from_path': b'file1',
'to': None,
'to_path': None
},
{
'type': 'delete',
'from': from_dir_model.get_path_data(b'file2'),
'from_path': b'file2',
'to': None,
'to_path': None
},
{
'type': 'delete',
'from': from_dir_model.get_path_data(b'file3'),
'from_path': b'file3',
'to': None,
'to_path': None
}]
self.diff_revisions(rev_from, rev_to, from_dir_model,
to_dir_model, expected_changes,
mock_get_dir, mock_get_rev)
@istest
def test_onelevel_diff(self, mock_get_dir, mock_get_rev):
rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba'
rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2'
from_dir_model = DirectoryModel()
from_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
from_dir_model.add_file(b'file2', 'f4a96b2000be83b61254d107046fa9777b17eb34')
from_dir_model.add_file(b'file3', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da')
to_dir_model = DirectoryModel()
to_dir_model.add_file(b'file2', '3ee0f38ee0ea23cc2c8c0b9d66b27be4596b002b')
to_dir_model.add_file(b'file3', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da')
to_dir_model.add_file(b'file4', '40460b9653b1dc507e1b6eb333bd4500634bdffc')
expected_changes = \
[{
'type': 'delete',
'from': from_dir_model.get_path_data(b'file1'),
'from_path': b'file1',
'to': None,
'to_path': None},
{
'type': 'modify',
'from': from_dir_model.get_path_data(b'file2'),
'from_path': b'file2',
'to': to_dir_model.get_path_data(b'file2'),
'to_path': b'file2'},
{
'type': 'insert',
'from': None,
'from_path': None,
'to': to_dir_model.get_path_data(b'file4'),
'to_path': b'file4'
}]
self.diff_revisions(rev_from, rev_to, from_dir_model,
to_dir_model, expected_changes,
mock_get_dir, mock_get_rev)
@istest
def test_twolevels_diff(self, mock_get_dir, mock_get_rev):
rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba'
rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2'
from_dir_model = DirectoryModel()
from_dir_model.add_file(b'file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
from_dir_model.add_file(b'dir1/file1', '8335fca266811bac7ae5c8e1621476b4cf4156b6')
from_dir_model.add_file(b'dir1/file2', 'a6127d909e79f1fcb28bbf220faf86e7be7831e5')
from_dir_model.add_file(b'dir1/file3', '18049b8d067ce1194a7e1cce26cfa3ae4242a43d')
from_dir_model.add_file(b'file2', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da')
to_dir_model = DirectoryModel()
to_dir_model.add_file(b'file1', '3ee0f38ee0ea23cc2c8c0b9d66b27be4596b002b')
to_dir_model.add_file(b'dir1/file2', 'de3548b32a8669801daa02143a66dae21fe852fd')
to_dir_model.add_file(b'dir1/file3', '18049b8d067ce1194a7e1cce26cfa3ae4242a43d')
to_dir_model.add_file(b'dir1/file4', 'f5c3f42aec5fe7b92276196c350cbadaf4c51f87')
to_dir_model.add_file(b'file2', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da')
expected_changes = \
[{
'type': 'delete',
'from': from_dir_model.get_path_data(b'dir1/file1'),
'from_path': b'dir1/file1',
'to': None,
'to_path': None
},
{
'type': 'modify',
'from': from_dir_model.get_path_data(b'dir1/file2'),
'from_path': b'dir1/file2',
'to': to_dir_model.get_path_data(b'dir1/file2'),
'to_path': b'dir1/file2'
},
{
'type': 'insert',
'from': None,
'from_path': None,
'to': to_dir_model.get_path_data(b'dir1/file4'),
'to_path': b'dir1/file4'
},
{
'type': 'modify',
'from': from_dir_model.get_path_data(b'file1'),
'from_path': b'file1',
'to': to_dir_model.get_path_data(b'file1'),
'to_path': b'file1'
}]
self.diff_revisions(rev_from, rev_to, from_dir_model,
to_dir_model, expected_changes,
mock_get_dir, mock_get_rev)
@istest
def test_insert_delete_empty_dirs(self, mock_get_dir, mock_get_rev):
rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba'
rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2'
from_dir_model = DirectoryModel()
from_dir_model.add_file(b'dir3/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
to_dir_model = DirectoryModel()
to_dir_model.add_file(b'dir3/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
to_dir_model.add_file(b'dir3/dir1/')
expected_changes = \
[{
'type': 'insert',
'from': None,
'from_path': None,
'to': to_dir_model.get_path_data(b'dir3/dir1'),
'to_path': b'dir3/dir1'
}]
self.diff_revisions(rev_from, rev_to, from_dir_model,
to_dir_model, expected_changes,
mock_get_dir, mock_get_rev)
from_dir_model = DirectoryModel()
from_dir_model.add_file(b'dir1/dir2/')
from_dir_model.add_file(b'dir1/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
to_dir_model = DirectoryModel()
to_dir_model.add_file(b'dir1/file1', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
expected_changes = \
[{
'type': 'delete',
'from': from_dir_model.get_path_data(b'dir1/dir2'),
'from_path': b'dir1/dir2',
'to': None,
'to_path': None
}]
self.diff_revisions(rev_from, rev_to, from_dir_model,
to_dir_model, expected_changes,
mock_get_dir, mock_get_rev)
@istest
def test_track_renaming(self, mock_get_dir, mock_get_rev):
rev_from = '898ff03e1e7925ecde3da66327d3cdc7e07625ba'
rev_to = '647c3d381e67490e82cdbbe6c96e46d5e1628ce2'
from_dir_model = DirectoryModel()
from_dir_model.add_file(b'file1_oldname', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
from_dir_model.add_file(b'dir1/file1_oldname', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
from_dir_model.add_file(b'file2_oldname', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da')
to_dir_model = DirectoryModel()
to_dir_model.add_file(b'dir1/file1_newname', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
to_dir_model.add_file(b'dir2/file1_newname', 'ea15f54ca215e7920c60f564315ebb7f911a5204')
to_dir_model.add_file(b'file2_newname', 'd3c00f9396c6d0277727cec522ff6ad1ea0bc2da')
expected_changes = \
[{
'type': 'rename',
'from': from_dir_model.get_path_data(b'dir1/file1_oldname'),
'from_path': b'dir1/file1_oldname',
'to': to_dir_model.get_path_data(b'dir1/file1_newname'),
'to_path': b'dir1/file1_newname'
},
{
'type': 'rename',
'from': from_dir_model.get_path_data(b'file1_oldname'),
'from_path': b'file1_oldname',
'to': to_dir_model.get_path_data(b'dir2/file1_newname'),
'to_path': b'dir2/file1_newname'
},
{
'type': 'rename',
'from': from_dir_model.get_path_data(b'file2_oldname'),
'from_path': b'file2_oldname',
'to': to_dir_model.get_path_data(b'file2_newname'),
'to_path': b'file2_newname'
}]
self.diff_revisions(rev_from, rev_to, from_dir_model,
to_dir_model, expected_changes,
mock_get_dir, mock_get_rev)

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 11:31 AM (3 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3452541

Event Timeline