Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | def reset(self): | ||||
self._objects = defaultdict(list) | self._objects = defaultdict(list) | ||||
# ideally we would want a skip list for both fast inserts and searches | # ideally we would want a skip list for both fast inserts and searches | ||||
self._sorted_sha1s = [] | self._sorted_sha1s = [] | ||||
self.objstorage = get_objstorage('memory', {}) | self.objstorage = get_objstorage('memory', {}) | ||||
def check_config(self, *, check_write): | def check_config(self, *, check_write): | ||||
"""Check that the storage is configured and ready to go.""" | |||||
return True | return True | ||||
def _content_add(self, contents, with_data): | def _content_add(self, contents, with_data): | ||||
content_with_data = [] | content_with_data = [] | ||||
content_without_data = [] | content_without_data = [] | ||||
for content in contents: | for content in contents: | ||||
if content.status is None: | if content.status is None: | ||||
content.status = 'visible' | content.status = 'visible' | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | def _content_add_absent(self, contents): | ||||
self._skipped_content_indexes[algo][content.get_hash(algo)] \ | self._skipped_content_indexes[algo][content.get_hash(algo)] \ | ||||
.add(key) | .add(key) | ||||
self._skipped_contents[key] = content | self._skipped_contents[key] = content | ||||
count += 1 | count += 1 | ||||
return count | return count | ||||
def _content_to_model(self, contents): | def _content_to_model(self, contents): | ||||
"""Takes a list of content dicts, optionally with an extra 'origin' | |||||
key, and yields tuples (model.Content, origin).""" | |||||
for content in contents: | for content in contents: | ||||
content = content.copy() | content = content.copy() | ||||
content.pop('origin', None) | content.pop('origin', None) | ||||
yield Content.from_dict(content) | yield Content.from_dict(content) | ||||
def content_add(self, content): | def content_add(self, content): | ||||
"""Add content blobs to the storage | |||||
Args: | |||||
content (iterable): iterable of dictionaries representing | |||||
individual pieces of content to add. Each dictionary has the | |||||
following keys: | |||||
- data (bytes): the actual content | |||||
- length (int): content length (default: -1) | |||||
- one key for each checksum algorithm in | |||||
:data:`swh.model.hashutil.DEFAULT_ALGORITHMS`, mapped to the | |||||
corresponding checksum | |||||
- status (str): one of visible, hidden, absent | |||||
- reason (str): if status = absent, the reason why | |||||
- origin (int): if status = absent, the origin we saw the | |||||
content in | |||||
Raises: | |||||
HashCollision in case of collision | |||||
Returns: | |||||
Summary dict with the following key and associated values: | |||||
content:add: New contents added | |||||
content_bytes:add: Sum of the contents' length data | |||||
skipped_content:add: New skipped contents (no data) added | |||||
""" | |||||
now = datetime.datetime.now(tz=datetime.timezone.utc) | now = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
content = [attr.evolve(c, ctime=now) | content = [attr.evolve(c, ctime=now) | ||||
for c in self._content_to_model(content)] | for c in self._content_to_model(content)] | ||||
return self._content_add(content, with_data=True) | return self._content_add(content, with_data=True) | ||||
def content_update(self, content, keys=[]): | def content_update(self, content, keys=[]): | ||||
"""Update content blobs to the storage. Does nothing for unknown | |||||
contents or skipped ones. | |||||
Args: | |||||
content (iterable): iterable of dictionaries representing | |||||
individual pieces of content to update. Each dictionary has the | |||||
following keys: | |||||
- data (bytes): the actual content | |||||
- length (int): content length (default: -1) | |||||
- one key for each checksum algorithm in | |||||
:data:`swh.model.hashutil.ALGORITHMS`, mapped to the | |||||
corresponding checksum | |||||
- status (str): one of visible, hidden, absent | |||||
keys (list): List of keys (str) whose values needs an update, e.g., | |||||
new hash column | |||||
""" | |||||
if self.journal_writer: | if self.journal_writer: | ||||
raise NotImplementedError( | raise NotImplementedError( | ||||
'content_update is not yet supported with a journal_writer.') | 'content_update is not yet supported with a journal_writer.') | ||||
for cont_update in content: | for cont_update in content: | ||||
cont_update = cont_update.copy() | cont_update = cont_update.copy() | ||||
sha1 = cont_update.pop('sha1') | sha1 = cont_update.pop('sha1') | ||||
for old_key in self._content_indexes['sha1'][sha1]: | for old_key in self._content_indexes['sha1'][sha1]: | ||||
old_cont = self._contents.pop(old_key) | old_cont = self._contents.pop(old_key) | ||||
for algorithm in DEFAULT_ALGORITHMS: | for algorithm in DEFAULT_ALGORITHMS: | ||||
hash_ = old_cont.get_hash(algorithm) | hash_ = old_cont.get_hash(algorithm) | ||||
self._content_indexes[algorithm][hash_].remove(old_key) | self._content_indexes[algorithm][hash_].remove(old_key) | ||||
new_cont = attr.evolve(old_cont, **cont_update) | new_cont = attr.evolve(old_cont, **cont_update) | ||||
new_key = self._content_key(new_cont) | new_key = self._content_key(new_cont) | ||||
self._contents[new_key] = new_cont | self._contents[new_key] = new_cont | ||||
for algorithm in DEFAULT_ALGORITHMS: | for algorithm in DEFAULT_ALGORITHMS: | ||||
hash_ = new_cont.get_hash(algorithm) | hash_ = new_cont.get_hash(algorithm) | ||||
self._content_indexes[algorithm][hash_].add(new_key) | self._content_indexes[algorithm][hash_].add(new_key) | ||||
def content_add_metadata(self, content): | def content_add_metadata(self, content): | ||||
"""Add content metadata to the storage (like `content_add`, but | |||||
without inserting to the objstorage). | |||||
Args: | |||||
content (iterable): iterable of dictionaries representing | |||||
individual pieces of content to add. Each dictionary has the | |||||
following keys: | |||||
- length (int): content length (default: -1) | |||||
- one key for each checksum algorithm in | |||||
:data:`swh.model.hashutil.DEFAULT_ALGORITHMS`, mapped to the | |||||
corresponding checksum | |||||
- status (str): one of visible, hidden, absent | |||||
- reason (str): if status = absent, the reason why | |||||
- origin (int): if status = absent, the origin we saw the | |||||
content in | |||||
- ctime (datetime): time of insertion in the archive | |||||
Raises: | |||||
HashCollision in case of collision | |||||
Returns: | |||||
Summary dict with the following key and associated values: | |||||
content:add: New contents added | |||||
skipped_content:add: New skipped contents (no data) added | |||||
""" | |||||
content = list(self._content_to_model(content)) | content = list(self._content_to_model(content)) | ||||
return self._content_add(content, with_data=False) | return self._content_add(content, with_data=False) | ||||
def content_get(self, content): | def content_get(self, content): | ||||
"""Retrieve in bulk contents and their data. | |||||
This function may yield more blobs than provided sha1 identifiers, | |||||
in case they collide. | |||||
Args: | |||||
content: iterables of sha1 | |||||
Yields: | |||||
Dict[str, bytes]: Generates streams of contents as dict with their | |||||
raw data: | |||||
- sha1 (bytes): content id | |||||
- data (bytes): content's raw data | |||||
Raises: | |||||
ValueError in case of too much contents are required. | |||||
cf. BULK_BLOCK_CONTENT_LEN_MAX | |||||
""" | |||||
# FIXME: Make this method support slicing the `data`. | # FIXME: Make this method support slicing the `data`. | ||||
if len(content) > BULK_BLOCK_CONTENT_LEN_MAX: | if len(content) > BULK_BLOCK_CONTENT_LEN_MAX: | ||||
raise ValueError( | raise ValueError( | ||||
"Sending at most %s contents." % BULK_BLOCK_CONTENT_LEN_MAX) | "Sending at most %s contents." % BULK_BLOCK_CONTENT_LEN_MAX) | ||||
for obj_id in content: | for obj_id in content: | ||||
try: | try: | ||||
data = self.objstorage.get(obj_id) | data = self.objstorage.get(obj_id) | ||||
except ObjNotFoundError: | except ObjNotFoundError: | ||||
yield None | yield None | ||||
continue | continue | ||||
yield {'sha1': obj_id, 'data': data} | yield {'sha1': obj_id, 'data': data} | ||||
def content_get_range(self, start, end, limit=1000): | def content_get_range(self, start, end, limit=1000): | ||||
"""Retrieve contents within range [start, end] bound by limit. | |||||
Note that this function may return more than one blob per hash. The | |||||
limit is enforced with multiplicity (ie. two blobs with the same hash | |||||
will count twice toward the limit). | |||||
Args: | |||||
**start** (bytes): Starting identifier range (expected smaller | |||||
than end) | |||||
**end** (bytes): Ending identifier range (expected larger | |||||
than start) | |||||
**limit** (int): Limit result (default to 1000) | |||||
Returns: | |||||
a dict with keys: | |||||
- contents [dict]: iterable of contents in between the range. | |||||
- next (bytes): There remains content in the range | |||||
starting from this next sha1 | |||||
""" | |||||
if limit is None: | if limit is None: | ||||
raise ValueError('Development error: limit should not be None') | raise ValueError('Development error: limit should not be None') | ||||
from_index = bisect.bisect_left(self._sorted_sha1s, start) | from_index = bisect.bisect_left(self._sorted_sha1s, start) | ||||
sha1s = itertools.islice(self._sorted_sha1s, from_index, None) | sha1s = itertools.islice(self._sorted_sha1s, from_index, None) | ||||
sha1s = ((sha1, content_key) | sha1s = ((sha1, content_key) | ||||
for sha1 in sha1s | for sha1 in sha1s | ||||
for content_key in self._content_indexes['sha1'][sha1]) | for content_key in self._content_indexes['sha1'][sha1]) | ||||
matched = [] | matched = [] | ||||
next_content = None | next_content = None | ||||
for sha1, key in sha1s: | for sha1, key in sha1s: | ||||
if sha1 > end: | if sha1 > end: | ||||
break | break | ||||
if len(matched) >= limit: | if len(matched) >= limit: | ||||
next_content = sha1 | next_content = sha1 | ||||
break | break | ||||
matched.append(self._contents[key].to_dict()) | matched.append(self._contents[key].to_dict()) | ||||
return { | return { | ||||
'contents': matched, | 'contents': matched, | ||||
'next': next_content, | 'next': next_content, | ||||
} | } | ||||
def content_get_partition( | def content_get_partition( | ||||
self, partition_id: int, nb_partitions: int, limit: int = 1000, | self, partition_id: int, nb_partitions: int, limit: int = 1000, | ||||
page_token: str = None): | page_token: str = None): | ||||
"""Splits contents into nb_partitions, and returns one of these based on | |||||
partition_id (which must be in [0, nb_partitions-1]) | |||||
There is no guarantee on how the partitioning is done, or the | |||||
result order. | |||||
Args: | |||||
partition_id (int): index of the partition to fetch | |||||
nb_partitions (int): total number of partitions to split into | |||||
limit (int): Limit result (default to 1000) | |||||
page_token (Optional[str]): opaque token used for pagination. | |||||
Returns: | |||||
a dict with keys: | |||||
- contents (List[dict]): iterable of contents in the partition. | |||||
- **next_page_token** (Optional[str]): opaque token to be used as | |||||
`page_token` for retrieving the next page. if absent, there is | |||||
no more pages to gather. | |||||
""" | |||||
if limit is None: | if limit is None: | ||||
raise ValueError('Development error: limit should not be None') | raise ValueError('Development error: limit should not be None') | ||||
(start, end) = get_partition_bounds_bytes( | (start, end) = get_partition_bounds_bytes( | ||||
partition_id, nb_partitions, SHA1_SIZE) | partition_id, nb_partitions, SHA1_SIZE) | ||||
if page_token: | if page_token: | ||||
start = hash_to_bytes(page_token) | start = hash_to_bytes(page_token) | ||||
if end is None: | if end is None: | ||||
end = b'\xff'*SHA1_SIZE | end = b'\xff'*SHA1_SIZE | ||||
result = self.content_get_range(start, end, limit) | result = self.content_get_range(start, end, limit) | ||||
result2 = { | result2 = { | ||||
'contents': result['contents'], | 'contents': result['contents'], | ||||
'next_page_token': None, | 'next_page_token': None, | ||||
} | } | ||||
if result['next']: | if result['next']: | ||||
result2['next_page_token'] = hash_to_hex(result['next']) | result2['next_page_token'] = hash_to_hex(result['next']) | ||||
return result2 | return result2 | ||||
def content_get_metadata( | def content_get_metadata( | ||||
self, contents: List[bytes]) -> Dict[bytes, List[Dict]]: | self, contents: List[bytes]) -> Dict[bytes, List[Dict]]: | ||||
"""Retrieve content metadata in bulk | |||||
Args: | |||||
content: iterable of content identifiers (sha1) | |||||
Returns: | |||||
a dict with keys the content's sha1 and the associated value | |||||
either the existing content's metadata or None if the content does | |||||
not exist. | |||||
""" | |||||
result: Dict = {sha1: [] for sha1 in contents} | result: Dict = {sha1: [] for sha1 in contents} | ||||
for sha1 in contents: | for sha1 in contents: | ||||
if sha1 in self._content_indexes['sha1']: | if sha1 in self._content_indexes['sha1']: | ||||
objs = self._content_indexes['sha1'][sha1] | objs = self._content_indexes['sha1'][sha1] | ||||
# only 1 element as content_add_metadata would have raised a | # only 1 element as content_add_metadata would have raised a | ||||
# hash collision otherwise | # hash collision otherwise | ||||
for key in objs: | for key in objs: | ||||
d = self._contents[key].to_dict() | d = self._contents[key].to_dict() | ||||
Show All 14 Lines | def content_find(self, content): | ||||
if not found: | if not found: | ||||
return [] | return [] | ||||
keys = list(set.intersection(*found)) | keys = list(set.intersection(*found)) | ||||
return [self._contents[key].to_dict() for key in keys] | return [self._contents[key].to_dict() for key in keys] | ||||
def content_missing(self, content, key_hash='sha1'): | def content_missing(self, content, key_hash='sha1'): | ||||
"""List content missing from storage | |||||
Args: | |||||
contents ([dict]): iterable of dictionaries whose keys are | |||||
either 'length' or an item of | |||||
:data:`swh.model.hashutil.ALGORITHMS`; | |||||
mapped to the corresponding checksum | |||||
(or length). | |||||
key_hash (str): name of the column to use as hash id | |||||
result (default: 'sha1') | |||||
Returns: | |||||
iterable ([bytes]): missing content ids (as per the | |||||
key_hash column) | |||||
""" | |||||
for cont in content: | for cont in content: | ||||
for (algo, hash_) in cont.items(): | for (algo, hash_) in cont.items(): | ||||
if algo not in DEFAULT_ALGORITHMS: | if algo not in DEFAULT_ALGORITHMS: | ||||
continue | continue | ||||
if hash_ not in self._content_indexes.get(algo, []): | if hash_ not in self._content_indexes.get(algo, []): | ||||
yield cont[key_hash] | yield cont[key_hash] | ||||
break | break | ||||
else: | else: | ||||
for result in self.content_find(cont): | for result in self.content_find(cont): | ||||
if result['status'] == 'missing': | if result['status'] == 'missing': | ||||
yield cont[key_hash] | yield cont[key_hash] | ||||
def content_missing_per_sha1(self, contents): | def content_missing_per_sha1(self, contents): | ||||
"""List content missing from storage based only on sha1. | |||||
Args: | |||||
contents: Iterable of sha1 to check for absence. | |||||
Returns: | |||||
iterable: missing ids | |||||
Raises: | |||||
TODO: an exception when we get a hash collision. | |||||
""" | |||||
for content in contents: | for content in contents: | ||||
if content not in self._content_indexes['sha1']: | if content not in self._content_indexes['sha1']: | ||||
yield content | yield content | ||||
def content_missing_per_sha1_git(self, contents): | def content_missing_per_sha1_git(self, contents): | ||||
"""List content missing from storage based only on sha1_git. | |||||
Args: | |||||
contents: An iterable of content id (sha1_git) | |||||
Yields: | |||||
missing contents sha1_git | |||||
""" | |||||
for content in contents: | for content in contents: | ||||
if content not in self._content_indexes['sha1_git']: | if content not in self._content_indexes['sha1_git']: | ||||
yield content | yield content | ||||
def skipped_content_missing(self, contents): | def skipped_content_missing(self, contents): | ||||
"""List all skipped_content missing from storage | |||||
Args: | |||||
contents: Iterable of sha1 to check for skipped content entry | |||||
Returns: | |||||
iterable: dict of skipped content entry | |||||
""" | |||||
for content in contents: | for content in contents: | ||||
for (key, algorithm) in self._content_key_algorithm(content): | for (key, algorithm) in self._content_key_algorithm(content): | ||||
if algorithm == 'blake2s256': | if algorithm == 'blake2s256': | ||||
continue | continue | ||||
if key not in self._skipped_content_indexes[algorithm]: | if key not in self._skipped_content_indexes[algorithm]: | ||||
# index must contain hashes of algos except blake2s256 | # index must contain hashes of algos except blake2s256 | ||||
# else the content is considered skipped | # else the content is considered skipped | ||||
yield content | yield content | ||||
break | break | ||||
def content_get_random(self): | def content_get_random(self): | ||||
"""Finds a random content id. | |||||
Returns: | |||||
a sha1_git | |||||
""" | |||||
return random.choice(list(self._content_indexes['sha1_git'])) | return random.choice(list(self._content_indexes['sha1_git'])) | ||||
def directory_add(self, directories): | def directory_add(self, directories): | ||||
"""Add directories to the storage | |||||
Args: | |||||
directories (iterable): iterable of dictionaries representing the | |||||
individual directories to add. Each dict has the following | |||||
keys: | |||||
- id (sha1_git): the id of the directory to add | |||||
- entries (list): list of dicts for each entry in the | |||||
directory. Each dict has the following keys: | |||||
- name (bytes) | |||||
- type (one of 'file', 'dir', 'rev'): type of the | |||||
directory entry (file, directory, revision) | |||||
- target (sha1_git): id of the object pointed at by the | |||||
directory entry | |||||
- perms (int): entry permissions | |||||
Returns: | |||||
Summary dict of keys with associated count as values: | |||||
directory:add: Number of directories actually added | |||||
""" | |||||
directories = list(directories) | directories = list(directories) | ||||
if self.journal_writer: | if self.journal_writer: | ||||
self.journal_writer.write_additions( | self.journal_writer.write_additions( | ||||
'directory', | 'directory', | ||||
(dir_ for dir_ in directories | (dir_ for dir_ in directories | ||||
if dir_['id'] not in self._directories)) | if dir_['id'] not in self._directories)) | ||||
directories = [Directory.from_dict(d) for d in directories] | directories = [Directory.from_dict(d) for d in directories] | ||||
count = 0 | count = 0 | ||||
for directory in directories: | for directory in directories: | ||||
if directory.id not in self._directories: | if directory.id not in self._directories: | ||||
count += 1 | count += 1 | ||||
self._directories[directory.id] = directory | self._directories[directory.id] = directory | ||||
self._objects[directory.id].append( | self._objects[directory.id].append( | ||||
('directory', directory.id)) | ('directory', directory.id)) | ||||
return {'directory:add': count} | return {'directory:add': count} | ||||
def directory_missing(self, directories): | def directory_missing(self, directories): | ||||
"""List directories missing from storage | |||||
Args: | |||||
directories (iterable): an iterable of directory ids | |||||
Yields: | |||||
missing directory ids | |||||
""" | |||||
for id in directories: | for id in directories: | ||||
if id not in self._directories: | if id not in self._directories: | ||||
yield id | yield id | ||||
def _join_dentry_to_content(self, dentry): | def _join_dentry_to_content(self, dentry): | ||||
keys = ( | keys = ( | ||||
'status', | 'status', | ||||
'sha1', | 'sha1', | ||||
Show All 19 Lines | def _directory_ls(self, directory_id, recursive, prefix=b''): | ||||
ret['name'] = prefix + ret['name'] | ret['name'] = prefix + ret['name'] | ||||
ret['dir_id'] = directory_id | ret['dir_id'] = directory_id | ||||
yield ret | yield ret | ||||
if recursive and ret['type'] == 'dir': | if recursive and ret['type'] == 'dir': | ||||
yield from self._directory_ls( | yield from self._directory_ls( | ||||
ret['target'], True, prefix + ret['name'] + b'/') | ret['target'], True, prefix + ret['name'] + b'/') | ||||
def directory_ls(self, directory, recursive=False): | def directory_ls(self, directory, recursive=False): | ||||
"""Get entries for one directory. | |||||
Args: | |||||
- directory: the directory to list entries from. | |||||
- recursive: if flag on, this list recursively from this directory. | |||||
Returns: | |||||
List of entries for such directory. | |||||
If `recursive=True`, names in the path of a dir/file not at the | |||||
root are concatenated with a slash (`/`). | |||||
""" | |||||
yield from self._directory_ls(directory, recursive) | yield from self._directory_ls(directory, recursive) | ||||
def directory_entry_get_by_path(self, directory, paths): | def directory_entry_get_by_path(self, directory, paths): | ||||
"""Get the directory entry (either file or dir) from directory with path. | |||||
Args: | |||||
- directory: sha1 of the top level directory | |||||
- paths: path to lookup from the top level directory. From left | |||||
(top) to right (bottom). | |||||
Returns: | |||||
The corresponding directory entry if found, None otherwise. | |||||
""" | |||||
return self._directory_entry_get_by_path(directory, paths, b'') | return self._directory_entry_get_by_path(directory, paths, b'') | ||||
def directory_get_random(self): | def directory_get_random(self): | ||||
"""Finds a random directory id. | |||||
Returns: | |||||
a sha1_git if any | |||||
""" | |||||
if not self._directories: | if not self._directories: | ||||
return None | return None | ||||
return random.choice(list(self._directories)) | return random.choice(list(self._directories)) | ||||
def _directory_entry_get_by_path(self, directory, paths, prefix): | def _directory_entry_get_by_path(self, directory, paths, prefix): | ||||
if not paths: | if not paths: | ||||
return | return | ||||
Show All 16 Lines | def _directory_entry_get_by_path(self, directory, paths, prefix): | ||||
if not first_item or first_item['type'] != 'dir': | if not first_item or first_item['type'] != 'dir': | ||||
return | return | ||||
return self._directory_entry_get_by_path( | return self._directory_entry_get_by_path( | ||||
first_item['target'], paths[1:], prefix + paths[0] + b'/') | first_item['target'], paths[1:], prefix + paths[0] + b'/') | ||||
def revision_add(self, revisions): | def revision_add(self, revisions): | ||||
"""Add revisions to the storage | |||||
Args: | |||||
revisions (Iterable[dict]): iterable of dictionaries representing | |||||
the individual revisions to add. Each dict has the following | |||||
keys: | |||||
- **id** (:class:`sha1_git`): id of the revision to add | |||||
- **date** (:class:`dict`): date the revision was written | |||||
- **committer_date** (:class:`dict`): date the revision got | |||||
added to the origin | |||||
- **type** (one of 'git', 'tar'): type of the | |||||
revision added | |||||
- **directory** (:class:`sha1_git`): the directory the | |||||
revision points at | |||||
- **message** (:class:`bytes`): the message associated with | |||||
the revision | |||||
- **author** (:class:`Dict[str, bytes]`): dictionary with | |||||
keys: name, fullname, email | |||||
- **committer** (:class:`Dict[str, bytes]`): dictionary with | |||||
keys: name, fullname, email | |||||
- **metadata** (:class:`jsonb`): extra information as | |||||
dictionary | |||||
- **synthetic** (:class:`bool`): revision's nature (tarball, | |||||
directory creates synthetic revision`) | |||||
- **parents** (:class:`list[sha1_git]`): the parents of | |||||
this revision | |||||
date dictionaries have the form defined in :mod:`swh.model`. | |||||
Returns: | |||||
Summary dict of keys with associated count as values | |||||
revision_added: New objects actually stored in db | |||||
""" | |||||
revisions = list(revisions) | revisions = list(revisions) | ||||
if self.journal_writer: | if self.journal_writer: | ||||
self.journal_writer.write_additions( | self.journal_writer.write_additions( | ||||
'revision', | 'revision', | ||||
(rev for rev in revisions | (rev for rev in revisions | ||||
if rev['id'] not in self._revisions)) | if rev['id'] not in self._revisions)) | ||||
revisions = [Revision.from_dict(rev) for rev in revisions] | revisions = [Revision.from_dict(rev) for rev in revisions] | ||||
count = 0 | count = 0 | ||||
for revision in revisions: | for revision in revisions: | ||||
if revision.id not in self._revisions: | if revision.id not in self._revisions: | ||||
revision = attr.evolve( | revision = attr.evolve( | ||||
revision, | revision, | ||||
committer=self._person_add(revision.committer), | committer=self._person_add(revision.committer), | ||||
author=self._person_add(revision.author)) | author=self._person_add(revision.author)) | ||||
self._revisions[revision.id] = revision | self._revisions[revision.id] = revision | ||||
self._objects[revision.id].append( | self._objects[revision.id].append( | ||||
('revision', revision.id)) | ('revision', revision.id)) | ||||
count += 1 | count += 1 | ||||
return {'revision:add': count} | return {'revision:add': count} | ||||
def revision_missing(self, revisions): | def revision_missing(self, revisions): | ||||
"""List revisions missing from storage | |||||
Args: | |||||
revisions (iterable): revision ids | |||||
Yields: | |||||
missing revision ids | |||||
""" | |||||
for id in revisions: | for id in revisions: | ||||
if id not in self._revisions: | if id not in self._revisions: | ||||
yield id | yield id | ||||
def revision_get(self, revisions): | def revision_get(self, revisions): | ||||
for id in revisions: | for id in revisions: | ||||
if id in self._revisions: | if id in self._revisions: | ||||
yield self._revisions.get(id).to_dict() | yield self._revisions.get(id).to_dict() | ||||
else: | else: | ||||
yield None | yield None | ||||
def _get_parent_revs(self, rev_id, seen, limit): | def _get_parent_revs(self, rev_id, seen, limit): | ||||
if limit and len(seen) >= limit: | if limit and len(seen) >= limit: | ||||
return | return | ||||
if rev_id in seen or rev_id not in self._revisions: | if rev_id in seen or rev_id not in self._revisions: | ||||
return | return | ||||
seen.add(rev_id) | seen.add(rev_id) | ||||
yield self._revisions[rev_id].to_dict() | yield self._revisions[rev_id].to_dict() | ||||
for parent in self._revisions[rev_id].parents: | for parent in self._revisions[rev_id].parents: | ||||
yield from self._get_parent_revs(parent, seen, limit) | yield from self._get_parent_revs(parent, seen, limit) | ||||
def revision_log(self, revisions, limit=None): | def revision_log(self, revisions, limit=None): | ||||
"""Fetch revision entry from the given root revisions. | |||||
Args: | |||||
revisions: array of root revision to lookup | |||||
limit: limitation on the output result. Default to None. | |||||
Yields: | |||||
List of revision log from such revisions root. | |||||
""" | |||||
seen = set() | seen = set() | ||||
for rev_id in revisions: | for rev_id in revisions: | ||||
yield from self._get_parent_revs(rev_id, seen, limit) | yield from self._get_parent_revs(rev_id, seen, limit) | ||||
def revision_shortlog(self, revisions, limit=None): | def revision_shortlog(self, revisions, limit=None): | ||||
"""Fetch the shortlog for the given revisions | |||||
Args: | |||||
revisions: list of root revisions to lookup | |||||
limit: depth limitation for the output | |||||
Yields: | |||||
a list of (id, parents) tuples. | |||||
""" | |||||
yield from ((rev['id'], rev['parents']) | yield from ((rev['id'], rev['parents']) | ||||
for rev in self.revision_log(revisions, limit)) | for rev in self.revision_log(revisions, limit)) | ||||
def revision_get_random(self): | def revision_get_random(self): | ||||
"""Finds a random revision id. | |||||
Returns: | |||||
a sha1_git | |||||
""" | |||||
return random.choice(list(self._revisions)) | return random.choice(list(self._revisions)) | ||||
def release_add(self, releases): | def release_add(self, releases): | ||||
"""Add releases to the storage | |||||
Args: | |||||
releases (Iterable[dict]): iterable of dictionaries representing | |||||
the individual releases to add. Each dict has the following | |||||
keys: | |||||
- **id** (:class:`sha1_git`): id of the release to add | |||||
- **revision** (:class:`sha1_git`): id of the revision the | |||||
release points to | |||||
- **date** (:class:`dict`): the date the release was made | |||||
- **name** (:class:`bytes`): the name of the release | |||||
- **comment** (:class:`bytes`): the comment associated with | |||||
the release | |||||
- **author** (:class:`Dict[str, bytes]`): dictionary with | |||||
keys: name, fullname, email | |||||
the date dictionary has the form defined in :mod:`swh.model`. | |||||
Returns: | |||||
Summary dict of keys with associated count as values | |||||
release:add: New objects contents actually stored in db | |||||
""" | |||||
releases = list(releases) | releases = list(releases) | ||||
if self.journal_writer: | if self.journal_writer: | ||||
self.journal_writer.write_additions( | self.journal_writer.write_additions( | ||||
'release', | 'release', | ||||
(rel for rel in releases | (rel for rel in releases | ||||
if rel['id'] not in self._releases)) | if rel['id'] not in self._releases)) | ||||
releases = [Release.from_dict(rel) for rel in releases] | releases = [Release.from_dict(rel) for rel in releases] | ||||
count = 0 | count = 0 | ||||
for rel in releases: | for rel in releases: | ||||
if rel.id not in self._releases: | if rel.id not in self._releases: | ||||
if rel.author: | if rel.author: | ||||
self._person_add(rel.author) | self._person_add(rel.author) | ||||
self._objects[rel.id].append( | self._objects[rel.id].append( | ||||
('release', rel.id)) | ('release', rel.id)) | ||||
self._releases[rel.id] = rel | self._releases[rel.id] = rel | ||||
count += 1 | count += 1 | ||||
return {'release:add': count} | return {'release:add': count} | ||||
def release_missing(self, releases): | def release_missing(self, releases): | ||||
"""List releases missing from storage | |||||
Args: | |||||
releases: an iterable of release ids | |||||
Returns: | |||||
a list of missing release ids | |||||
""" | |||||
yield from (rel for rel in releases if rel not in self._releases) | yield from (rel for rel in releases if rel not in self._releases) | ||||
def release_get(self, releases): | def release_get(self, releases): | ||||
"""Given a list of sha1, return the releases's information | |||||
Args: | |||||
releases: list of sha1s | |||||
Yields: | |||||
dicts with the same keys as those given to `release_add` | |||||
(or ``None`` if a release does not exist) | |||||
""" | |||||
for rel_id in releases: | for rel_id in releases: | ||||
if rel_id in self._releases: | if rel_id in self._releases: | ||||
yield self._releases[rel_id].to_dict() | yield self._releases[rel_id].to_dict() | ||||
else: | else: | ||||
yield None | yield None | ||||
def release_get_random(self): | def release_get_random(self): | ||||
"""Finds a random release id. | |||||
Returns: | |||||
a sha1_git | |||||
""" | |||||
return random.choice(list(self._releases)) | return random.choice(list(self._releases)) | ||||
def snapshot_add(self, snapshots): | def snapshot_add(self, snapshots): | ||||
"""Add a snapshot to the storage | |||||
Args: | |||||
snapshot ([dict]): the snapshots to add, containing the | |||||
following keys: | |||||
- **id** (:class:`bytes`): id of the snapshot | |||||
- **branches** (:class:`dict`): branches the snapshot contains, | |||||
mapping the branch name (:class:`bytes`) to the branch target, | |||||
itself a :class:`dict` (or ``None`` if the branch points to an | |||||
unknown object) | |||||
- **target_type** (:class:`str`): one of ``content``, | |||||
``directory``, ``revision``, ``release``, | |||||
``snapshot``, ``alias`` | |||||
- **target** (:class:`bytes`): identifier of the target | |||||
(currently a ``sha1_git`` for all object kinds, or the name | |||||
of the target branch for aliases) | |||||
Raises: | |||||
ValueError: if the origin's or visit's identifier does not exist. | |||||
Returns: | |||||
Summary dict of keys with associated count as values | |||||
snapshot_added: Count of object actually stored in db | |||||
""" | |||||
count = 0 | count = 0 | ||||
snapshots = (Snapshot.from_dict(d) for d in snapshots) | snapshots = (Snapshot.from_dict(d) for d in snapshots) | ||||
snapshots = (snap for snap in snapshots | snapshots = (snap for snap in snapshots | ||||
if snap.id not in self._snapshots) | if snap.id not in self._snapshots) | ||||
for snapshot in snapshots: | for snapshot in snapshots: | ||||
if self.journal_writer: | if self.journal_writer: | ||||
self.journal_writer.write_addition('snapshot', snapshot) | self.journal_writer.write_addition('snapshot', snapshot) | ||||
sorted_branch_names = sorted(snapshot.branches) | sorted_branch_names = sorted(snapshot.branches) | ||||
self._snapshots[snapshot.id] = (snapshot, sorted_branch_names) | self._snapshots[snapshot.id] = (snapshot, sorted_branch_names) | ||||
self._objects[snapshot.id].append(('snapshot', snapshot.id)) | self._objects[snapshot.id].append(('snapshot', snapshot.id)) | ||||
count += 1 | count += 1 | ||||
return {'snapshot:add': count} | return {'snapshot:add': count} | ||||
def snapshot_missing(self, snapshots): | def snapshot_missing(self, snapshots): | ||||
"""List snapshot missing from storage | |||||
Args: | |||||
snapshots (iterable): an iterable of snapshot ids | |||||
Yields: | |||||
missing snapshot ids | |||||
""" | |||||
for id in snapshots: | for id in snapshots: | ||||
if id not in self._snapshots: | if id not in self._snapshots: | ||||
yield id | yield id | ||||
def snapshot_get(self, snapshot_id): | def snapshot_get(self, snapshot_id): | ||||
"""Get the content, possibly partial, of a snapshot with the given id | |||||
The branches of the snapshot are iterated in the lexicographical | |||||
order of their names. | |||||
.. warning:: At most 1000 branches contained in the snapshot will be | |||||
returned for performance reasons. In order to browse the whole | |||||
set of branches, the method :meth:`snapshot_get_branches` | |||||
should be used instead. | |||||
Args: | |||||
snapshot_id (bytes): identifier of the snapshot | |||||
Returns: | |||||
dict: a dict with three keys: | |||||
* **id**: identifier of the snapshot | |||||
* **branches**: a dict of branches contained in the snapshot | |||||
whose keys are the branches' names. | |||||
* **next_branch**: the name of the first branch not returned | |||||
or :const:`None` if the snapshot has less than 1000 | |||||
branches. | |||||
""" | |||||
return self.snapshot_get_branches(snapshot_id) | return self.snapshot_get_branches(snapshot_id) | ||||
def snapshot_get_by_origin_visit(self, origin, visit): | def snapshot_get_by_origin_visit(self, origin, visit): | ||||
"""Get the content, possibly partial, of a snapshot for the given origin visit | |||||
The branches of the snapshot are iterated in the lexicographical | |||||
order of their names. | |||||
.. warning:: At most 1000 branches contained in the snapshot will be | |||||
returned for performance reasons. In order to browse the whole | |||||
set of branches, the method :meth:`snapshot_get_branches` | |||||
should be used instead. | |||||
Args: | |||||
origin (int): the origin's identifier | |||||
visit (int): the visit's identifier | |||||
Returns: | |||||
dict: None if the snapshot does not exist; | |||||
a dict with three keys otherwise: | |||||
* **id**: identifier of the snapshot | |||||
* **branches**: a dict of branches contained in the snapshot | |||||
whose keys are the branches' names. | |||||
* **next_branch**: the name of the first branch not returned | |||||
or :const:`None` if the snapshot has less than 1000 | |||||
branches. | |||||
""" | |||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if not origin_url: | if not origin_url: | ||||
return | return | ||||
if origin_url not in self._origins or \ | if origin_url not in self._origins or \ | ||||
visit > len(self._origin_visits[origin_url]): | visit > len(self._origin_visits[origin_url]): | ||||
return None | return None | ||||
snapshot_id = self._origin_visits[origin_url][visit-1].snapshot | snapshot_id = self._origin_visits[origin_url][visit-1].snapshot | ||||
if snapshot_id: | if snapshot_id: | ||||
return self.snapshot_get(snapshot_id) | return self.snapshot_get(snapshot_id) | ||||
else: | else: | ||||
return None | return None | ||||
def snapshot_get_latest(self, origin, allowed_statuses=None): | def snapshot_get_latest(self, origin, allowed_statuses=None): | ||||
"""Get the content, possibly partial, of the latest snapshot for the | |||||
given origin, optionally only from visits that have one of the given | |||||
allowed_statuses | |||||
The branches of the snapshot are iterated in the lexicographical | |||||
order of their names. | |||||
.. warning:: At most 1000 branches contained in the snapshot will be | |||||
returned for performance reasons. In order to browse the whole | |||||
set of branches, the methods :meth:`origin_visit_get_latest` | |||||
and :meth:`snapshot_get_branches` should be used instead. | |||||
Args: | |||||
origin (str): the origin's URL | |||||
allowed_statuses (list of str): list of visit statuses considered | |||||
to find the latest snapshot for the origin. For instance, | |||||
``allowed_statuses=['full']`` will only consider visits that | |||||
have successfully run to completion. | |||||
Returns: | |||||
dict: a dict with three keys: | |||||
* **id**: identifier of the snapshot | |||||
* **branches**: a dict of branches contained in the snapshot | |||||
whose keys are the branches' names. | |||||
* **next_branch**: the name of the first branch not returned | |||||
or :const:`None` if the snapshot has less than 1000 | |||||
branches. | |||||
""" | |||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if not origin_url: | if not origin_url: | ||||
return | return | ||||
visit = self.origin_visit_get_latest( | visit = self.origin_visit_get_latest( | ||||
origin_url, | origin_url, | ||||
allowed_statuses=allowed_statuses, | allowed_statuses=allowed_statuses, | ||||
require_snapshot=True) | require_snapshot=True) | ||||
if visit and visit['snapshot']: | if visit and visit['snapshot']: | ||||
snapshot = self.snapshot_get(visit['snapshot']) | snapshot = self.snapshot_get(visit['snapshot']) | ||||
if not snapshot: | if not snapshot: | ||||
raise ValueError( | raise ValueError( | ||||
'last origin visit references an unknown snapshot') | 'last origin visit references an unknown snapshot') | ||||
return snapshot | return snapshot | ||||
def snapshot_count_branches(self, snapshot_id): | def snapshot_count_branches(self, snapshot_id): | ||||
"""Count the number of branches in the snapshot with the given id | |||||
Args: | |||||
snapshot_id (bytes): identifier of the snapshot | |||||
Returns: | |||||
dict: A dict whose keys are the target types of branches and | |||||
values their corresponding amount | |||||
""" | |||||
(snapshot, _) = self._snapshots[snapshot_id] | (snapshot, _) = self._snapshots[snapshot_id] | ||||
return collections.Counter(branch.target_type.value if branch else None | return collections.Counter(branch.target_type.value if branch else None | ||||
for branch in snapshot.branches.values()) | for branch in snapshot.branches.values()) | ||||
def snapshot_get_branches(self, snapshot_id, branches_from=b'', | def snapshot_get_branches(self, snapshot_id, branches_from=b'', | ||||
branches_count=1000, target_types=None): | branches_count=1000, target_types=None): | ||||
"""Get the content, possibly partial, of a snapshot with the given id | |||||
The branches of the snapshot are iterated in the lexicographical | |||||
order of their names. | |||||
Args: | |||||
snapshot_id (bytes): identifier of the snapshot | |||||
branches_from (bytes): optional parameter used to skip branches | |||||
whose name is lesser than it before returning them | |||||
branches_count (int): optional parameter used to restrain | |||||
the amount of returned branches | |||||
target_types (list): optional parameter used to filter the | |||||
target types of branch to return (possible values that can be | |||||
contained in that list are `'content', 'directory', | |||||
'revision', 'release', 'snapshot', 'alias'`) | |||||
Returns: | |||||
dict: None if the snapshot does not exist; | |||||
a dict with three keys otherwise: | |||||
* **id**: identifier of the snapshot | |||||
* **branches**: a dict of branches contained in the snapshot | |||||
whose keys are the branches' names. | |||||
* **next_branch**: the name of the first branch not returned | |||||
or :const:`None` if the snapshot has less than | |||||
`branches_count` branches after `branches_from` included. | |||||
""" | |||||
res = self._snapshots.get(snapshot_id) | res = self._snapshots.get(snapshot_id) | ||||
if res is None: | if res is None: | ||||
return None | return None | ||||
(snapshot, sorted_branch_names) = res | (snapshot, sorted_branch_names) = res | ||||
from_index = bisect.bisect_left( | from_index = bisect.bisect_left( | ||||
sorted_branch_names, branches_from) | sorted_branch_names, branches_from) | ||||
if target_types: | if target_types: | ||||
next_branch = None | next_branch = None | ||||
Show All 22 Lines | def snapshot_get_branches(self, snapshot_id, branches_from=b'', | ||||
return { | return { | ||||
'id': snapshot_id, | 'id': snapshot_id, | ||||
'branches': branches, | 'branches': branches, | ||||
'next_branch': next_branch, | 'next_branch': next_branch, | ||||
} | } | ||||
def snapshot_get_random(self): | def snapshot_get_random(self): | ||||
"""Finds a random snapshot id. | |||||
Returns: | |||||
a sha1_git | |||||
""" | |||||
return random.choice(list(self._snapshots)) | return random.choice(list(self._snapshots)) | ||||
def object_find_by_sha1_git(self, ids): | def object_find_by_sha1_git(self, ids): | ||||
"""Return the objects found with the given ids. | |||||
Args: | |||||
ids: a generator of sha1_gits | |||||
Returns: | |||||
dict: a mapping from id to the list of objects found. Each object | |||||
found is itself a dict with keys: | |||||
- sha1_git: the input id | |||||
- type: the type of object found | |||||
""" | |||||
ret = {} | ret = {} | ||||
for id_ in ids: | for id_ in ids: | ||||
objs = self._objects.get(id_, []) | objs = self._objects.get(id_, []) | ||||
ret[id_] = [{ | ret[id_] = [{ | ||||
'sha1_git': id_, | 'sha1_git': id_, | ||||
'type': obj[0], | 'type': obj[0], | ||||
} for obj in objs] | } for obj in objs] | ||||
return ret | return ret | ||||
def _convert_origin(self, t): | def _convert_origin(self, t): | ||||
if t is None: | if t is None: | ||||
return None | return None | ||||
return t.to_dict() | return t.to_dict() | ||||
def origin_get(self, origins): | def origin_get(self, origins): | ||||
"""Return origins, either all identified by their ids or all | |||||
identified by urls. | |||||
Args: | |||||
origin: a list of dictionaries representing the individual | |||||
origins to find. | |||||
These dicts have either the key url (and optionally type): | |||||
- url (bytes): the url the origin points to | |||||
or the id: | |||||
- id (int): the origin's identifier | |||||
Returns: | |||||
dict: the origin dictionary with the keys: | |||||
- id: origin's id | |||||
- url: origin's url | |||||
Raises: | |||||
ValueError: if the keys does not match (url and type) nor id. | |||||
""" | |||||
if isinstance(origins, dict): | if isinstance(origins, dict): | ||||
# Old API | # Old API | ||||
return_single = True | return_single = True | ||||
origins = [origins] | origins = [origins] | ||||
else: | else: | ||||
return_single = False | return_single = False | ||||
# Sanity check to be error-compatible with the pgsql backend | # Sanity check to be error-compatible with the pgsql backend | ||||
Show All 20 Lines | def origin_get(self, origins): | ||||
if return_single: | if return_single: | ||||
assert len(results) == 1 | assert len(results) == 1 | ||||
return results[0] | return results[0] | ||||
else: | else: | ||||
return results | return results | ||||
def origin_get_by_sha1(self, sha1s): | def origin_get_by_sha1(self, sha1s): | ||||
"""Return origins, identified by the sha1 of their URLs. | |||||
Args: | |||||
sha1s (list[bytes]): a list of sha1s | |||||
Yields: | |||||
dicts containing origin information as returned | |||||
by :meth:`swh.storage.in_memory.Storage.origin_get`, or None if an | |||||
origin matching the sha1 is not found. | |||||
""" | |||||
return [ | return [ | ||||
self._convert_origin(self._origins_by_sha1.get(sha1)) | self._convert_origin(self._origins_by_sha1.get(sha1)) | ||||
for sha1 in sha1s | for sha1 in sha1s | ||||
] | ] | ||||
def origin_get_range(self, origin_from=1, origin_count=100): | def origin_get_range(self, origin_from=1, origin_count=100): | ||||
"""Retrieve ``origin_count`` origins whose ids are greater | |||||
or equal than ``origin_from``. | |||||
Origins are sorted by id before retrieving them. | |||||
Args: | |||||
origin_from (int): the minimum id of origins to retrieve | |||||
origin_count (int): the maximum number of origins to retrieve | |||||
Yields: | |||||
dicts containing origin information as returned | |||||
by :meth:`swh.storage.in_memory.Storage.origin_get`, plus | |||||
an 'id' key. | |||||
""" | |||||
origin_from = max(origin_from, 1) | origin_from = max(origin_from, 1) | ||||
if origin_from <= len(self._origins_by_id): | if origin_from <= len(self._origins_by_id): | ||||
max_idx = origin_from + origin_count - 1 | max_idx = origin_from + origin_count - 1 | ||||
if max_idx > len(self._origins_by_id): | if max_idx > len(self._origins_by_id): | ||||
max_idx = len(self._origins_by_id) | max_idx = len(self._origins_by_id) | ||||
for idx in range(origin_from-1, max_idx): | for idx in range(origin_from-1, max_idx): | ||||
origin = self._convert_origin( | origin = self._convert_origin( | ||||
self._origins[self._origins_by_id[idx]]) | self._origins[self._origins_by_id[idx]]) | ||||
yield {'id': idx+1, **origin} | yield {'id': idx+1, **origin} | ||||
def origin_list(self, page_token: Optional[str] = None, limit: int = 100 | def origin_list(self, page_token: Optional[str] = None, limit: int = 100 | ||||
) -> dict: | ) -> dict: | ||||
"""Returns the list of origins | |||||
Args: | |||||
page_token: opaque token used for pagination. | |||||
limit: the maximum number of results to return | |||||
Returns: | |||||
dict: dict with the following keys: | |||||
- **next_page_token** (str, optional): opaque token to be used as | |||||
`page_token` for retrieving the next page. if absent, there is | |||||
no more pages to gather. | |||||
- **origins** (List[dict]): list of origins, as returned by | |||||
`origin_get`. | |||||
""" | |||||
origin_urls = sorted(self._origins) | origin_urls = sorted(self._origins) | ||||
if page_token: | if page_token: | ||||
from_ = bisect.bisect_left(origin_urls, page_token) | from_ = bisect.bisect_left(origin_urls, page_token) | ||||
else: | else: | ||||
from_ = 0 | from_ = 0 | ||||
result = { | result = { | ||||
'origins': [{'url': origin_url} | 'origins': [{'url': origin_url} | ||||
for origin_url in origin_urls[from_:from_+limit]] | for origin_url in origin_urls[from_:from_+limit]] | ||||
} | } | ||||
if from_+limit < len(origin_urls): | if from_+limit < len(origin_urls): | ||||
result['next_page_token'] = origin_urls[from_+limit] | result['next_page_token'] = origin_urls[from_+limit] | ||||
return result | return result | ||||
def origin_search(self, url_pattern, offset=0, limit=50, | def origin_search(self, url_pattern, offset=0, limit=50, | ||||
regexp=False, with_visit=False): | regexp=False, with_visit=False): | ||||
"""Search for origins whose urls contain a provided string pattern | |||||
or match a provided regular expression. | |||||
The search is performed in a case insensitive way. | |||||
Args: | |||||
url_pattern (str): the string pattern to search for in origin urls | |||||
offset (int): number of found origins to skip before returning | |||||
results | |||||
limit (int): the maximum number of found origins to return | |||||
regexp (bool): if True, consider the provided pattern as a regular | |||||
expression and return origins whose urls match it | |||||
with_visit (bool): if True, filter out origins with no visit | |||||
Returns: | |||||
An iterable of dict containing origin information as returned | |||||
by :meth:`swh.storage.storage.Storage.origin_get`. | |||||
""" | |||||
origins = map(self._convert_origin, self._origins.values()) | origins = map(self._convert_origin, self._origins.values()) | ||||
if regexp: | if regexp: | ||||
pat = re.compile(url_pattern) | pat = re.compile(url_pattern) | ||||
origins = [orig for orig in origins if pat.search(orig['url'])] | origins = [orig for orig in origins if pat.search(orig['url'])] | ||||
else: | else: | ||||
origins = [orig for orig in origins if url_pattern in orig['url']] | origins = [orig for orig in origins if url_pattern in orig['url']] | ||||
if with_visit: | if with_visit: | ||||
origins = [ | origins = [ | ||||
orig for orig in origins | orig for orig in origins | ||||
if len(self._origin_visits[orig['url']]) > 0 and | if len(self._origin_visits[orig['url']]) > 0 and | ||||
set(ov.snapshot | set(ov.snapshot | ||||
for ov in self._origin_visits[orig['url']] | for ov in self._origin_visits[orig['url']] | ||||
if ov.snapshot) & | if ov.snapshot) & | ||||
set(self._snapshots)] | set(self._snapshots)] | ||||
return origins[offset:offset+limit] | return origins[offset:offset+limit] | ||||
def origin_count(self, url_pattern, regexp=False, with_visit=False): | def origin_count(self, url_pattern, regexp=False, with_visit=False): | ||||
"""Count origins whose urls contain a provided string pattern | |||||
or match a provided regular expression. | |||||
The pattern search in origin urls is performed in a case insensitive | |||||
way. | |||||
Args: | |||||
url_pattern (str): the string pattern to search for in origin urls | |||||
regexp (bool): if True, consider the provided pattern as a regular | |||||
expression and return origins whose urls match it | |||||
with_visit (bool): if True, filter out origins with no visit | |||||
Returns: | |||||
int: The number of origins matching the search criterion. | |||||
""" | |||||
return len(self.origin_search(url_pattern, regexp=regexp, | return len(self.origin_search(url_pattern, regexp=regexp, | ||||
with_visit=with_visit, | with_visit=with_visit, | ||||
limit=len(self._origins))) | limit=len(self._origins))) | ||||
def origin_add(self, origins): | def origin_add(self, origins): | ||||
"""Add origins to the storage | |||||
Args: | |||||
origins: list of dictionaries representing the individual origins, | |||||
with the following keys: | |||||
- url (bytes): the url the origin points to | |||||
Returns: | |||||
list: given origins as dict updated with their id | |||||
""" | |||||
origins = copy.deepcopy(list(origins)) | origins = copy.deepcopy(list(origins)) | ||||
for origin in origins: | for origin in origins: | ||||
self.origin_add_one(origin) | self.origin_add_one(origin) | ||||
return origins | return origins | ||||
def origin_add_one(self, origin): | def origin_add_one(self, origin): | ||||
"""Add origin to the storage | |||||
Args: | |||||
origin: dictionary representing the individual origin to add. This | |||||
dict has the following keys: | |||||
- url (bytes): the url the origin points to | |||||
Returns: | |||||
the id of the added origin, or of the identical one that already | |||||
exists. | |||||
""" | |||||
origin = Origin.from_dict(origin) | origin = Origin.from_dict(origin) | ||||
if origin.url not in self._origins: | if origin.url not in self._origins: | ||||
if self.journal_writer: | if self.journal_writer: | ||||
self.journal_writer.write_addition('origin', origin) | self.journal_writer.write_addition('origin', origin) | ||||
# generate an origin_id because it is needed by origin_get_range. | # generate an origin_id because it is needed by origin_get_range. | ||||
# TODO: remove this when we remove origin_get_range | # TODO: remove this when we remove origin_get_range | ||||
origin_id = len(self._origins) + 1 | origin_id = len(self._origins) + 1 | ||||
self._origins_by_id.append(origin.url) | self._origins_by_id.append(origin.url) | ||||
assert len(self._origins_by_id) == origin_id | assert len(self._origins_by_id) == origin_id | ||||
self._origins[origin.url] = origin | self._origins[origin.url] = origin | ||||
self._origins_by_sha1[origin_url_to_sha1(origin.url)] = origin | self._origins_by_sha1[origin_url_to_sha1(origin.url)] = origin | ||||
self._origin_visits[origin.url] = [] | self._origin_visits[origin.url] = [] | ||||
self._objects[origin.url].append(('origin', origin.url)) | self._objects[origin.url].append(('origin', origin.url)) | ||||
return origin.url | return origin.url | ||||
def origin_visit_add(self, origin, date, type): | def origin_visit_add(self, origin, date, type): | ||||
"""Add an origin_visit for the origin at date with status 'ongoing'. | |||||
Args: | |||||
origin (str): visited origin's identifier or URL | |||||
date (Union[str,datetime]): timestamp of such visit | |||||
type (str): the type of loader used for the visit (hg, git, ...) | |||||
Returns: | |||||
dict: dictionary with keys origin and visit where: | |||||
- origin: origin's identifier | |||||
- visit: the visit's identifier for the new visit occurrence | |||||
""" | |||||
origin_url = origin | origin_url = origin | ||||
if origin_url is None: | if origin_url is None: | ||||
raise ValueError('Unknown origin.') | raise ValueError('Unknown origin.') | ||||
if isinstance(date, str): | if isinstance(date, str): | ||||
# FIXME: Converge on iso8601 at some point | # FIXME: Converge on iso8601 at some point | ||||
date = dateutil.parser.parse(date) | date = dateutil.parser.parse(date) | ||||
elif not isinstance(date, datetime.datetime): | elif not isinstance(date, datetime.datetime): | ||||
Show All 25 Lines | def origin_visit_add(self, origin, date, type): | ||||
if self.journal_writer: | if self.journal_writer: | ||||
self.journal_writer.write_addition('origin_visit', visit) | self.journal_writer.write_addition('origin_visit', visit) | ||||
return visit_ret | return visit_ret | ||||
def origin_visit_update(self, origin, visit_id, status=None, | def origin_visit_update(self, origin, visit_id, status=None, | ||||
metadata=None, snapshot=None): | metadata=None, snapshot=None): | ||||
"""Update an origin_visit's status. | |||||
Args: | |||||
origin (str): visited origin's URL | |||||
visit_id (int): visit's identifier | |||||
status: visit's new status | |||||
metadata: data associated to the visit | |||||
snapshot (sha1_git): identifier of the snapshot to add to | |||||
the visit | |||||
Returns: | |||||
None | |||||
""" | |||||
if not isinstance(origin, str): | if not isinstance(origin, str): | ||||
raise TypeError('origin must be a string, not %r' % (origin,)) | raise TypeError('origin must be a string, not %r' % (origin,)) | ||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url is None: | if origin_url is None: | ||||
raise ValueError('Unknown origin.') | raise ValueError('Unknown origin.') | ||||
try: | try: | ||||
visit = self._origin_visits[origin_url][visit_id-1] | visit = self._origin_visits[origin_url][visit_id-1] | ||||
Show All 12 Lines | def origin_visit_update(self, origin, visit_id, status=None, | ||||
visit = attr.evolve(visit, **updates) | visit = attr.evolve(visit, **updates) | ||||
if self.journal_writer: | if self.journal_writer: | ||||
self.journal_writer.write_update('origin_visit', visit) | self.journal_writer.write_update('origin_visit', visit) | ||||
self._origin_visits[origin_url][visit_id-1] = visit | self._origin_visits[origin_url][visit_id-1] = visit | ||||
def origin_visit_upsert(self, visits): | def origin_visit_upsert(self, visits): | ||||
"""Add a origin_visits with a specific id and with all its data. | |||||
If there is already an origin_visit with the same | |||||
`(origin_url, visit_id)`, updates it instead of inserting a new one. | |||||
Args: | |||||
visits: iterable of dicts with keys: | |||||
- **origin**: origin url | |||||
- **visit**: origin visit id | |||||
- **type**: type of loader used for the visit | |||||
- **date**: timestamp of such visit | |||||
- **status**: Visit's new status | |||||
- **metadata**: Data associated to the visit | |||||
- **snapshot**: identifier of the snapshot to add to | |||||
the visit | |||||
""" | |||||
for visit in visits: | for visit in visits: | ||||
if not isinstance(visit['origin'], str): | if not isinstance(visit['origin'], str): | ||||
raise TypeError("visit['origin'] must be a string, not %r" | raise TypeError("visit['origin'] must be a string, not %r" | ||||
% (visit['origin'],)) | % (visit['origin'],)) | ||||
visits = [OriginVisit.from_dict(d) for d in visits] | visits = [OriginVisit.from_dict(d) for d in visits] | ||||
if self.journal_writer: | if self.journal_writer: | ||||
for visit in visits: | for visit in visits: | ||||
Show All 17 Lines | def _convert_visit(self, visit): | ||||
if visit is None: | if visit is None: | ||||
return | return | ||||
visit = visit.to_dict() | visit = visit.to_dict() | ||||
return visit | return visit | ||||
def origin_visit_get(self, origin, last_visit=None, limit=None): | def origin_visit_get(self, origin, last_visit=None, limit=None): | ||||
"""Retrieve all the origin's visit's information. | |||||
Args: | |||||
origin (int): the origin's identifier | |||||
last_visit (int): visit's id from which listing the next ones, | |||||
default to None | |||||
limit (int): maximum number of results to return, | |||||
default to None | |||||
Yields: | |||||
List of visits. | |||||
""" | |||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits: | if origin_url in self._origin_visits: | ||||
visits = self._origin_visits[origin_url] | visits = self._origin_visits[origin_url] | ||||
if last_visit is not None: | if last_visit is not None: | ||||
visits = visits[last_visit:] | visits = visits[last_visit:] | ||||
if limit is not None: | if limit is not None: | ||||
visits = visits[:limit] | visits = visits[:limit] | ||||
for visit in visits: | for visit in visits: | ||||
if not visit: | if not visit: | ||||
continue | continue | ||||
visit_id = visit.visit | visit_id = visit.visit | ||||
yield self._convert_visit( | yield self._convert_visit( | ||||
self._origin_visits[origin_url][visit_id-1]) | self._origin_visits[origin_url][visit_id-1]) | ||||
def origin_visit_find_by_date(self, origin, visit_date): | def origin_visit_find_by_date(self, origin, visit_date): | ||||
"""Retrieves the origin visit whose date is closest to the provided | |||||
timestamp. | |||||
In case of a tie, the visit with largest id is selected. | |||||
Args: | |||||
origin (str): The occurrence's origin (URL). | |||||
target (datetime): target timestamp | |||||
Returns: | |||||
A visit. | |||||
""" | |||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits: | if origin_url in self._origin_visits: | ||||
visits = self._origin_visits[origin_url] | visits = self._origin_visits[origin_url] | ||||
visit = min( | visit = min( | ||||
visits, | visits, | ||||
key=lambda v: (abs(v.date - visit_date), -v.visit)) | key=lambda v: (abs(v.date - visit_date), -v.visit)) | ||||
return self._convert_visit(visit) | return self._convert_visit(visit) | ||||
def origin_visit_get_by(self, origin, visit): | def origin_visit_get_by(self, origin, visit): | ||||
"""Retrieve origin visit's information. | |||||
Args: | |||||
origin (int): the origin's identifier | |||||
Returns: | |||||
The information on that particular (origin, visit) or None if | |||||
it does not exist | |||||
""" | |||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits and \ | if origin_url in self._origin_visits and \ | ||||
visit <= len(self._origin_visits[origin_url]): | visit <= len(self._origin_visits[origin_url]): | ||||
return self._convert_visit( | return self._convert_visit( | ||||
self._origin_visits[origin_url][visit-1]) | self._origin_visits[origin_url][visit-1]) | ||||
def origin_visit_get_latest( | def origin_visit_get_latest( | ||||
self, origin, allowed_statuses=None, require_snapshot=False): | self, origin, allowed_statuses=None, require_snapshot=False): | ||||
"""Get the latest origin visit for the given origin, optionally | |||||
looking only for those with one of the given allowed_statuses | |||||
or for those with a known snapshot. | |||||
Args: | |||||
origin (str): the origin's URL | |||||
allowed_statuses (list of str): list of visit statuses considered | |||||
to find the latest visit. For instance, | |||||
``allowed_statuses=['full']`` will only consider visits that | |||||
have successfully run to completion. | |||||
require_snapshot (bool): If True, only a visit with a snapshot | |||||
will be returned. | |||||
Returns: | |||||
dict: a dict with the following keys: | |||||
- **origin**: the URL of the origin | |||||
- **visit**: origin visit id | |||||
- **type**: type of loader used for the visit | |||||
- **date**: timestamp of such visit | |||||
- **status**: Visit's new status | |||||
- **metadata**: Data associated to the visit | |||||
- **snapshot** (Optional[sha1_git]): identifier of the snapshot | |||||
associated to the visit | |||||
""" | |||||
origin = self._origins.get(origin) | origin = self._origins.get(origin) | ||||
if not origin: | if not origin: | ||||
return | return | ||||
visits = self._origin_visits[origin.url] | visits = self._origin_visits[origin.url] | ||||
if allowed_statuses is not None: | if allowed_statuses is not None: | ||||
visits = [visit for visit in visits | visits = [visit for visit in visits | ||||
if visit.status in allowed_statuses] | if visit.status in allowed_statuses] | ||||
if require_snapshot: | if require_snapshot: | ||||
visits = [visit for visit in visits | visits = [visit for visit in visits | ||||
if visit.snapshot] | if visit.snapshot] | ||||
visit = max( | visit = max( | ||||
visits, key=lambda v: (v.date, v.visit), default=None) | visits, key=lambda v: (v.date, v.visit), default=None) | ||||
return self._convert_visit(visit) | return self._convert_visit(visit) | ||||
def _select_random_origin_visit_by_type(self, type: str) -> str: | def _select_random_origin_visit_by_type(self, type: str) -> str: | ||||
"""Select randomly an origin visit """ | |||||
while True: | while True: | ||||
url = random.choice(list(self._origin_visits.keys())) | url = random.choice(list(self._origin_visits.keys())) | ||||
random_origin_visits = self._origin_visits[url] | random_origin_visits = self._origin_visits[url] | ||||
if random_origin_visits[0].type == type: | if random_origin_visits[0].type == type: | ||||
return url | return url | ||||
def origin_visit_get_random(self, type: str) -> Optional[Dict[str, Any]]: | def origin_visit_get_random(self, type: str) -> Optional[Dict[str, Any]]: | ||||
"""Randomly select one successful origin visit with <type> | |||||
made in the last 3 months. | |||||
Returns: | |||||
dict representing an origin visit, in the same format as | |||||
`origin_visit_get`. | |||||
""" | |||||
url = self._select_random_origin_visit_by_type(type) | url = self._select_random_origin_visit_by_type(type) | ||||
random_origin_visits = copy.deepcopy(self._origin_visits[url]) | random_origin_visits = copy.deepcopy(self._origin_visits[url]) | ||||
random_origin_visits.reverse() | random_origin_visits.reverse() | ||||
back_in_the_day = now() - timedelta(weeks=12) # 3 months back | back_in_the_day = now() - timedelta(weeks=12) # 3 months back | ||||
# This should be enough for tests | # This should be enough for tests | ||||
for visit in random_origin_visits: | for visit in random_origin_visits: | ||||
if visit.date > back_in_the_day and visit.status == 'full': | if visit.date > back_in_the_day and visit.status == 'full': | ||||
return visit.to_dict() | return visit.to_dict() | ||||
else: | else: | ||||
return None | return None | ||||
def stat_counters(self): | def stat_counters(self): | ||||
"""compute statistics about the number of tuples in various tables | |||||
Returns: | |||||
dict: a dictionary mapping textual labels (e.g., content) to | |||||
integer values (e.g., the number of tuples in table content) | |||||
""" | |||||
keys = ( | keys = ( | ||||
'content', | 'content', | ||||
'directory', | 'directory', | ||||
'origin', | 'origin', | ||||
'origin_visit', | 'origin_visit', | ||||
'person', | 'person', | ||||
'release', | 'release', | ||||
'revision', | 'revision', | ||||
'skipped_content', | 'skipped_content', | ||||
'snapshot' | 'snapshot' | ||||
) | ) | ||||
stats = {key: 0 for key in keys} | stats = {key: 0 for key in keys} | ||||
stats.update(collections.Counter( | stats.update(collections.Counter( | ||||
obj_type | obj_type | ||||
for (obj_type, obj_id) | for (obj_type, obj_id) | ||||
in itertools.chain(*self._objects.values()))) | in itertools.chain(*self._objects.values()))) | ||||
return stats | return stats | ||||
def refresh_stat_counters(self): | def refresh_stat_counters(self): | ||||
"""Recomputes the statistics for `stat_counters`.""" | |||||
pass | pass | ||||
def origin_metadata_add(self, origin_url, ts, provider, tool, metadata): | def origin_metadata_add(self, origin_url, ts, provider, tool, metadata): | ||||
""" Add an origin_metadata for the origin at ts with provenance and | |||||
metadata. | |||||
Args: | |||||
origin_url (str): the origin url for which the metadata is added | |||||
ts (datetime): timestamp of the found metadata | |||||
provider: id of the provider of metadata (ex:'hal') | |||||
tool: id of the tool used to extract metadata | |||||
metadata (jsonb): the metadata retrieved at the time and location | |||||
""" | |||||
if not isinstance(origin_url, str): | if not isinstance(origin_url, str): | ||||
raise TypeError('origin_id must be str, not %r' % (origin_url,)) | raise TypeError('origin_id must be str, not %r' % (origin_url,)) | ||||
if isinstance(ts, str): | if isinstance(ts, str): | ||||
ts = dateutil.parser.parse(ts) | ts = dateutil.parser.parse(ts) | ||||
origin_metadata = { | origin_metadata = { | ||||
'origin_url': origin_url, | 'origin_url': origin_url, | ||||
'discovery_date': ts, | 'discovery_date': ts, | ||||
'tool_id': tool, | 'tool_id': tool, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'provider_id': provider, | 'provider_id': provider, | ||||
} | } | ||||
self._origin_metadata[origin_url].append(origin_metadata) | self._origin_metadata[origin_url].append(origin_metadata) | ||||
return None | return None | ||||
def origin_metadata_get_by(self, origin_url, provider_type=None): | def origin_metadata_get_by(self, origin_url, provider_type=None): | ||||
"""Retrieve list of all origin_metadata entries for the origin_url | |||||
Args: | |||||
origin_url (str): the origin's url | |||||
provider_type (str): (optional) type of provider | |||||
Returns: | |||||
list of dicts: the origin_metadata dictionary with the keys: | |||||
- origin_url (int): origin's URL | |||||
- discovery_date (datetime): timestamp of discovery | |||||
- tool_id (int): metadata's extracting tool | |||||
- metadata (jsonb) | |||||
- provider_id (int): metadata's provider | |||||
- provider_name (str) | |||||
- provider_type (str) | |||||
- provider_url (str) | |||||
""" | |||||
if not isinstance(origin_url, str): | if not isinstance(origin_url, str): | ||||
raise TypeError('origin_url must be str, not %r' % (origin_url,)) | raise TypeError('origin_url must be str, not %r' % (origin_url,)) | ||||
metadata = [] | metadata = [] | ||||
for item in self._origin_metadata[origin_url]: | for item in self._origin_metadata[origin_url]: | ||||
item = copy.deepcopy(item) | item = copy.deepcopy(item) | ||||
provider = self.metadata_provider_get(item['provider_id']) | provider = self.metadata_provider_get(item['provider_id']) | ||||
for attr_name in ('name', 'type', 'url'): | for attr_name in ('name', 'type', 'url'): | ||||
item['provider_' + attr_name] = \ | item['provider_' + attr_name] = \ | ||||
provider['provider_' + attr_name] | provider['provider_' + attr_name] | ||||
metadata.append(item) | metadata.append(item) | ||||
return metadata | return metadata | ||||
def tool_add(self, tools): | def tool_add(self, tools): | ||||
"""Add new tools to the storage. | |||||
Args: | |||||
tools (iterable of :class:`dict`): Tool information to add to | |||||
storage. Each tool is a :class:`dict` with the following keys: | |||||
- name (:class:`str`): name of the tool | |||||
- version (:class:`str`): version of the tool | |||||
- configuration (:class:`dict`): configuration of the tool, | |||||
must be json-encodable | |||||
Returns: | |||||
:class:`dict`: All the tools inserted in storage | |||||
(including the internal ``id``). The order of the list is not | |||||
guaranteed to match the order of the initial list. | |||||
""" | |||||
inserted = [] | inserted = [] | ||||
for tool in tools: | for tool in tools: | ||||
key = self._tool_key(tool) | key = self._tool_key(tool) | ||||
assert 'id' not in tool | assert 'id' not in tool | ||||
record = copy.deepcopy(tool) | record = copy.deepcopy(tool) | ||||
record['id'] = key # TODO: remove this | record['id'] = key # TODO: remove this | ||||
if key not in self._tools: | if key not in self._tools: | ||||
self._tools[key] = record | self._tools[key] = record | ||||
inserted.append(copy.deepcopy(self._tools[key])) | inserted.append(copy.deepcopy(self._tools[key])) | ||||
return inserted | return inserted | ||||
def tool_get(self, tool): | def tool_get(self, tool): | ||||
"""Retrieve tool information. | |||||
Args: | |||||
tool (dict): Tool information we want to retrieve from storage. | |||||
The dicts have the same keys as those used in :func:`tool_add`. | |||||
Returns: | |||||
dict: The full tool information if it exists (``id`` included), | |||||
None otherwise. | |||||
""" | |||||
return self._tools.get(self._tool_key(tool)) | return self._tools.get(self._tool_key(tool)) | ||||
def metadata_provider_add(self, provider_name, provider_type, provider_url, | def metadata_provider_add(self, provider_name, provider_type, provider_url, | ||||
metadata): | metadata): | ||||
"""Add a metadata provider. | |||||
Args: | |||||
provider_name (str): Its name | |||||
provider_type (str): Its type | |||||
provider_url (str): Its URL | |||||
metadata: JSON-encodable object | |||||
Returns: | |||||
an identifier of the provider | |||||
""" | |||||
provider = { | provider = { | ||||
'provider_name': provider_name, | 'provider_name': provider_name, | ||||
'provider_type': provider_type, | 'provider_type': provider_type, | ||||
'provider_url': provider_url, | 'provider_url': provider_url, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
} | } | ||||
key = self._metadata_provider_key(provider) | key = self._metadata_provider_key(provider) | ||||
provider['id'] = key | provider['id'] = key | ||||
self._metadata_providers[key] = provider | self._metadata_providers[key] = provider | ||||
return key | return key | ||||
def metadata_provider_get(self, provider_id): | def metadata_provider_get(self, provider_id): | ||||
"""Get a metadata provider | |||||
Args: | |||||
provider_id: Its identifier, as given by `metadata_provider_add`. | |||||
Returns: | |||||
dict: same as `metadata_provider_add`; | |||||
or None if it does not exist. | |||||
""" | |||||
return self._metadata_providers.get(provider_id) | return self._metadata_providers.get(provider_id) | ||||
def metadata_provider_get_by(self, provider): | def metadata_provider_get_by(self, provider): | ||||
"""Get a metadata provider | |||||
Args: | |||||
provider_name: Its name | |||||
provider_url: Its URL | |||||
Returns: | |||||
dict: same as `metadata_provider_add`; | |||||
or None if it does not exist. | |||||
""" | |||||
key = self._metadata_provider_key(provider) | key = self._metadata_provider_key(provider) | ||||
return self._metadata_providers.get(key) | return self._metadata_providers.get(key) | ||||
def _get_origin_url(self, origin): | def _get_origin_url(self, origin): | ||||
if isinstance(origin, str): | if isinstance(origin, str): | ||||
return origin | return origin | ||||
else: | else: | ||||
raise TypeError('origin must be a string.') | raise TypeError('origin must be a string.') | ||||
def _person_add(self, person): | def _person_add(self, person): | ||||
"""Add a person in storage. | |||||
Note: Private method, do not use outside of this class. | |||||
Args: | |||||
person: dictionary with keys fullname, name and email. | |||||
""" | |||||
key = ('person', person.fullname) | key = ('person', person.fullname) | ||||
if key not in self._objects: | if key not in self._objects: | ||||
person_id = len(self._persons) + 1 | person_id = len(self._persons) + 1 | ||||
self._persons.append(person) | self._persons.append(person) | ||||
self._objects[key].append(('person', person_id)) | self._objects[key].append(('person', person_id)) | ||||
else: | else: | ||||
person_id = self._objects[key][0][1] | person_id = self._objects[key][0][1] | ||||
person = self._persons[person_id-1] | person = self._persons[person_id-1] | ||||
Show All 16 Lines | class Storage: | ||||
@staticmethod | @staticmethod | ||||
def _tool_key(tool): | def _tool_key(tool): | ||||
return '%r %r %r' % (tool['name'], tool['version'], | return '%r %r %r' % (tool['name'], tool['version'], | ||||
tuple(sorted(tool['configuration'].items()))) | tuple(sorted(tool['configuration'].items()))) | ||||
@staticmethod | @staticmethod | ||||
def _metadata_provider_key(provider): | def _metadata_provider_key(provider): | ||||
return '%r %r' % (provider['provider_name'], provider['provider_url']) | return '%r %r' % (provider['provider_name'], provider['provider_url']) | ||||
def diff_directories(self, from_dir, to_dir, track_renaming=False): | |||||
raise NotImplementedError('InMemoryStorage.diff_directories') | |||||
def diff_revisions(self, from_rev, to_rev, track_renaming=False): | |||||
raise NotImplementedError('InMemoryStorage.diff_revisions') | |||||
def diff_revision(self, revision, track_renaming=False): | |||||
raise NotImplementedError('InMemoryStorage.diff_revision') |