Page MenuHomeSoftware Heritage

service.py
No OneTemporary

service.py

# Copyright (C) 2015-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from collections import defaultdict
from swh.model import hashutil
from swh.storage.algos import revisions_walker
from swh.web.common import converters
from swh.web.common import query
from swh.web.common.exc import NotFoundExc
from swh.web.common.origin_visits import get_origin_visit
from swh.web import config
storage = config.storage()
vault = config.vault()
idx_storage = config.indexer_storage()
MAX_LIMIT = 50 # Top limit the users can ask for
def _first_element(l):
"""Returns the first element in the provided list or None
if it is empty or None"""
return next(iter(l or []), None)
def lookup_multiple_hashes(hashes):
"""Lookup the passed hashes in a single DB connection, using batch
processing.
Args:
An array of {filename: X, sha1: Y}, string X, hex sha1 string Y.
Returns:
The same array with elements updated with elem['found'] = true if
the hash is present in storage, elem['found'] = false if not.
"""
hashlist = [hashutil.hash_to_bytes(elem['sha1']) for elem in hashes]
content_missing = storage.content_missing_per_sha1(hashlist)
missing = [hashutil.hash_to_hex(x) for x in content_missing]
for x in hashes:
x.update({'found': True})
for h in hashes:
if h['sha1'] in missing:
h['found'] = False
return hashes
def lookup_expression(expression, last_sha1, per_page):
"""Lookup expression in raw content.
Args:
expression (str): An expression to lookup through raw indexed
content
last_sha1 (str): Last sha1 seen
per_page (int): Number of results per page
Yields:
ctags whose content match the expression
"""
limit = min(per_page, MAX_LIMIT)
ctags = idx_storage.content_ctags_search(expression,
last_sha1=last_sha1,
limit=limit)
for ctag in ctags:
ctag = converters.from_swh(ctag, hashess={'id'})
ctag['sha1'] = ctag['id']
ctag.pop('id')
yield ctag
def lookup_hash(q):
"""Checks if the storage contains a given content checksum
Args: query string of the form <hash_algo:hash>
Returns: Dict with key found containing the hash info if the
hash is present, None if not.
"""
algo, hash = query.parse_hash(q)
found = _first_element(storage.content_find({algo: hash}))
return {'found': converters.from_content(found),
'algo': algo}
def search_hash(q):
"""Checks if the storage contains a given content checksum
Args: query string of the form <hash_algo:hash>
Returns: Dict with key found to True or False, according to
whether the checksum is present or not
"""
algo, hash = query.parse_hash(q)
found = _first_element(storage.content_find({algo: hash}))
return {'found': found is not None}
def _lookup_content_sha1(q):
"""Given a possible input, query for the content's sha1.
Args:
q: query string of the form <hash_algo:hash>
Returns:
binary sha1 if found or None
"""
algo, hash = query.parse_hash(q)
if algo != 'sha1':
hashes = _first_element(storage.content_find({algo: hash}))
if not hashes:
return None
return hashes['sha1']
return hash
def lookup_content_ctags(q):
"""Return ctags information from a specified content.
Args:
q: query string of the form <hash_algo:hash>
Yields:
ctags information (dict) list if the content is found.
"""
sha1 = _lookup_content_sha1(q)
if not sha1:
return None
ctags = list(idx_storage.content_ctags_get([sha1]))
if not ctags:
return None
for ctag in ctags:
yield converters.from_swh(ctag, hashess={'id'})
def lookup_content_filetype(q):
"""Return filetype information from a specified content.
Args:
q: query string of the form <hash_algo:hash>
Yields:
filetype information (dict) list if the content is found.
"""
sha1 = _lookup_content_sha1(q)
if not sha1:
return None
filetype = _first_element(list(idx_storage.content_mimetype_get([sha1])))
if not filetype:
return None
return converters.from_filetype(filetype)
def lookup_content_language(q):
"""Return language information from a specified content.
Args:
q: query string of the form <hash_algo:hash>
Yields:
language information (dict) list if the content is found.
"""
sha1 = _lookup_content_sha1(q)
if not sha1:
return None
lang = _first_element(list(idx_storage.content_language_get([sha1])))
if not lang:
return None
return converters.from_swh(lang, hashess={'id'})
def lookup_content_license(q):
"""Return license information from a specified content.
Args:
q: query string of the form <hash_algo:hash>
Yields:
license information (dict) list if the content is found.
"""
sha1 = _lookup_content_sha1(q)
if not sha1:
return None
lic = _first_element(idx_storage.content_fossology_license_get([sha1]))
if not lic:
return None
return converters.from_swh({'id': sha1, 'facts': lic[sha1]},
hashess={'id'})
def lookup_origin(origin):
"""Return information about the origin matching dict origin.
Args:
origin: origin's dict with keys either 'id' or
('type' AND 'url')
Returns:
origin information as dict.
"""
origin_info = storage.origin_get(origin)
if not origin_info:
if 'id' in origin and origin['id']:
msg = 'Origin with id %s not found!' % origin['id']
else:
msg = 'Origin with type %s and url %s not found!' % \
(origin['type'], origin['url'])
raise NotFoundExc(msg)
return converters.from_origin(origin_info)
def lookup_origins(origin_from=1, origin_count=100):
"""Get list of archived software origins in a paginated way.
Origins are sorted by id before returning them
Args:
origin_from (int): The minimum id of the origins to return
origin_count (int): The maximum number of origins to return
Yields:
origins information as dicts
"""
origins = storage.origin_get_range(origin_from, origin_count)
return map(converters.from_origin, origins)
def search_origin(url_pattern, offset=0, limit=50, regexp=False,
with_visit=False):
"""Search for origins whose urls contain a provided string pattern
or match a provided regular expression.
Args:
url_pattern: the string pattern to search for in origin urls
offset: number of found origins to skip before returning results
limit: the maximum number of found origins to return
Returns:
list of origin information as dict.
"""
origins = storage.origin_search(url_pattern, offset, limit, regexp,
with_visit)
return map(converters.from_origin, origins)
def search_origin_metadata(fulltext, limit=50):
"""Search for origins whose metadata match a provided string pattern.
Args:
fulltext: the string pattern to search for in origin metadata
offset: number of found origins to skip before returning results
limit: the maximum number of found origins to return
Returns:
list of origin metadata as dict.
"""
matches = idx_storage.origin_intrinsic_metadata_search_fulltext(
conjunction=[fulltext], limit=limit)
results = []
for match in matches:
match['from_revision'] = hashutil.hash_to_hex(match['from_revision'])
result = converters.from_origin(
storage.origin_get({'id': match.pop('id')}))
result['metadata'] = match
results.append(result)
return results
def lookup_origin_intrinsic_metadata(origin_dict):
"""Return intrinsic metadata for origin whose origin_id matches given
origin_id.
Args:
origin_dict: origin's dict with keys ('type' AND 'url')
Returns:
origin metadata.
"""
origin_info = storage.origin_get(origin_dict)
if not origin_info:
msg = 'Origin with type %s and url %s not found!' % \
(origin_dict['type'], origin_dict['url'])
raise NotFoundExc(msg)
origin_ids = [origin_info['id']]
match = idx_storage.origin_intrinsic_metadata_get(origin_ids)[0]
return match['metadata']
def lookup_person(person_id):
"""Return information about the person with id person_id.
Args:
person_id as string
Returns:
person information as dict.
Raises:
NotFoundExc if there is no person with the provided id.
"""
person = _first_element(storage.person_get([int(person_id)]))
if not person:
raise NotFoundExc('Person with id %s not found' % person_id)
return converters.from_person(person)
def _to_sha1_bin(sha1_hex):
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
sha1_hex,
['sha1'], # HACK: sha1_git really
'Only sha1_git is supported.')
return sha1_git_bin
def _check_directory_exists(sha1_git, sha1_git_bin):
if len(list(storage.directory_missing([sha1_git_bin]))):
raise NotFoundExc('Directory with sha1_git %s not found' % sha1_git)
def lookup_directory(sha1_git):
"""Return information about the directory with id sha1_git.
Args:
sha1_git as string
Returns:
directory information as dict.
"""
empty_dir_sha1 = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
if sha1_git == empty_dir_sha1:
return []
sha1_git_bin = _to_sha1_bin(sha1_git)
_check_directory_exists(sha1_git, sha1_git_bin)
directory_entries = storage.directory_ls(sha1_git_bin)
return map(converters.from_directory_entry, directory_entries)
def lookup_directory_with_path(sha1_git, path_string):
"""Return directory information for entry with path path_string w.r.t.
root directory pointed by directory_sha1_git
Args:
- directory_sha1_git: sha1_git corresponding to the directory
to which we append paths to (hopefully) find the entry
- the relative path to the entry starting from the directory pointed by
directory_sha1_git
Raises:
NotFoundExc if the directory entry is not found
"""
sha1_git_bin = _to_sha1_bin(sha1_git)
_check_directory_exists(sha1_git, sha1_git_bin)
paths = path_string.strip(os.path.sep).split(os.path.sep)
queried_dir = storage.directory_entry_get_by_path(
sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths)))
if not queried_dir:
raise NotFoundExc(('Directory entry with path %s from %s not found') %
(path_string, sha1_git))
return converters.from_directory_entry(queried_dir)
def lookup_release(release_sha1_git):
"""Return information about the release with sha1 release_sha1_git.
Args:
release_sha1_git: The release's sha1 as hexadecimal
Returns:
Release information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
sha1_git_bin = _to_sha1_bin(release_sha1_git)
release = _first_element(storage.release_get([sha1_git_bin]))
if not release:
raise NotFoundExc('Release with sha1_git %s not found.'
% release_sha1_git)
return converters.from_release(release)
def lookup_release_multiple(sha1_git_list):
"""Return information about the revisions identified with
their sha1_git identifiers.
Args:
sha1_git_list: A list of revision sha1_git identifiers
Returns:
Release information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list)
releases = storage.release_get(sha1_bin_list) or []
return (converters.from_release(r) for r in releases)
def lookup_revision(rev_sha1_git):
"""Return information about the revision with sha1 revision_sha1_git.
Args:
revision_sha1_git: The revision's sha1 as hexadecimal
Returns:
Revision information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
NotFoundExc if there is no revision with the provided sha1_git.
"""
sha1_git_bin = _to_sha1_bin(rev_sha1_git)
revision = _first_element(storage.revision_get([sha1_git_bin]))
if not revision:
raise NotFoundExc('Revision with sha1_git %s not found.'
% rev_sha1_git)
return converters.from_revision(revision)
def lookup_revision_multiple(sha1_git_list):
"""Return information about the revisions identified with
their sha1_git identifiers.
Args:
sha1_git_list: A list of revision sha1_git identifiers
Returns:
Generator of revisions information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list)
revisions = storage.revision_get(sha1_bin_list) or []
return (converters.from_revision(r) for r in revisions)
def lookup_revision_message(rev_sha1_git):
"""Return the raw message of the revision with sha1 revision_sha1_git.
Args:
revision_sha1_git: The revision's sha1 as hexadecimal
Returns:
Decoded revision message as dict {'message': <the_message>}
Raises:
ValueError if the identifier provided is not of sha1 nature.
NotFoundExc if the revision is not found, or if it has no message
"""
sha1_git_bin = _to_sha1_bin(rev_sha1_git)
revision = _first_element(storage.revision_get([sha1_git_bin]))
if not revision:
raise NotFoundExc('Revision with sha1_git %s not found.'
% rev_sha1_git)
if 'message' not in revision:
raise NotFoundExc('No message for revision with sha1_git %s.'
% rev_sha1_git)
res = {'message': revision['message']}
return res
def _lookup_revision_id_by(origin_id, branch_name, timestamp):
def _get_snapshot_branch(snapshot, branch_name):
snapshot = lookup_snapshot(visit['snapshot'],
branches_from=branch_name,
branches_count=10)
branch = None
if branch_name in snapshot['branches']:
branch = snapshot['branches'][branch_name]
return branch
visit = get_origin_visit({'id': origin_id}, visit_ts=timestamp)
branch = _get_snapshot_branch(visit['snapshot'], branch_name)
rev_id = None
if branch and branch['target_type'] == 'revision':
rev_id = branch['target']
elif branch and branch['target_type'] == 'alias':
branch = _get_snapshot_branch(visit['snapshot'], branch['target'])
if branch and branch['target_type'] == 'revision':
rev_id = branch['target']
if not rev_id:
raise NotFoundExc('Revision for origin %s and branch %s not found.'
% (origin_id, branch_name))
return rev_id
def lookup_revision_by(origin_id,
branch_name='HEAD',
timestamp=None):
"""Lookup revision by origin id, snapshot branch name and visit timestamp.
If branch_name is not provided, lookup using 'HEAD' as default.
If timestamp is not provided, use the most recent.
Args:
origin_id (int): origin of the revision
branch_name (str): snapshot branch name
timestamp (str/int): origin visit time frame
Returns:
dict: The revision matching the criterions
Raises:
NotFoundExc if no revision corresponds to the criterion
"""
rev_id = _lookup_revision_id_by(origin_id, branch_name, timestamp)
return lookup_revision(rev_id)
def lookup_revision_log(rev_sha1_git, limit):
"""Lookup revision log by revision id.
Args:
rev_sha1_git (str): The revision's sha1 as hexadecimal
limit (int): the maximum number of revisions returned
Returns:
list: Revision log as list of revision dicts
Raises:
ValueError: if the identifier provided is not of sha1 nature.
NotFoundExc: if there is no revision with the provided sha1_git.
"""
lookup_revision(rev_sha1_git)
sha1_git_bin = _to_sha1_bin(rev_sha1_git)
revision_entries = storage.revision_log([sha1_git_bin], limit)
return map(converters.from_revision, revision_entries)
def lookup_revision_log_by(origin_id, branch_name, timestamp, limit):
"""Lookup revision by origin id, snapshot branch name and visit timestamp.
Args:
origin_id (int): origin of the revision
branch_name (str): snapshot branch
timestamp (str/int): origin visit time frame
limit (int): the maximum number of revisions returned
Returns:
list: Revision log as list of revision dicts
Raises:
NotFoundExc: if no revision corresponds to the criterion
"""
rev_id = _lookup_revision_id_by(origin_id, branch_name, timestamp)
return lookup_revision_log(rev_id, limit)
def lookup_revision_with_context_by(origin_id, branch_name, timestamp,
sha1_git, limit=100):
"""Return information about revision sha1_git, limited to the
sub-graph of all transitive parents of sha1_git_root.
sha1_git_root being resolved through the lookup of a revision by origin_id,
branch_name and ts.
In other words, sha1_git is an ancestor of sha1_git_root.
Args:
- origin_id: origin of the revision.
- branch_name: revision's branch.
- timestamp: revision's time frame.
- sha1_git: one of sha1_git_root's ancestors.
- limit: limit the lookup to 100 revisions back.
Returns:
Pair of (root_revision, revision).
Information on sha1_git if it is an ancestor of sha1_git_root
including children leading to sha1_git_root
Raises:
- BadInputExc in case of unknown algo_hash or bad hash.
- NotFoundExc if either revision is not found or if sha1_git is not an
ancestor of sha1_git_root.
"""
rev_root_id = _lookup_revision_id_by(origin_id, branch_name, timestamp)
rev_root_id_bin = hashutil.hash_to_bytes(rev_root_id)
rev_root = _first_element(storage.revision_get([rev_root_id_bin]))
return (converters.from_revision(rev_root),
lookup_revision_with_context(rev_root, sha1_git, limit))
def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100):
"""Return information about revision sha1_git, limited to the
sub-graph of all transitive parents of sha1_git_root.
In other words, sha1_git is an ancestor of sha1_git_root.
Args:
sha1_git_root: latest revision. The type is either a sha1 (as an hex
string) or a non converted dict.
sha1_git: one of sha1_git_root's ancestors
limit: limit the lookup to 100 revisions back
Returns:
Information on sha1_git if it is an ancestor of sha1_git_root
including children leading to sha1_git_root
Raises:
BadInputExc in case of unknown algo_hash or bad hash
NotFoundExc if either revision is not found or if sha1_git is not an
ancestor of sha1_git_root
"""
sha1_git_bin = _to_sha1_bin(sha1_git)
revision = _first_element(storage.revision_get([sha1_git_bin]))
if not revision:
raise NotFoundExc('Revision %s not found' % sha1_git)
if isinstance(sha1_git_root, str):
sha1_git_root_bin = _to_sha1_bin(sha1_git_root)
revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) # noqa
if not revision_root:
raise NotFoundExc('Revision root %s not found' % sha1_git_root)
else:
sha1_git_root_bin = sha1_git_root['id']
revision_log = storage.revision_log([sha1_git_root_bin], limit)
parents = {}
children = defaultdict(list)
for rev in revision_log:
rev_id = rev['id']
parents[rev_id] = []
for parent_id in rev['parents']:
parents[rev_id].append(parent_id)
children[parent_id].append(rev_id)
if revision['id'] not in parents:
raise NotFoundExc('Revision %s is not an ancestor of %s' %
(sha1_git, sha1_git_root))
revision['children'] = children[revision['id']]
return converters.from_revision(revision)
def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False):
"""Return information on directory pointed by revision with sha1_git.
If dir_path is not provided, display top level directory.
Otherwise, display the directory pointed by dir_path (if it exists).
Args:
sha1_git: revision's hash.
dir_path: optional directory pointed to by that revision.
with_data: boolean that indicates to retrieve the raw data if the path
resolves to a content. Default to False (for the api)
Returns:
Information on the directory pointed to by that revision.
Raises:
BadInputExc in case of unknown algo_hash or bad hash.
NotFoundExc either if the revision is not found or the path referenced
does not exist.
NotImplementedError in case of dir_path exists but do not reference a
type 'dir' or 'file'.
"""
sha1_git_bin = _to_sha1_bin(sha1_git)
revision = _first_element(storage.revision_get([sha1_git_bin]))
if not revision:
raise NotFoundExc('Revision %s not found' % sha1_git)
dir_sha1_git_bin = revision['directory']
if dir_path:
paths = dir_path.strip(os.path.sep).split(os.path.sep)
entity = storage.directory_entry_get_by_path(
dir_sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths)))
if not entity:
raise NotFoundExc(
"Directory or File '%s' pointed to by revision %s not found"
% (dir_path, sha1_git))
else:
entity = {'type': 'dir', 'target': dir_sha1_git_bin}
if entity['type'] == 'dir':
directory_entries = storage.directory_ls(entity['target']) or []
return {'type': 'dir',
'path': '.' if not dir_path else dir_path,
'revision': sha1_git,
'content': list(map(converters.from_directory_entry,
directory_entries))}
elif entity['type'] == 'file': # content
content = _first_element(
storage.content_find({'sha1_git': entity['target']}))
if not content:
raise NotFoundExc('Content not found for revision %s'
% sha1_git)
if with_data:
c = _first_element(storage.content_get([content['sha1']]))
content['data'] = c['data']
return {'type': 'file',
'path': '.' if not dir_path else dir_path,
'revision': sha1_git,
'content': converters.from_content(content)}
elif entity['type'] == 'rev': # revision
revision = next(storage.revision_get([entity['target']]))
return {'type': 'rev',
'path': '.' if not dir_path else dir_path,
'revision': sha1_git,
'content': converters.from_revision(revision)}
else:
raise NotImplementedError('Entity of type %s not implemented.'
% entity['type'])
def lookup_content(q):
"""Lookup the content designed by q.
Args:
q: The release's sha1 as hexadecimal
Raises:
NotFoundExc if the requested content is not found
"""
algo, hash = query.parse_hash(q)
c = _first_element(storage.content_find({algo: hash}))
if not c:
raise NotFoundExc('Content with %s checksum equals to %s not found!' %
(algo, hashutil.hash_to_hex(hash)))
return converters.from_content(c)
def lookup_content_raw(q):
"""Lookup the content defined by q.
Args:
q: query string of the form <hash_algo:hash>
Returns:
dict with 'sha1' and 'data' keys.
data representing its raw data decoded.
Raises:
NotFoundExc if the requested content is not found or
if the content bytes are not available in the storage
"""
c = lookup_content(q)
content_sha1_bytes = hashutil.hash_to_bytes(c['checksums']['sha1'])
content = _first_element(storage.content_get([content_sha1_bytes]))
if not content:
algo, hash = query.parse_hash(q)
raise NotFoundExc('Bytes of content with %s checksum equals to %s '
'are not available!' %
(algo, hashutil.hash_to_hex(hash)))
return converters.from_content(content)
def stat_counters():
"""Return the stat counters for Software Heritage
Returns:
A dict mapping textual labels to integer values.
"""
return storage.stat_counters()
def _lookup_origin_visits(origin_id, last_visit=None, limit=10):
"""Yields the origin origin_ids' visits.
Args:
origin_id (int): origin to list visits for
last_visit (int): last visit to lookup from
limit (int): Number of elements max to display
Yields:
Dictionaries of origin_visit for that origin
"""
limit = min(limit, MAX_LIMIT)
yield from storage.origin_visit_get(
origin_id, last_visit=last_visit, limit=limit)
def lookup_origin_visits(origin_id, last_visit=None, per_page=10):
"""Yields the origin origin_ids' visits.
Args:
origin_id: origin to list visits for
Yields:
Dictionaries of origin_visit for that origin
"""
lookup_origin({'id': origin_id})
visits = _lookup_origin_visits(origin_id, last_visit=last_visit,
limit=per_page)
for visit in visits:
yield converters.from_origin_visit(visit)
def lookup_origin_visit(origin_id, visit_id):
"""Return information about visit visit_id with origin origin_id.
Args:
origin_id: origin concerned by the visit
visit_id: the visit identifier to lookup
Yields:
The dict origin_visit concerned
"""
visit = storage.origin_visit_get_by(origin_id, visit_id)
if not visit:
raise NotFoundExc('Origin with id %s or its visit '
'with id %s not found!' % (origin_id, visit_id))
return converters.from_origin_visit(visit)
def lookup_snapshot_size(snapshot_id):
"""Count the number of branches in the snapshot with the given id
Args:
snapshot_id (str): sha1 identifier of the snapshot
Returns:
dict: A dict whose keys are the target types of branches and
values their corresponding amount
"""
snapshot_id_bin = _to_sha1_bin(snapshot_id)
snapshot_size = storage.snapshot_count_branches(snapshot_id_bin)
if 'revision' not in snapshot_size:
snapshot_size['revision'] = 0
if 'release' not in snapshot_size:
snapshot_size['release'] = 0
return snapshot_size
def lookup_snapshot(snapshot_id, branches_from='', branches_count=1000,
target_types=None):
"""Return information about a snapshot, aka the list of named
branches found during a specific visit of an origin.
Args:
snapshot_id (str): sha1 identifier of the snapshot
branches_from (str): optional parameter used to skip branches
whose name is lesser than it before returning them
branches_count (int): optional parameter used to restrain
the amount of returned branches
target_types (list): optional parameter used to filter the
target types of branch to return (possible values that can be
contained in that list are `'content', 'directory',
'revision', 'release', 'snapshot', 'alias'`)
Returns:
A dict filled with the snapshot content.
"""
snapshot_id_bin = _to_sha1_bin(snapshot_id)
snapshot = storage.snapshot_get_branches(snapshot_id_bin,
branches_from.encode(),
branches_count, target_types)
if not snapshot:
raise NotFoundExc('Snapshot with id %s not found!' % snapshot_id)
return converters.from_snapshot(snapshot)
def lookup_latest_origin_snapshot(origin_id, allowed_statuses=None):
"""Return information about the latest snapshot of an origin.
.. warning:: At most 1000 branches contained in the snapshot
will be returned for performance reasons.
Args:
origin_id: integer identifier of the origin
allowed_statuses: list of visit statuses considered
to find the latest snapshot for the visit. For instance,
``allowed_statuses=['full']`` will only consider visits that
have successfully run to completion.
Returns:
A dict filled with the snapshot content.
"""
snapshot = storage.snapshot_get_latest(origin_id, allowed_statuses)
return converters.from_snapshot(snapshot)
def lookup_revision_through(revision, limit=100):
"""Retrieve a revision from the criterion stored in revision dictionary.
Args:
revision: Dictionary of criterion to lookup the revision with.
Here are the supported combination of possible values:
- origin_id, branch_name, ts, sha1_git
- origin_id, branch_name, ts
- sha1_git_root, sha1_git
- sha1_git
Returns:
None if the revision is not found or the actual revision.
"""
if 'origin_id' in revision and \
'branch_name' in revision and \
'ts' in revision and \
'sha1_git' in revision:
return lookup_revision_with_context_by(revision['origin_id'],
revision['branch_name'],
revision['ts'],
revision['sha1_git'],
limit)
if 'origin_id' in revision and \
'branch_name' in revision and \
'ts' in revision:
return lookup_revision_by(revision['origin_id'],
revision['branch_name'],
revision['ts'])
if 'sha1_git_root' in revision and \
'sha1_git' in revision:
return lookup_revision_with_context(revision['sha1_git_root'],
revision['sha1_git'],
limit)
if 'sha1_git' in revision:
return lookup_revision(revision['sha1_git'])
# this should not happen
raise NotImplementedError('Should not happen!')
def lookup_directory_through_revision(revision, path=None,
limit=100, with_data=False):
"""Retrieve the directory information from the revision.
Args:
revision: dictionary of criterion representing a revision to lookup
path: directory's path to lookup.
limit: optional query parameter to limit the revisions log (default to
100). For now, note that this limit could impede the transitivity
conclusion about sha1_git not being an ancestor of.
with_data: indicate to retrieve the content's raw data if path resolves
to a content.
Returns:
The directory pointing to by the revision criterions at path.
"""
rev = lookup_revision_through(revision, limit)
if not rev:
raise NotFoundExc('Revision with criterion %s not found!' % revision)
return (rev['id'],
lookup_directory_with_revision(rev['id'], path, with_data))
def vault_cook(obj_type, obj_id, email=None):
"""Cook a vault bundle.
"""
return vault.cook(obj_type, obj_id, email=email)
def vault_fetch(obj_type, obj_id):
"""Fetch a vault bundle.
"""
return vault.fetch(obj_type, obj_id)
def vault_progress(obj_type, obj_id):
"""Get the current progress of a vault bundle.
"""
return vault.progress(obj_type, obj_id)
def diff_revision(rev_id):
"""Get the list of file changes (insertion / deletion / modification /
renaming) for a particular revision.
"""
rev_sha1_git_bin = _to_sha1_bin(rev_id)
changes = storage.diff_revision(rev_sha1_git_bin, track_renaming=True)
for change in changes:
change['from'] = converters.from_directory_entry(change['from'])
change['to'] = converters.from_directory_entry(change['to'])
if change['from_path']:
change['from_path'] = change['from_path'].decode('utf-8')
if change['to_path']:
change['to_path'] = change['to_path'].decode('utf-8')
return changes
class _RevisionsWalkerProxy(object):
"""
Proxy class wrapping a revisions walker iterator from
swh-storage and performing needed conversions.
"""
def __init__(self, rev_walker_type, rev_start, *args, **kwargs):
rev_start_bin = hashutil.hash_to_bytes(rev_start)
self.revisions_walker = \
revisions_walker.get_revisions_walker(rev_walker_type,
storage,
rev_start_bin,
*args, **kwargs)
def export_state(self):
return self.revisions_walker.export_state()
def __next__(self):
return converters.from_revision(next(self.revisions_walker))
def __iter__(self):
return self
def get_revisions_walker(rev_walker_type, rev_start, *args, **kwargs):
"""
Utility function to instantiate a revisions walker of a given type,
see :mod:`swh.storage.algos.revisions_walker`.
Args:
rev_walker_type (str): the type of revisions walker to return,
possible values are: ``committer_date``, ``dfs``, ``dfs_post``,
``bfs`` and ``path``
rev_start (str): hexadecimal representation of a revision identifier
args (list): position arguments to pass to the revisions walker
constructor
kwargs (dict): keyword arguments to pass to the revisions walker
constructor
"""
# first check if the provided revision is valid
lookup_revision(rev_start)
return _RevisionsWalkerProxy(rev_walker_type, rev_start, *args, **kwargs)

File Metadata

Mime Type
text/x-python
Expires
Fri, Jul 4, 2:07 PM (12 h, 6 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3392769

Event Timeline