# Copyright (C) 2015-2019  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information

import os

from collections import defaultdict

from swh.model import hashutil

from swh.storage.algos import revisions_walker

from swh.web.common import converters
from swh.web.common import query
from swh.web.common.exc import NotFoundExc
from swh.web.common.origin_visits import get_origin_visit
from swh.web import config

storage = config.storage()
vault = config.vault()
idx_storage = config.indexer_storage()


MAX_LIMIT = 50  # Top limit the users can ask for


def _first_element(l):
    """Returns the first element in the provided list or None
    if it is empty or None"""
    return next(iter(l or []), None)


def lookup_multiple_hashes(hashes):
    """Lookup the passed hashes in a single DB connection, using batch
    processing.

    Args:
        An array of {filename: X, sha1: Y}, string X, hex sha1 string Y.
    Returns:
        The same array with elements updated with elem['found'] = true if
        the hash is present in storage, elem['found'] = false if not.

    """
    hashlist = [hashutil.hash_to_bytes(elem['sha1']) for elem in hashes]
    content_missing = storage.content_missing_per_sha1(hashlist)
    missing = [hashutil.hash_to_hex(x) for x in content_missing]
    for x in hashes:
        x.update({'found': True})
    for h in hashes:
        if h['sha1'] in missing:
            h['found'] = False
    return hashes


def lookup_expression(expression, last_sha1, per_page):
    """Lookup expression in raw content.

    Args:
        expression (str): An expression to lookup through raw indexed
        content
        last_sha1 (str): Last sha1 seen
        per_page (int): Number of results per page

    Yields:
        ctags whose content match the expression

    """

    limit = min(per_page, MAX_LIMIT)
    ctags = idx_storage.content_ctags_search(expression,
                                             last_sha1=last_sha1,
                                             limit=limit)
    for ctag in ctags:
        ctag = converters.from_swh(ctag, hashess={'id'})
        ctag['sha1'] = ctag['id']
        ctag.pop('id')
        yield ctag


def lookup_hash(q):
    """Checks if the storage contains a given content checksum

    Args: query string of the form <hash_algo:hash>

    Returns: Dict with key found containing the hash info if the
    hash is present, None if not.

    """
    algo, hash = query.parse_hash(q)
    found = _first_element(storage.content_find({algo: hash}))
    return {'found': converters.from_content(found),
            'algo': algo}


def search_hash(q):
    """Checks if the storage contains a given content checksum

    Args: query string of the form <hash_algo:hash>

    Returns: Dict with key found to True or False, according to
        whether the checksum is present or not

    """
    algo, hash = query.parse_hash(q)
    found = _first_element(storage.content_find({algo: hash}))
    return {'found': found is not None}


def _lookup_content_sha1(q):
    """Given a possible input, query for the content's sha1.

    Args:
        q: query string of the form <hash_algo:hash>

    Returns:
        binary sha1 if found or None

    """
    algo, hash = query.parse_hash(q)
    if algo != 'sha1':
        hashes = _first_element(storage.content_find({algo: hash}))
        if not hashes:
            return None
        return hashes['sha1']
    return hash


def lookup_content_ctags(q):
    """Return ctags information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        ctags information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)

    if not sha1:
        return None

    ctags = list(idx_storage.content_ctags_get([sha1]))
    if not ctags:
        return None

    for ctag in ctags:
        yield converters.from_swh(ctag, hashess={'id'})


def lookup_content_filetype(q):
    """Return filetype information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        filetype information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)
    if not sha1:
        return None
    filetype = _first_element(list(idx_storage.content_mimetype_get([sha1])))
    if not filetype:
        return None
    return converters.from_filetype(filetype)


def lookup_content_language(q):
    """Return language information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        language information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)
    if not sha1:
        return None
    lang = _first_element(list(idx_storage.content_language_get([sha1])))
    if not lang:
        return None
    return converters.from_swh(lang, hashess={'id'})


def lookup_content_license(q):
    """Return license information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        license information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)
    if not sha1:
        return None
    lic = _first_element(idx_storage.content_fossology_license_get([sha1]))

    if not lic:
        return None
    return converters.from_swh({'id': sha1, 'facts': lic[sha1]},
                               hashess={'id'})


def lookup_origin(origin):
    """Return information about the origin matching dict origin.

    Args:
        origin: origin's dict with keys either 'id' or
        ('type' AND 'url')

    Returns:
        origin information as dict.

    """
    origin_info = storage.origin_get(origin)
    if not origin_info:
        if 'id' in origin and origin['id']:
            msg = 'Origin with id %s not found!' % origin['id']
        else:
            msg = 'Origin with type %s and url %s not found!' % \
                (origin['type'], origin['url'])
        raise NotFoundExc(msg)
    return converters.from_origin(origin_info)


def lookup_origins(origin_from=1, origin_count=100):
    """Get list of archived software origins in a paginated way.

    Origins are sorted by id before returning them

    Args:
        origin_from (int): The minimum id of the origins to return
        origin_count (int): The maximum number of origins to return

    Yields:
        origins information as dicts
    """
    origins = storage.origin_get_range(origin_from, origin_count)
    return map(converters.from_origin, origins)


def search_origin(url_pattern, offset=0, limit=50, regexp=False,
                  with_visit=False):
    """Search for origins whose urls contain a provided string pattern
    or match a provided regular expression.

    Args:
        url_pattern: the string pattern to search for in origin urls
        offset: number of found origins to skip before returning results
        limit: the maximum number of found origins to return

    Returns:
        list of origin information as dict.

    """
    origins = storage.origin_search(url_pattern, offset, limit, regexp,
                                    with_visit)
    return map(converters.from_origin, origins)


def search_origin_metadata(fulltext, limit=50):
    """Search for origins whose metadata match a provided string pattern.

    Args:
        fulltext: the string pattern to search for in origin metadata
        offset: number of found origins to skip before returning results
        limit: the maximum number of found origins to return

    Returns:
        list of origin metadata as dict.

    """
    matches = idx_storage.origin_intrinsic_metadata_search_fulltext(
        conjunction=[fulltext], limit=limit)
    results = []
    for match in matches:
        match['from_revision'] = hashutil.hash_to_hex(match['from_revision'])
        result = converters.from_origin(
            storage.origin_get({'id': match.pop('id')}))
        result['metadata'] = match
        results.append(result)
    return results


def lookup_origin_intrinsic_metadata(origin_dict):
    """Return intrinsic metadata for origin whose origin_id matches given
    origin_id.

    Args:
        origin_dict: origin's dict with keys ('type' AND 'url')

    Returns:
        origin metadata.

    """
    origin_info = storage.origin_get(origin_dict)
    if not origin_info:
        msg = 'Origin with type %s and url %s not found!' % \
            (origin_dict['type'], origin_dict['url'])
        raise NotFoundExc(msg)

    origin_ids = [origin_info['id']]
    match = _first_element(
        idx_storage.origin_intrinsic_metadata_get(origin_ids))
    result = {}
    if match:
        result = match['metadata']
    return result


def lookup_person(person_id):
    """Return information about the person with id person_id.

    Args:
        person_id as string

    Returns:
        person information as dict.

    Raises:
        NotFoundExc if there is no person with the provided id.

    """
    person = _first_element(storage.person_get([int(person_id)]))
    if not person:
        raise NotFoundExc('Person with id %s not found' % person_id)
    return converters.from_person(person)


def _to_sha1_bin(sha1_hex):
    _, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
        sha1_hex,
        ['sha1'],  # HACK: sha1_git really
        'Only sha1_git is supported.')
    return sha1_git_bin


def _check_directory_exists(sha1_git, sha1_git_bin):
    if len(list(storage.directory_missing([sha1_git_bin]))):
        raise NotFoundExc('Directory with sha1_git %s not found' % sha1_git)


def lookup_directory(sha1_git):
    """Return information about the directory with id sha1_git.

    Args:
        sha1_git as string

    Returns:
        directory information as dict.

    """
    empty_dir_sha1 = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'

    if sha1_git == empty_dir_sha1:
        return []

    sha1_git_bin = _to_sha1_bin(sha1_git)

    _check_directory_exists(sha1_git, sha1_git_bin)

    directory_entries = storage.directory_ls(sha1_git_bin)
    return map(converters.from_directory_entry, directory_entries)


def lookup_directory_with_path(sha1_git, path_string):
    """Return directory information for entry with path path_string w.r.t.
    root directory pointed by directory_sha1_git

    Args:
        - directory_sha1_git: sha1_git corresponding to the directory
        to which we append paths to (hopefully) find the entry
        - the relative path to the entry starting from the directory pointed by
        directory_sha1_git

    Raises:
        NotFoundExc if the directory entry is not found
    """
    sha1_git_bin = _to_sha1_bin(sha1_git)

    _check_directory_exists(sha1_git, sha1_git_bin)

    paths = path_string.strip(os.path.sep).split(os.path.sep)
    queried_dir = storage.directory_entry_get_by_path(
        sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths)))

    if not queried_dir:
        raise NotFoundExc(('Directory entry with path %s from %s not found') %
                          (path_string, sha1_git))

    return converters.from_directory_entry(queried_dir)


def lookup_release(release_sha1_git):
    """Return information about the release with sha1 release_sha1_git.

    Args:
        release_sha1_git: The release's sha1 as hexadecimal

    Returns:
        Release information as dict.

    Raises:
        ValueError if the identifier provided is not of sha1 nature.

    """
    sha1_git_bin = _to_sha1_bin(release_sha1_git)
    release = _first_element(storage.release_get([sha1_git_bin]))
    if not release:
        raise NotFoundExc('Release with sha1_git %s not found.'
                          % release_sha1_git)
    return converters.from_release(release)


def lookup_release_multiple(sha1_git_list):
    """Return information about the revisions identified with
    their sha1_git identifiers.

    Args:
        sha1_git_list: A list of revision sha1_git identifiers

    Returns:
        Release information as dict.

    Raises:
        ValueError if the identifier provided is not of sha1 nature.

    """
    sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list)
    releases = storage.release_get(sha1_bin_list) or []
    return (converters.from_release(r) for r in releases)


def lookup_revision(rev_sha1_git):
    """Return information about the revision with sha1 revision_sha1_git.

    Args:
        revision_sha1_git: The revision's sha1 as hexadecimal

    Returns:
        Revision information as dict.

    Raises:
        ValueError if the identifier provided is not of sha1 nature.
        NotFoundExc if there is no revision with the provided sha1_git.

    """
    sha1_git_bin = _to_sha1_bin(rev_sha1_git)
    revision = _first_element(storage.revision_get([sha1_git_bin]))
    if not revision:
        raise NotFoundExc('Revision with sha1_git %s not found.'
                          % rev_sha1_git)
    return converters.from_revision(revision)


def lookup_revision_multiple(sha1_git_list):
    """Return information about the revisions identified with
    their sha1_git identifiers.

    Args:
        sha1_git_list: A list of revision sha1_git identifiers

    Returns:
        Generator of revisions information as dict.

    Raises:
        ValueError if the identifier provided is not of sha1 nature.

    """
    sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list)
    revisions = storage.revision_get(sha1_bin_list) or []
    return (converters.from_revision(r) for r in revisions)


def lookup_revision_message(rev_sha1_git):
    """Return the raw message of the revision with sha1 revision_sha1_git.

    Args:
        revision_sha1_git: The revision's sha1 as hexadecimal

    Returns:
        Decoded revision message as dict {'message': <the_message>}

    Raises:
        ValueError if the identifier provided is not of sha1 nature.
        NotFoundExc if the revision is not found, or if it has no message

    """
    sha1_git_bin = _to_sha1_bin(rev_sha1_git)

    revision = _first_element(storage.revision_get([sha1_git_bin]))
    if not revision:
        raise NotFoundExc('Revision with sha1_git %s not found.'
                          % rev_sha1_git)
    if 'message' not in revision:
        raise NotFoundExc('No message for revision with sha1_git %s.'
                          % rev_sha1_git)
    res = {'message': revision['message']}
    return res


def _lookup_revision_id_by(origin_id, branch_name, timestamp):
    def _get_snapshot_branch(snapshot, branch_name):
        snapshot = lookup_snapshot(visit['snapshot'],
                                   branches_from=branch_name,
                                   branches_count=10)
        branch = None
        if branch_name in snapshot['branches']:
            branch = snapshot['branches'][branch_name]
        return branch

    visit = get_origin_visit({'id': origin_id}, visit_ts=timestamp)
    branch = _get_snapshot_branch(visit['snapshot'], branch_name)
    rev_id = None
    if branch and branch['target_type'] == 'revision':
        rev_id = branch['target']
    elif branch and branch['target_type'] == 'alias':
        branch = _get_snapshot_branch(visit['snapshot'], branch['target'])
        if branch and branch['target_type'] == 'revision':
            rev_id = branch['target']

    if not rev_id:
        raise NotFoundExc('Revision for origin %s and branch %s not found.'
                          % (origin_id, branch_name))

    return rev_id


def lookup_revision_by(origin_id,
                       branch_name='HEAD',
                       timestamp=None):
    """Lookup revision by origin id, snapshot branch name and visit timestamp.

    If branch_name is not provided, lookup using 'HEAD' as default.
    If timestamp is not provided, use the most recent.

    Args:
        origin_id (int): origin of the revision
        branch_name (str): snapshot branch name
        timestamp (str/int): origin visit time frame

    Returns:
        dict: The revision matching the criterions

    Raises:
        NotFoundExc if no revision corresponds to the criterion

    """
    rev_id = _lookup_revision_id_by(origin_id, branch_name, timestamp)
    return lookup_revision(rev_id)


def lookup_revision_log(rev_sha1_git, limit):
    """Lookup revision log by revision id.

    Args:
        rev_sha1_git (str): The revision's sha1 as hexadecimal
        limit (int): the maximum number of revisions returned

    Returns:
        list: Revision log as list of revision dicts

    Raises:
        ValueError: if the identifier provided is not of sha1 nature.
        NotFoundExc: if there is no revision with the provided sha1_git.

    """
    lookup_revision(rev_sha1_git)
    sha1_git_bin = _to_sha1_bin(rev_sha1_git)
    revision_entries = storage.revision_log([sha1_git_bin], limit)
    return map(converters.from_revision, revision_entries)


def lookup_revision_log_by(origin_id, branch_name, timestamp, limit):
    """Lookup revision by origin id, snapshot branch name and visit timestamp.

    Args:
        origin_id (int): origin of the revision
        branch_name (str): snapshot branch
        timestamp (str/int): origin visit time frame
        limit (int): the maximum number of revisions returned

    Returns:
        list: Revision log as list of revision dicts

    Raises:
        NotFoundExc: if no revision corresponds to the criterion

    """
    rev_id = _lookup_revision_id_by(origin_id, branch_name, timestamp)
    return lookup_revision_log(rev_id, limit)


def lookup_revision_with_context_by(origin_id, branch_name, timestamp,
                                    sha1_git, limit=100):
    """Return information about revision sha1_git, limited to the
    sub-graph of all transitive parents of sha1_git_root.
    sha1_git_root being resolved through the lookup of a revision by origin_id,
    branch_name and ts.

    In other words, sha1_git is an ancestor of sha1_git_root.

    Args:
        - origin_id: origin of the revision.
        - branch_name: revision's branch.
        - timestamp: revision's time frame.
        - sha1_git: one of sha1_git_root's ancestors.
        - limit: limit the lookup to 100 revisions back.

    Returns:
        Pair of (root_revision, revision).
        Information on sha1_git if it is an ancestor of sha1_git_root
        including children leading to sha1_git_root

    Raises:
        - BadInputExc in case of unknown algo_hash or bad hash.
        - NotFoundExc if either revision is not found or if sha1_git is not an
        ancestor of sha1_git_root.

    """
    rev_root_id = _lookup_revision_id_by(origin_id, branch_name, timestamp)

    rev_root_id_bin = hashutil.hash_to_bytes(rev_root_id)

    rev_root = _first_element(storage.revision_get([rev_root_id_bin]))

    return (converters.from_revision(rev_root),
            lookup_revision_with_context(rev_root, sha1_git, limit))


def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100):
    """Return information about revision sha1_git, limited to the
    sub-graph of all transitive parents of sha1_git_root.

    In other words, sha1_git is an ancestor of sha1_git_root.

    Args:
        sha1_git_root: latest revision. The type is either a sha1 (as an hex
        string) or a non converted dict.
        sha1_git: one of sha1_git_root's ancestors
        limit: limit the lookup to 100 revisions back

    Returns:
        Information on sha1_git if it is an ancestor of sha1_git_root
        including children leading to sha1_git_root

    Raises:
        BadInputExc in case of unknown algo_hash or bad hash
        NotFoundExc if either revision is not found or if sha1_git is not an
        ancestor of sha1_git_root

    """
    sha1_git_bin = _to_sha1_bin(sha1_git)

    revision = _first_element(storage.revision_get([sha1_git_bin]))
    if not revision:
        raise NotFoundExc('Revision %s not found' % sha1_git)

    if isinstance(sha1_git_root, str):
        sha1_git_root_bin = _to_sha1_bin(sha1_git_root)

        revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) # noqa
        if not revision_root:
            raise NotFoundExc('Revision root %s not found' % sha1_git_root)
    else:
        sha1_git_root_bin = sha1_git_root['id']

    revision_log = storage.revision_log([sha1_git_root_bin], limit)

    parents = {}
    children = defaultdict(list)

    for rev in revision_log:
        rev_id = rev['id']
        parents[rev_id] = []
        for parent_id in rev['parents']:
            parents[rev_id].append(parent_id)
            children[parent_id].append(rev_id)

    if revision['id'] not in parents:
        raise NotFoundExc('Revision %s is not an ancestor of %s' %
                          (sha1_git, sha1_git_root))

    revision['children'] = children[revision['id']]

    return converters.from_revision(revision)


def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False):
    """Return information on directory pointed by revision with sha1_git.
    If dir_path is not provided, display top level directory.
    Otherwise, display the directory pointed by dir_path (if it exists).

    Args:
        sha1_git: revision's hash.
        dir_path: optional directory pointed to by that revision.
        with_data: boolean that indicates to retrieve the raw data if the path
        resolves to a content. Default to False (for the api)

    Returns:
        Information on the directory pointed to by that revision.

    Raises:
        BadInputExc in case of unknown algo_hash or bad hash.
        NotFoundExc either if the revision is not found or the path referenced
        does not exist.
        NotImplementedError in case of dir_path exists but do not reference a
        type 'dir' or 'file'.

    """
    sha1_git_bin = _to_sha1_bin(sha1_git)
    revision = _first_element(storage.revision_get([sha1_git_bin]))
    if not revision:
        raise NotFoundExc('Revision %s not found' % sha1_git)
    dir_sha1_git_bin = revision['directory']
    if dir_path:
        paths = dir_path.strip(os.path.sep).split(os.path.sep)
        entity = storage.directory_entry_get_by_path(
            dir_sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths)))
        if not entity:
            raise NotFoundExc(
                "Directory or File '%s' pointed to by revision %s not found"
                % (dir_path, sha1_git))
    else:
        entity = {'type': 'dir', 'target': dir_sha1_git_bin}
    if entity['type'] == 'dir':
        directory_entries = storage.directory_ls(entity['target']) or []
        return {'type': 'dir',
                'path': '.' if not dir_path else dir_path,
                'revision': sha1_git,
                'content': list(map(converters.from_directory_entry,
                                    directory_entries))}
    elif entity['type'] == 'file':  # content
        content = _first_element(
            storage.content_find({'sha1_git': entity['target']}))
        if not content:
            raise NotFoundExc('Content not found for revision %s'
                              % sha1_git)
        if with_data:
            c = _first_element(storage.content_get([content['sha1']]))
            content['data'] = c['data']
        return {'type': 'file',
                'path': '.' if not dir_path else dir_path,
                'revision': sha1_git,
                'content': converters.from_content(content)}
    elif entity['type'] == 'rev':  # revision
        revision = next(storage.revision_get([entity['target']]))
        return {'type': 'rev',
                'path': '.' if not dir_path else dir_path,
                'revision': sha1_git,
                'content': converters.from_revision(revision)}
    else:
        raise NotImplementedError('Entity of type %s not implemented.'
                                  % entity['type'])


def lookup_content(q):
    """Lookup the content designed by q.

    Args:
        q: The release's sha1 as hexadecimal

    Raises:
        NotFoundExc if the requested content is not found

    """
    algo, hash = query.parse_hash(q)
    c = _first_element(storage.content_find({algo: hash}))
    if not c:
        raise NotFoundExc('Content with %s checksum equals to %s not found!' %
                          (algo, hashutil.hash_to_hex(hash)))
    return converters.from_content(c)


def lookup_content_raw(q):
    """Lookup the content defined by q.

    Args:
        q: query string of the form <hash_algo:hash>

    Returns:
        dict with 'sha1' and 'data' keys.
        data representing its raw data decoded.

    Raises:
        NotFoundExc if the requested content is not found or
        if the content bytes are not available in the storage

    """
    c = lookup_content(q)
    content_sha1_bytes = hashutil.hash_to_bytes(c['checksums']['sha1'])
    content = _first_element(storage.content_get([content_sha1_bytes]))
    if not content:
        algo, hash = query.parse_hash(q)
        raise NotFoundExc('Bytes of content with %s checksum equals to %s '
                          'are not available!' %
                          (algo, hashutil.hash_to_hex(hash)))
    return converters.from_content(content)


def stat_counters():
    """Return the stat counters for Software Heritage

    Returns:
        A dict mapping textual labels to integer values.
    """
    return storage.stat_counters()


def _lookup_origin_visits(origin_id, last_visit=None, limit=10):
    """Yields the origin origin_ids' visits.

    Args:
        origin_id (int): origin to list visits for
        last_visit (int): last visit to lookup from
        limit (int): Number of elements max to display

    Yields:
       Dictionaries of origin_visit for that origin

    """
    limit = min(limit, MAX_LIMIT)
    yield from storage.origin_visit_get(
        origin_id, last_visit=last_visit, limit=limit)


def lookup_origin_visits(origin_id, last_visit=None, per_page=10):
    """Yields the origin origin_ids' visits.

    Args:
        origin_id: origin to list visits for

    Yields:
       Dictionaries of origin_visit for that origin

    """
    lookup_origin({'id': origin_id})
    visits = _lookup_origin_visits(origin_id, last_visit=last_visit,
                                   limit=per_page)
    for visit in visits:
        yield converters.from_origin_visit(visit)


def lookup_origin_visit(origin_id, visit_id):
    """Return information about visit visit_id with origin origin_id.

    Args:
        origin_id: origin concerned by the visit
        visit_id: the visit identifier to lookup

    Yields:
       The dict origin_visit concerned

    """
    visit = storage.origin_visit_get_by(origin_id, visit_id)
    if not visit:
        raise NotFoundExc('Origin with id %s or its visit '
                          'with id %s not found!' % (origin_id, visit_id))
    return converters.from_origin_visit(visit)


def lookup_snapshot_size(snapshot_id):
    """Count the number of branches in the snapshot with the given id

    Args:
        snapshot_id (str): sha1 identifier of the snapshot

    Returns:
        dict: A dict whose keys are the target types of branches and
        values their corresponding amount
    """
    snapshot_id_bin = _to_sha1_bin(snapshot_id)
    snapshot_size = storage.snapshot_count_branches(snapshot_id_bin)
    if 'revision' not in snapshot_size:
        snapshot_size['revision'] = 0
    if 'release' not in snapshot_size:
        snapshot_size['release'] = 0
    return snapshot_size


def lookup_snapshot(snapshot_id, branches_from='', branches_count=1000,
                    target_types=None):
    """Return information about a snapshot, aka the list of named
    branches found during a specific visit of an origin.

    Args:
        snapshot_id (str): sha1 identifier of the snapshot
        branches_from (str): optional parameter used to skip branches
            whose name is lesser than it before returning them
        branches_count (int): optional parameter used to restrain
            the amount of returned branches
        target_types (list): optional parameter used to filter the
            target types of branch to return (possible values that can be
            contained in that list are `'content', 'directory',
            'revision', 'release', 'snapshot', 'alias'`)

    Returns:
        A dict filled with the snapshot content.
    """
    snapshot_id_bin = _to_sha1_bin(snapshot_id)
    snapshot = storage.snapshot_get_branches(snapshot_id_bin,
                                             branches_from.encode(),
                                             branches_count, target_types)
    if not snapshot:
        raise NotFoundExc('Snapshot with id %s not found!' % snapshot_id)
    return converters.from_snapshot(snapshot)


def lookup_latest_origin_snapshot(origin_id, allowed_statuses=None):
    """Return information about the latest snapshot of an origin.

    .. warning:: At most 1000 branches contained in the snapshot
        will be returned for performance reasons.

    Args:
        origin_id: integer identifier of the origin
        allowed_statuses: list of visit statuses considered
            to find the latest snapshot for the visit. For instance,
            ``allowed_statuses=['full']`` will only consider visits that
            have successfully run to completion.

    Returns:
        A dict filled with the snapshot content.
    """
    snapshot = storage.snapshot_get_latest(origin_id, allowed_statuses)
    return converters.from_snapshot(snapshot)


def lookup_revision_through(revision, limit=100):
    """Retrieve a revision from the criterion stored in revision dictionary.

    Args:
        revision: Dictionary of criterion to lookup the revision with.
        Here are the supported combination of possible values:
        - origin_id, branch_name, ts, sha1_git
        - origin_id, branch_name, ts
        - sha1_git_root, sha1_git
        - sha1_git

    Returns:
        None if the revision is not found or the actual revision.

    """
    if 'origin_id' in revision and \
       'branch_name' in revision and \
       'ts' in revision and \
       'sha1_git' in revision:
        return lookup_revision_with_context_by(revision['origin_id'],
                                               revision['branch_name'],
                                               revision['ts'],
                                               revision['sha1_git'],
                                               limit)
    if 'origin_id' in revision and \
       'branch_name' in revision and \
       'ts' in revision:
        return lookup_revision_by(revision['origin_id'],
                                  revision['branch_name'],
                                  revision['ts'])
    if 'sha1_git_root' in revision and \
       'sha1_git' in revision:
        return lookup_revision_with_context(revision['sha1_git_root'],
                                            revision['sha1_git'],
                                            limit)
    if 'sha1_git' in revision:
        return lookup_revision(revision['sha1_git'])

    # this should not happen
    raise NotImplementedError('Should not happen!')


def lookup_directory_through_revision(revision, path=None,
                                      limit=100, with_data=False):
    """Retrieve the directory information from the revision.

    Args:
        revision: dictionary of criterion representing a revision to lookup
        path: directory's path to lookup.
        limit: optional query parameter to limit the revisions log (default to
            100). For now, note that this limit could impede the transitivity
            conclusion about sha1_git not being an ancestor of.
        with_data: indicate to retrieve the content's raw data if path resolves
            to a content.

    Returns:
        The directory pointing to by the revision criterions at path.

    """
    rev = lookup_revision_through(revision, limit)

    if not rev:
        raise NotFoundExc('Revision with criterion %s not found!' % revision)
    return (rev['id'],
            lookup_directory_with_revision(rev['id'], path, with_data))


def vault_cook(obj_type, obj_id, email=None):
    """Cook a vault bundle.
    """
    return vault.cook(obj_type, obj_id, email=email)


def vault_fetch(obj_type, obj_id):
    """Fetch a vault bundle.
    """
    return vault.fetch(obj_type, obj_id)


def vault_progress(obj_type, obj_id):
    """Get the current progress of a vault bundle.
    """
    return vault.progress(obj_type, obj_id)


def diff_revision(rev_id):
    """Get the list of file changes (insertion / deletion / modification /
    renaming) for a particular revision.
    """
    rev_sha1_git_bin = _to_sha1_bin(rev_id)

    changes = storage.diff_revision(rev_sha1_git_bin, track_renaming=True)

    for change in changes:
        change['from'] = converters.from_directory_entry(change['from'])
        change['to'] = converters.from_directory_entry(change['to'])
        if change['from_path']:
            change['from_path'] = change['from_path'].decode('utf-8')
        if change['to_path']:
            change['to_path'] = change['to_path'].decode('utf-8')

    return changes


class _RevisionsWalkerProxy(object):
    """
    Proxy class wrapping a revisions walker iterator from
    swh-storage and performing needed conversions.
    """
    def __init__(self, rev_walker_type, rev_start, *args, **kwargs):
        rev_start_bin = hashutil.hash_to_bytes(rev_start)
        self.revisions_walker = \
            revisions_walker.get_revisions_walker(rev_walker_type,
                                                  storage,
                                                  rev_start_bin,
                                                  *args, **kwargs)

    def export_state(self):
        return self.revisions_walker.export_state()

    def __next__(self):
        return converters.from_revision(next(self.revisions_walker))

    def __iter__(self):
        return self


def get_revisions_walker(rev_walker_type, rev_start, *args, **kwargs):
    """
    Utility function to instantiate a revisions walker of a given type,
    see :mod:`swh.storage.algos.revisions_walker`.

    Args:
        rev_walker_type (str): the type of revisions walker to return,
            possible values are: ``committer_date``, ``dfs``, ``dfs_post``,
            ``bfs`` and ``path``
        rev_start (str): hexadecimal representation of a revision identifier
        args (list): position arguments to pass to the revisions walker
            constructor
        kwargs (dict): keyword arguments to pass to the revisions walker
            constructor

    """
    # first check if the provided revision is valid
    lookup_revision(rev_start)
    return _RevisionsWalkerProxy(rev_walker_type, rev_start, *args, **kwargs)
