Changeset View
Changeset View
Standalone View
Standalone View
docker/tests/test_vault.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import io | |||||
import hashlib | import hashlib | ||||
import tarfile | import io | ||||
from os.path import join | from os.path import join | ||||
import tarfile | |||||
from urllib.parse import quote_plus | from urllib.parse import quote_plus | ||||
from .conftest import apiget, getdirectory, pollapi | from .conftest import apiget, getdirectory, pollapi | ||||
def test_vault_directory(scheduler_host, git_origin): | def test_vault_directory(scheduler_host, git_origin): | ||||
# retrieve the root directory of the master branch of the ingested git | # retrieve the root directory of the master branch of the ingested git | ||||
# repository (by the git_origin fixture) | # repository (by the git_origin fixture) | ||||
visit = apiget(f'origin/{quote_plus(git_origin)}/visit/latest') | visit = apiget(f"origin/{quote_plus(git_origin)}/visit/latest") | ||||
snapshot = apiget(f'snapshot/{visit["snapshot"]}') | snapshot = apiget(f'snapshot/{visit["snapshot"]}') | ||||
rev_id = snapshot["branches"]["refs/heads/master"]["target"] | rev_id = snapshot["branches"]["refs/heads/master"]["target"] | ||||
revision = apiget(f'revision/{rev_id}') | revision = apiget(f"revision/{rev_id}") | ||||
dir_id = revision['directory'] | dir_id = revision["directory"] | ||||
# now cook it | # now cook it | ||||
cook = apiget(f'vault/directory/{dir_id}/', 'POST') | cook = apiget(f"vault/directory/{dir_id}/", "POST") | ||||
assert cook['obj_type'] == 'directory' | assert cook["obj_type"] == "directory" | ||||
assert cook['obj_id'] == dir_id | assert cook["obj_id"] == dir_id | ||||
assert cook['fetch_url'].endswith(f'vault/directory/{dir_id}/raw/') | assert cook["fetch_url"].endswith(f"vault/directory/{dir_id}/raw/") | ||||
# while it's cooking, get the directory tree from the archive | # while it's cooking, get the directory tree from the archive | ||||
directory = getdirectory(dir_id) | directory = getdirectory(dir_id) | ||||
# retrieve the cooked tar file | # retrieve the cooked tar file | ||||
resp = pollapi(f'vault/directory/{dir_id}/raw') | resp = pollapi(f"vault/directory/{dir_id}/raw") | ||||
tarf = tarfile.open(fileobj=io.BytesIO(resp.content)) | tarf = tarfile.open(fileobj=io.BytesIO(resp.content)) | ||||
# and check the tarfile seems ok wrt. 'directory' | # and check the tarfile seems ok wrt. 'directory' | ||||
assert tarf.getnames()[0] == dir_id | assert tarf.getnames()[0] == dir_id | ||||
tarfiles = {t.name: t for t in tarf.getmembers()} | tarfiles = {t.name: t for t in tarf.getmembers()} | ||||
for fname, fdesc in directory: | for fname, fdesc in directory: | ||||
tfinfo = tarfiles.get(join(dir_id, fname)) | tfinfo = tarfiles.get(join(dir_id, fname)) | ||||
assert tfinfo, f"Missing path {fname} in retrieved tarfile" | assert tfinfo, f"Missing path {fname} in retrieved tarfile" | ||||
if fdesc['type'] == 'file': | if fdesc["type"] == "file": | ||||
assert fdesc['length'] == tfinfo.size, \ | assert fdesc["length"] == tfinfo.size, f"File {fname}: length mismatch" | ||||
f"File {fname}: length mismatch" | |||||
fdata = tarf.extractfile(tfinfo).read() | fdata = tarf.extractfile(tfinfo).read() | ||||
for algo in fdesc['checksums']: | for algo in fdesc["checksums"]: | ||||
if algo not in hashlib.algorithms_available: | if algo not in hashlib.algorithms_available: | ||||
continue | continue | ||||
hash = hashlib.new(algo, fdata).hexdigest() | hash = hashlib.new(algo, fdata).hexdigest() | ||||
assert hash == fdesc['checksums'][algo], \ | assert ( | ||||
f"File {fname}: {algo} mismatch" | hash == fdesc["checksums"][algo] | ||||
), f"File {fname}: {algo} mismatch" | |||||
# XXX what to check for dir? symlink? (other?) | # XXX what to check for dir? symlink? (other?) | ||||
# check that if we ask a second time this directory, it returns the same | # check that if we ask a second time this directory, it returns the same | ||||
# and does not cook it again | # and does not cook it again | ||||
recook = apiget(f'vault/directory/{dir_id}/', 'POST') | recook = apiget(f"vault/directory/{dir_id}/", "POST") | ||||
assert recook['obj_type'] == 'directory' | assert recook["obj_type"] == "directory" | ||||
assert recook['obj_id'] == dir_id | assert recook["obj_id"] == dir_id | ||||
assert recook['id'] == cook['id'] | assert recook["id"] == cook["id"] | ||||
assert recook['status'] == 'done' # no need to wait for this to be true | assert recook["status"] == "done" # no need to wait for this to be true |