Changeset View
Changeset View
Standalone View
Standalone View
swh/web/tests/data.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import random | import random | ||||
from copy import deepcopy | from copy import deepcopy | ||||
from swh.indexer.fossology_license import FossologyLicenseIndexer | from swh.indexer.fossology_license import FossologyLicenseIndexer | ||||
from swh.indexer.mimetype import MimetypeIndexer | from swh.indexer.mimetype import MimetypeIndexer | ||||
from swh.indexer.ctags import CtagsIndexer | from swh.indexer.ctags import CtagsIndexer | ||||
from swh.indexer.storage import get_indexer_storage | from swh.indexer.storage import get_indexer_storage | ||||
from swh.model.model import Content | |||||
from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS | from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS | ||||
from swh.model.model import Directory, Origin | from swh.model.model import Directory, Origin | ||||
from swh.loader.git.from_disk import GitLoaderFromArchive | from swh.loader.git.from_disk import GitLoaderFromArchive | ||||
from swh.search import get_search | from swh.search import get_search | ||||
from swh.storage.algos.dir_iterators import dir_iterator | from swh.storage.algos.dir_iterators import dir_iterator | ||||
from swh.web import config | from swh.web import config | ||||
from swh.web.browse.utils import ( | from swh.web.browse.utils import ( | ||||
get_mimetype_and_encoding_for_content, | get_mimetype_and_encoding_for_content, | ||||
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | _TEST_ORIGINS = [ | ||||
"archives": ["repo_with_submodules.tgz"], | "archives": ["repo_with_submodules.tgz"], | ||||
"visit_date": ["Jan 1 2019, 01:00 UTC"], | "visit_date": ["Jan 1 2019, 01:00 UTC"], | ||||
}, | }, | ||||
] | ] | ||||
_contents = {} | _contents = {} | ||||
def _add_extra_contents(storage, contents): | |||||
pbm_image_data = b"""P1 | |||||
# PBM example | |||||
24 7 | |||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |||||
0 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 | |||||
0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 | |||||
0 1 1 1 0 0 0 1 1 1 0 0 0 1 1 1 0 0 0 1 1 1 1 0 | |||||
0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 | |||||
0 1 0 0 0 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 0 0 0 0 | |||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0""" | |||||
# add file with mimetype image/x-portable-bitmap in the archive content | |||||
pbm_content = Content.from_data(pbm_image_data) | |||||
storage.content_add([pbm_content]) | |||||
contents.add(pbm_content.sha1) | |||||
# Tests data initialization | # Tests data initialization | ||||
def _init_tests_data(): | def _init_tests_data(): | ||||
# To hold reference to the memory storage | # To hold reference to the memory storage | ||||
storage = None | storage = None | ||||
# Create search instance | # Create search instance | ||||
search = get_search("memory", {}) | search = get_search("memory", {}) | ||||
search.initialize() | search.initialize() | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | for origin in _TEST_ORIGINS: | ||||
for rev_log in storage.revision_shortlog(set(revisions)): | for rev_log in storage.revision_shortlog(set(revisions)): | ||||
rev_id = rev_log[0] | rev_id = rev_log[0] | ||||
revisions.add(rev_id) | revisions.add(rev_id) | ||||
for rev in storage.revision_get(revisions): | for rev in storage.revision_get(revisions): | ||||
dir_id = rev["directory"] | dir_id = rev["directory"] | ||||
directories.add(hash_to_hex(dir_id)) | directories.add(hash_to_hex(dir_id)) | ||||
for entry in dir_iterator(storage, dir_id): | for entry in dir_iterator(storage, dir_id): | ||||
if entry["type"] == "file": | |||||
contents.add(entry["sha1"]) | |||||
content_path[entry["sha1"]] = "/".join( | content_path[entry["sha1"]] = "/".join( | ||||
[hash_to_hex(dir_id), entry["path"].decode("utf-8")] | [hash_to_hex(dir_id), entry["path"].decode("utf-8")] | ||||
) | ) | ||||
if entry["type"] == "file": | |||||
contents.add(entry["sha1"]) | |||||
elif entry["type"] == "dir": | elif entry["type"] == "dir": | ||||
directories.add(hash_to_hex(entry["target"])) | directories.add(hash_to_hex(entry["target"])) | ||||
_add_extra_contents(storage, contents) | |||||
# Get all checksums for each content | # Get all checksums for each content | ||||
result = storage.content_get_metadata(contents) | result = storage.content_get_metadata(contents) | ||||
contents = [] | contents = [] | ||||
for sha1, contents_metadata in result.items(): | for sha1, contents_metadata in result.items(): | ||||
for content_metadata in contents_metadata: | sha1 = contents_metadata[0]["sha1"] | ||||
contents.append( | content_metadata = { | ||||
{ | algo: hash_to_hex(contents_metadata[0][algo]) for algo in DEFAULT_ALGORITHMS | ||||
algo: hash_to_hex(content_metadata[algo]) | |||||
for algo in DEFAULT_ALGORITHMS | |||||
} | } | ||||
) | |||||
path = "" | |||||
if sha1 in content_path: | |||||
path = content_path[sha1] | path = content_path[sha1] | ||||
cnt = next(storage.content_get([sha1])) | cnt = next(storage.content_get([sha1])) | ||||
mimetype, encoding = get_mimetype_and_encoding_for_content(cnt["data"]) | mimetype, encoding = get_mimetype_and_encoding_for_content(cnt["data"]) | ||||
_, _, cnt["data"] = _re_encode_content(mimetype, encoding, cnt["data"]) | _, _, cnt["data"] = _re_encode_content(mimetype, encoding, cnt["data"]) | ||||
content_display_data = prepare_content_for_display( | content_display_data = prepare_content_for_display(cnt["data"], mimetype, path) | ||||
cnt["data"], mimetype, path | |||||
content_metadata.update( | |||||
{ | |||||
"path": path, | |||||
"mimetype": mimetype, | |||||
"encoding": encoding, | |||||
"hljs_language": content_display_data["language"], | |||||
"data": content_display_data["content_data"], | |||||
} | |||||
) | ) | ||||
contents[-1]["path"] = path | _contents[hash_to_hex(sha1)] = content_metadata | ||||
contents[-1]["mimetype"] = mimetype | contents.append(content_metadata) | ||||
contents[-1]["encoding"] = encoding | |||||
contents[-1]["hljs_language"] = content_display_data["language"] | |||||
contents[-1]["data"] = content_display_data["content_data"] | |||||
_contents[contents[-1]["sha1"]] = contents[-1] | |||||
# Create indexer storage instance that will be shared by indexers | # Create indexer storage instance that will be shared by indexers | ||||
idx_storage = get_indexer_storage("memory", {}) | idx_storage = get_indexer_storage("memory", {}) | ||||
# Add the empty directory to the test archive | # Add the empty directory to the test archive | ||||
storage.directory_add([Directory(entries=[])]) | storage.directory_add([Directory(entries=[])]) | ||||
# Return tests data | # Return tests data | ||||
▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines |