Differential D3118 Diff 11084 swh/web/tests/data.py

Changeset View

Standalone View

swh/web/tests/data.py

# Copyright (C) 2018-2020 The Software Heritage developers		# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution		# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version		# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information		# See top-level LICENSE file for more information

import os		import os
import random		import random

from copy import deepcopy		from copy import deepcopy

from swh.indexer.fossology_license import FossologyLicenseIndexer		from swh.indexer.fossology_license import FossologyLicenseIndexer
from swh.indexer.mimetype import MimetypeIndexer		from swh.indexer.mimetype import MimetypeIndexer
from swh.indexer.ctags import CtagsIndexer		from swh.indexer.ctags import CtagsIndexer
from swh.indexer.storage import get_indexer_storage		from swh.indexer.storage import get_indexer_storage
		from swh.model.model import Content
from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS		from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS
from swh.model.model import Directory, Origin		from swh.model.model import Directory, Origin
from swh.loader.git.from_disk import GitLoaderFromArchive		from swh.loader.git.from_disk import GitLoaderFromArchive
from swh.search import get_search		from swh.search import get_search
from swh.storage.algos.dir_iterators import dir_iterator		from swh.storage.algos.dir_iterators import dir_iterator
from swh.web import config		from swh.web import config
from swh.web.browse.utils import (		from swh.web.browse.utils import (
get_mimetype_and_encoding_for_content,		get_mimetype_and_encoding_for_content,
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines	_TEST_ORIGINS = [
"archives": ["repo_with_submodules.tgz"],		"archives": ["repo_with_submodules.tgz"],
"visit_date": ["Jan 1 2019, 01:00 UTC"],		"visit_date": ["Jan 1 2019, 01:00 UTC"],
},		},
]		]

_contents = {}		_contents = {}


		def _add_extra_contents(storage, contents):
		pbm_image_data = b"""P1
		# PBM example
		24 7
		0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
		0 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0
		0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0
		0 1 1 1 0 0 0 1 1 1 0 0 0 1 1 1 0 0 0 1 1 1 1 0
		0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0
		0 1 0 0 0 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 0 0 0 0
		0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"""

		# add file with mimetype image/x-portable-bitmap in the archive content
		pbm_content = Content.from_data(pbm_image_data)
		storage.content_add([pbm_content])
		contents.add(pbm_content.sha1)


# Tests data initialization		# Tests data initialization
def _init_tests_data():		def _init_tests_data():
# To hold reference to the memory storage		# To hold reference to the memory storage
storage = None		storage = None

# Create search instance		# Create search instance
search = get_search("memory", {})		search = get_search("memory", {})
search.initialize()		search.initialize()
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines	for origin in _TEST_ORIGINS:
for rev_log in storage.revision_shortlog(set(revisions)):		for rev_log in storage.revision_shortlog(set(revisions)):
rev_id = rev_log[0]		rev_id = rev_log[0]
revisions.add(rev_id)		revisions.add(rev_id)

for rev in storage.revision_get(revisions):		for rev in storage.revision_get(revisions):
dir_id = rev["directory"]		dir_id = rev["directory"]
directories.add(hash_to_hex(dir_id))		directories.add(hash_to_hex(dir_id))
for entry in dir_iterator(storage, dir_id):		for entry in dir_iterator(storage, dir_id):
		if entry["type"] == "file":
		contents.add(entry["sha1"])
content_path[entry["sha1"]] = "/".join(		content_path[entry["sha1"]] = "/".join(
[hash_to_hex(dir_id), entry["path"].decode("utf-8")]		[hash_to_hex(dir_id), entry["path"].decode("utf-8")]
)		)
if entry["type"] == "file":
contents.add(entry["sha1"])
elif entry["type"] == "dir":		elif entry["type"] == "dir":
directories.add(hash_to_hex(entry["target"]))		directories.add(hash_to_hex(entry["target"]))

		_add_extra_contents(storage, contents)

# Get all checksums for each content		# Get all checksums for each content
result = storage.content_get_metadata(contents)		result = storage.content_get_metadata(contents)
contents = []		contents = []
for sha1, contents_metadata in result.items():		for sha1, contents_metadata in result.items():
for content_metadata in contents_metadata:		sha1 = contents_metadata[0]["sha1"]
contents.append(		content_metadata = {
{		algo: hash_to_hex(contents_metadata[0][algo]) for algo in DEFAULT_ALGORITHMS
algo: hash_to_hex(content_metadata[algo])
for algo in DEFAULT_ALGORITHMS
}		}
)
		path = ""
		if sha1 in content_path:
path = content_path[sha1]		path = content_path[sha1]
cnt = next(storage.content_get([sha1]))		cnt = next(storage.content_get([sha1]))
mimetype, encoding = get_mimetype_and_encoding_for_content(cnt["data"])		mimetype, encoding = get_mimetype_and_encoding_for_content(cnt["data"])
_, _, cnt["data"] = _re_encode_content(mimetype, encoding, cnt["data"])		_, _, cnt["data"] = _re_encode_content(mimetype, encoding, cnt["data"])
content_display_data = prepare_content_for_display(		content_display_data = prepare_content_for_display(cnt["data"], mimetype, path)
cnt["data"], mimetype, path
		content_metadata.update(
		{
		"path": path,
		"mimetype": mimetype,
		"encoding": encoding,
		"hljs_language": content_display_data["language"],
		"data": content_display_data["content_data"],
		}
)		)
contents[-1]["path"] = path		_contents[hash_to_hex(sha1)] = content_metadata
contents[-1]["mimetype"] = mimetype		contents.append(content_metadata)
contents[-1]["encoding"] = encoding
contents[-1]["hljs_language"] = content_display_data["language"]
contents[-1]["data"] = content_display_data["content_data"]
_contents[contents[-1]["sha1"]] = contents[-1]

# Create indexer storage instance that will be shared by indexers		# Create indexer storage instance that will be shared by indexers
idx_storage = get_indexer_storage("memory", {})		idx_storage = get_indexer_storage("memory", {})

# Add the empty directory to the test archive		# Add the empty directory to the test archive
storage.directory_add([Directory(entries=[])])		storage.directory_add([Directory(entries=[])])

# Return tests data		# Return tests data
▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines