D8158.diff
No OneTemporary
Actions

Size

13 KB

Subscribers

None

D8158.diff
View Options

	diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
	--- a/swh/indexer/storage/__init__.py
	+++ b/swh/indexer/storage/__init__.py
	@@ -25,7 +25,6 @@
	from .interface import PagedResult, Sha1
	from .metrics import process_metrics, send_metric, timed
	from .model import (
	- ContentLanguageRow,
	ContentLicenseRow,
	ContentMetadataRow,
	ContentMimetypeRow,
	@@ -102,16 +101,17 @@
	data (List[dict]): List of dictionaries to be inserted

	>>> check_id_duplicates([
	- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
	- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=32, lang="python"),
	+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="GPL"),
	+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=32, license="GPL"),
	... ])
	>>> check_id_duplicates([
	- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
	- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
	+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="AGPL"),
	+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="AGPL"),
	... ])
	Traceback (most recent call last):
	- ...
	- swh.indexer.storage.exc.DuplicateId: [{'id': b'foo', 'indexer_configuration_id': 42}]
	+ ...
	+ swh.indexer.storage.exc.DuplicateId: [{'id': b'foo', 'indexer_configuration_id': 42, 'license': 'AGPL'}]
	+
	""" # noqa
	counter = Counter(tuple(sorted(item.unique_key().items())) for item in data)
	duplicates = [id_ for (id_, count) in counter.items() if count >= 2]
	@@ -194,7 +194,7 @@
	bound by limit.

	Args:
	- indexer_type: Type of data content to index (mimetype, language, etc...)
	+ indexer_type: Type of data content to index (mimetype, etc...)
	indexer_configuration_id: The tool used to index data
	partition_id: index of the partition to fetch
	nb_partitions: total number of partitions to split into
	@@ -303,56 +303,6 @@
	for c in db.content_mimetype_get_from_list(ids, cur)
	]

	- @timed
	- @db_transaction()
	- def content_language_missing(
	- self, languages: Iterable[Dict], db=None, cur=None
	- ) -> List[Tuple[Sha1, int]]:
	- return [obj[0] for obj in db.content_language_missing_from_list(languages, cur)]
	-
	- @timed
	- @db_transaction()
	- def content_language_get(
	- self, ids: Iterable[Sha1], db=None, cur=None
	- ) -> List[ContentLanguageRow]:
	- return [
	- ContentLanguageRow.from_dict(
	- converters.db_to_language(dict(zip(db.content_language_cols, c)))
	- )
	- for c in db.content_language_get_from_list(ids, cur)
	- ]
	-
	- @timed
	- @process_metrics
	- @db_transaction()
	- def content_language_add(
	- self,
	- languages: List[ContentLanguageRow],
	- db=None,
	- cur=None,
	- ) -> Dict[str, int]:
	- check_id_duplicates(languages)
	- languages.sort(key=lambda m: m.id)
	- self.journal_writer.write_additions("content_language", languages)
	- db.mktemp_content_language(cur)
	- # empty language is mapped to 'unknown'
	- db.copy_to(
	- (
	- {
	- "id": lang.id,
	- "lang": lang.lang or "unknown",
	- "indexer_configuration_id": lang.indexer_configuration_id,
	- }
	- for lang in languages
	- ),
	- "tmp_content_language",
	- ["id", "lang", "indexer_configuration_id"],
	- cur,
	- )
	-
	- count = db.content_language_add_from_temp(cur)
	- return {"content_language:add": count}
	-
	@timed
	@db_transaction()
	def content_fossology_license_get(
	diff --git a/swh/indexer/storage/converters.py b/swh/indexer/storage/converters.py
	--- a/swh/indexer/storage/converters.py
	+++ b/swh/indexer/storage/converters.py
	@@ -19,20 +19,6 @@
	}


	-def db_to_language(language):
	- """Convert a language entry into a ready language output."""
	- return {
	- "id": language["id"],
	- "lang": language["lang"],
	- "tool": {
	- "id": language["tool_id"],
	- "name": language["tool_name"],
	- "version": language["tool_version"],
	- "configuration": language["tool_configuration"],
	- },
	- }
	-
	-
	def db_to_metadata(metadata):
	"""Convert a metadata entry into a ready metadata output."""
	metadata["tool"] = {
	diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
	--- a/swh/indexer/storage/db.py
	+++ b/swh/indexer/storage/db.py
	@@ -22,8 +22,7 @@
	"""Read from table the data with hash_keys that are missing.

	Args:
	- table: Table name (e.g content_mimetype, content_language,
	- etc...)
	+ table: Table name (e.g content_mimetype, fossology_license, etc...)
	data: Dict of data to read from
	hash_keys: List of keys to read in the data dict.

	@@ -85,8 +84,7 @@

	Expected:
	Tables content_{something} being aliased as 'c' (something
	- in {language, mimetype, ...}), table indexer_configuration
	- being aliased as 'i'.
	+ in {mimetype, ...}), table indexer_configuration being aliased as 'i'.

	"""
	if key == "id":
	@@ -181,37 +179,6 @@
	"content_mimetype", ids, self.content_mimetype_cols, cur=cur
	)

	- content_language_hash_keys = ["id", "indexer_configuration_id"]
	-
	- def content_language_missing_from_list(self, languages, cur=None):
	- """List missing languages."""
	- yield from self._missing_from_list(
	- "content_language", languages, self.content_language_hash_keys, cur=cur
	- )
	-
	- content_language_cols = [
	- "id",
	- "lang",
	- "tool_id",
	- "tool_name",
	- "tool_version",
	- "tool_configuration",
	- ]
	-
	- @stored_procedure("swh_mktemp_content_language")
	- def mktemp_content_language(self, cur=None):
	- pass
	-
	- def content_language_add_from_temp(self, cur=None):
	- cur = self._cursor(cur)
	- cur.execute("select * from swh_content_language_add()")
	- return cur.fetchone()[0]
	-
	- def content_language_get_from_list(self, ids, cur=None):
	- yield from self._get_from_list(
	- "content_language", ids, self.content_language_cols, cur=cur
	- )
	-
	content_fossology_license_cols = [
	"id",
	"tool_id",
	diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
	--- a/swh/indexer/storage/in_memory.py
	+++ b/swh/indexer/storage/in_memory.py
	@@ -33,7 +33,6 @@
	from .interface import PagedResult, Sha1
	from .model import (
	BaseRow,
	- ContentLanguageRow,
	ContentLicenseRow,
	ContentMetadataRow,
	ContentMimetypeRow,
	@@ -154,7 +153,7 @@
	bound by limit.

	Args:
	- indexer_type: Type of data content to index (mimetype, language, etc...)
	+ indexer_type: Type of data content to index (mimetype, etc...)
	indexer_configuration_id: The tool used to index data
	partition_id: index of the partition to fetch
	nb_partitions: total number of partitions to split into
	@@ -246,7 +245,6 @@
	self.journal_writer = JournalWriter(tool_getter, journal_writer)
	args = (self._tools, self.journal_writer)
	self._mimetypes = SubStorage(ContentMimetypeRow, *args)
	- self._languages = SubStorage(ContentLanguageRow, *args)
	self._licenses = SubStorage(ContentLicenseRow, *args)
	self._content_metadata = SubStorage(ContentMetadataRow, *args)
	self._directory_intrinsic_metadata = SubStorage(
	@@ -284,20 +282,6 @@
	def content_mimetype_get(self, ids: Iterable[Sha1]) -> List[ContentMimetypeRow]:
	return self._mimetypes.get(ids)

	- def content_language_missing(
	- self, languages: Iterable[Dict]
	- ) -> List[Tuple[Sha1, int]]:
	- return self._languages.missing(languages)
	-
	- def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]:
	- return self._languages.get(ids)
	-
	- def content_language_add(
	- self, languages: List[ContentLanguageRow]
	- ) -> Dict[str, int]:
	- added = self._languages.add(languages)
	- return {"content_language:add": added}
	-
	def content_fossology_license_get(
	self, ids: Iterable[Sha1]
	) -> List[ContentLicenseRow]:
	diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py
	--- a/swh/indexer/storage/interface.py
	+++ b/swh/indexer/storage/interface.py
	@@ -10,7 +10,6 @@
	from swh.core.api import remote_api_endpoint
	from swh.core.api.classes import PagedResult as CorePagedResult
	from swh.indexer.storage.model import (
	- ContentLanguageRow,
	ContentLicenseRow,
	ContentMetadataRow,
	ContentMimetypeRow,
	@@ -113,53 +112,6 @@
	"""
	...

	- @remote_api_endpoint("content_language/missing")
	- def content_language_missing(
	- self, languages: Iterable[Dict]
	- ) -> List[Tuple[Sha1, int]]:
	- """List languages missing from storage.
	-
	- Args:
	- languages (iterable): dictionaries with keys:
	-
	- - id (bytes): sha1 identifier
	- - indexer_configuration_id (int): tool used to compute
	- the results
	-
	- Returns:
	- list of tuple (id, indexer_configuration_id) missing
	-
	- """
	- ...
	-
	- @remote_api_endpoint("content_language")
	- def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]:
	- """Retrieve full content language per ids.
	-
	- Args:
	- ids (iterable): sha1 identifier
	-
	- Returns:
	- language row objects
	-
	- """
	- ...
	-
	- @remote_api_endpoint("content_language/add")
	- def content_language_add(
	- self, languages: List[ContentLanguageRow]
	- ) -> Dict[str, int]:
	- """Add languages not present in storage.
	-
	- Args:
	- languages: language row objects
	-
	- Returns:
	- Dict summary of number of rows added
	-
	- """
	- ...
	-
	@remote_api_endpoint("content/fossology_license")
	def content_fossology_license_get(
	self, ids: Iterable[Sha1]
	diff --git a/swh/indexer/storage/model.py b/swh/indexer/storage/model.py
	--- a/swh/indexer/storage/model.py
	+++ b/swh/indexer/storage/model.py
	@@ -75,14 +75,6 @@
	encoding = attr.ib(type=str)


	-@attr.s
	-class ContentLanguageRow(BaseRow):
	- object_type: Final = "content_language"
	-
	- id = attr.ib(type=Sha1Git)
	- lang = attr.ib(type=str)
	-
	-
	@attr.s
	class ContentLicenseRow(BaseRow):
	object_type: Final = "content_fossology_license"
	diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py
	--- a/swh/indexer/tests/storage/test_converters.py
	+++ b/swh/indexer/tests/storage/test_converters.py
	@@ -34,32 +34,6 @@
	assert actual_mimetype == expected_mimetype


	-def test_db_to_language() -> None:
	- input_language = {
	- "id": b"some-id",
	- "tool_id": 20,
	- "tool_name": "some-toolname",
	- "tool_version": "some-toolversion",
	- "tool_configuration": {},
	- "lang": b"css",
	- }
	-
	- expected_language = {
	- "id": b"some-id",
	- "lang": b"css",
	- "tool": {
	- "id": 20,
	- "name": "some-toolname",
	- "version": "some-toolversion",
	- "configuration": {},
	- },
	- }
	-
	- actual_language = converters.db_to_language(input_language)
	-
	- assert actual_language == expected_language
	-
	-
	def test_db_to_fossology_license() -> None:
	input_license = {
	"id": b"some-id",
	diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
	--- a/swh/indexer/tests/storage/test_storage.py
	+++ b/swh/indexer/tests/storage/test_storage.py
	@@ -14,7 +14,6 @@
	from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult
	from swh.indexer.storage.model import (
	BaseRow,
	- ContentLanguageRow,
	ContentLicenseRow,
	ContentMetadataRow,
	ContentMimetypeRow,
	@@ -503,22 +502,6 @@
	assert actual_id in expected_ids


	-class TestIndexerStorageContentLanguage(StorageETypeTester):
	- """Test Indexer Storage content_language related methods"""
	-
	- endpoint_type = "content_language"
	- tool_name = "pygments"
	- example_data = [
	- {
	- "lang": "haskell",
	- },
	- {
	- "lang": "common-lisp",
	- },
	- ]
	- row_class = ContentLanguageRow
	-
	-
	class TestIndexerStorageContentMetadata(StorageETypeTester):
	"""Test Indexer Storage content_metadata related methods"""

	diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py
	--- a/swh/indexer/tests/test_fossology_license.py
	+++ b/swh/indexer/tests/test_fossology_license.py
	@@ -67,7 +67,7 @@


	class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase):
	- """Language indexer test scenarios:
	+ """Fossology license indexer test scenarios:

	- Known sha1s in the input list have their data indexed
	- Unknown sha1 in the input list are not indexed

File Metadata

Mime Type: text/plain
Expires: Mar 17 2025, 7:11 PM (15 w, 14 h ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3225316

D8158.diffNo OneTemporaryActions

D8158.diffView Options

File Metadata

Event Timeline

D8158.diff
No OneTemporary
Actions

D8158.diff
View Options