Page MenuHomeSoftware Heritage

D8158.diff
No OneTemporary

D8158.diff

diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -25,7 +25,6 @@
from .interface import PagedResult, Sha1
from .metrics import process_metrics, send_metric, timed
from .model import (
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -102,16 +101,17 @@
data (List[dict]): List of dictionaries to be inserted
>>> check_id_duplicates([
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=32, lang="python"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="GPL"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=32, license="GPL"),
... ])
>>> check_id_duplicates([
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="AGPL"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="AGPL"),
... ])
Traceback (most recent call last):
- ...
- swh.indexer.storage.exc.DuplicateId: [{'id': b'foo', 'indexer_configuration_id': 42}]
+ ...
+ swh.indexer.storage.exc.DuplicateId: [{'id': b'foo', 'indexer_configuration_id': 42, 'license': 'AGPL'}]
+
""" # noqa
counter = Counter(tuple(sorted(item.unique_key().items())) for item in data)
duplicates = [id_ for (id_, count) in counter.items() if count >= 2]
@@ -194,7 +194,7 @@
bound by limit.
Args:
- **indexer_type**: Type of data content to index (mimetype, language, etc...)
+ **indexer_type**: Type of data content to index (mimetype, etc...)
**indexer_configuration_id**: The tool used to index data
**partition_id**: index of the partition to fetch
**nb_partitions**: total number of partitions to split into
@@ -303,56 +303,6 @@
for c in db.content_mimetype_get_from_list(ids, cur)
]
- @timed
- @db_transaction()
- def content_language_missing(
- self, languages: Iterable[Dict], db=None, cur=None
- ) -> List[Tuple[Sha1, int]]:
- return [obj[0] for obj in db.content_language_missing_from_list(languages, cur)]
-
- @timed
- @db_transaction()
- def content_language_get(
- self, ids: Iterable[Sha1], db=None, cur=None
- ) -> List[ContentLanguageRow]:
- return [
- ContentLanguageRow.from_dict(
- converters.db_to_language(dict(zip(db.content_language_cols, c)))
- )
- for c in db.content_language_get_from_list(ids, cur)
- ]
-
- @timed
- @process_metrics
- @db_transaction()
- def content_language_add(
- self,
- languages: List[ContentLanguageRow],
- db=None,
- cur=None,
- ) -> Dict[str, int]:
- check_id_duplicates(languages)
- languages.sort(key=lambda m: m.id)
- self.journal_writer.write_additions("content_language", languages)
- db.mktemp_content_language(cur)
- # empty language is mapped to 'unknown'
- db.copy_to(
- (
- {
- "id": lang.id,
- "lang": lang.lang or "unknown",
- "indexer_configuration_id": lang.indexer_configuration_id,
- }
- for lang in languages
- ),
- "tmp_content_language",
- ["id", "lang", "indexer_configuration_id"],
- cur,
- )
-
- count = db.content_language_add_from_temp(cur)
- return {"content_language:add": count}
-
@timed
@db_transaction()
def content_fossology_license_get(
diff --git a/swh/indexer/storage/converters.py b/swh/indexer/storage/converters.py
--- a/swh/indexer/storage/converters.py
+++ b/swh/indexer/storage/converters.py
@@ -19,20 +19,6 @@
}
-def db_to_language(language):
- """Convert a language entry into a ready language output."""
- return {
- "id": language["id"],
- "lang": language["lang"],
- "tool": {
- "id": language["tool_id"],
- "name": language["tool_name"],
- "version": language["tool_version"],
- "configuration": language["tool_configuration"],
- },
- }
-
-
def db_to_metadata(metadata):
"""Convert a metadata entry into a ready metadata output."""
metadata["tool"] = {
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -22,8 +22,7 @@
"""Read from table the data with hash_keys that are missing.
Args:
- table: Table name (e.g content_mimetype, content_language,
- etc...)
+ table: Table name (e.g content_mimetype, fossology_license, etc...)
data: Dict of data to read from
hash_keys: List of keys to read in the data dict.
@@ -85,8 +84,7 @@
Expected:
Tables content_{something} being aliased as 'c' (something
- in {language, mimetype, ...}), table indexer_configuration
- being aliased as 'i'.
+ in {mimetype, ...}), table indexer_configuration being aliased as 'i'.
"""
if key == "id":
@@ -181,37 +179,6 @@
"content_mimetype", ids, self.content_mimetype_cols, cur=cur
)
- content_language_hash_keys = ["id", "indexer_configuration_id"]
-
- def content_language_missing_from_list(self, languages, cur=None):
- """List missing languages."""
- yield from self._missing_from_list(
- "content_language", languages, self.content_language_hash_keys, cur=cur
- )
-
- content_language_cols = [
- "id",
- "lang",
- "tool_id",
- "tool_name",
- "tool_version",
- "tool_configuration",
- ]
-
- @stored_procedure("swh_mktemp_content_language")
- def mktemp_content_language(self, cur=None):
- pass
-
- def content_language_add_from_temp(self, cur=None):
- cur = self._cursor(cur)
- cur.execute("select * from swh_content_language_add()")
- return cur.fetchone()[0]
-
- def content_language_get_from_list(self, ids, cur=None):
- yield from self._get_from_list(
- "content_language", ids, self.content_language_cols, cur=cur
- )
-
content_fossology_license_cols = [
"id",
"tool_id",
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -33,7 +33,6 @@
from .interface import PagedResult, Sha1
from .model import (
BaseRow,
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -154,7 +153,7 @@
bound by limit.
Args:
- **indexer_type**: Type of data content to index (mimetype, language, etc...)
+ **indexer_type**: Type of data content to index (mimetype, etc...)
**indexer_configuration_id**: The tool used to index data
**partition_id**: index of the partition to fetch
**nb_partitions**: total number of partitions to split into
@@ -246,7 +245,6 @@
self.journal_writer = JournalWriter(tool_getter, journal_writer)
args = (self._tools, self.journal_writer)
self._mimetypes = SubStorage(ContentMimetypeRow, *args)
- self._languages = SubStorage(ContentLanguageRow, *args)
self._licenses = SubStorage(ContentLicenseRow, *args)
self._content_metadata = SubStorage(ContentMetadataRow, *args)
self._directory_intrinsic_metadata = SubStorage(
@@ -284,20 +282,6 @@
def content_mimetype_get(self, ids: Iterable[Sha1]) -> List[ContentMimetypeRow]:
return self._mimetypes.get(ids)
- def content_language_missing(
- self, languages: Iterable[Dict]
- ) -> List[Tuple[Sha1, int]]:
- return self._languages.missing(languages)
-
- def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]:
- return self._languages.get(ids)
-
- def content_language_add(
- self, languages: List[ContentLanguageRow]
- ) -> Dict[str, int]:
- added = self._languages.add(languages)
- return {"content_language:add": added}
-
def content_fossology_license_get(
self, ids: Iterable[Sha1]
) -> List[ContentLicenseRow]:
diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py
--- a/swh/indexer/storage/interface.py
+++ b/swh/indexer/storage/interface.py
@@ -10,7 +10,6 @@
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
from swh.indexer.storage.model import (
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -113,53 +112,6 @@
"""
...
- @remote_api_endpoint("content_language/missing")
- def content_language_missing(
- self, languages: Iterable[Dict]
- ) -> List[Tuple[Sha1, int]]:
- """List languages missing from storage.
-
- Args:
- languages (iterable): dictionaries with keys:
-
- - **id** (bytes): sha1 identifier
- - **indexer_configuration_id** (int): tool used to compute
- the results
-
- Returns:
- list of tuple (id, indexer_configuration_id) missing
-
- """
- ...
-
- @remote_api_endpoint("content_language")
- def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]:
- """Retrieve full content language per ids.
-
- Args:
- ids (iterable): sha1 identifier
-
- Returns:
- language row objects
-
- """
- ...
-
- @remote_api_endpoint("content_language/add")
- def content_language_add(
- self, languages: List[ContentLanguageRow]
- ) -> Dict[str, int]:
- """Add languages not present in storage.
-
- Args:
- languages: language row objects
-
- Returns:
- Dict summary of number of rows added
-
- """
- ...
-
@remote_api_endpoint("content/fossology_license")
def content_fossology_license_get(
self, ids: Iterable[Sha1]
diff --git a/swh/indexer/storage/model.py b/swh/indexer/storage/model.py
--- a/swh/indexer/storage/model.py
+++ b/swh/indexer/storage/model.py
@@ -75,14 +75,6 @@
encoding = attr.ib(type=str)
-@attr.s
-class ContentLanguageRow(BaseRow):
- object_type: Final = "content_language"
-
- id = attr.ib(type=Sha1Git)
- lang = attr.ib(type=str)
-
-
@attr.s
class ContentLicenseRow(BaseRow):
object_type: Final = "content_fossology_license"
diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py
--- a/swh/indexer/tests/storage/test_converters.py
+++ b/swh/indexer/tests/storage/test_converters.py
@@ -34,32 +34,6 @@
assert actual_mimetype == expected_mimetype
-def test_db_to_language() -> None:
- input_language = {
- "id": b"some-id",
- "tool_id": 20,
- "tool_name": "some-toolname",
- "tool_version": "some-toolversion",
- "tool_configuration": {},
- "lang": b"css",
- }
-
- expected_language = {
- "id": b"some-id",
- "lang": b"css",
- "tool": {
- "id": 20,
- "name": "some-toolname",
- "version": "some-toolversion",
- "configuration": {},
- },
- }
-
- actual_language = converters.db_to_language(input_language)
-
- assert actual_language == expected_language
-
-
def test_db_to_fossology_license() -> None:
input_license = {
"id": b"some-id",
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -14,7 +14,6 @@
from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult
from swh.indexer.storage.model import (
BaseRow,
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -503,22 +502,6 @@
assert actual_id in expected_ids
-class TestIndexerStorageContentLanguage(StorageETypeTester):
- """Test Indexer Storage content_language related methods"""
-
- endpoint_type = "content_language"
- tool_name = "pygments"
- example_data = [
- {
- "lang": "haskell",
- },
- {
- "lang": "common-lisp",
- },
- ]
- row_class = ContentLanguageRow
-
-
class TestIndexerStorageContentMetadata(StorageETypeTester):
"""Test Indexer Storage content_metadata related methods"""
diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py
--- a/swh/indexer/tests/test_fossology_license.py
+++ b/swh/indexer/tests/test_fossology_license.py
@@ -67,7 +67,7 @@
class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase):
- """Language indexer test scenarios:
+ """Fossology license indexer test scenarios:
- Known sha1s in the input list have their data indexed
- Unknown sha1 in the input list are not indexed

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 7:11 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225316

Event Timeline