Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7343078
D8158.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Subscribers
None
D8158.diff
View Options
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -25,7 +25,6 @@
from .interface import PagedResult, Sha1
from .metrics import process_metrics, send_metric, timed
from .model import (
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -102,16 +101,17 @@
data (List[dict]): List of dictionaries to be inserted
>>> check_id_duplicates([
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=32, lang="python"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="GPL"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=32, license="GPL"),
... ])
>>> check_id_duplicates([
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
- ... ContentLanguageRow(id=b'foo', indexer_configuration_id=42, lang="python"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="AGPL"),
+ ... ContentLicenseRow(id=b'foo', indexer_configuration_id=42, license="AGPL"),
... ])
Traceback (most recent call last):
- ...
- swh.indexer.storage.exc.DuplicateId: [{'id': b'foo', 'indexer_configuration_id': 42}]
+ ...
+ swh.indexer.storage.exc.DuplicateId: [{'id': b'foo', 'indexer_configuration_id': 42, 'license': 'AGPL'}]
+
""" # noqa
counter = Counter(tuple(sorted(item.unique_key().items())) for item in data)
duplicates = [id_ for (id_, count) in counter.items() if count >= 2]
@@ -194,7 +194,7 @@
bound by limit.
Args:
- **indexer_type**: Type of data content to index (mimetype, language, etc...)
+ **indexer_type**: Type of data content to index (mimetype, etc...)
**indexer_configuration_id**: The tool used to index data
**partition_id**: index of the partition to fetch
**nb_partitions**: total number of partitions to split into
@@ -303,56 +303,6 @@
for c in db.content_mimetype_get_from_list(ids, cur)
]
- @timed
- @db_transaction()
- def content_language_missing(
- self, languages: Iterable[Dict], db=None, cur=None
- ) -> List[Tuple[Sha1, int]]:
- return [obj[0] for obj in db.content_language_missing_from_list(languages, cur)]
-
- @timed
- @db_transaction()
- def content_language_get(
- self, ids: Iterable[Sha1], db=None, cur=None
- ) -> List[ContentLanguageRow]:
- return [
- ContentLanguageRow.from_dict(
- converters.db_to_language(dict(zip(db.content_language_cols, c)))
- )
- for c in db.content_language_get_from_list(ids, cur)
- ]
-
- @timed
- @process_metrics
- @db_transaction()
- def content_language_add(
- self,
- languages: List[ContentLanguageRow],
- db=None,
- cur=None,
- ) -> Dict[str, int]:
- check_id_duplicates(languages)
- languages.sort(key=lambda m: m.id)
- self.journal_writer.write_additions("content_language", languages)
- db.mktemp_content_language(cur)
- # empty language is mapped to 'unknown'
- db.copy_to(
- (
- {
- "id": lang.id,
- "lang": lang.lang or "unknown",
- "indexer_configuration_id": lang.indexer_configuration_id,
- }
- for lang in languages
- ),
- "tmp_content_language",
- ["id", "lang", "indexer_configuration_id"],
- cur,
- )
-
- count = db.content_language_add_from_temp(cur)
- return {"content_language:add": count}
-
@timed
@db_transaction()
def content_fossology_license_get(
diff --git a/swh/indexer/storage/converters.py b/swh/indexer/storage/converters.py
--- a/swh/indexer/storage/converters.py
+++ b/swh/indexer/storage/converters.py
@@ -19,20 +19,6 @@
}
-def db_to_language(language):
- """Convert a language entry into a ready language output."""
- return {
- "id": language["id"],
- "lang": language["lang"],
- "tool": {
- "id": language["tool_id"],
- "name": language["tool_name"],
- "version": language["tool_version"],
- "configuration": language["tool_configuration"],
- },
- }
-
-
def db_to_metadata(metadata):
"""Convert a metadata entry into a ready metadata output."""
metadata["tool"] = {
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -22,8 +22,7 @@
"""Read from table the data with hash_keys that are missing.
Args:
- table: Table name (e.g content_mimetype, content_language,
- etc...)
+ table: Table name (e.g content_mimetype, fossology_license, etc...)
data: Dict of data to read from
hash_keys: List of keys to read in the data dict.
@@ -85,8 +84,7 @@
Expected:
Tables content_{something} being aliased as 'c' (something
- in {language, mimetype, ...}), table indexer_configuration
- being aliased as 'i'.
+ in {mimetype, ...}), table indexer_configuration being aliased as 'i'.
"""
if key == "id":
@@ -181,37 +179,6 @@
"content_mimetype", ids, self.content_mimetype_cols, cur=cur
)
- content_language_hash_keys = ["id", "indexer_configuration_id"]
-
- def content_language_missing_from_list(self, languages, cur=None):
- """List missing languages."""
- yield from self._missing_from_list(
- "content_language", languages, self.content_language_hash_keys, cur=cur
- )
-
- content_language_cols = [
- "id",
- "lang",
- "tool_id",
- "tool_name",
- "tool_version",
- "tool_configuration",
- ]
-
- @stored_procedure("swh_mktemp_content_language")
- def mktemp_content_language(self, cur=None):
- pass
-
- def content_language_add_from_temp(self, cur=None):
- cur = self._cursor(cur)
- cur.execute("select * from swh_content_language_add()")
- return cur.fetchone()[0]
-
- def content_language_get_from_list(self, ids, cur=None):
- yield from self._get_from_list(
- "content_language", ids, self.content_language_cols, cur=cur
- )
-
content_fossology_license_cols = [
"id",
"tool_id",
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -33,7 +33,6 @@
from .interface import PagedResult, Sha1
from .model import (
BaseRow,
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -154,7 +153,7 @@
bound by limit.
Args:
- **indexer_type**: Type of data content to index (mimetype, language, etc...)
+ **indexer_type**: Type of data content to index (mimetype, etc...)
**indexer_configuration_id**: The tool used to index data
**partition_id**: index of the partition to fetch
**nb_partitions**: total number of partitions to split into
@@ -246,7 +245,6 @@
self.journal_writer = JournalWriter(tool_getter, journal_writer)
args = (self._tools, self.journal_writer)
self._mimetypes = SubStorage(ContentMimetypeRow, *args)
- self._languages = SubStorage(ContentLanguageRow, *args)
self._licenses = SubStorage(ContentLicenseRow, *args)
self._content_metadata = SubStorage(ContentMetadataRow, *args)
self._directory_intrinsic_metadata = SubStorage(
@@ -284,20 +282,6 @@
def content_mimetype_get(self, ids: Iterable[Sha1]) -> List[ContentMimetypeRow]:
return self._mimetypes.get(ids)
- def content_language_missing(
- self, languages: Iterable[Dict]
- ) -> List[Tuple[Sha1, int]]:
- return self._languages.missing(languages)
-
- def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]:
- return self._languages.get(ids)
-
- def content_language_add(
- self, languages: List[ContentLanguageRow]
- ) -> Dict[str, int]:
- added = self._languages.add(languages)
- return {"content_language:add": added}
-
def content_fossology_license_get(
self, ids: Iterable[Sha1]
) -> List[ContentLicenseRow]:
diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py
--- a/swh/indexer/storage/interface.py
+++ b/swh/indexer/storage/interface.py
@@ -10,7 +10,6 @@
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
from swh.indexer.storage.model import (
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -113,53 +112,6 @@
"""
...
- @remote_api_endpoint("content_language/missing")
- def content_language_missing(
- self, languages: Iterable[Dict]
- ) -> List[Tuple[Sha1, int]]:
- """List languages missing from storage.
-
- Args:
- languages (iterable): dictionaries with keys:
-
- - **id** (bytes): sha1 identifier
- - **indexer_configuration_id** (int): tool used to compute
- the results
-
- Returns:
- list of tuple (id, indexer_configuration_id) missing
-
- """
- ...
-
- @remote_api_endpoint("content_language")
- def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]:
- """Retrieve full content language per ids.
-
- Args:
- ids (iterable): sha1 identifier
-
- Returns:
- language row objects
-
- """
- ...
-
- @remote_api_endpoint("content_language/add")
- def content_language_add(
- self, languages: List[ContentLanguageRow]
- ) -> Dict[str, int]:
- """Add languages not present in storage.
-
- Args:
- languages: language row objects
-
- Returns:
- Dict summary of number of rows added
-
- """
- ...
-
@remote_api_endpoint("content/fossology_license")
def content_fossology_license_get(
self, ids: Iterable[Sha1]
diff --git a/swh/indexer/storage/model.py b/swh/indexer/storage/model.py
--- a/swh/indexer/storage/model.py
+++ b/swh/indexer/storage/model.py
@@ -75,14 +75,6 @@
encoding = attr.ib(type=str)
-@attr.s
-class ContentLanguageRow(BaseRow):
- object_type: Final = "content_language"
-
- id = attr.ib(type=Sha1Git)
- lang = attr.ib(type=str)
-
-
@attr.s
class ContentLicenseRow(BaseRow):
object_type: Final = "content_fossology_license"
diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py
--- a/swh/indexer/tests/storage/test_converters.py
+++ b/swh/indexer/tests/storage/test_converters.py
@@ -34,32 +34,6 @@
assert actual_mimetype == expected_mimetype
-def test_db_to_language() -> None:
- input_language = {
- "id": b"some-id",
- "tool_id": 20,
- "tool_name": "some-toolname",
- "tool_version": "some-toolversion",
- "tool_configuration": {},
- "lang": b"css",
- }
-
- expected_language = {
- "id": b"some-id",
- "lang": b"css",
- "tool": {
- "id": 20,
- "name": "some-toolname",
- "version": "some-toolversion",
- "configuration": {},
- },
- }
-
- actual_language = converters.db_to_language(input_language)
-
- assert actual_language == expected_language
-
-
def test_db_to_fossology_license() -> None:
input_license = {
"id": b"some-id",
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -14,7 +14,6 @@
from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult
from swh.indexer.storage.model import (
BaseRow,
- ContentLanguageRow,
ContentLicenseRow,
ContentMetadataRow,
ContentMimetypeRow,
@@ -503,22 +502,6 @@
assert actual_id in expected_ids
-class TestIndexerStorageContentLanguage(StorageETypeTester):
- """Test Indexer Storage content_language related methods"""
-
- endpoint_type = "content_language"
- tool_name = "pygments"
- example_data = [
- {
- "lang": "haskell",
- },
- {
- "lang": "common-lisp",
- },
- ]
- row_class = ContentLanguageRow
-
-
class TestIndexerStorageContentMetadata(StorageETypeTester):
"""Test Indexer Storage content_metadata related methods"""
diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py
--- a/swh/indexer/tests/test_fossology_license.py
+++ b/swh/indexer/tests/test_fossology_license.py
@@ -67,7 +67,7 @@
class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase):
- """Language indexer test scenarios:
+ """Fossology license indexer test scenarios:
- Known sha1s in the input list have their data indexed
- Unknown sha1 in the input list are not indexed
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 7:11 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225316
Attached To
D8158: Drop decomissioned language indexer storage endpoints
Event Timeline
Log In to Comment