Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066150
D8072.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
D8072.id.diff
View Options
diff --git a/docs/metadata-workflow.rst b/docs/metadata-workflow.rst
--- a/docs/metadata-workflow.rst
+++ b/docs/metadata-workflow.rst
@@ -134,10 +134,10 @@
.. code-block:: python
- from .base import DictMapping, SingleFileMapping
+ from .base import DictMapping, SingleFileIntrinsicMapping
from swh.indexer.codemeta import CROSSWALK_TABLE
- class MyMapping(DictMapping, SingleFileMapping):
+ class MyMapping(DictMapping, SingleFileIntrinsicMapping):
"""Dedicated class for ..."""
name = 'my-mapping'
filename = b'the-filename'
@@ -145,6 +145,8 @@
.. _CodeMeta crosswalks: https://github.com/codemeta/codemeta/tree/master/crosswalks
+And reference it from :const:`swh.indexer.metadata_dictionary.INTRINSIC_MAPPINGS`.
+
Then, add a ``string_fields`` attribute, that is the list of all keys whose
values are simple text values. For instance, to
`translate Python PKG-INFO`_, it's:
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -31,7 +31,7 @@
OriginIndexer,
)
from swh.indexer.metadata_detector import detect_metadata
-from swh.indexer.metadata_dictionary import MAPPINGS
+from swh.indexer.metadata_dictionary import EXTRINSIC_MAPPINGS, INTRINSIC_MAPPINGS
from swh.indexer.metadata_dictionary.base import DirectoryLsEntry
from swh.indexer.origin_head import get_head_swhid
from swh.indexer.storage import INDEXER_CFG_KEY, Sha1
@@ -116,7 +116,7 @@
metadata_items = []
mappings = []
- for (mapping_name, mapping) in MAPPINGS.items():
+ for (mapping_name, mapping) in EXTRINSIC_MAPPINGS.items():
if data.format in mapping.extrinsic_metadata_formats():
metadata_item = mapping().translate(data.metadata)
if metadata_item is not None:
@@ -210,7 +210,7 @@
try:
mapping_name = self.tool["tool_configuration"]["context"]
log_suffix += ", content_id=%s" % hashutil.hash_to_hex(id)
- metadata = MAPPINGS[mapping_name](log_suffix).translate(data)
+ metadata = INTRINSIC_MAPPINGS[mapping_name](log_suffix).translate(data)
except Exception:
self.log.exception(
"Problem during metadata translation "
@@ -364,7 +364,9 @@
config = {k: self.config[k] for k in [INDEXER_CFG_KEY, "objstorage", "storage"]}
config["tools"] = [tool]
all_detected_files = detect_metadata(files)
- used_mappings = [MAPPINGS[context].name for context in all_detected_files]
+ used_mappings = [
+ INTRINSIC_MAPPINGS[context].name for context in all_detected_files
+ ]
for (mapping_name, detected_files) in all_detected_files.items():
cfg = deepcopy(config)
cfg["tools"][0]["configuration"]["context"] = mapping_name
diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py
--- a/swh/indexer/metadata_detector.py
+++ b/swh/indexer/metadata_detector.py
@@ -5,7 +5,7 @@
from typing import Dict, List
-from swh.indexer.metadata_dictionary import MAPPINGS
+from swh.indexer.metadata_dictionary import INTRINSIC_MAPPINGS
from swh.indexer.metadata_dictionary.base import DirectoryLsEntry
from swh.indexer.storage.interface import Sha1
@@ -21,7 +21,7 @@
dict: {mapping_filenames[name]:f['sha1']} (may be empty)
"""
results = {}
- for (mapping_name, mapping) in MAPPINGS.items():
+ for (mapping_name, mapping) in INTRINSIC_MAPPINGS.items():
matches = mapping.detect_metadata_files(files)
if matches:
results[mapping_name] = matches
diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py
--- a/swh/indexer/metadata_dictionary/__init__.py
+++ b/swh/indexer/metadata_dictionary/__init__.py
@@ -4,22 +4,30 @@
# See top-level LICENSE file for more information
import collections
+from typing import Dict, Type
import click
from . import cff, codemeta, composer, github, maven, npm, python, ruby
+from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping
-MAPPINGS = {
+INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
"CffMapping": cff.CffMapping,
"CodemetaMapping": codemeta.CodemetaMapping,
"GemspecMapping": ruby.GemspecMapping,
- "GitHubMapping": github.GitHubMapping,
"MavenMapping": maven.MavenMapping,
"NpmMapping": npm.NpmMapping,
"PythonPkginfoMapping": python.PythonPkginfoMapping,
"ComposerMapping": composer.ComposerMapping,
}
+EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = {
+ "GitHubMapping": github.GitHubMapping,
+}
+
+
+MAPPINGS: Dict[str, Type[BaseMapping]] = {**INTRINSIC_MAPPINGS, **EXTRINSIC_MAPPINGS}
+
def list_terms():
"""Returns a dictionary with all supported CodeMeta terms as keys,
diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py
--- a/swh/indexer/metadata_dictionary/base.py
+++ b/swh/indexer/metadata_dictionary/base.py
@@ -41,13 +41,8 @@
class BaseMapping:
- """Base class for mappings to inherit from
-
- To implement a new mapping:
-
- - inherit this class
- - override translate function
- """
+ """Base class for :class:`BaseExtrinsicMapping` and :class:`BaseIntrinsicMapping`,
+ not to be inherited directly."""
def __init__(self, log_suffix=""):
self.log_suffix = log_suffix
@@ -61,12 +56,23 @@
indexer storage."""
raise NotImplementedError(f"{self.__class__.__name__}.name")
- @classmethod
- def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
- """
- Returns the sha1 hashes of files which can be translated by this mapping
- """
- raise NotImplementedError(f"{cls.__name__}.detect_metadata_files")
+ def translate(self, file_content: bytes) -> Optional[Dict]:
+ """Translates metadata, from the content of a file or of a RawExtrinsicMetadata
+ object."""
+ raise NotImplementedError(f"{self.__class__.__name__}.translate")
+
+ def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
+ return compact(metadata)
+
+
+class BaseExtrinsicMapping(BaseMapping):
+ """Base class for extrinsic-metadata mappings to inherit from
+
+ To implement a new mapping:
+
+ - inherit this class
+ - override translate function
+ """
@classmethod
def extrinsic_metadata_formats(cls) -> Tuple[str, ...]:
@@ -76,15 +82,25 @@
"""
raise NotImplementedError(f"{cls.__name__}.extrinsic_metadata_formats")
- def translate(self, file_content: bytes) -> Optional[Dict]:
- """Translates intrinsic metadata, from the content of a file."""
- raise NotImplementedError(f"{self.__class__.__name__}.translate")
- def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
- return compact(metadata)
+class BaseIntrinsicMapping(BaseMapping):
+ """Base class for intrinsic-metadata mappings to inherit from
+
+ To implement a new mapping:
+
+ - inherit this class
+ - override translate function
+ """
+
+ @classmethod
+ def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
+ """
+ Returns the sha1 hashes of files which can be translated by this mapping
+ """
+ raise NotImplementedError(f"{cls.__name__}.detect_metadata_files")
-class SingleFileMapping(BaseMapping):
+class SingleFileIntrinsicMapping(BaseIntrinsicMapping):
"""Base class for all intrinsic metadata mappings that use a single file as input."""
@property
@@ -99,11 +115,6 @@
return [entry["sha1"]]
return []
- @classmethod
- def extrinsic_metadata_formats(cls) -> Tuple[str, ...]:
- # this class is only used by intrinsic metadata mappings
- return ()
-
class DictMapping(BaseMapping):
"""Base class for mappings that take as input a file that is mostly
diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py
--- a/swh/indexer/metadata_dictionary/cff.py
+++ b/swh/indexer/metadata_dictionary/cff.py
@@ -4,7 +4,7 @@
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
-from .base import DictMapping, SingleFileMapping
+from .base import DictMapping, SingleFileIntrinsicMapping
class SafeLoader(yaml.SafeLoader):
@@ -14,7 +14,7 @@
}
-class CffMapping(DictMapping, SingleFileMapping):
+class CffMapping(DictMapping, SingleFileIntrinsicMapping):
"""Dedicated class for Citation (CITATION.cff) mapping and translation"""
name = "cff"
diff --git a/swh/indexer/metadata_dictionary/codemeta.py b/swh/indexer/metadata_dictionary/codemeta.py
--- a/swh/indexer/metadata_dictionary/codemeta.py
+++ b/swh/indexer/metadata_dictionary/codemeta.py
@@ -8,10 +8,10 @@
from swh.indexer.codemeta import CODEMETA_TERMS, expand
-from .base import SingleFileMapping
+from .base import SingleFileIntrinsicMapping
-class CodemetaMapping(SingleFileMapping):
+class CodemetaMapping(SingleFileIntrinsicMapping):
"""
dedicated class for CodeMeta (codemeta.json) mapping and translation
"""
diff --git a/swh/indexer/metadata_dictionary/composer.py b/swh/indexer/metadata_dictionary/composer.py
--- a/swh/indexer/metadata_dictionary/composer.py
+++ b/swh/indexer/metadata_dictionary/composer.py
@@ -7,7 +7,7 @@
from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
-from .base import JsonMapping, SingleFileMapping
+from .base import JsonMapping, SingleFileIntrinsicMapping
COMPOSER_TABLE_PATH = os.path.join(_DATA_DIR, "composer.csv")
@@ -15,7 +15,7 @@
(CODEMETA_TERMS, COMPOSER_TABLE) = _read_crosstable(fd)
-class ComposerMapping(JsonMapping, SingleFileMapping):
+class ComposerMapping(JsonMapping, SingleFileIntrinsicMapping):
"""Dedicated class for Packagist(composer.json) mapping and translation"""
name = "composer"
diff --git a/swh/indexer/metadata_dictionary/github.py b/swh/indexer/metadata_dictionary/github.py
--- a/swh/indexer/metadata_dictionary/github.py
+++ b/swh/indexer/metadata_dictionary/github.py
@@ -2,20 +2,20 @@
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+
import json
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, Tuple
from swh.indexer.codemeta import ACTIVITYSTREAMS_URI, CROSSWALK_TABLE, FORGEFED_URI
-from swh.indexer.storage.interface import Sha1
-from .base import DirectoryLsEntry, JsonMapping, produce_terms
+from .base import BaseExtrinsicMapping, JsonMapping, produce_terms
def _prettyprint(d):
print(json.dumps(d, indent=4))
-class GitHubMapping(JsonMapping):
+class GitHubMapping(BaseExtrinsicMapping, JsonMapping):
name = "github"
mapping = CROSSWALK_TABLE["GitHub"]
string_fields = [
@@ -28,10 +28,6 @@
"issues_url",
]
- @classmethod
- def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
- return []
-
@classmethod
def extrinsic_metadata_formats(cls) -> Tuple[str, ...]:
return ("application/vnd.github.v3+json",)
diff --git a/swh/indexer/metadata_dictionary/maven.py b/swh/indexer/metadata_dictionary/maven.py
--- a/swh/indexer/metadata_dictionary/maven.py
+++ b/swh/indexer/metadata_dictionary/maven.py
@@ -11,10 +11,10 @@
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
-from .base import DictMapping, SingleFileMapping
+from .base import DictMapping, SingleFileIntrinsicMapping
-class MavenMapping(DictMapping, SingleFileMapping):
+class MavenMapping(DictMapping, SingleFileIntrinsicMapping):
"""
dedicated class for Maven (pom.xml) mapping and translation
"""
diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py
--- a/swh/indexer/metadata_dictionary/npm.py
+++ b/swh/indexer/metadata_dictionary/npm.py
@@ -7,10 +7,10 @@
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
-from .base import JsonMapping, SingleFileMapping
+from .base import JsonMapping, SingleFileIntrinsicMapping
-class NpmMapping(JsonMapping, SingleFileMapping):
+class NpmMapping(JsonMapping, SingleFileIntrinsicMapping):
"""
dedicated class for NPM (package.json) mapping and translation
"""
diff --git a/swh/indexer/metadata_dictionary/python.py b/swh/indexer/metadata_dictionary/python.py
--- a/swh/indexer/metadata_dictionary/python.py
+++ b/swh/indexer/metadata_dictionary/python.py
@@ -9,7 +9,7 @@
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
-from .base import DictMapping, SingleFileMapping
+from .base import DictMapping, SingleFileIntrinsicMapping
_normalize_pkginfo_key = str.lower
@@ -22,7 +22,7 @@
return self.header_factory(name, value)
-class PythonPkginfoMapping(DictMapping, SingleFileMapping):
+class PythonPkginfoMapping(DictMapping, SingleFileIntrinsicMapping):
"""Dedicated class for Python's PKG-INFO mapping and translation.
https://www.python.org/dev/peps/pep-0314/"""
diff --git a/swh/indexer/metadata_dictionary/ruby.py b/swh/indexer/metadata_dictionary/ruby.py
--- a/swh/indexer/metadata_dictionary/ruby.py
+++ b/swh/indexer/metadata_dictionary/ruby.py
@@ -6,13 +6,13 @@
import ast
import itertools
import re
-from typing import List, Tuple
+from typing import List
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
from swh.indexer.metadata_dictionary.base import DirectoryLsEntry
from swh.indexer.storage.interface import Sha1
-from .base import DictMapping
+from .base import BaseIntrinsicMapping, DictMapping
def name_to_person(name):
@@ -22,7 +22,7 @@
}
-class GemspecMapping(DictMapping):
+class GemspecMapping(BaseIntrinsicMapping, DictMapping):
name = "gemspec"
mapping = CROSSWALK_TABLE["Ruby Gem"]
string_fields = ["name", "version", "description", "summary", "email"]
@@ -37,11 +37,6 @@
return [entry["sha1"]]
return []
- @classmethod
- def extrinsic_metadata_formats(cls) -> Tuple[str, ...]:
- # this class is only used by intrinsic metadata mappings
- return ()
-
def translate(self, raw_content):
try:
raw_content = raw_content.decode()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 4 2024, 7:09 PM (9 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220163
Attached To
D8072: Use separate base classes for intrinsic and extrinsic mappings
Event Timeline
Log In to Comment