Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312078
D8549.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D8549.diff
View Options
diff --git a/docs/metadata-workflow.rst b/docs/metadata-workflow.rst
--- a/docs/metadata-workflow.rst
+++ b/docs/metadata-workflow.rst
@@ -69,7 +69,11 @@
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Intrinsic metadata is extracted from files provided with a project's source
-code, and translated using `CodeMeta`_'s `crosswalk table`_.
+code, and translated using `CodeMeta`_'s `crosswalk table`_; which is vendored
+in :file:`swh/indexer/data/codemeta/codemeta.csv`.
+Ecosystems not yet included in Codemeta's crosswalk have their own
+:file:`swh/indexer/data/*.csv` file, with one row for each CodeMeta property,
+even when not supported by the ecosystem.
All input formats supported so far are straightforward dictionaries (eg. JSON)
or can be accessed as such (eg. XML); and the first part of the translation is
diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -9,7 +9,7 @@
import json
import os.path
import re
-from typing import Any, List
+from typing import Any, Dict, List, Set, TextIO, Tuple
from pyld import jsonld
import rdflib
@@ -66,7 +66,15 @@
return uri
-def _read_crosstable(fd):
+def read_crosstable(fd: TextIO) -> Tuple[Set[str], Dict[str, Dict[str, rdflib.URIRef]]]:
+ """
+ Given a file-like object to a `CodeMeta crosswalk table` (either the main
+ cross-table with all columns, or an auxiliary table with just the CodeMeta
+ column and one ecosystem-specific table); returns a list of all CodeMeta
+ terms, and a dictionary ``{ecosystem: {ecosystem_term: codemeta_term}}``
+
+ .. _CodeMeta crosswalk table: <https://codemeta.github.io/crosswalk/
+ """
reader = csv.reader(fd)
try:
header = next(reader)
@@ -75,7 +83,9 @@
data_sources = set(header) - {"Parent Type", "Property", "Type", "Description"}
- codemeta_translation = {data_source: {} for data_source in data_sources}
+ codemeta_translation: Dict[str, Dict[str, rdflib.URIRef]] = {
+ data_source: {} for data_source in data_sources
+ }
terms = set()
for line in reader: # For each canonical name
@@ -101,7 +111,7 @@
with open(CROSSWALK_TABLE_PATH) as fd:
- (CODEMETA_TERMS, CROSSWALK_TABLE) = _read_crosstable(fd)
+ (CODEMETA_TERMS, CROSSWALK_TABLE) = read_crosstable(fd)
def _document_loader(url, options=None):
diff --git a/swh/indexer/data/Gitea.csv b/swh/indexer/data/Gitea.csv
--- a/swh/indexer/data/Gitea.csv
+++ b/swh/indexer/data/Gitea.csv
@@ -66,11 +66,3 @@
issueTracker,
referencePublication,
readme,
-,
-,
-,
-,
-,
-,
-,
-,
diff --git a/swh/indexer/metadata_dictionary/composer.py b/swh/indexer/metadata_dictionary/composer.py
--- a/swh/indexer/metadata_dictionary/composer.py
+++ b/swh/indexer/metadata_dictionary/composer.py
@@ -8,7 +8,7 @@
from rdflib import BNode, Graph, Literal, URIRef
-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
from swh.indexer.namespaces import RDF, SCHEMA
from .base import JsonMapping, SingleFileIntrinsicMapping
@@ -20,7 +20,7 @@
COMPOSER_TABLE_PATH = os.path.join(_DATA_DIR, "composer.csv")
with open(COMPOSER_TABLE_PATH) as fd:
- (CODEMETA_TERMS, COMPOSER_TABLE) = _read_crosstable(fd)
+ (CODEMETA_TERMS, COMPOSER_TABLE) = read_crosstable(fd)
class ComposerMapping(JsonMapping, SingleFileIntrinsicMapping):
diff --git a/swh/indexer/metadata_dictionary/dart.py b/swh/indexer/metadata_dictionary/dart.py
--- a/swh/indexer/metadata_dictionary/dart.py
+++ b/swh/indexer/metadata_dictionary/dart.py
@@ -8,7 +8,7 @@
from rdflib import RDF, BNode, Graph, Literal, URIRef
-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
from swh.indexer.namespaces import SCHEMA
from .base import YamlMapping
@@ -19,7 +19,7 @@
PUB_TABLE_PATH = os.path.join(_DATA_DIR, "pubspec.csv")
with open(PUB_TABLE_PATH) as fd:
- (CODEMETA_TERMS, PUB_TABLE) = _read_crosstable(fd)
+ (CODEMETA_TERMS, PUB_TABLE) = read_crosstable(fd)
def name_to_person(name):
diff --git a/swh/indexer/metadata_dictionary/gitea.py b/swh/indexer/metadata_dictionary/gitea.py
--- a/swh/indexer/metadata_dictionary/gitea.py
+++ b/swh/indexer/metadata_dictionary/gitea.py
@@ -8,7 +8,7 @@
from rdflib import RDF, BNode, Graph, Literal, URIRef
-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
from swh.indexer.namespaces import ACTIVITYSTREAMS, FORGEFED, SCHEMA
from .base import BaseExtrinsicMapping, JsonMapping, produce_terms
@@ -20,7 +20,7 @@
GITEA_TABLE_PATH = os.path.join(_DATA_DIR, "Gitea.csv")
with open(GITEA_TABLE_PATH) as fd:
- (CODEMETA_TERMS, GITEA_TABLE) = _read_crosstable(fd)
+ (CODEMETA_TERMS, GITEA_TABLE) = read_crosstable(fd)
class GiteaMapping(BaseExtrinsicMapping, JsonMapping):
diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py
--- a/swh/indexer/metadata_dictionary/nuget.py
+++ b/swh/indexer/metadata_dictionary/nuget.py
@@ -9,7 +9,7 @@
from rdflib import RDF, BNode, Graph, Literal, URIRef
-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
from swh.indexer.namespaces import SCHEMA
from swh.indexer.storage.interface import Sha1
@@ -19,7 +19,7 @@
NUGET_TABLE_PATH = os.path.join(_DATA_DIR, "nuget.csv")
with open(NUGET_TABLE_PATH) as fd:
- (CODEMETA_TERMS, NUGET_TABLE) = _read_crosstable(fd)
+ (CODEMETA_TERMS, NUGET_TABLE) = read_crosstable(fd)
SPDX = URIRef("https://spdx.org/licenses/")
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:42 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218045
Attached To
D8549: Make read_crosstable public and document it.
Event Timeline
Log In to Comment