D8549.diff
No OneTemporary
Actions

Size

5 KB

Subscribers

None

D8549.diff
View Options

	diff --git a/docs/metadata-workflow.rst b/docs/metadata-workflow.rst
	--- a/docs/metadata-workflow.rst
	+++ b/docs/metadata-workflow.rst
	@@ -69,7 +69,11 @@
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

	Intrinsic metadata is extracted from files provided with a project's source
	-code, and translated using `CodeMeta`_'s `crosswalk table`_.
	+code, and translated using `CodeMeta`_'s `crosswalk table`_; which is vendored
	+in :file:`swh/indexer/data/codemeta/codemeta.csv`.
	+Ecosystems not yet included in Codemeta's crosswalk have their own
	+:file:`swh/indexer/data/*.csv` file, with one row for each CodeMeta property,
	+even when not supported by the ecosystem.

	All input formats supported so far are straightforward dictionaries (eg. JSON)
	or can be accessed as such (eg. XML); and the first part of the translation is
	diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
	--- a/swh/indexer/codemeta.py
	+++ b/swh/indexer/codemeta.py
	@@ -9,7 +9,7 @@
	import json
	import os.path
	import re
	-from typing import Any, List
	+from typing import Any, Dict, List, Set, TextIO, Tuple

	from pyld import jsonld
	import rdflib
	@@ -66,7 +66,15 @@
	return uri


	-def _read_crosstable(fd):
	+def read_crosstable(fd: TextIO) -> Tuple[Set[str], Dict[str, Dict[str, rdflib.URIRef]]]:
	+ """
	+ Given a file-like object to a `CodeMeta crosswalk table` (either the main
	+ cross-table with all columns, or an auxiliary table with just the CodeMeta
	+ column and one ecosystem-specific table); returns a list of all CodeMeta
	+ terms, and a dictionary ``{ecosystem: {ecosystem_term: codemeta_term}}``
	+
	+ .. _CodeMeta crosswalk table: <https://codemeta.github.io/crosswalk/
	+ """
	reader = csv.reader(fd)
	try:
	header = next(reader)
	@@ -75,7 +83,9 @@

	data_sources = set(header) - {"Parent Type", "Property", "Type", "Description"}

	- codemeta_translation = {data_source: {} for data_source in data_sources}
	+ codemeta_translation: Dict[str, Dict[str, rdflib.URIRef]] = {
	+ data_source: {} for data_source in data_sources
	+ }
	terms = set()

	for line in reader: # For each canonical name
	@@ -101,7 +111,7 @@


	with open(CROSSWALK_TABLE_PATH) as fd:
	- (CODEMETA_TERMS, CROSSWALK_TABLE) = _read_crosstable(fd)
	+ (CODEMETA_TERMS, CROSSWALK_TABLE) = read_crosstable(fd)


	def _document_loader(url, options=None):
	diff --git a/swh/indexer/data/Gitea.csv b/swh/indexer/data/Gitea.csv
	--- a/swh/indexer/data/Gitea.csv
	+++ b/swh/indexer/data/Gitea.csv
	@@ -66,11 +66,3 @@
	issueTracker,
	referencePublication,
	readme,
	-,
	-,
	-,
	-,
	-,
	-,
	-,
	-,
	diff --git a/swh/indexer/metadata_dictionary/composer.py b/swh/indexer/metadata_dictionary/composer.py
	--- a/swh/indexer/metadata_dictionary/composer.py
	+++ b/swh/indexer/metadata_dictionary/composer.py
	@@ -8,7 +8,7 @@

	from rdflib import BNode, Graph, Literal, URIRef

	-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
	+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
	from swh.indexer.namespaces import RDF, SCHEMA

	from .base import JsonMapping, SingleFileIntrinsicMapping
	@@ -20,7 +20,7 @@
	COMPOSER_TABLE_PATH = os.path.join(_DATA_DIR, "composer.csv")

	with open(COMPOSER_TABLE_PATH) as fd:
	- (CODEMETA_TERMS, COMPOSER_TABLE) = _read_crosstable(fd)
	+ (CODEMETA_TERMS, COMPOSER_TABLE) = read_crosstable(fd)


	class ComposerMapping(JsonMapping, SingleFileIntrinsicMapping):
	diff --git a/swh/indexer/metadata_dictionary/dart.py b/swh/indexer/metadata_dictionary/dart.py
	--- a/swh/indexer/metadata_dictionary/dart.py
	+++ b/swh/indexer/metadata_dictionary/dart.py
	@@ -8,7 +8,7 @@

	from rdflib import RDF, BNode, Graph, Literal, URIRef

	-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
	+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
	from swh.indexer.namespaces import SCHEMA

	from .base import YamlMapping
	@@ -19,7 +19,7 @@
	PUB_TABLE_PATH = os.path.join(_DATA_DIR, "pubspec.csv")

	with open(PUB_TABLE_PATH) as fd:
	- (CODEMETA_TERMS, PUB_TABLE) = _read_crosstable(fd)
	+ (CODEMETA_TERMS, PUB_TABLE) = read_crosstable(fd)


	def name_to_person(name):
	diff --git a/swh/indexer/metadata_dictionary/gitea.py b/swh/indexer/metadata_dictionary/gitea.py
	--- a/swh/indexer/metadata_dictionary/gitea.py
	+++ b/swh/indexer/metadata_dictionary/gitea.py
	@@ -8,7 +8,7 @@

	from rdflib import RDF, BNode, Graph, Literal, URIRef

	-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
	+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
	from swh.indexer.namespaces import ACTIVITYSTREAMS, FORGEFED, SCHEMA

	from .base import BaseExtrinsicMapping, JsonMapping, produce_terms
	@@ -20,7 +20,7 @@
	GITEA_TABLE_PATH = os.path.join(_DATA_DIR, "Gitea.csv")

	with open(GITEA_TABLE_PATH) as fd:
	- (CODEMETA_TERMS, GITEA_TABLE) = _read_crosstable(fd)
	+ (CODEMETA_TERMS, GITEA_TABLE) = read_crosstable(fd)


	class GiteaMapping(BaseExtrinsicMapping, JsonMapping):
	diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py
	--- a/swh/indexer/metadata_dictionary/nuget.py
	+++ b/swh/indexer/metadata_dictionary/nuget.py
	@@ -9,7 +9,7 @@

	from rdflib import RDF, BNode, Graph, Literal, URIRef

	-from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
	+from swh.indexer.codemeta import _DATA_DIR, read_crosstable
	from swh.indexer.namespaces import SCHEMA
	from swh.indexer.storage.interface import Sha1

	@@ -19,7 +19,7 @@
	NUGET_TABLE_PATH = os.path.join(_DATA_DIR, "nuget.csv")

	with open(NUGET_TABLE_PATH) as fd:
	- (CODEMETA_TERMS, NUGET_TABLE) = _read_crosstable(fd)
	+ (CODEMETA_TERMS, NUGET_TABLE) = read_crosstable(fd)

	SPDX = URIRef("https://spdx.org/licenses/")

File Metadata

Mime Type: text/plain
Expires: Wed, Jul 2, 10:42 AM (2 w, 1 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3218045

D8549.diffNo OneTemporaryActions

D8549.diffView Options

File Metadata

Event Timeline

D8549.diff
No OneTemporary
Actions

D8549.diff
View Options