Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123228
D8263.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
18 KB
Subscribers
None
D8263.diff
View Options
diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -14,6 +14,7 @@
from pyld import jsonld
import swh.indexer
+from swh.indexer.namespaces import ACTIVITYSTREAMS, CODEMETA, FORGEFED, SCHEMA
_DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), "data")
@@ -34,18 +35,14 @@
CODEMETA_ALTERNATE_CONTEXT_URLS = {
("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
}
-CODEMETA_URI = "https://codemeta.github.io/terms/"
-SCHEMA_URI = "http://schema.org/"
-FORGEFED_URI = "https://forgefed.org/ns#"
-ACTIVITYSTREAMS_URI = "https://www.w3.org/ns/activitystreams#"
PROPERTY_BLACKLIST = {
# CodeMeta properties that we cannot properly represent.
- SCHEMA_URI + "softwareRequirements",
- CODEMETA_URI + "softwareSuggestions",
+ SCHEMA.softwareRequirements,
+ CODEMETA.softwareSuggestions,
# Duplicate of 'author'
- SCHEMA_URI + "creator",
+ SCHEMA.creator,
}
_codemeta_field_separator = re.compile(r"\s*[,/]\s*")
@@ -64,7 +61,7 @@
uri = jsonld.JsonLdProcessor.get_context_value(
_PROCESSED_CODEMETA_CONTEXT, local_name, "@id"
)
- assert uri.startswith(("@", CODEMETA_URI, SCHEMA_URI)), (local_name, uri)
+ assert uri.startswith(("@", CODEMETA._uri, SCHEMA._uri)), (local_name, uri)
return uri
@@ -115,10 +112,10 @@
"documentUrl": url,
"document": CODEMETA_CONTEXT,
}
- elif url == CODEMETA_URI:
+ elif url == CODEMETA._uri:
raise Exception(
"{} is CodeMeta's URI, use {} as context url".format(
- CODEMETA_URI, CODEMETA_CONTEXT_URL
+ CODEMETA._uri, CODEMETA_CONTEXT_URL
)
)
else:
@@ -135,7 +132,7 @@
"""
contexts: List[Any] = [CODEMETA_CONTEXT_URL]
if forgefed:
- contexts.append({"as": ACTIVITYSTREAMS_URI, "forge": FORGEFED_URI})
+ contexts.append({"as": ACTIVITYSTREAMS._uri, "forge": FORGEFED._uri})
return jsonld.compact(doc, contexts, options={"documentLoader": _document_loader})
@@ -195,8 +192,8 @@
if "@id" not in merged_document:
merged_document["@id"] = value
elif value != merged_document["@id"]:
- if value not in merged_document[SCHEMA_URI + "sameAs"]:
- merged_document[SCHEMA_URI + "sameAs"].append(value)
+ if value not in merged_document[SCHEMA.sameAs]:
+ merged_document[SCHEMA.sameAs].append(value)
else:
for value in values:
if isinstance(value, dict) and set(value) == {"@list"}:
diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py
--- a/swh/indexer/metadata_dictionary/base.py
+++ b/swh/indexer/metadata_dictionary/base.py
@@ -10,7 +10,8 @@
from typing_extensions import TypedDict
import yaml
-from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values
+from swh.indexer.codemeta import compact, merge_values
+from swh.indexer.namespaces import SCHEMA
from swh.indexer.storage.interface import Sha1
@@ -26,16 +27,14 @@
)
-def produce_terms(
- namespace: str, terms: List[str]
-) -> Callable[[TTranslateCallable], TTranslateCallable]:
+def produce_terms(*uris: str) -> Callable[[TTranslateCallable], TTranslateCallable]:
"""Returns a decorator that marks the decorated function as adding
the given terms to the ``translated_metadata`` dict"""
def decorator(f: TTranslateCallable) -> TTranslateCallable:
if not hasattr(f, "produced_terms"):
f.produced_terms = [] # type: ignore
- f.produced_terms.extend(namespace + term for term in terms) # type: ignore
+ f.produced_terms.extend(uris) # type: ignore
return f
return decorator
@@ -175,7 +174,7 @@
the indexer
"""
- translated_metadata = {"@type": SCHEMA_URI + "SoftwareSourceCode"}
+ translated_metadata = {"@type": SCHEMA.SoftwareSourceCode}
for k, v in content_dict.items():
# First, check if there is a specific translation
# method for this key
diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py
--- a/swh/indexer/metadata_dictionary/cff.py
+++ b/swh/indexer/metadata_dictionary/cff.py
@@ -1,6 +1,7 @@
from typing import Dict, List, Optional, Union
-from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
+from swh.indexer.codemeta import CROSSWALK_TABLE
+from swh.indexer.namespaces import SCHEMA
from .base import YamlMapping
@@ -17,19 +18,19 @@
result = []
for author in d:
author_data: Dict[str, Optional[Union[str, Dict]]] = {
- "@type": SCHEMA_URI + "Person"
+ "@type": SCHEMA.Person
}
if "orcid" in author and isinstance(author["orcid"], str):
author_data["@id"] = author["orcid"]
if "affiliation" in author and isinstance(author["affiliation"], str):
- author_data[SCHEMA_URI + "affiliation"] = {
- "@type": SCHEMA_URI + "Organization",
- SCHEMA_URI + "name": author["affiliation"],
+ author_data[SCHEMA.affiliation] = {
+ "@type": SCHEMA.Organization,
+ SCHEMA.name: author["affiliation"],
}
if "family-names" in author and isinstance(author["family-names"], str):
- author_data[SCHEMA_URI + "familyName"] = author["family-names"]
+ author_data[SCHEMA.familyName] = author["family-names"]
if "given-names" in author and isinstance(author["given-names"], str):
- author_data[SCHEMA_URI + "givenName"] = author["given-names"]
+ author_data[SCHEMA.givenName] = author["given-names"]
result.append(author_data)
@@ -50,4 +51,4 @@
def normalize_date_released(self, s: str) -> Dict[str, str]:
if isinstance(s, str):
- return {"@value": s, "@type": SCHEMA_URI + "Date"}
+ return {"@value": s, "@type": SCHEMA.Date}
diff --git a/swh/indexer/metadata_dictionary/composer.py b/swh/indexer/metadata_dictionary/composer.py
--- a/swh/indexer/metadata_dictionary/composer.py
+++ b/swh/indexer/metadata_dictionary/composer.py
@@ -5,7 +5,8 @@
import os.path
-from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.namespaces import SCHEMA
from .base import JsonMapping, SingleFileIntrinsicMapping
@@ -43,13 +44,13 @@
def normalize_authors(self, author_list):
authors = []
for author in author_list:
- author_obj = {"@type": SCHEMA_URI + "Person"}
+ author_obj = {"@type": SCHEMA.Person}
if isinstance(author, dict):
if isinstance(author.get("name", None), str):
- author_obj[SCHEMA_URI + "name"] = author.get("name", None)
+ author_obj[SCHEMA.name] = author.get("name", None)
if isinstance(author.get("email", None), str):
- author_obj[SCHEMA_URI + "email"] = author.get("email", None)
+ author_obj[SCHEMA.email] = author.get("email", None)
authors.append(author_obj)
diff --git a/swh/indexer/metadata_dictionary/dart.py b/swh/indexer/metadata_dictionary/dart.py
--- a/swh/indexer/metadata_dictionary/dart.py
+++ b/swh/indexer/metadata_dictionary/dart.py
@@ -6,7 +6,8 @@
import os.path
import re
-from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.namespaces import SCHEMA
from .base import YamlMapping
@@ -18,8 +19,8 @@
def name_to_person(name):
return {
- "@type": SCHEMA_URI + "Person",
- SCHEMA_URI + "name": name,
+ "@type": SCHEMA.Person,
+ SCHEMA.name: name,
}
@@ -50,17 +51,17 @@
def normalize_author(self, s):
name_email_regex = "(?P<name>.*?)( <(?P<email>.*)>)"
- author = {"@type": SCHEMA_URI + "Person"}
+ author = {"@type": SCHEMA.Person}
if isinstance(s, str):
match = re.search(name_email_regex, s)
if match:
name = match.group("name")
email = match.group("email")
- author[SCHEMA_URI + "email"] = email
+ author[SCHEMA.email] = email
else:
name = s
- author[SCHEMA_URI + "name"] = name
+ author[SCHEMA.name] = name
return {"@list": [author]}
diff --git a/swh/indexer/metadata_dictionary/github.py b/swh/indexer/metadata_dictionary/github.py
--- a/swh/indexer/metadata_dictionary/github.py
+++ b/swh/indexer/metadata_dictionary/github.py
@@ -6,7 +6,8 @@
import json
from typing import Any, Dict, Tuple
-from swh.indexer.codemeta import ACTIVITYSTREAMS_URI, CROSSWALK_TABLE, FORGEFED_URI
+from swh.indexer.codemeta import CROSSWALK_TABLE
+from swh.indexer.namespaces import ACTIVITYSTREAMS, FORGEFED
from .base import BaseExtrinsicMapping, JsonMapping, produce_terms
@@ -34,11 +35,10 @@
def _translate_dict(self, content_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]:
d = super()._translate_dict(content_dict, **kwargs)
- d["type"] = FORGEFED_URI + "Repository"
+ d["type"] = FORGEFED.Repository
return d
- @produce_terms(FORGEFED_URI, ["forks"])
- @produce_terms(ACTIVITYSTREAMS_URI, ["totalItems"])
+ @produce_terms(FORGEFED.forks, ACTIVITYSTREAMS.totalItems)
def translate_forks_count(
self, translated_metadata: Dict[str, Any], v: Any
) -> None:
@@ -57,15 +57,14 @@
}
"""
if isinstance(v, int):
- translated_metadata.setdefault(FORGEFED_URI + "forks", []).append(
+ translated_metadata.setdefault(FORGEFED.forks, []).append(
{
- "@type": ACTIVITYSTREAMS_URI + "OrderedCollection",
- ACTIVITYSTREAMS_URI + "totalItems": v,
+ "@type": ACTIVITYSTREAMS.OrderedCollection,
+ ACTIVITYSTREAMS.totalItems: v,
}
)
- @produce_terms(ACTIVITYSTREAMS_URI, ["likes"])
- @produce_terms(ACTIVITYSTREAMS_URI, ["totalItems"])
+ @produce_terms(ACTIVITYSTREAMS.likes, ACTIVITYSTREAMS.totalItems)
def translate_stargazers_count(
self, translated_metadata: Dict[str, Any], v: Any
) -> None:
@@ -84,15 +83,14 @@
}
"""
if isinstance(v, int):
- translated_metadata.setdefault(ACTIVITYSTREAMS_URI + "likes", []).append(
+ translated_metadata.setdefault(ACTIVITYSTREAMS.likes, []).append(
{
- "@type": ACTIVITYSTREAMS_URI + "Collection",
- ACTIVITYSTREAMS_URI + "totalItems": v,
+ "@type": ACTIVITYSTREAMS.Collection,
+ ACTIVITYSTREAMS.totalItems: v,
}
)
- @produce_terms(ACTIVITYSTREAMS_URI, ["followers"])
- @produce_terms(ACTIVITYSTREAMS_URI, ["totalItems"])
+ @produce_terms(ACTIVITYSTREAMS.followers, ACTIVITYSTREAMS.totalItems)
def translate_watchers_count(
self, translated_metadata: Dict[str, Any], v: Any
) -> None:
@@ -111,12 +109,10 @@
}
"""
if isinstance(v, int):
- translated_metadata.setdefault(
- ACTIVITYSTREAMS_URI + "followers", []
- ).append(
+ translated_metadata.setdefault(ACTIVITYSTREAMS.followers, []).append(
{
- "@type": ACTIVITYSTREAMS_URI + "Collection",
- ACTIVITYSTREAMS_URI + "totalItems": v,
+ "@type": ACTIVITYSTREAMS.Collection,
+ ACTIVITYSTREAMS.totalItems: v,
}
)
diff --git a/swh/indexer/metadata_dictionary/maven.py b/swh/indexer/metadata_dictionary/maven.py
--- a/swh/indexer/metadata_dictionary/maven.py
+++ b/swh/indexer/metadata_dictionary/maven.py
@@ -9,7 +9,8 @@
import xmltodict
-from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
+from swh.indexer.codemeta import CROSSWALK_TABLE
+from swh.indexer.namespaces import SCHEMA
from .base import DictMapping, SingleFileIntrinsicMapping
@@ -41,8 +42,8 @@
self.log.warning("Skipping ill-formed XML content: %s", content)
return None
metadata = self._translate_dict(d, normalize=False)
- metadata[SCHEMA_URI + "codeRepository"] = self.parse_repositories(d)
- metadata[SCHEMA_URI + "license"] = self.parse_licenses(d)
+ metadata[SCHEMA.codeRepository] = self.parse_repositories(d)
+ metadata[SCHEMA.license] = self.parse_licenses(d)
return self.normalize_translation(metadata)
_default_repository = {"url": "https://repo.maven.apache.org/maven2/"}
diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py
--- a/swh/indexer/metadata_dictionary/npm.py
+++ b/swh/indexer/metadata_dictionary/npm.py
@@ -6,7 +6,8 @@
import re
import urllib.parse
-from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
+from swh.indexer.codemeta import CROSSWALK_TABLE
+from swh.indexer.namespaces import SCHEMA
from .base import JsonMapping, SingleFileIntrinsicMapping
@@ -120,7 +121,7 @@
'http://schema.org/email': 'john.doe@example.org',
'http://schema.org/name': 'John Doe'}]}
""" # noqa
- author = {"@type": SCHEMA_URI + "Person"}
+ author = {"@type": SCHEMA.Person}
if isinstance(d, dict):
name = d.get("name", None)
email = d.get("email", None)
@@ -136,15 +137,15 @@
return None
if name and isinstance(name, str):
- author[SCHEMA_URI + "name"] = name
+ author[SCHEMA.name] = name
if email and isinstance(email, str):
- author[SCHEMA_URI + "email"] = email
+ author[SCHEMA.email] = email
if url and isinstance(url, str):
# Workaround for https://github.com/digitalbazaar/pyld/issues/91 : drop
# URLs that are blatantly invalid early, so PyLD does not crash.
parsed_url = urllib.parse.urlparse(url)
if parsed_url.netloc:
- author[SCHEMA_URI + "url"] = {"@id": url}
+ author[SCHEMA.url] = {"@id": url}
return {"@list": [author]}
diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py
--- a/swh/indexer/metadata_dictionary/nuget.py
+++ b/swh/indexer/metadata_dictionary/nuget.py
@@ -9,7 +9,8 @@
import xmltodict
-from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.namespaces import SCHEMA
from swh.indexer.storage.interface import Sha1
from .base import DictMapping, DirectoryLsEntry, SingleFileIntrinsicMapping
@@ -93,8 +94,7 @@
if isinstance(s, str):
author_names = [a.strip() for a in s.split(",")]
authors = [
- {"@type": SCHEMA_URI + "Person", SCHEMA_URI + "name": name}
- for name in author_names
+ {"@type": SCHEMA.Person, SCHEMA.name: name} for name in author_names
]
return {"@list": authors}
diff --git a/swh/indexer/metadata_dictionary/python.py b/swh/indexer/metadata_dictionary/python.py
--- a/swh/indexer/metadata_dictionary/python.py
+++ b/swh/indexer/metadata_dictionary/python.py
@@ -7,7 +7,8 @@
import email.policy
import itertools
-from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
+from swh.indexer.codemeta import CROSSWALK_TABLE
+from swh.indexer.namespaces import SCHEMA
from .base import DictMapping, SingleFileIntrinsicMapping
@@ -52,15 +53,13 @@
if value != "UNKNOWN":
d.setdefault(key, []).append(value)
metadata = self._translate_dict(d, normalize=False)
- if SCHEMA_URI + "author" in metadata or SCHEMA_URI + "email" in metadata:
- metadata[SCHEMA_URI + "author"] = {
+ if SCHEMA.author in metadata or SCHEMA.email in metadata:
+ metadata[SCHEMA.author] = {
"@list": [
{
- "@type": SCHEMA_URI + "Person",
- SCHEMA_URI
- + "name": metadata.pop(SCHEMA_URI + "author", [None])[0],
- SCHEMA_URI
- + "email": metadata.pop(SCHEMA_URI + "email", [None])[0],
+ "@type": SCHEMA.Person,
+ SCHEMA.name: metadata.pop(SCHEMA.author, [None])[0],
+ SCHEMA.email: metadata.pop(SCHEMA.email, [None])[0],
}
]
}
diff --git a/swh/indexer/metadata_dictionary/ruby.py b/swh/indexer/metadata_dictionary/ruby.py
--- a/swh/indexer/metadata_dictionary/ruby.py
+++ b/swh/indexer/metadata_dictionary/ruby.py
@@ -8,8 +8,9 @@
import re
from typing import List
-from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
+from swh.indexer.codemeta import CROSSWALK_TABLE
from swh.indexer.metadata_dictionary.base import DirectoryLsEntry
+from swh.indexer.namespaces import SCHEMA
from swh.indexer.storage.interface import Sha1
from .base import BaseIntrinsicMapping, DictMapping
@@ -17,8 +18,8 @@
def name_to_person(name):
return {
- "@type": SCHEMA_URI + "Person",
- SCHEMA_URI + "name": name,
+ "@type": SCHEMA.Person,
+ SCHEMA.name: name,
}
diff --git a/swh/indexer/namespaces.py b/swh/indexer/namespaces.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/namespaces.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+class _Namespace:
+ """Handy class to get terms within a namespace by accessing them as attributes.
+
+ This is similar to `rdflib's namespaces
+ <https://rdflib.readthedocs.io/en/stable/namespaces_and_bindings.html>`__
+ """
+
+ def __init__(self, uri: str):
+ if not uri.endswith(("#", "/")):
+ # Sanity check, to make sure it doesn't end with an alphanumerical
+ # character, which is very likely to be invalid.
+ raise ValueError(f"Invalid trailing character for namespace URI: {uri}")
+ self._uri = uri
+
+ def __getattr__(self, term: str) -> str:
+ return self._uri + term
+
+
+SCHEMA = _Namespace("http://schema.org/")
+CODEMETA = _Namespace("https://codemeta.github.io/terms/")
+FORGEFED = _Namespace("https://forgefed.org/ns#")
+ACTIVITYSTREAMS = _Namespace("https://www.w3.org/ns/activitystreams#")
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 4:01 AM (1 d, 22 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219242
Attached To
D8263: metadata_dictionary: Simplify code using rdflib-style namespace classes
Event Timeline
Log In to Comment