Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/utils.py
# Copyright (C) 2022 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
from typing import Callable, Iterable, Optional, Sequence, TypeVar | from typing import Any, Callable, Iterable, Optional, Sequence, TypeVar | ||||
import urllib.parse | |||||
from pyld import jsonld | from pyld import jsonld | ||||
from rdflib import RDF, Graph, URIRef | from rdflib import RDF, Graph, URIRef | ||||
import rdflib.term | import rdflib.term | ||||
from swh.indexer.codemeta import _document_loader | from swh.indexer.codemeta import _document_loader | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | def add_map( | ||||
subject: rdflib.term.Node, | subject: rdflib.term.Node, | ||||
predicate: rdflib.term.Identifier, | predicate: rdflib.term.Identifier, | ||||
f: Callable[[Graph, TValue], Optional[rdflib.term.Node]], | f: Callable[[Graph, TValue], Optional[rdflib.term.Node]], | ||||
values: Iterable[TValue], | values: Iterable[TValue], | ||||
) -> None: | ) -> None: | ||||
"""Helper for :func:`add_list` that takes a mapper function ``f``.""" | """Helper for :func:`add_list` that takes a mapper function ``f``.""" | ||||
nodes = [f(graph, value) for value in values] | nodes = [f(graph, value) for value in values] | ||||
add_list(graph, subject, predicate, [node for node in nodes if node]) | add_list(graph, subject, predicate, [node for node in nodes if node]) | ||||
def add_url_if_valid( | |||||
graph: Graph, | |||||
subject: rdflib.term.Node, | |||||
predicate: rdflib.term.Identifier, | |||||
anlambert: Typing could be more precise here.
```lang=python
url: Optional[str]
``` | |||||
Done Inline Actionsnot really, url comes from arbitrary JSON, YAML, or XML files. vlorentz: not really, `url` comes from arbitrary JSON, YAML, or XML files. | |||||
url: Any, | |||||
) -> None: | |||||
"""Adds ``(subject, predicate, url)`` to the graph if ``url`` is well-formed. | |||||
This is meant as a workaround for https://github.com/digitalbazaar/pyld/issues/91 | |||||
to drop URLs that are blatantly invalid early, so PyLD does not crash. | |||||
>>> from pprint import pprint | |||||
>>> graph = Graph() | |||||
>>> subject = rdflib.term.URIRef("http://example.org/test-software") | |||||
>>> predicate = rdflib.term.URIRef("http://schema.org/license") | |||||
>>> add_url_if_valid( | |||||
... graph, subject, predicate, "https//www.apache.org/licenses/LICENSE-2.0.txt" | |||||
... ) | |||||
>>> add_url_if_valid( | |||||
... graph, subject, predicate, "http:s//www.apache.org/licenses/LICENSE-2.0.txt" | |||||
... ) | |||||
>>> add_url_if_valid( | |||||
... graph, subject, predicate, "https://www.apache.org/licenses/LICENSE-2.0.txt" | |||||
... ) | |||||
>>> add_url_if_valid( | |||||
... graph, subject, predicate, 42 | |||||
... ) | |||||
>>> pprint(set(graph.triples((subject, predicate, None)))) | |||||
{(rdflib.term.URIRef('http://example.org/test-software'), | |||||
rdflib.term.URIRef('http://schema.org/license'), | |||||
rdflib.term.URIRef('https://www.apache.org/licenses/LICENSE-2.0.txt'))} | |||||
""" | |||||
if not isinstance(url, str): | |||||
return | |||||
if " " in url or not urllib.parse.urlparse(url).netloc: | |||||
Not Done Inline ActionsCould be merged into a single if block. if url is None or " " in url or not urllib.parse.urlparse(url).netloc: return anlambert: Could be merged into a single if block.
```lang=python
if url is None or " " in url or not… | |||||
return | |||||
graph.add((subject, predicate, rdflib.term.URIRef(url))) |
Typing could be more precise here.