diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -132,10 +132,14 @@ """Base class for mappings that take as input a file that is mostly a key-value store (eg. a shallow JSON dict).""" - string_fields = [] # type: List[str] + string_fields: List[str] = [] """List of fields that are simple strings, and don't need any normalization.""" + uri_fields: List[str] = [] + """List of fields that are simple URIs, and don't need any + normalization.""" + @property def mapping(self): """A translation dict to map dict keys into a canonical name.""" @@ -151,7 +155,7 @@ simple_terms = { str(term) for (key, term) in cls.mapping.items() - if key in cls.string_fields + if key in cls.string_fields + cls.uri_fields or hasattr(cls, "normalize_" + cls._normalize_method_name(key)) } @@ -224,6 +228,11 @@ elif k in self.string_fields and isinstance(v, list): for item in v: graph.add((root, codemeta_key, rdflib.Literal(item))) + elif k in self.uri_fields and isinstance(v, str): + graph.add((root, codemeta_key, rdflib.URIRef(v))) + elif k in self.uri_fields and isinstance(v, list): + for item in v: + graph.add((root, codemeta_key, rdflib.URIRef(item))) else: continue diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -25,6 +25,7 @@ filename = b"CITATION.cff" mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] + uri_fields = ["repository-code"] def _translate_author(self, graph: Graph, author: dict) -> rdflib.term.Node: node: rdflib.term.Node @@ -57,10 +58,6 @@ if isinstance(s, str): return SPDX + s - def normalize_repository_code(self, s: str) -> URIRef: - if isinstance(s, str): - return URIRef(s) - def normalize_date_released(self, s: str) -> Literal: if isinstance(s, str): return Literal(s, datatype=SCHEMA.Date) diff --git a/swh/indexer/metadata_dictionary/composer.py b/swh/indexer/metadata_dictionary/composer.py --- a/swh/indexer/metadata_dictionary/composer.py +++ b/swh/indexer/metadata_dictionary/composer.py @@ -34,15 +34,11 @@ "description", "version", "keywords", - "homepage", "license", "author", "authors", ] - - def normalize_homepage(self, s): - if isinstance(s, str): - return URIRef(s) + uri_fields = ["homepage"] def normalize_license(self, s): if isinstance(s, str): diff --git a/swh/indexer/metadata_dictionary/dart.py b/swh/indexer/metadata_dictionary/dart.py --- a/swh/indexer/metadata_dictionary/dart.py +++ b/swh/indexer/metadata_dictionary/dart.py @@ -39,21 +39,17 @@ "keywords", "description", "name", - "homepage", "issue_tracker", "platforms", "license" # license will only be used with the SPDX Identifier ] + uri_fields = ["homepage"] def normalize_license(self, s): if isinstance(s, str): return SPDX + s - def normalize_homepage(self, s): - if isinstance(s, str): - return URIRef(s) - def _translate_author(self, graph, s): name_email_re = re.compile("(?P.*?)( <(?P.*)>)") if isinstance(s, str): diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py --- a/swh/indexer/metadata_dictionary/npm.py +++ b/swh/indexer/metadata_dictionary/npm.py @@ -25,7 +25,8 @@ name = "npm" mapping = CROSSWALK_TABLE["NodeJS"] filename = b"package.json" - string_fields = ["name", "version", "homepage", "description", "email"] + string_fields = ["name", "version", "description", "email"] + uri_fields = ["homepage"] _schema_shortcuts = { "github": "git+https://github.com/%s.git", @@ -271,15 +272,6 @@ if isinstance(s, str): return SPDX + s - def normalize_homepage(self, s): - """https://docs.npmjs.com/files/package.json#homepage - - >>> NpmMapping().normalize_homepage('https://example.org/~john.doe') - rdflib.term.URIRef('https://example.org/~john.doe') - """ - if isinstance(s, str): - return URIRef(s) - def normalize_keywords(self, lst): """https://docs.npmjs.com/files/package.json#homepage diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py --- a/swh/indexer/metadata_dictionary/nuget.py +++ b/swh/indexer/metadata_dictionary/nuget.py @@ -36,15 +36,14 @@ string_fields = [ "description", "version", - "projectUrl", "name", "tags", "license", - "licenseUrl", "summary", "copyright", "language", ] + uri_fields = ["projectUrl", "licenseUrl"] @classmethod def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]: @@ -56,10 +55,6 @@ def _translate_dict(self, d: Dict[str, Any]) -> Dict[str, Any]: return super()._translate_dict(d.get("package", {}).get("metadata", {})) - def normalize_projectUrl(self, s): - if isinstance(s, str): - return URIRef(s) - def translate_repository(self, graph, root, v): if isinstance(v, dict) and isinstance(v["@url"], str): codemeta_key = URIRef(self.mapping["repository.url"]) @@ -80,10 +75,6 @@ else: return None - def normalize_licenseUrl(self, s): - if isinstance(s, str): - return URIRef(s) - def translate_authors(self, graph: Graph, root, s): if isinstance(s, str): authors = [] diff --git a/swh/indexer/metadata_dictionary/ruby.py b/swh/indexer/metadata_dictionary/ruby.py --- a/swh/indexer/metadata_dictionary/ruby.py +++ b/swh/indexer/metadata_dictionary/ruby.py @@ -34,6 +34,7 @@ name = "gemspec" mapping = CROSSWALK_TABLE["Ruby Gem"] string_fields = ["name", "version", "description", "summary", "email"] + uri_fields = ["homepage"] _re_spec_new = re.compile(r".*Gem::Specification.new +(do|\{) +\|.*\|.*") _re_spec_entry = re.compile(r"\s*\w+\.(?P\w+)\s*=\s*(?P.*)") @@ -112,10 +113,6 @@ if isinstance(tree, ast.Expression): return evaluator(tree.body) - def normalize_homepage(self, s): - if isinstance(s, str): - return URIRef(s) - def normalize_license(self, s): if isinstance(s, str): return SPDX + s