diff --git a/swh/indexer/metadata_dictionary/maven.py b/swh/indexer/metadata_dictionary/maven.py
index a374a5e..8b3e48d 100644
--- a/swh/indexer/metadata_dictionary/maven.py
+++ b/swh/indexer/metadata_dictionary/maven.py
@@ -1,159 +1,162 @@
 # Copyright (C) 2018-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 from typing import Any, Dict
 
 from rdflib import Graph, Literal, URIRef
 
 from swh.indexer.codemeta import CROSSWALK_TABLE
 from swh.indexer.namespaces import SCHEMA
 
 from .base import SingleFileIntrinsicMapping, XmlMapping
 from .utils import prettyprint_graph  # noqa
 
 
 class MavenMapping(XmlMapping, SingleFileIntrinsicMapping):
     """
     dedicated class for Maven (pom.xml) mapping and translation
     """
 
     name = "maven"
     filename = b"pom.xml"
     mapping = CROSSWALK_TABLE["Java (Maven)"]
     string_fields = ["name", "version", "description", "email"]
 
     _default_repository = {"url": "https://repo.maven.apache.org/maven2/"}
 
     def _translate_dict(self, d: Dict[str, Any]) -> Dict[str, Any]:
         return super()._translate_dict(d.get("project") or {})
 
     def extra_translation(self, graph: Graph, root, d):
         self.parse_repositories(graph, root, d)
 
     def parse_repositories(self, graph: Graph, root, d):
         """https://maven.apache.org/pom.html#Repositories
 
         >>> import rdflib
         >>> import xmltodict
         >>> from pprint import pprint
         >>> d = xmltodict.parse('''
         ... <repositories>
         ...   <repository>
         ...     <id>codehausSnapshots</id>
         ...     <name>Codehaus Snapshots</name>
         ...     <url>http://snapshots.maven.codehaus.org/maven2</url>
         ...     <layout>default</layout>
         ...   </repository>
         ... </repositories>
         ... ''')
         >>> MavenMapping().parse_repositories(rdflib.Graph(), rdflib.BNode(), d)
         """
         repositories = d.get("repositories")
         if not repositories:
             self.parse_repository(graph, root, d, self._default_repository)
         elif isinstance(repositories, dict):
             repositories = repositories.get("repository") or []
             if not isinstance(repositories, list):
                 repositories = [repositories]
             for repo in repositories:
                 self.parse_repository(graph, root, d, repo)
 
     def parse_repository(self, graph: Graph, root, d, repo):
         if not isinstance(repo, dict):
             return
         if repo.get("layout", "default") != "default":
             return  # TODO ?
         url = repo.get("url")
         group_id = d.get("groupId")
         artifact_id = d.get("artifactId")
         if (
             isinstance(url, str)
             and isinstance(group_id, str)
             and isinstance(artifact_id, str)
         ):
             repo = os.path.join(url, *group_id.split("."), artifact_id)
+            if "${" in repo:
+                # Often use as templating in pom.xml files collected from VCSs
+                return
             graph.add((root, SCHEMA.codeRepository, URIRef(repo)))
 
     def normalize_groupId(self, id_):
         """https://maven.apache.org/pom.html#Maven_Coordinates
 
         >>> MavenMapping().normalize_groupId('org.example')
         rdflib.term.Literal('org.example')
         """
         if isinstance(id_, str):
             return Literal(id_)
 
     def translate_licenses(self, graph, root, licenses):
         """https://maven.apache.org/pom.html#Licenses
 
         >>> import xmltodict
         >>> import json
         >>> d = xmltodict.parse('''
         ... <licenses>
         ...   <license>
         ...     <name>Apache License, Version 2.0</name>
         ...     <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
         ...   </license>
         ... </licenses>
         ... ''')
         >>> print(json.dumps(d, indent=4))
         {
             "licenses": {
                 "license": {
                     "name": "Apache License, Version 2.0",
                     "url": "https://www.apache.org/licenses/LICENSE-2.0.txt"
                 }
             }
         }
         >>> graph = Graph()
         >>> root = URIRef("http://example.org/test-software")
         >>> MavenMapping().translate_licenses(graph, root, d["licenses"])
         >>> prettyprint_graph(graph, root)
         {
             "@id": ...,
             "http://schema.org/license": {
                 "@id": "https://www.apache.org/licenses/LICENSE-2.0.txt"
             }
         }
 
         or, if there are more than one license:
 
         >>> import xmltodict
         >>> from pprint import pprint
         >>> d = xmltodict.parse('''
         ... <licenses>
         ...   <license>
         ...     <name>Apache License, Version 2.0</name>
         ...     <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
         ...   </license>
         ...   <license>
         ...     <name>MIT License</name>
         ...     <url>https://opensource.org/licenses/MIT</url>
         ...   </license>
         ... </licenses>
         ... ''')
         >>> graph = Graph()
         >>> root = URIRef("http://example.org/test-software")
         >>> MavenMapping().translate_licenses(graph, root, d["licenses"])
         >>> pprint(set(graph.triples((root, URIRef("http://schema.org/license"), None))))
         {(rdflib.term.URIRef('http://example.org/test-software'),
           rdflib.term.URIRef('http://schema.org/license'),
           rdflib.term.URIRef('https://opensource.org/licenses/MIT')),
          (rdflib.term.URIRef('http://example.org/test-software'),
           rdflib.term.URIRef('http://schema.org/license'),
           rdflib.term.URIRef('https://www.apache.org/licenses/LICENSE-2.0.txt'))}
         """
 
         if not isinstance(licenses, dict):
             return
         licenses = licenses.get("license")
         if isinstance(licenses, dict):
             licenses = [licenses]
         elif not isinstance(licenses, list):
             return
         for license in licenses:
             if isinstance(license, dict) and isinstance(license.get("url"), str):
                 graph.add((root, SCHEMA.license, URIRef(license["url"])))
diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py
index 1540ef6..5486539 100644
--- a/swh/indexer/metadata_dictionary/npm.py
+++ b/swh/indexer/metadata_dictionary/npm.py
@@ -1,282 +1,288 @@
 # Copyright (C) 2018-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import re
 import urllib.parse
 
 from rdflib import RDF, BNode, Graph, Literal, URIRef
 
 from swh.indexer.codemeta import CROSSWALK_TABLE
 from swh.indexer.namespaces import SCHEMA
 
 from .base import JsonMapping, SingleFileIntrinsicMapping
 from .utils import add_list, prettyprint_graph  # noqa
 
 SPDX = URIRef("https://spdx.org/licenses/")
 
 
 class NpmMapping(JsonMapping, SingleFileIntrinsicMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
 
     name = "npm"
     mapping = CROSSWALK_TABLE["NodeJS"]
     filename = b"package.json"
     string_fields = ["name", "version", "description", "email"]
     uri_fields = ["homepage"]
 
     _schema_shortcuts = {
         "github": "git+https://github.com/%s.git",
         "gist": "git+https://gist.github.com/%s.git",
         "gitlab": "git+https://gitlab.com/%s.git",
         # Bitbucket supports both hg and git, and the shortcut does not
         # tell which one to use.
         # 'bitbucket': 'https://bitbucket.org/',
     }
 
     def normalize_repository(self, d):
         """https://docs.npmjs.com/files/package.json#repository
 
         >>> NpmMapping().normalize_repository({
         ...     'type': 'git',
         ...     'url': 'https://example.org/foo.git'
         ... })
         rdflib.term.URIRef('git+https://example.org/foo.git')
         >>> NpmMapping().normalize_repository(
         ...     'gitlab:foo/bar')
         rdflib.term.URIRef('git+https://gitlab.com/foo/bar.git')
         >>> NpmMapping().normalize_repository(
         ...     'foo/bar')
         rdflib.term.URIRef('git+https://github.com/foo/bar.git')
         """
         if (
             isinstance(d, dict)
             and isinstance(d.get("type"), str)
             and isinstance(d.get("url"), str)
         ):
             url = "{type}+{url}".format(**d)
         elif isinstance(d, str):
             if "://" in d:
                 url = d
             elif ":" in d:
                 (schema, rest) = d.split(":", 1)
                 if schema in self._schema_shortcuts:
                     url = self._schema_shortcuts[schema] % rest
                 else:
                     return None
             else:
                 url = self._schema_shortcuts["github"] % d
 
         else:
             return None
 
         return URIRef(url)
 
     def normalize_bugs(self, d):
         """https://docs.npmjs.com/files/package.json#bugs
 
         >>> NpmMapping().normalize_bugs({
         ...     'url': 'https://example.org/bugs/',
         ...     'email': 'bugs@example.org'
         ... })
         rdflib.term.URIRef('https://example.org/bugs/')
         >>> NpmMapping().normalize_bugs(
         ...     'https://example.org/bugs/')
         rdflib.term.URIRef('https://example.org/bugs/')
         """
         if isinstance(d, dict) and isinstance(d.get("url"), str):
             return URIRef(d["url"])
         elif isinstance(d, str):
             return URIRef(d)
         else:
             return None
 
     _parse_author = re.compile(
         r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$"
     )
 
     def translate_author(self, graph: Graph, root, d):
         r"""https://docs.npmjs.com/files/package.json#people-fields-author-contributors'
 
         >>> from pprint import pprint
         >>> root = URIRef("http://example.org/test-software")
         >>> graph = Graph()
         >>> NpmMapping().translate_author(graph, root, {
         ...     'name': 'John Doe',
         ...     'email': 'john.doe@example.org',
         ...     'url': 'https://example.org/~john.doe',
         ... })
         >>> prettyprint_graph(graph, root)
         {
             "@id": ...,
             "http://schema.org/author": {
                 "@list": [
                     {
                         "@type": "http://schema.org/Person",
                         "http://schema.org/email": "john.doe@example.org",
                         "http://schema.org/name": "John Doe",
                         "http://schema.org/url": {
                             "@id": "https://example.org/~john.doe"
                         }
                     }
                 ]
             }
         }
         >>> graph = Graph()
         >>> NpmMapping().translate_author(graph, root,
         ...     'John Doe <john.doe@example.org> (https://example.org/~john.doe)'
         ... )
         >>> prettyprint_graph(graph, root)
         {
             "@id": ...,
             "http://schema.org/author": {
                 "@list": [
                     {
                         "@type": "http://schema.org/Person",
                         "http://schema.org/email": "john.doe@example.org",
                         "http://schema.org/name": "John Doe",
                         "http://schema.org/url": {
                             "@id": "https://example.org/~john.doe"
                         }
                     }
                 ]
             }
         }
         >>> graph = Graph()
         >>> NpmMapping().translate_author(graph, root, {
         ...     'name': 'John Doe',
         ...     'email': 'john.doe@example.org',
         ...     'url': 'https:\\\\example.invalid/~john.doe',
         ... })
         >>> prettyprint_graph(graph, root)
         {
             "@id": ...,
             "http://schema.org/author": {
                 "@list": [
                     {
                         "@type": "http://schema.org/Person",
                         "http://schema.org/email": "john.doe@example.org",
                         "http://schema.org/name": "John Doe"
                     }
                 ]
             }
         }
         """  # noqa
         author = BNode()
         graph.add((author, RDF.type, SCHEMA.Person))
         if isinstance(d, dict):
             name = d.get("name", None)
             email = d.get("email", None)
             url = d.get("url", None)
         elif isinstance(d, str):
             match = self._parse_author.match(d)
             if not match:
                 return None
             name = match.group("name")
             email = match.group("email")
             url = match.group("url")
         else:
             return None
 
         if name and isinstance(name, str):
             graph.add((author, SCHEMA.name, Literal(name)))
         if email and isinstance(email, str):
             graph.add((author, SCHEMA.email, Literal(email)))
         if url and isinstance(url, str):
             # Workaround for https://github.com/digitalbazaar/pyld/issues/91 : drop
             # URLs that are blatantly invalid early, so PyLD does not crash.
             parsed_url = urllib.parse.urlparse(url)
             if parsed_url.netloc:
                 graph.add((author, SCHEMA.url, URIRef(url)))
 
         add_list(graph, root, SCHEMA.author, [author])
 
     def normalize_description(self, description):
         r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common
         mistake that causes issues in the database because of null bytes in JSON.
 
         >>> NpmMapping().normalize_description("foo bar")
         rdflib.term.Literal('foo bar')
         >>> NpmMapping().normalize_description(
         ...     "\ufffd\ufffd#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 \x00"
         ... )
         rdflib.term.Literal('foo bar')
         >>> NpmMapping().normalize_description(
         ...     "\ufffd\ufffd\x00#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 "
         ... )
         rdflib.term.Literal('foo bar')
         >>> NpmMapping().normalize_description(
         ...     # invalid UTF-16 and meaningless UTF-8:
         ...     "\ufffd\ufffd\x00#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00"
         ... ) is None
         True
         >>> NpmMapping().normalize_description(
         ...     # ditto (ut looks like little-endian at first)
         ...     "\ufffd\ufffd#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00\x00"
         ... ) is None
         True
         >>> NpmMapping().normalize_description(None) is None
         True
         """
         if not isinstance(description, str):
             return None
         # XXX: if this function ever need to support more cases, consider
         # switching to https://pypi.org/project/ftfy/ instead of adding more hacks
         if description.startswith("\ufffd\ufffd") and "\x00" in description:
             # 2 unicode replacement characters followed by '# ' encoded as UTF-16
             # is a common mistake, which indicates a README.md was saved as UTF-16,
             # and some NPM tool opened it as UTF-8 and used the first line as
             # description.
 
             description_bytes = description.encode()
 
             # Strip the the two unicode replacement characters
             assert description_bytes.startswith(b"\xef\xbf\xbd\xef\xbf\xbd")
             description_bytes = description_bytes[6:]
 
             # If the following attempts fail to recover the description, discard it
             # entirely because the current indexer storage backend (postgresql) cannot
             # store zero bytes in JSON columns.
             description = None
 
             if not description_bytes.startswith(b"\x00"):
                 # try UTF-16 little-endian (the most common) first
                 try:
                     description = description_bytes.decode("utf-16le")
                 except UnicodeDecodeError:
                     pass
             if description is None:
                 # if it fails, try UTF-16 big-endian
                 try:
                     description = description_bytes.decode("utf-16be")
                 except UnicodeDecodeError:
                     pass
 
             if description:
                 if description.startswith("# "):
                     description = description[2:]
                 return Literal(description.rstrip())
             else:
                 return None
         return Literal(description)
 
     def normalize_license(self, s):
         """https://docs.npmjs.com/files/package.json#license
 
         >>> NpmMapping().normalize_license('MIT')
         rdflib.term.URIRef('https://spdx.org/licenses/MIT')
         """
         if isinstance(s, str):
+            if s.startswith("SEE LICENSE IN "):
+                # Very common pattern, because it is an example in the specification.
+                # It is followed by the filename; and the indexer architecture currently
+                # does not allow accessing that from metadata mappings.
+                # (Plus, an hypothetical license mapping would eventually pick it up)
+                return
             return SPDX + s
 
     def normalize_keywords(self, lst):
         """https://docs.npmjs.com/files/package.json#homepage
 
         >>> NpmMapping().normalize_keywords(['foo', 'bar'])
         [rdflib.term.Literal('foo'), rdflib.term.Literal('bar')]
         """
         if isinstance(lst, list):
             return [Literal(x) for x in lst if isinstance(x, str)]
diff --git a/swh/indexer/tests/metadata_dictionary/test_maven.py b/swh/indexer/tests/metadata_dictionary/test_maven.py
index 0267e95..afde286 100644
--- a/swh/indexer/tests/metadata_dictionary/test_maven.py
+++ b/swh/indexer/tests/metadata_dictionary/test_maven.py
@@ -1,365 +1,406 @@
 # Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 
 from hypothesis import HealthCheck, given, settings
 
 from swh.indexer.metadata_dictionary import MAPPINGS
 
 from ..utils import xml_document_strategy
 
 
 def test_compute_metadata_maven():
     raw_content = b"""
     <project>
       <name>Maven Default Project</name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version>1.2.3</version>
       <repositories>
         <repository>
           <id>central</id>
           <name>Maven Repository Switchboard</name>
           <layout>default</layout>
           <url>http://repo1.maven.org/maven2</url>
           <snapshots>
             <enabled>false</enabled>
           </snapshots>
         </repository>
       </repositories>
       <licenses>
         <license>
           <name>Apache License, Version 2.0</name>
           <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
           <distribution>repo</distribution>
           <comments>A business-friendly OSS license</comments>
         </license>
       </licenses>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "Maven Default Project",
         "schema:identifier": "com.mycompany.app",
         "version": "1.2.3",
         "license": "https://www.apache.org/licenses/LICENSE-2.0.txt",
         "codeRepository": ("http://repo1.maven.org/maven2/com/mycompany/app/my-app"),
     }
 
 
 def test_compute_metadata_maven_empty():
     raw_content = b"""
     <project>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
     }
 
 
 def test_compute_metadata_maven_almost_empty():
     raw_content = b"""
     <project>
       <foo/>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
     }
 
 
 def test_compute_metadata_maven_invalid_xml(caplog):
     expected_warning = (
         "swh.indexer.metadata_dictionary.maven.MavenMapping",
         logging.WARNING,
         "Error parsing XML from foo",
     )
     caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary")
 
     raw_content = b"""
     <project>"""
     caplog.clear()
     result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
     assert caplog.record_tuples == [expected_warning], result
     assert result is None
 
     raw_content = b"""
     """
     caplog.clear()
     result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
     assert caplog.record_tuples == [expected_warning], result
     assert result is None
 
 
 def test_compute_metadata_maven_unknown_encoding(caplog):
     expected_warning = (
         "swh.indexer.metadata_dictionary.maven.MavenMapping",
         logging.WARNING,
         "Error detecting XML encoding from foo",
     )
     caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary")
 
     raw_content = b"""<?xml version="1.0" encoding="foo"?>
     <project>
     </project>"""
     caplog.clear()
     result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
     assert caplog.record_tuples == [expected_warning], result
     assert result is None
 
     raw_content = b"""<?xml version="1.0" encoding="UTF-7"?>
     <project>
     </project>"""
     caplog.clear()
     result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
     assert caplog.record_tuples == [expected_warning], result
     assert result is None
 
 
 def test_compute_metadata_maven_invalid_encoding(caplog):
     expected_warning = [
         # libexpat1 <= 2.2.10-2+deb11u1
         [
             (
                 "swh.indexer.metadata_dictionary.maven.MavenMapping",
                 logging.WARNING,
                 "Error unidecoding XML from foo",
             )
         ],
         # libexpat1 >= 2.2.10-2+deb11u2
         [
             (
                 "swh.indexer.metadata_dictionary.maven.MavenMapping",
                 logging.WARNING,
                 "Error parsing XML from foo",
             )
         ],
     ]
     caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary")
 
     raw_content = b"""<?xml version="1.0" encoding="UTF-8"?>
     <foo\xe5ct>
     </foo>"""
     caplog.clear()
     result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
     assert caplog.record_tuples in expected_warning, result
     assert result is None
 
 
 def test_compute_metadata_maven_minimal():
     raw_content = b"""
     <project>
       <name>Maven Default Project</name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version>1.2.3</version>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "Maven Default Project",
         "schema:identifier": "com.mycompany.app",
         "version": "1.2.3",
         "codeRepository": (
             "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
         ),
     }
 
 
 def test_compute_metadata_maven_empty_nodes():
     raw_content = b"""
     <project>
       <name>Maven Default Project</name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version>1.2.3</version>
       <repositories>
       </repositories>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "Maven Default Project",
         "schema:identifier": "com.mycompany.app",
         "version": "1.2.3",
         "codeRepository": (
             "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
         ),
     }
 
     raw_content = b"""
     <project>
       <name>Maven Default Project</name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version></version>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "Maven Default Project",
         "schema:identifier": "com.mycompany.app",
         "codeRepository": (
             "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
         ),
     }
 
     raw_content = b"""
     <project>
       <name></name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version>1.2.3</version>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "schema:identifier": "com.mycompany.app",
         "version": "1.2.3",
         "codeRepository": (
             "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
         ),
     }
 
     raw_content = b"""
     <project>
       <name>Maven Default Project</name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version>1.2.3</version>
       <licenses>
       </licenses>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "Maven Default Project",
         "schema:identifier": "com.mycompany.app",
         "version": "1.2.3",
         "codeRepository": (
             "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
         ),
     }
 
     raw_content = b"""
     <project>
       <groupId></groupId>
       <version>1.2.3</version>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "version": "1.2.3",
     }
 
 
 def test_compute_metadata_maven_invalid_licenses():
     raw_content = b"""
     <project>
       <name>Maven Default Project</name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version>1.2.3</version>
       <licenses>
         foo
       </licenses>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "Maven Default Project",
         "schema:identifier": "com.mycompany.app",
         "version": "1.2.3",
         "codeRepository": (
             "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
         ),
     }
 
 
 def test_compute_metadata_maven_multiple():
     """Tests when there are multiple code repos and licenses."""
     raw_content = b"""
     <project>
       <name>Maven Default Project</name>
       <modelVersion>4.0.0</modelVersion>
       <groupId>com.mycompany.app</groupId>
       <artifactId>my-app</artifactId>
       <version>1.2.3</version>
       <repositories>
         <repository>
           <id>central</id>
           <name>Maven Repository Switchboard</name>
           <layout>default</layout>
           <url>http://repo1.maven.org/maven2</url>
           <snapshots>
             <enabled>false</enabled>
           </snapshots>
         </repository>
         <repository>
           <id>example</id>
           <name>Example Maven Repo</name>
           <layout>default</layout>
           <url>http://example.org/maven2</url>
         </repository>
       </repositories>
       <licenses>
         <license>
           <name>Apache License, Version 2.0</name>
           <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
           <distribution>repo</distribution>
           <comments>A business-friendly OSS license</comments>
         </license>
         <license>
           <name>MIT license</name>
           <url>https://opensource.org/licenses/MIT</url>
         </license>
       </licenses>
     </project>"""
     result = MAPPINGS["MavenMapping"]().translate(raw_content)
     assert set(result.pop("license")) == {
         "https://www.apache.org/licenses/LICENSE-2.0.txt",
         "https://opensource.org/licenses/MIT",
     }, result
     assert set(result.pop("codeRepository")) == {
         "http://repo1.maven.org/maven2/com/mycompany/app/my-app",
         "http://example.org/maven2/com/mycompany/app/my-app",
     }, result
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "Maven Default Project",
         "schema:identifier": "com.mycompany.app",
         "version": "1.2.3",
     }
 
 
+def test_compute_metadata_maven_invalid_repository():
+    raw_content = b"""
+    <project>
+      <name>Maven Default Project</name>
+      <modelVersion>4.0.0</modelVersion>
+      <groupId>com.mycompany.app</groupId>
+      <artifactId>my-app</artifactId>
+      <version>1.2.3</version>
+      <repositories>
+        <repository>
+          <id>tcc-transaction-internal-releases</id>
+          <name>internal repository for released artifacts</name>
+          <url>${repo.internal.releases.url}</url>
+          <snapshots>
+              <enabled>false</enabled>
+          </snapshots>
+          <releases>
+              <enabled>true</enabled>
+          </releases>
+        </repository>
+      </repositories>
+      <licenses>
+        <license>
+          <name>Apache License, Version 2.0</name>
+          <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+          <distribution>repo</distribution>
+          <comments>A business-friendly OSS license</comments>
+        </license>
+      </licenses>
+    </project>"""
+    result = MAPPINGS["MavenMapping"]().translate(raw_content)
+    assert result == {
+        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+        "type": "SoftwareSourceCode",
+        "name": "Maven Default Project",
+        "schema:identifier": "com.mycompany.app",
+        "version": "1.2.3",
+        "license": "https://www.apache.org/licenses/LICENSE-2.0.txt",
+    }
+
+
 @settings(suppress_health_check=[HealthCheck.too_slow])
 @given(
     xml_document_strategy(
         keys=list(MAPPINGS["MavenMapping"].mapping),  # type: ignore
         root="project",
         xmlns="http://maven.apache.org/POM/4.0.0",
     )
 )
 def test_maven_adversarial(doc):
     MAPPINGS["MavenMapping"]().translate(doc)
diff --git a/swh/indexer/tests/metadata_dictionary/test_npm.py b/swh/indexer/tests/metadata_dictionary/test_npm.py
index 64f4ed2..cdaf6b7 100644
--- a/swh/indexer/tests/metadata_dictionary/test_npm.py
+++ b/swh/indexer/tests/metadata_dictionary/test_npm.py
@@ -1,402 +1,420 @@
 # Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 
 from hypothesis import HealthCheck, given, settings
 import pytest
 
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_dictionary import MAPPINGS
 from swh.indexer.storage.model import ContentMetadataRow
 
 from ..test_metadata import TRANSLATOR_TOOL, ContentMetadataTestIndexer
 from ..utils import (
     BASE_TEST_CONFIG,
     MAPPING_DESCRIPTION_CONTENT_SHA1,
     json_document_strategy,
 )
 
 
 def test_compute_metadata_none():
     """
     testing content empty content is empty
     should return None
     """
     content = b""
 
     # None if no metadata was found or an error occurred
     declared_metadata = None
     result = MAPPINGS["NpmMapping"]().translate(content)
     assert declared_metadata == result
 
 
 def test_compute_metadata_npm():
     """
     testing only computation of metadata with hard_mapping_npm
     """
     content = b"""
         {
             "name": "test_metadata",
             "version": "0.0.2",
             "description": "Simple package.json test for indexer",
               "repository": {
                 "type": "git",
                 "url": "https://github.com/moranegg/metadata_test"
             },
             "author": {
                 "email": "moranegg@example.com",
                 "name": "Morane G"
             }
         }
     """
     declared_metadata = {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "test_metadata",
         "version": "0.0.2",
         "description": "Simple package.json test for indexer",
         "codeRepository": "git+https://github.com/moranegg/metadata_test",
         "author": [
             {
                 "type": "Person",
                 "name": "Morane G",
                 "email": "moranegg@example.com",
             }
         ],
     }
 
     result = MAPPINGS["NpmMapping"]().translate(content)
     assert declared_metadata == result
 
 
 def test_compute_metadata_invalid_description_npm():
     """
     testing only computation of metadata with hard_mapping_npm
     """
     content = b"""
         {
             "name": "test_metadata",
             "version": "0.0.2",
             "description": 1234
     }
     """
     declared_metadata = {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "name": "test_metadata",
         "version": "0.0.2",
     }
 
     result = MAPPINGS["NpmMapping"]().translate(content)
     assert declared_metadata == result
 
 
 def test_index_content_metadata_npm(storage, obj_storage):
     """
     testing NPM with package.json
     - one sha1 uses a file that can't be translated to metadata and
       should return None in the translated metadata
     """
     sha1s = [
         MAPPING_DESCRIPTION_CONTENT_SHA1["json:test-metadata-package.json"],
         MAPPING_DESCRIPTION_CONTENT_SHA1["json:npm-package.json"],
         MAPPING_DESCRIPTION_CONTENT_SHA1["python:code"],
     ]
 
     # this metadata indexer computes only metadata for package.json
     # in npm context with a hard mapping
     config = BASE_TEST_CONFIG.copy()
     config["tools"] = [TRANSLATOR_TOOL]
     metadata_indexer = ContentMetadataTestIndexer(config=config)
     metadata_indexer.run(sha1s, log_suffix="unknown content")
     results = list(metadata_indexer.idx_storage.content_metadata_get(sha1s))
 
     expected_results = [
         ContentMetadataRow(
             id=sha1s[0],
             tool=TRANSLATOR_TOOL,
             metadata={
                 "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                 "type": "SoftwareSourceCode",
                 "codeRepository": "git+https://github.com/moranegg/metadata_test",
                 "description": "Simple package.json test for indexer",
                 "name": "test_metadata",
                 "version": "0.0.1",
             },
         ),
         ContentMetadataRow(
             id=sha1s[1],
             tool=TRANSLATOR_TOOL,
             metadata={
                 "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                 "type": "SoftwareSourceCode",
                 "issueTracker": "https://github.com/npm/npm/issues",
                 "author": [
                     {
                         "type": "Person",
                         "name": "Isaac Z. Schlueter",
                         "email": "i@izs.me",
                         "url": "http://blog.izs.me",
                     }
                 ],
                 "codeRepository": "git+https://github.com/npm/npm",
                 "description": "a package manager for JavaScript",
                 "license": "https://spdx.org/licenses/Artistic-2.0",
                 "version": "5.0.3",
                 "name": "npm",
                 "url": "https://docs.npmjs.com/",
             },
         ),
     ]
 
     for result in results:
         del result.tool["id"]
         result.metadata.pop("keywords", None)
 
     # The assertion below returns False sometimes because of nested lists
     assert expected_results == results
 
 
 def test_npm_null_list_item_normalization():
     package_json = b"""{
         "name": "foo",
         "keywords": [
             "foo",
             null
         ],
         "homepage": [
             "http://example.org/",
             null
         ]
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "type": "SoftwareSourceCode",
         "url": "http://example.org/",
         "keywords": "foo",
     }
 
 
 def test_npm_bugs_normalization():
     # valid dictionary
     package_json = b"""{
         "name": "foo",
         "bugs": {
             "url": "https://github.com/owner/project/issues",
             "email": "foo@example.com"
         }
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "issueTracker": "https://github.com/owner/project/issues",
         "type": "SoftwareSourceCode",
     }
 
     # "invalid" dictionary
     package_json = b"""{
         "name": "foo",
         "bugs": {
             "email": "foo@example.com"
         }
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "type": "SoftwareSourceCode",
     }
 
     # string
     package_json = b"""{
         "name": "foo",
         "bugs": "https://github.com/owner/project/issues"
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "issueTracker": "https://github.com/owner/project/issues",
         "type": "SoftwareSourceCode",
     }
 
 
 def test_npm_repository_normalization():
     # normal
     package_json = b"""{
         "name": "foo",
         "repository": {
             "type" : "git",
             "url" : "https://github.com/npm/cli.git"
         }
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "codeRepository": "git+https://github.com/npm/cli.git",
         "type": "SoftwareSourceCode",
     }
 
     # missing url
     package_json = b"""{
         "name": "foo",
         "repository": {
             "type" : "git"
         }
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "type": "SoftwareSourceCode",
     }
 
     # github shortcut
     package_json = b"""{
         "name": "foo",
         "repository": "github:npm/cli"
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     expected_result = {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "codeRepository": "git+https://github.com/npm/cli.git",
         "type": "SoftwareSourceCode",
     }
     assert result == expected_result
 
     # github shortshortcut
     package_json = b"""{
         "name": "foo",
         "repository": "npm/cli"
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == expected_result
 
     # gitlab shortcut
     package_json = b"""{
         "name": "foo",
         "repository": "gitlab:user/repo"
     }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "name": "foo",
         "codeRepository": "git+https://gitlab.com/user/repo.git",
         "type": "SoftwareSourceCode",
     }
 
 
 def test_npm_invalid_uris():
     package_json = rb"""{
   "version": "1.0.0",
   "homepage": "",
   "author": {
     "name": "foo",
     "url": "http://example.org"
   }
 }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "author": [{"name": "foo", "type": "Person", "url": "http://example.org"}],
         "version": "1.0.0",
     }
 
     package_json = rb"""{
   "version": "1.0.0",
   "homepage": "http://example.org",
   "author": {
     "name": "foo",
     "url": ""
   }
 }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "author": [{"name": "foo", "type": "Person"}],
         "url": "http://example.org",
         "version": "1.0.0",
     }
 
     package_json = rb"""{
   "version": "1.0.0",
   "homepage": "",
   "author": {
     "name": "foo",
     "url": ""
   }
 }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "author": [{"name": "foo", "type": "Person"}],
         "version": "1.0.0",
     }
 
     package_json = rb"""{
   "version": "1.0.0",
   "homepage": "http:example.org",
   "author": {
     "name": "foo",
     "url": "http:example.com"
   }
 }"""
     result = MAPPINGS["NpmMapping"]().translate(package_json)
     assert result == {
         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
         "type": "SoftwareSourceCode",
         "author": [{"name": "foo", "type": "Person"}],
         "version": "1.0.0",
     }
 
 
+def test_npm_invalid_licenses():
+    package_json = rb"""{
+  "version": "1.0.0",
+  "license": "SEE LICENSE IN LICENSE.md",
+  "author": {
+    "name": "foo",
+    "url": "http://example.org"
+  }
+}"""
+    result = MAPPINGS["NpmMapping"]().translate(package_json)
+    assert result == {
+        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+        "type": "SoftwareSourceCode",
+        "author": [{"name": "foo", "type": "Person", "url": "http://example.org"}],
+        "version": "1.0.0",
+    }
+
+
 @settings(suppress_health_check=[HealthCheck.too_slow])
 @given(json_document_strategy(keys=list(MAPPINGS["NpmMapping"].mapping)))  # type: ignore
 def test_npm_adversarial(doc):
     raw = json.dumps(doc).encode()
     MAPPINGS["NpmMapping"]().translate(raw)
 
 
 @pytest.mark.parametrize(
     "filename", [b"package.json", b"Package.json", b"PACKAGE.json", b"PACKAGE.JSON"]
 )
 def test_detect_metadata_package_json(filename):
     df = [
         {
             "sha1_git": b"abc",
             "name": b"index.js",
             "target": b"abc",
             "length": 897,
             "status": "visible",
             "type": "file",
             "perms": 33188,
             "dir_id": b"dir_a",
             "sha1": b"bcd",
         },
         {
             "sha1_git": b"aab",
             "name": filename,
             "target": b"aab",
             "length": 712,
             "status": "visible",
             "type": "file",
             "perms": 33188,
             "dir_id": b"dir_a",
             "sha1": b"cde",
         },
     ]
     results = detect_metadata(df)
 
     expected_results = {"NpmMapping": [b"cde"]}
     assert expected_results == results