diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -795,6 +795,9 @@ return str(swhid) +CONTEXT_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"} + + def parse_swhid(swhid: str) -> SWHID: """Parse :ref:`persistent-identifiers`. @@ -857,8 +860,14 @@ _metadata = {} for part in swhid_parts: try: - key, val = part.split("=") - _metadata[key] = val + qualifier, val = part.split("=") + if qualifier not in CONTEXT_QUALIFIERS: + error_msg = ( + f"Wrong format: qualifier should be one " + f" of {', '.join(CONTEXT_QUALIFIERS)}" + ) + raise ValidationError(error_msg) + _metadata[qualifier] = val except Exception: msg = """Wrong format: contextual data should be of form: := ";" [ ] ; diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -1059,6 +1059,12 @@ "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed", "swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", "swh:1:snp:foo", + # swhid ok but context qualifiers off + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anc=1;visit=1;path=/", # noqa + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;vis=1;path=/", # noqa + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;visit=1;pa=/", # noqa;s;s[] + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;line=10;origin=something;anchor=1;visit=1;path=/", # noqa ], ) def test_parse_swhid_parsing_error(invalid_swhid):