swhid = 'swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed'
def parse_swhid(swhid: str) -> SWHID:
"""Parse :ref:`persistent-identifiers`.
Args:
swhid (str): A persistent identifier
Raises:
swh.model.exceptions.ValidationError: in case of:
* missing mandatory values (4)
* invalid namespace supplied
* invalid version supplied
* invalid type supplied
* missing hash
* invalid hash identifier supplied
Returns:
a named tuple holding the parsing result
"""
# <swhid>;<contextual-information>
swhid_parts = swhid.split(SWHID_CTXT_SEP)
swhid_data = swhid_parts.pop(0).split(":")
if len(swhid_data) != 4:
raise ValidationError(
"""Wrong format: The supported format should be:
<identifier_core> ::= "swh" ":" <scheme_version> ":" <object_type> ":" <object_id> ;
<scheme_version> ::= "1" ;
<object_type> ::=
"snp" (* snapshot *)
| "rel" (* release *)
| "rev" (* revision *)
| "dir" (* directory *)
| "cnt" (* content *)
;
<object_id> ::= 40 * <hex_digit> ; (* intrinsic object id, as hex-encoded SHA1 *)
<dec_digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<hex_digit> ::= <dec_digit> | "a" | "b" | "c" | "d" | "e" | "f" ;
"""
)
# Checking for parsing errors
_ns, _version, _type, _id = swhid_data
for otype, data in _object_type_map.items():
if _type == data["short_name"]:
_type = otype
break
if not _id:
raise ValidationError(
"""Wrong format: <object_id> should be provided:
<object_id> ::= 40 * <hex_digit> ; (* intrinsic object id, as hex-encoded SHA1 *)
<dec_digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<hex_digit> ::= <dec_digit> | "a" | "b" | "c" | "d" | "e" | "f" ;
"""
)
_metadata = {}
for part in swhid_parts:
try:
> key, val = part.split("=")
E ValueError: not enough values to unpack (expected 2, got 1)
.tox/py3/lib/python3.7/site-packages/swh/model/identifiers.py:860: ValueError
During handling of the above exception, another exception occurred:
swhid = 'swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed'
expected_error = 'Contextual data is badly formatted, form key=val expected'
@pytest.mark.parametrize(
"swhid,expected_error",
[
("swh:1:cnt", "Wrong format: There should be 4 mandatory values"),
("swh:1:", "Wrong format: There should be 4 mandatory values"),
("swh:", "Wrong format: There should be 4 mandatory values"),
("swh:1:cnt:", "Wrong format: Identifier should be present"),
(
"foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
"Wrong format: only supported namespace is 'swh'",
),
(
"swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
"Wrong format: only supported version is 1",
),
(
"swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
"Wrong input: Supported types are",
),
(
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed",
"Contextual data is badly formatted, form key=val expected",
),
("swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", "Unexpected characters"),
("swh:1:snp:foo", "Unexpected characters"),
],
)
def test_parse_swhid_parsing_error(swhid, expected_error):
with pytest.raises(ValidationError, match=expected_error):
> identifiers.parse_swhid(swhid)
.tox/py3/lib/python3.7/site-packages/swh/model/tests/test_identifiers.py:1078:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
swhid = 'swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed'
def parse_swhid(swhid: str) -> SWHID:
"""Parse :ref:`persistent-identifiers`.
Args:
swhid (str): A persistent identifier
Raises:
swh.model.exceptions.ValidationError: in case of:
* missing mandatory values (4)
* invalid namespace supplied
* invalid version supplied
* invalid type supplied
* missing hash
* invalid hash identifier supplied
Returns:
a named tuple holding the parsing result
"""
# <swhid>;<contextual-information>
swhid_parts = swhid.split(SWHID_CTXT_SEP)
swhid_data = swhid_parts.pop(0).split(":")
if len(swhid_data) != 4:
raise ValidationError(
"""Wrong format: The supported format should be:
<identifier_core> ::= "swh" ":" <scheme_version> ":" <object_type> ":" <object_id> ;
<scheme_version> ::= "1" ;
<object_type> ::=
"snp" (* snapshot *)
| "rel" (* release *)
| "rev" (* revision *)
| "dir" (* directory *)
| "cnt" (* content *)
;
<object_id> ::= 40 * <hex_digit> ; (* intrinsic object id, as hex-encoded SHA1 *)
<dec_digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<hex_digit> ::= <dec_digit> | "a" | "b" | "c" | "d" | "e" | "f" ;
"""
)
# Checking for parsing errors
_ns, _version, _type, _id = swhid_data
for otype, data in _object_type_map.items():
if _type == data["short_name"]:
_type = otype
break
if not _id:
raise ValidationError(
"""Wrong format: <object_id> should be provided:
<object_id> ::= 40 * <hex_digit> ; (* intrinsic object id, as hex-encoded SHA1 *)
<dec_digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<hex_digit> ::= <dec_digit> | "a" | "b" | "c" | "d" | "e" | "f" ;
"""
)
_metadata = {}
for part in swhid_parts:
try:
key, val = part.split("=")
_metadata[key] = val
except Exception:
msg = """Wrong format: contextual data should be of form:
<context_qualifier> ::=
<origin_ctxt>
| <visit_ctxt>
| <anchor_ctxt>
| <path_ctxt>
;
<origin_ctxt> ::= "origin" "=" <url_escaped> ;
<visit_ctxt> ::= "visit" "=" <identifier_core> ;
<anchor_ctxt> ::= "anchor" "=" <identifier_core> ;
<path_ctxt> ::= "path" "=" <path_absolute_escaped> ;
"""
> raise ValidationError(msg)
E swh.model.exceptions.ValidationError: ['Wrong format: contextual data should be of form:\n <context_qualifier> ::=\n <origin_ctxt>\n | <visit_ctxt>\n | <anchor_ctxt>\n | <path_ctxt>\n ;\n <origin_ctxt> ::= "origin" "=" <url_escaped> ;\n <visit_ctxt> ::= "visit" "=" <identifier_core> ;\n <anchor_ctxt> ::= "anchor" "=" <identifier_core> ;\n <path_ctxt> ::= "path" "=" <path_absolute_escaped> ;\n']
.tox/py3/lib/python3.7/site-packages/swh/model/identifiers.py:875: ValidationError
During handling of the above exception, another exception occurred:
swhid = 'swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed'
expected_error = 'Contextual data is badly formatted, form key=val expected'
@pytest.mark.parametrize(
"swhid,expected_error",
[
("swh:1:cnt", "Wrong format: There should be 4 mandatory values"),
("swh:1:", "Wrong format: There should be 4 mandatory values"),
("swh:", "Wrong format: There should be 4 mandatory values"),
("swh:1:cnt:", "Wrong format: Identifier should be present"),
(
"foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
"Wrong format: only supported namespace is 'swh'",
),
(
"swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
"Wrong format: only supported version is 1",
),
(
"swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
"Wrong input: Supported types are",
),
(
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed",
"Contextual data is badly formatted, form key=val expected",
),
("swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", "Unexpected characters"),
("swh:1:snp:foo", "Unexpected characters"),
],
)
def test_parse_swhid_parsing_error(swhid, expected_error):
with pytest.raises(ValidationError, match=expected_error):
> identifiers.parse_swhid(swhid)
E AssertionError: Regex pattern 'Contextual data is badly formatted, form key=val expected' does not match '[\'Wrong format: contextual data should be of form:\\n <context_qualifier> ::=\\n <origin_ctxt>\\n | <visit_ctxt>\\n | <anchor_ctxt>\\n | <path_ctxt>\\n ;\\n <origin_ctxt> ::= "origin" "=" <url_escaped> ;\\n <visit_ctxt> ::= "visit" "=" <identifier_core> ;\\n <anchor_ctxt> ::= "anchor" "=" <identifier_core> ;\\n <path_ctxt> ::= "path" "=" <path_absolute_escaped> ;\\n\']'.
.tox/py3/lib/python3.7/site-packages/swh/model/tests/test_identifiers.py:1078: AssertionError
TEST RESULT
TEST RESULT
- Run At
- Nov 10 2020, 6:22 PM