diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -638,18 +638,35 @@ PERSISTENT_IDENTIFIER_KEYS = [ - 'namespace', 'scheme_version', 'object_type', 'object_id'] + 'namespace', 'scheme_version', 'object_type', 'object_id', 'metadata'] + +PERSISTENT_IDENTIFIER_PARTS_SEP = ';' def parse_persistent_identifier(persistent_id): - """Parse swh's persistent identifier scheme. + """Parse swh's :ref:`persistent-identifiers` scheme. Args: persistent_id (str): A persistent identifier Returns: - dict with keys namespace, scheme_version, object_type, object_id + dict: dict with keys : + + * namespace, holding str value + * scheme_version, holding str value + * object_type, holding str value + * object_id, holding str value + * metadata, holding dict value """ - data = persistent_id.split(':') + persistent_id_parts = persistent_id.split(PERSISTENT_IDENTIFIER_PARTS_SEP) + data = persistent_id_parts.pop(0).split(':') + persistent_id_metadata = {} + for part in persistent_id_parts: + try: + key, val = part.split('=') + persistent_id_metadata[key] = val + except Exception: + pass + data.append(persistent_id_metadata) return dict(zip(PERSISTENT_IDENTIFIER_KEYS, data)) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -818,6 +818,32 @@ 'scheme_version': _version, 'object_type': _type, 'object_id': _hash, + 'metadata': {} + } + actual_result = identifiers.parse_persistent_identifier(pid) + self.assertEquals(actual_result, expected_result) + + for pid, _type, _version, _hash, _metadata in [ + ('swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython', # noqa + 'cnt', '1', '9c95815d9e9d91b8dae8e05d8bbc696fe19f796b', + { + 'lines': '1-18', + 'origin': 'https://github.com/python/cpython' + }), + ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools', # noqa + 'dir', '1', '0b6959356d30f1a4e9b7f6bca59b9a336464c03d', + { + 'origin': 'deb://Debian/packages/linuxdoc-tools' + }), + ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed', # noqa + 'dir', '1', '0b6959356d30f1a4e9b7f6bca59b9a336464c03d', {}) + ]: + expected_result = { + 'namespace': 'swh', + 'scheme_version': _version, + 'object_type': _type, + 'object_id': _hash, + 'metadata': _metadata } actual_result = identifiers.parse_persistent_identifier(pid) self.assertEquals(actual_result, expected_result)