diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,2 +1,2 @@ swh.core[http] >= 0.3 -swh.model +swh.model >= 0.13.0 diff --git a/swh/graph/backend.py b/swh/graph/backend.py --- a/swh/graph/backend.py +++ b/swh/graph/backend.py @@ -16,7 +16,7 @@ from swh.graph.config import check_config from swh.graph.swhid import NodeToSwhidMap, SwhidToNodeMap -from swh.model.identifiers import SWHID_TYPES +from swh.model.identifiers import EXTENDED_SWHID_TYPES BUF_SIZE = 64 * 1024 BIN_FMT = ">q" # 64 bit integer, big endian @@ -75,7 +75,7 @@ yield node_id async def walk(self, direction, edges_fmt, algo, src, dst): - if dst in SWHID_TYPES: + if dst in EXTENDED_SWHID_TYPES: it = self.stream_proxy.walk_type(direction, edges_fmt, algo, src, dst) else: it = self.stream_proxy.walk(direction, edges_fmt, algo, src, dst) @@ -83,7 +83,7 @@ yield node_id async def random_walk(self, direction, edges_fmt, retries, src, dst): - if dst in SWHID_TYPES: + if dst in EXTENDED_SWHID_TYPES: it = self.stream_proxy.random_walk_type( direction, edges_fmt, retries, src, dst ) diff --git a/swh/graph/cli.py b/swh/graph/cli.py --- a/swh/graph/cli.py +++ b/swh/graph/cli.py @@ -272,7 +272,7 @@ from swh.graph.backend import NODE2SWHID_EXT, SWHID2NODE_EXT from swh.graph.swhid import NodeToSwhidMap, SwhidToNodeMap import swh.model.exceptions - from swh.model.identifiers import parse_swhid + from swh.model.identifiers import ExtendedSWHID success = True # no identifiers failed to be looked up swhid2node = SwhidToNodeMap(f"{graph}.{SWHID2NODE_EXT}") @@ -286,7 +286,7 @@ is_swhid = False except ValueError: try: - parse_swhid(identifier) + ExtendedSWHID.from_string(identifier) is_swhid = True except swh.model.exceptions.ValidationError: success = False diff --git a/swh/graph/server/app.py b/swh/graph/server/app.py --- a/swh/graph/server/app.py +++ b/swh/graph/server/app.py @@ -17,7 +17,7 @@ from swh.core.api.asynchronous import RPCServerApp from swh.model.exceptions import ValidationError -from swh.model.identifiers import SWHID_TYPES +from swh.model.identifiers import EXTENDED_SWHID_TYPES try: from contextlib import asynccontextmanager @@ -86,7 +86,7 @@ s = self.request.query.get("edges", "*") if any( [ - node_type != "*" and node_type not in SWHID_TYPES + node_type != "*" and node_type not in EXTENDED_SWHID_TYPES for edge in s.split(":") for node_type in edge.split(",", maxsplit=1) ] @@ -192,7 +192,7 @@ src = self.request.match_info["src"] dst = self.request.match_info["dst"] self.src_node = self.node_of_swhid(src) - if dst not in SWHID_TYPES: + if dst not in EXTENDED_SWHID_TYPES: self.dst_thing = self.node_of_swhid(dst) else: self.dst_thing = dst diff --git a/swh/graph/swhid.py b/swh/graph/swhid.py --- a/swh/graph/swhid.py +++ b/swh/graph/swhid.py @@ -1,8 +1,10 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from __future__ import annotations + from collections.abc import MutableMapping from enum import Enum import mmap @@ -11,7 +13,7 @@ import struct from typing import BinaryIO, Iterator, Tuple -from swh.model.identifiers import SWHID, parse_swhid +from swh.model.identifiers import ExtendedObjectType, ExtendedSWHID SWHID_BIN_FMT = "BB20s" # 2 unsigned chars + 20 bytes INT_BIN_FMT = ">q" # big endian, 8-byte integer @@ -20,7 +22,8 @@ class SwhidType(Enum): - """types of existing SWHIDs, used to serialize SWHID type as a (char) integer + """types of existing SWHIDs, used to serialize ExtendedSWHID type as a (char) + integer Note that the order does matter also for driving the binary search in SWHID-indexed maps. Integer values also matter, for compatibility with the @@ -35,6 +38,13 @@ revision = 4 snapshot = 5 + @classmethod + def from_extended_object_type(cls, object_type: ExtendedObjectType) -> SwhidType: + return cls[object_type.name.lower()] + + def to_extended_object_type(self) -> ExtendedObjectType: + return ExtendedObjectType[SwhidType(self).name.upper()] + def str_to_bytes(swhid_str: str) -> bytes: """Convert a SWHID to a byte sequence @@ -54,12 +64,12 @@ bytes: byte sequence representation of swhid """ - swhid = parse_swhid(swhid_str) + swhid = ExtendedSWHID.from_string(swhid_str) return struct.pack( SWHID_BIN_FMT, swhid.scheme_version, - SwhidType[swhid.object_type].value, - bytes.fromhex(swhid.object_id), + SwhidType.from_extended_object_type(swhid.object_type).value, + swhid.object_id, ) @@ -76,7 +86,9 @@ """ (version, type, bin_digest) = struct.unpack(SWHID_BIN_FMT, bytes) - swhid = SWHID(object_type=SwhidType(type).name, object_id=bin_digest) + swhid = ExtendedSWHID( + object_type=SwhidType(type).to_extended_object_type(), object_id=bin_digest + ) return str(swhid) diff --git a/swh/graph/tests/test_swhid.py b/swh/graph/tests/test_swhid.py --- a/swh/graph/tests/test_swhid.py +++ b/swh/graph/tests/test_swhid.py @@ -139,14 +139,14 @@ os.unlink(fname2) # tmpdir will be cleaned even if we don't reach this def test_iter_type(self): - for t in SWHID_TYPES: + for t in SWHID_TYPES + ["ori"]: first_20 = list(islice(self.map.iter_type(t), 20)) k = first_20[0][1] expected = [("swh:1:{}:{:040x}".format(t, i), i) for i in range(k, k + 20)] assert first_20 == expected def test_iter_prefix(self): - for t in SWHID_TYPES: + for t in SWHID_TYPES + ["ori"]: prefix = self.map.iter_prefix("swh:1:{}:00".format(t)) first_20 = list(islice(prefix, 20)) k = first_20[0][1]