Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/pid.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import mmap | import mmap | ||||
import os | import os | ||||
import struct | import struct | ||||
from collections.abc import MutableMapping | from collections.abc import MutableMapping | ||||
from enum import Enum | from enum import Enum | ||||
from mmap import MAP_SHARED, PROT_READ, PROT_WRITE | from mmap import MAP_SHARED, PROT_READ, PROT_WRITE | ||||
from typing import BinaryIO, Iterator, Tuple | from typing import BinaryIO, Iterator, Tuple | ||||
from swh.model.identifiers import PersistentId, parse_persistent_identifier | from swh.model.identifiers import SWHID, parse_swhid | ||||
PID_BIN_FMT = "BB20s" # 2 unsigned chars + 20 bytes | PID_BIN_FMT = "BB20s" # 2 unsigned chars + 20 bytes | ||||
INT_BIN_FMT = ">q" # big endian, 8-byte integer | INT_BIN_FMT = ">q" # big endian, 8-byte integer | ||||
PID_BIN_SIZE = 22 # in bytes | PID_BIN_SIZE = 22 # in bytes | ||||
INT_BIN_SIZE = 8 # in bytes | INT_BIN_SIZE = 8 # in bytes | ||||
Show All 27 Lines | def str_to_bytes(pid_str: str) -> bytes: | ||||
Args: | Args: | ||||
pid: persistent identifier | pid: persistent identifier | ||||
Returns: | Returns: | ||||
bytes: byte sequence representation of pid | bytes: byte sequence representation of pid | ||||
""" | """ | ||||
pid = parse_persistent_identifier(pid_str) | pid = parse_swhid(pid_str) | ||||
return struct.pack( | return struct.pack( | ||||
PID_BIN_FMT, | PID_BIN_FMT, | ||||
pid.scheme_version, | pid.scheme_version, | ||||
PidType[pid.object_type].value, | PidType[pid.object_type].value, | ||||
bytes.fromhex(pid.object_id), | bytes.fromhex(pid.object_id), | ||||
) | ) | ||||
def bytes_to_str(bytes: bytes) -> str: | def bytes_to_str(bytes: bytes) -> str: | ||||
"""Inverse function of :func:`str_to_bytes` | """Inverse function of :func:`str_to_bytes` | ||||
See :func:`str_to_bytes` for a description of the binary PID format. | See :func:`str_to_bytes` for a description of the binary PID format. | ||||
Args: | Args: | ||||
bytes: byte sequence representation of pid | bytes: byte sequence representation of pid | ||||
Returns: | Returns: | ||||
pid: persistent identifier | pid: persistent identifier | ||||
""" | """ | ||||
(version, type, bin_digest) = struct.unpack(PID_BIN_FMT, bytes) | (version, type, bin_digest) = struct.unpack(PID_BIN_FMT, bytes) | ||||
pid = PersistentId(object_type=PidType(type).name, object_id=bin_digest) | pid = SWHID(object_type=PidType(type).name, object_id=bin_digest) | ||||
return str(pid) | return str(pid) | ||||
class _OnDiskMap: | class _OnDiskMap: | ||||
"""mmap-ed on-disk sequence of fixed size records | """mmap-ed on-disk sequence of fixed size records | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 317 Lines • Show Last 20 Lines |