Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/tests/test_pid.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import shutil | import shutil | ||||
import tempfile | import tempfile | ||||
import unittest | import unittest | ||||
from itertools import islice | |||||
from swh.graph.pid import str_to_bytes, bytes_to_str | from swh.graph.pid import str_to_bytes, bytes_to_str | ||||
from swh.graph.pid import PidToIntMap, IntToPidMap | from swh.graph.pid import PidToIntMap, IntToPidMap | ||||
class TestPidSerialization(unittest.TestCase): | class TestPidSerialization(unittest.TestCase): | ||||
pairs = [ | pairs = [ | ||||
('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', | ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', | ||||
Show All 25 Lines | def test_bytes_to_str(self): | ||||
self.assertEqual(bytes_to_str(pid_bytes), pid_str) | self.assertEqual(bytes_to_str(pid_bytes), pid_str) | ||||
def test_round_trip(self): | def test_round_trip(self): | ||||
for (pid_str, pid_bytes) in self.pairs: | for (pid_str, pid_bytes) in self.pairs: | ||||
self.assertEqual(pid_str, bytes_to_str(str_to_bytes(pid_str))) | self.assertEqual(pid_str, bytes_to_str(str_to_bytes(pid_str))) | ||||
self.assertEqual(pid_bytes, str_to_bytes(bytes_to_str(pid_bytes))) | self.assertEqual(pid_bytes, str_to_bytes(bytes_to_str(pid_bytes))) | ||||
def gen_records(types=['ori', 'snp', 'rev', 'rel', 'dir', 'cnt'], | def gen_records(types=['cnt', 'dir', 'rel', 'rev', 'ori', 'snp'], | ||||
vlorentz: Why this change? | |||||
Done Inline ActionsReadability. The order here doesn't matter, as there is a sorted in the function implementation anyway. But it was a pain to read as a default argument in a non-alphabetic order. zack: Readability. The order here doesn't matter, as there is a `sorted` in the function… | |||||
Done Inline ActionsIt's still not alphabetic. (ori is after rev). And the original order made sense because it's a topological order. vlorentz: It's still not alphabetic. (`ori` is after `rev`).
And the original order made sense because… | |||||
Done Inline ActionsActually no, in the original order, rev and rel were in the wrong order wrt the usual order vlorentz: Actually no, in the original order, `rev` and `rel` were in the wrong order wrt the usual order | |||||
Done Inline Actionsfixed now zack: fixed now | |||||
length=10000): | length=10000): | ||||
"""generate sequential PID/int records, suitable for filling int<->pid maps for | """generate sequential PID/int records, suitable for filling int<->pid maps for | ||||
testing swh-graph on-disk binary databases | testing swh-graph on-disk binary databases | ||||
Args: | Args: | ||||
types (list): list of PID types to be generated, specified as the | types (list): list of PID types to be generated, specified as the | ||||
corresponding 3-letter component in PIDs | corresponding 3-letter component in PIDs | ||||
length (int): number of PIDs to generate *per type* | length (int): number of PIDs to generate *per type* | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | def test_missing(self): | ||||
self.map['swh:1:cnt:0101010100000000000000000000000000000000'], | self.map['swh:1:cnt:0101010100000000000000000000000000000000'], | ||||
def test_type_error(self): | def test_type_error(self): | ||||
with self.assertRaises(TypeError): | with self.assertRaises(TypeError): | ||||
self.map[42] | self.map[42] | ||||
with self.assertRaises(TypeError): | with self.assertRaises(TypeError): | ||||
self.map[1.2] | self.map[1.2] | ||||
def test_update(self): | |||||
fname2 = self.fname + '.update' | |||||
shutil.copy(self.fname, fname2) # fresh map copy | |||||
map2 = PidToIntMap(fname2, mode='rb+') | |||||
for (pid, int) in islice(map2, 11): # update the first N items | |||||
new_int = int + 42 | |||||
map2[pid] = new_int | |||||
self.assertEqual(map2[pid], new_int) # check updated value | |||||
os.unlink(fname2) # tmpdir will be cleaned even if we don't reach this | |||||
class TestIntToPidMap(unittest.TestCase): | class TestIntToPidMap(unittest.TestCase): | ||||
@classmethod | @classmethod | ||||
def setUpClass(cls): | def setUpClass(cls): | ||||
"""create reasonably sized (~1 MB) int->PID map to test on-disk DB | """create reasonably sized (~1 MB) int->PID map to test on-disk DB | ||||
""" | """ | ||||
Show All 17 Lines | def test_lookup(self): | ||||
for (pid, pos) in MAP_PAIRS: | for (pid, pos) in MAP_PAIRS: | ||||
self.assertEqual(self.map[pos], pid) | self.assertEqual(self.map[pos], pid) | ||||
def test_out_of_bounds(self): | def test_out_of_bounds(self): | ||||
with self.assertRaises(IndexError): | with self.assertRaises(IndexError): | ||||
self.map[1000000] | self.map[1000000] | ||||
with self.assertRaises(IndexError): | with self.assertRaises(IndexError): | ||||
self.map[-1000000] | self.map[-1000000] | ||||
def test_update(self): | |||||
fname2 = self.fname + '.update' | |||||
shutil.copy(self.fname, fname2) # fresh map copy | |||||
map2 = IntToPidMap(fname2, mode='rb+') | |||||
for (int, pid) in islice(map2, 11): # update the first N items | |||||
new_pid = pid.replace(':0', ':f') # mangle first hex digit | |||||
map2[int] = new_pid | |||||
self.assertEqual(map2[int], new_pid) # check updated value | |||||
os.unlink(fname2) # tmpdir will be cleaned even if we don't reach this |
Why this change?