Changeset View
Standalone View
swh/provenance/tests/conftest.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import glob | import glob | ||||
from os import path | from os import path | ||||
import re | |||||
from typing import Iterable, Iterator, List | |||||
import pytest | import pytest | ||||
from typing_extensions import TypedDict | |||||
from swh.core.api.serializers import msgpack_loads | from swh.core.api.serializers import msgpack_loads | ||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||
from swh.core.db.pytest_plugin import postgresql_fact | from swh.core.db.pytest_plugin import postgresql_fact | ||||
from swh.core.utils import numfile_sortkey as sortkey | from swh.core.utils import numfile_sortkey as sortkey | ||||
from swh.model.model import Content, Directory, DirectoryEntry, Revision | from swh.model.model import Content, Directory, DirectoryEntry, Revision | ||||
from swh.model.tests.swh_model_data import TEST_OBJECTS | from swh.model.tests.swh_model_data import TEST_OBJECTS | ||||
import swh.provenance | import swh.provenance | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | def archive_pg(swh_storage_with_objects): | ||||
# transaction. | # transaction. | ||||
# TODO: refactor the ArchivePostgreSQL to properly deal with | # TODO: refactor the ArchivePostgreSQL to properly deal with | ||||
# transactions and get rif of this fixture | # transactions and get rif of this fixture | ||||
archive = ArchivePostgreSQL(conn=swh_storage_with_objects.get_db().conn) | archive = ArchivePostgreSQL(conn=swh_storage_with_objects.get_db().conn) | ||||
yield archive | yield archive | ||||
archive.conn.rollback() | archive.conn.rollback() | ||||
def get_datafile(fname): | |||||
return path.join(path.dirname(__file__), "data", fname) | |||||
@pytest.fixture | @pytest.fixture | ||||
def CMDBTS_data(): | def CMDBTS_data(): | ||||
# imported git tree is https://github.com/grouss/CMDBTS rev 4c5551b496 | # imported git tree is https://github.com/grouss/CMDBTS rev 4c5551b496 | ||||
vlorentz: why do you override the one from `swh/core/pytest_plugin.py`? | |||||
Done Inline Actionsbecause I forgot it was there :-) douardda: because I forgot it was there :-) | |||||
# ([xxx] is the timestamp): | # ([xxx] is the timestamp): | ||||
# o - [1609757158] first commit 35ccb8dd1b53d2d8a5c1375eb513ef2beaa79ae5 | # o - [1609757158] first commit 35ccb8dd1b53d2d8a5c1375eb513ef2beaa79ae5 | ||||
# | `- README.md * 43f3c871310a8e524004e91f033e7fb3b0bc8475 | # | `- README.md * 43f3c871310a8e524004e91f033e7fb3b0bc8475 | ||||
# o - [1610644094] Reset Empty repository 840b91df68e9549c156942ddd5002111efa15604 | # o - [1610644094] Reset Empty repository 840b91df68e9549c156942ddd5002111efa15604 | ||||
# | | # | | ||||
# o - [1610644094] R0000 9e36e095b79e36a3da104ce272989b39cd68aefd | # o - [1610644094] R0000 9e36e095b79e36a3da104ce272989b39cd68aefd | ||||
# | `- Red/Blue/Green/a * 6dc7e44ead5c0e300fe94448c3e046dfe33ad4d1 | # | `- Red/Blue/Green/a * 6dc7e44ead5c0e300fe94448c3e046dfe33ad4d1 | ||||
# o - [1610644097] R0001 bfbfcc72ae7fc35d6941386c36280512e6b38440 | # o - [1610644097] R0001 bfbfcc72ae7fc35d6941386c36280512e6b38440 | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | def CMDBTS_data(): | ||||
# |- Paris/Berlin/Purple/f 94ba40161084e8b80943accd9d24e1f9dd47189b | # |- Paris/Berlin/Purple/f 94ba40161084e8b80943accd9d24e1f9dd47189b | ||||
# |- Paris/Berlin/Purple/g 94ba40161084e8b80943accd9d24e1f9dd47189b | # |- Paris/Berlin/Purple/g 94ba40161084e8b80943accd9d24e1f9dd47189b | ||||
# |- Paris/Munich/Purple/f 94ba40161084e8b80943accd9d24e1f9dd47189b | # |- Paris/Munich/Purple/f 94ba40161084e8b80943accd9d24e1f9dd47189b | ||||
# |- Paris/Munich/Purple/g 94ba40161084e8b80943accd9d24e1f9dd47189b | # |- Paris/Munich/Purple/g 94ba40161084e8b80943accd9d24e1f9dd47189b | ||||
# |- Paris/Purple/f 94ba40161084e8b80943accd9d24e1f9dd47189b | # |- Paris/Purple/f 94ba40161084e8b80943accd9d24e1f9dd47189b | ||||
# |- Paris/Purple/g 94ba40161084e8b80943accd9d24e1f9dd47189b | # |- Paris/Purple/g 94ba40161084e8b80943accd9d24e1f9dd47189b | ||||
# `- Paris/k * cb79b39935c9392fa5193d9f84a6c35dc9c22c75 | # `- Paris/k * cb79b39935c9392fa5193d9f84a6c35dc9c22c75 | ||||
data = {"revision": [], "directory": [], "content": []} | data = {"revision": [], "directory": [], "content": []} | ||||
with open( | with open(get_datafile("CMDBTS.msgpack"), "rb") as fobj: | ||||
path.join(path.dirname(__file__), "data", "CMDBTS.msgpack"), "rb" | |||||
) as fobj: | |||||
for etype, value in msgpack_loads(fobj.read()): | for etype, value in msgpack_loads(fobj.read()): | ||||
data[etype].append(value) | data[etype].append(value) | ||||
return data | return data | ||||
def filter_dict(d, keys): | def filter_dict(d, keys): | ||||
return {k: v for (k, v) in d.items() if k in keys} | return {k: v for (k, v) in d.items() if k in keys} | ||||
Show All 17 Lines | swh_storage.directory_add( | ||||
) | ) | ||||
for dir in CMDBTS_data["directory"] | for dir in CMDBTS_data["directory"] | ||||
] | ] | ||||
) | ) | ||||
swh_storage.revision_add( | swh_storage.revision_add( | ||||
Revision.from_dict(revision) for revision in CMDBTS_data["revision"] | Revision.from_dict(revision) for revision in CMDBTS_data["revision"] | ||||
) | ) | ||||
return swh_storage, CMDBTS_data | return swh_storage, CMDBTS_data | ||||
class SynthRelation(TypedDict): | |||||
path: str | |||||
src: bytes | |||||
dst: bytes | |||||
rel_ts: float | |||||
class SynthRevision(TypedDict): | |||||
sha1: bytes | |||||
date: float | |||||
msg: str | |||||
R_C: List[SynthRelation] | |||||
R_D: List[SynthRelation] | |||||
D_C: List[SynthRelation] | |||||
def synthetic_result(filename: str) -> Iterator[SynthRevision]: | |||||
"""Generates dict representations of synthetic revisions found in the synthetic | |||||
file (from the data/ directory) given as argument of the generator. | |||||
Generated SynthRevision (typed dict) with the following elements: | |||||
"sha1": (bytes) sha1 of the revision, | |||||
"date": (float) timestamp of the revision, | |||||
"msg": (str) commit message of the revision, | |||||
"R_C": (list) new R---C relations added by this revision | |||||
"R_D": (list) new R-D relations added by this revision | |||||
Not Done Inline ActionsRevision---Content ardumont: Revision---Content
Revision-Directory
Directory-Content
? I guess. | |||||
Done Inline Actionsyes, but it uses the notations from @grouss' "synthetic" files douardda: yes, but it uses the notations from @grouss' "synthetic" files | |||||
Not Done Inline Actionssorry, i meant to know if that was the meaning behind it, not to propose another writing. And the more I read, the more it meant sense. ardumont: sorry, i meant to know if that was the meaning behind it, not to propose another writing.
And… | |||||
"D_C": (list) new D-C relations added by this revision | |||||
Each relation above is a SynthRelation typed dict with: | |||||
"path": (str) location | |||||
"src": (bytes) sha1 of the source of the relation | |||||
"dst": (bytes) sha1 of the destination of the relation | |||||
"rel_ts": (float) timestamp of the target of the relation | |||||
Not Done Inline ActionsShouln't those be in their respective class instead? And referred to it here with sphinx like link here? ardumont: Shouln't those be in their respective class instead?
And referred to it here with sphinx like… | |||||
Done Inline ActionsTypedDict classes could be documented, yes (but meh) douardda: TypedDict classes could be documented, yes (but meh) | |||||
(related to the timestamp of the revision) | |||||
""" | |||||
with open(get_datafile(filename), "r") as fobj: | |||||
yield from _parse_synthetic_file(fobj) | |||||
def _parse_synthetic_file(fobj: Iterable[str]) -> Iterator[SynthRevision]: | |||||
"""Read a 'synthetic' file and generate a dict representation of the synthetic | |||||
revision for each revision listed in the synthetic file. | |||||
""" | |||||
regs = [ | |||||
"(?P<revname>R[0-9]{4})?", | |||||
"(?P<reltype>[^| ]*)", | |||||
"(?P<path>[^|]*?)", | |||||
"(?P<type>[RDC]) (?P<sha1>[0-9a-z]{40})", | |||||
"(?P<ts>-?[0-9]+(.[0-9]+)?)", | |||||
] | |||||
regex = re.compile("^ *" + r" *[|] *".join(regs) + r" *$") | |||||
current_rev: List[dict] = [] | |||||
Done Inline ActionsMight as well use Any instad of this big Union. What about a TypedDict? vlorentz: Might as well use Any instad of this big Union.
What about a TypedDict? | |||||
Done Inline Actionsprobably a good idea, yes douardda: probably a good idea, yes | |||||
for m in (regex.match(line) for line in fobj): | |||||
if m: | |||||
d = m.groupdict() | |||||
if d["revname"]: | |||||
if current_rev: | |||||
yield _mk_synth_rev(current_rev) | |||||
current_rev.clear() | |||||
current_rev.append(d) | |||||
if current_rev: | |||||
yield _mk_synth_rev(current_rev) | |||||
def _mk_synth_rev(synth_rev) -> SynthRevision: | |||||
assert synth_rev[0]["type"] == "R" | |||||
rev = SynthRevision( | |||||
sha1=bytes.fromhex(synth_rev[0]["sha1"]), | |||||
date=float(synth_rev[0]["ts"]), | |||||
msg=synth_rev[0]["revname"], | |||||
R_C=[], | |||||
R_D=[], | |||||
D_C=[], | |||||
) | |||||
for row in synth_rev[1:]: | |||||
if row["reltype"] == "R---C": | |||||
assert row["type"] == "C" | |||||
rev["R_C"].append( | |||||
SynthRelation( | |||||
path=row["path"], | |||||
src=rev["sha1"], | |||||
dst=bytes.fromhex(row["sha1"]), | |||||
rel_ts=float(row["ts"]), | |||||
) | |||||
) | |||||
elif row["reltype"] == "R-D": | |||||
assert row["type"] == "D" | |||||
rev["R_D"].append( | |||||
SynthRelation( | |||||
path=row["path"], | |||||
src=rev["sha1"], | |||||
dst=bytes.fromhex(row["sha1"]), | |||||
rel_ts=float(row["ts"]), | |||||
) | |||||
) | |||||
elif row["reltype"] == "D-C": | |||||
assert row["type"] == "C" | |||||
rev["D_C"].append( | |||||
SynthRelation( | |||||
path=row["path"], | |||||
src=rev["R_D"][-1]["dst"], | |||||
dst=bytes.fromhex(row["sha1"]), | |||||
rel_ts=float(row["ts"]), | |||||
) | |||||
Not Done Inline Actionsi don't know at which point the part generating the file to parse is safe... So maybe the following could help avoid missing any inconsistencies? else: logger.warning("unknown relation: %s", row) ardumont: i don't know at which point the part generating the file to parse is safe...
So maybe the… | |||||
Done Inline ActionsHonestly I don't know either. I believe these files have been written by hand, so... douardda: Honestly I don't know either. I believe these files have been written by hand, so... | |||||
) | |||||
return rev |
why do you override the one from swh/core/pytest_plugin.py?