diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ace1fe2..41a4ad6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,48 +1,48 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 hooks: - id: trailing-whitespace - id: check-json - id: check-yaml - repo: https://gitlab.com/pycqa/flake8 rev: 3.8.3 hooks: - id: flake8 - repo: https://github.com/codespell-project/codespell rev: v1.16.0 hooks: - id: codespell - repo: local hooks: - id: mypy name: mypy entry: mypy args: [swh] pass_filenames: false language: system types: [python] # unfortunately, we are far from being able to enable this... # - repo: https://github.com/PyCQA/pydocstyle.git # rev: 4.0.0 # hooks: # - id: pydocstyle # name: pydocstyle # description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions. # entry: pydocstyle --convention=google # language: python # types: [python] - repo: https://github.com/PyCQA/isort rev: 5.5.2 hooks: - id: isort - repo: https://github.com/python/black - rev: 19.10b0 + rev: 20.8b1 hooks: - id: black diff --git a/swh/provenance/postgresql/provenancedb_with_path.py b/swh/provenance/postgresql/provenancedb_with_path.py index ce4f468..8fedbee 100644 --- a/swh/provenance/postgresql/provenancedb_with_path.py +++ b/swh/provenance/postgresql/provenancedb_with_path.py @@ -1,193 +1,196 @@ from datetime import datetime import os from typing import Generator, Optional, Tuple import psycopg2 import psycopg2.extras from ..model import DirectoryEntry, FileEntry from ..revision import RevisionEntry from .provenancedb_base import ProvenanceDBBase def normalize(path: bytes) -> bytes: return path[2:] if path.startswith(bytes("." + os.path.sep, "utf-8")) else path class ProvenanceWithPathDB(ProvenanceDBBase): def content_add_to_directory( self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes ): self.insert_cache["content_in_dir"].add( (blob.id, directory.id, normalize(os.path.join(prefix, blob.name))) ) def content_add_to_revision( self, revision: RevisionEntry, blob: FileEntry, prefix: bytes ): self.insert_cache["content_early_in_rev"].add( (blob.id, revision.id, normalize(os.path.join(prefix, blob.name))) ) def content_find_first( self, blobid: bytes ) -> Optional[Tuple[bytes, bytes, datetime, bytes]]: self.cursor.execute( """SELECT content_location.sha1 AS blob, revision.sha1 AS rev, revision.date AS date, content_location.path AS path FROM (SELECT content_hex.sha1, content_hex.rev, location.path FROM (SELECT content.sha1, content_early_in_rev.rev, content_early_in_rev.loc FROM content_early_in_rev JOIN content ON content.id=content_early_in_rev.blob WHERE content.sha1=%s ) AS content_hex JOIN location ON location.id=content_hex.loc ) AS content_location JOIN revision ON revision.id=content_location.rev ORDER BY date, rev, path ASC LIMIT 1""", (blobid,), ) return self.cursor.fetchone() def content_find_all( self, blobid: bytes, limit: Optional[int] = None ) -> Generator[Tuple[bytes, bytes, datetime, bytes], None, None]: early_cut = f"LIMIT {limit}" if limit is not None else "" self.cursor.execute( f"""(SELECT content_location.sha1 AS blob, revision.sha1 AS rev, revision.date AS date, content_location.path AS path FROM (SELECT content_hex.sha1, content_hex.rev, location.path FROM (SELECT content.sha1, content_early_in_rev.rev, content_early_in_rev.loc FROM content_early_in_rev JOIN content ON content.id=content_early_in_rev.blob WHERE content.sha1=%s ) AS content_hex JOIN location ON location.id=content_hex.loc ) AS content_location JOIN revision ON revision.id=content_location.rev ) UNION (SELECT content_prefix.sha1 AS blob, revision.sha1 AS rev, revision.date AS date, content_prefix.path AS path FROM (SELECT content_in_rev.sha1, content_in_rev.rev, CASE location.path WHEN '' THEN content_in_rev.suffix WHEN '.' THEN content_in_rev.suffix ELSE (location.path || '/' || content_in_rev.suffix)::unix_path END AS path FROM (SELECT content_suffix.sha1, directory_in_rev.rev, directory_in_rev.loc, content_suffix.path AS suffix FROM (SELECT content_hex.sha1, content_hex.dir, location.path FROM (SELECT content.sha1, content_in_dir.dir, content_in_dir.loc FROM content_in_dir JOIN content ON content_in_dir.blob=content.id WHERE content.sha1=%s ) AS content_hex JOIN location ON location.id=content_hex.loc ) AS content_suffix JOIN directory_in_rev ON directory_in_rev.dir=content_suffix.dir ) AS content_in_rev JOIN location ON location.id=content_in_rev.loc ) AS content_prefix JOIN revision ON revision.id=content_prefix.rev ) ORDER BY date, rev, path {early_cut}""", (blobid, blobid), ) # TODO: use POSTGRESQL EXPLAIN looking for query optimizations. yield from self.cursor.fetchall() def directory_add_to_revision( self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes ): self.insert_cache["directory_in_rev"].add( (directory.id, revision.id, normalize(path)) ) def insert_location(self, src0_table, src1_table, dst_table): """Insert location entries in `dst_table` from the insert_cache Also insert missing location entries in the 'location' table. """ # TODO: find a better way of doing this; might be doable in a coupls of # SQL queries (one to insert missing entries in the location' table, # one to insert entries in the dst_table) # Resolve src0 ids src0_sha1s = tuple(set(sha1 for (sha1, _, _) in self.insert_cache[dst_table])) fmt = ",".join(["%s"] * len(src0_sha1s)) self.cursor.execute( - f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({fmt})""", src0_sha1s, + f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({fmt})""", + src0_sha1s, ) src0_values = dict(self.cursor.fetchall()) # Resolve src1 ids src1_sha1s = tuple(set(sha1 for (_, sha1, _) in self.insert_cache[dst_table])) fmt = ",".join(["%s"] * len(src1_sha1s)) self.cursor.execute( - f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({fmt})""", src1_sha1s, + f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({fmt})""", + src1_sha1s, ) src1_values = dict(self.cursor.fetchall()) # insert missing locations locations = tuple(set((loc,) for (_, _, loc) in self.insert_cache[dst_table])) psycopg2.extras.execute_values( self.cursor, """ INSERT INTO location(path) VALUES %s ON CONFLICT (path) DO NOTHING """, locations, ) # fetch location ids fmt = ",".join(["%s"] * len(locations)) self.cursor.execute( - f"SELECT path, id FROM location WHERE path IN ({fmt})", locations, + f"SELECT path, id FROM location WHERE path IN ({fmt})", + locations, ) loc_ids = dict(self.cursor.fetchall()) # Insert values in dst_table rows = [ (src0_values[sha1_src], src1_values[sha1_dst], loc_ids[loc]) for (sha1_src, sha1_dst, loc) in self.insert_cache[dst_table] ] psycopg2.extras.execute_values( self.cursor, f"""INSERT INTO {dst_table} VALUES %s ON CONFLICT DO NOTHING""", rows, ) self.insert_cache[dst_table].clear() diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py index 5adf3b9..378e30a 100644 --- a/swh/provenance/revision.py +++ b/swh/provenance/revision.py @@ -1,152 +1,154 @@ from datetime import datetime, timezone from itertools import islice import threading from typing import Iterable, Iterator, Optional, Tuple import iso8601 from swh.model.hashutil import hash_to_bytes from swh.provenance.archive import ArchiveInterface from swh.provenance.model import RevisionEntry ######################################################################################## ######################################################################################## class CSVRevisionIterator: """Iterator over revisions typically present in the given CSV file. The input is an iterator that produces 3 elements per row: (id, date, root) where: - id: is the id (sha1_git) of the revision - date: is the author date - root: sha1 of the directory """ def __init__( self, revisions: Iterable[Tuple[bytes, datetime, bytes]], archive: ArchiveInterface, limit: Optional[int] = None, ): self.revisions: Iterator[Tuple[bytes, datetime, bytes]] if limit is not None: self.revisions = islice(revisions, limit) else: self.revisions = iter(revisions) self.mutex = threading.Lock() self.archive = archive def __iter__(self): return self def __next__(self): with self.mutex: id, date, root = next(self.revisions) date = iso8601.parse_date(date) if date.tzinfo is None: date = date.replace(tzinfo=timezone.utc) return RevisionEntry( - hash_to_bytes(id), date=date, root=hash_to_bytes(root), + hash_to_bytes(id), + date=date, + root=hash_to_bytes(root), ) # class ArchiveRevisionIterator(RevisionIterator): # """Iterator over revisions present in the given database.""" # # def __init__(self, conn, limit=None, chunksize=100): # self.cur = conn.cursor() # self.chunksize = chunksize # self.records = [] # if limit is None: # self.cur.execute('''SELECT id, date, committer_date, directory # FROM revision''') # else: # self.cur.execute('''SELECT id, date, committer_date, directory # FROM revision # LIMIT %s''', (limit,)) # for row in self.cur.fetchmany(self.chunksize): # record = self.make_record(row) # if record is not None: # self.records.append(record) # self.mutex = threading.Lock() # # def __del__(self): # self.cur.close() # # def next(self): # self.mutex.acquire() # if not self.records: # self.records.clear() # for row in self.cur.fetchmany(self.chunksize): # record = self.make_record(row) # if record is not None: # self.records.append(record) # # if self.records: # revision, *self.records = self.records # self.mutex.release() # return revision # else: # self.mutex.release() # return None # # def make_record(self, row): # # Only revision with author or committer date are considered # if row[1] is not None: # # If the revision has author date, it takes precedence # return RevisionEntry(row[0], row[1], row[3]) # elif row[2] is not None: # # If not, we use the committer date # return RevisionEntry(row[0], row[2], row[3]) ######################################################################################## ######################################################################################## # class RevisionWorker(threading.Thread): # def __init__( # self, # id: int, # conninfo: dict, # archive: ArchiveInterface, # revisions: RevisionIterator # ): # from .provenance import get_provenance # # super().__init__() # self.archive = archive # self.id = id # self.provenance = get_provenance(conninfo) # self.revisions = revisions # # # def run(self): # from .provenance import revision_add # # # while True: # revision = self.revisions.next() # if revision is None: break # # processed = False # while not processed: # logging.info( # f'Thread {( # self.id # )} - Processing revision {( # hash_to_hex(revision.id) # )} (timestamp: {revision.date})' # ) # processed = revision_add(self.provenance, self.archive, revision) # if not processed: # logging.warning( # f'Thread {( # self.id # )} - Failed to process revision {( # hash_to_hex(revision.id) # )} (timestamp: {revision.date})' # ) diff --git a/swh/provenance/tests/test_cli.py b/swh/provenance/tests/test_cli.py index e0f332b..93046b5 100644 --- a/swh/provenance/tests/test_cli.py +++ b/swh/provenance/tests/test_cli.py @@ -1,111 +1,114 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from click.testing import CliRunner import psycopg2 import pytest import yaml from swh.core.cli import swh as swhmain import swh.core.cli.db # noqa ; ensure cli is loaded import swh.provenance.cli # noqa ; ensure cli is loaded def test_cli_swh_db_help(): # swhmain.add_command(provenance_cli) result = CliRunner().invoke(swhmain, ["provenance", "-h"]) assert result.exit_code == 0 assert "Commands:" in result.output commands = result.output.split("Commands:")[1] for command in ( "create", "find-all", "find-first", "iter-origins", "iter-revisions", ): assert f" {command} " in commands def test_cli_create_deprecated(provenance_db, tmp_path): conffile = tmp_path / "config.yml" conf = { - "provenance": {"cls": "local", "with_path": True,}, + "provenance": { + "cls": "local", + "with_path": True, + }, } yaml.dump(conf, conffile.open("w")) result = CliRunner().invoke( swhmain, ["provenance", "--config-file", str(conffile), "create", "--drop"] ) assert result.exit_code == 0, result.output assert "DeprecationWarning" in result.output TABLES = { "dbflavor", "dbversion", "content", "content_early_in_rev", "content_in_dir", "directory", "directory_in_rev", "origin", "revision", "revision_before_rev", "revision_in_org", } @pytest.mark.parametrize( "flavor, dbtables", (("with-path", TABLES | {"location"}), ("without-path", TABLES)) ) def test_cli_db_create_and_init_db_with_flavor( monkeypatch, postgresql, flavor, dbtables ): """Test that 'swh db init provenance' works with flavors for both with-path and without-path flavors""" dbname = f"{flavor}-db" # DB creation using 'swh db create' db_params = postgresql.get_dsn_parameters() monkeypatch.setenv("PGHOST", db_params["host"]) monkeypatch.setenv("PGUSER", db_params["user"]) monkeypatch.setenv("PGPORT", db_params["port"]) result = CliRunner().invoke(swhmain, ["db", "create", "-d", dbname, "provenance"]) assert result.exit_code == 0, result.output # DB init using 'swh db init' result = CliRunner().invoke( swhmain, ["db", "init", "-d", dbname, "--flavor", flavor, "provenance"] ) assert result.exit_code == 0, result.output assert f"(flavor {flavor})" in result.output db_params["dbname"] = dbname cnx = psycopg2.connect(**db_params) # check the DB looks OK (check for db_flavor and expected tables) with cnx.cursor() as cur: cur.execute("select swh_get_dbflavor()") assert cur.fetchone() == (flavor,) cur.execute( "select table_name from information_schema.tables " "where table_schema = 'public' " f"and table_catalog = '{dbname}'" ) tables = set(x for (x,) in cur.fetchall()) assert tables == dbtables def test_cli_init_db_default_flavor(provenance_db): "Test that 'swh db init provenance' defaults to a with-path flavored DB" dbname = provenance_db.dsn result = CliRunner().invoke(swhmain, ["db", "init", "-d", dbname, "provenance"]) assert result.exit_code == 0, result.output with provenance_db.cursor() as cur: cur.execute("select swh_get_dbflavor()") assert cur.fetchone() == ("with-path",) diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py index ea2c133..8abc38a 100644 --- a/swh/provenance/tests/test_provenance_db.py +++ b/swh/provenance/tests/test_provenance_db.py @@ -1,288 +1,292 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import pytest from swh.model.tests.swh_model_data import TEST_OBJECTS from swh.provenance.model import RevisionEntry from swh.provenance.origin import OriginEntry from swh.provenance.provenance import origin_add, revision_add from swh.provenance.storage.archive import ArchiveStorage from swh.provenance.tests.conftest import synthetic_result def ts2dt(ts: dict) -> datetime.datetime: timestamp = datetime.datetime.fromtimestamp( ts["timestamp"]["seconds"], datetime.timezone(datetime.timedelta(minutes=ts["offset"])), ) return timestamp.replace(microsecond=ts["timestamp"]["microseconds"]) def test_provenance_origin_add(provenance, swh_storage_with_objects): """Test the ProvenanceDB.origin_add() method""" for origin in TEST_OBJECTS["origin"]: entry = OriginEntry(url=origin.url, revisions=[]) origin_add(ArchiveStorage(swh_storage_with_objects), provenance, entry) # TODO: check some facts here def test_provenance_add_revision(provenance, storage_and_CMDBTS, archive): storage, data = storage_and_CMDBTS for i in range(2): # do it twice, there should be no change in results for revision in data["revision"]: entry = RevisionEntry( id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], ) revision_add(provenance, archive, entry) # there should be as many entries in 'revision' as revisions from the # test dataset provenance.cursor.execute("SELECT count(*) FROM revision") assert provenance.cursor.fetchone()[0] == len(data["revision"]) # there should be no 'location' for the empty path provenance.cursor.execute("SELECT count(*) FROM location WHERE path=''") assert provenance.cursor.fetchone()[0] == 0 # there should be 32 'location' for non-empty path provenance.cursor.execute("SELECT count(*) FROM location WHERE path!=''") assert provenance.cursor.fetchone()[0] == 32 # there should be as many entries in 'revision' as revisions from the # test dataset provenance.cursor.execute("SELECT count(*) FROM revision") assert provenance.cursor.fetchone()[0] == len(data["revision"]) # 7 directories provenance.cursor.execute("SELECT count(*) FROM directory") assert provenance.cursor.fetchone()[0] == 7 # 12 D-R entries provenance.cursor.execute("SELECT count(*) FROM directory_in_rev") assert provenance.cursor.fetchone()[0] == 12 provenance.cursor.execute("SELECT count(*) FROM content") assert provenance.cursor.fetchone()[0] == len(data["content"]) provenance.cursor.execute("SELECT count(*) FROM content_in_dir") assert provenance.cursor.fetchone()[0] == 16 provenance.cursor.execute("SELECT count(*) FROM content_early_in_rev") assert provenance.cursor.fetchone()[0] == 13 def test_provenance_content_find_first(provenance, storage_and_CMDBTS, archive): storage, data = storage_and_CMDBTS for revision in data["revision"]: entry = RevisionEntry( - id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], + id=revision["id"], + date=ts2dt(revision["date"]), + root=revision["directory"], ) revision_add(provenance, archive, entry) first_expected_content = [ { "content": "43f3c871310a8e524004e91f033e7fb3b0bc8475", "rev": "35ccb8dd1b53d2d8a5c1375eb513ef2beaa79ae5", "date": 1609757158, "path": "README.md", }, { "content": "6dc7e44ead5c0e300fe94448c3e046dfe33ad4d1", "rev": "9e36e095b79e36a3da104ce272989b39cd68aefd", "date": 1610644094, "path": "Red/Blue/Green/a", }, { "content": "9f6e04be05297905f1275d3f4e0bb0583458b2e8", "rev": "bfbfcc72ae7fc35d6941386c36280512e6b38440", "date": 1610644097, "path": "Red/Blue/Green/b", }, { "content": "a28fa70e725ebda781e772795ca080cd737b823c", "rev": "0a31c9d509783abfd08f9fdfcd3acae20f17dfd0", "date": 1610644099, "path": "Red/Blue/c", }, { "content": "c0229d305adf3edf49f031269a70e3e87665fe88", "rev": "1d1fcf1816a8a2a77f9b1f342ba11d0fe9fd7f17", "date": 1610644105, "path": "Purple/d", }, { "content": "94ba40161084e8b80943accd9d24e1f9dd47189b", "rev": "55d4dc9471de6144f935daf3c38878155ca274d5", "date": 1610644113, "path": ("Dark/Brown/Purple/f", "Dark/Brown/Purple/g", "Dark/h"), # XXX }, { "content": "5e8f9ceaee9dafae2e3210e254fdf170295f8b5b", "rev": "a8939755d0be76cfea136e9e5ebce9bc51c49fef", "date": 1610644116, "path": "Dark/h", }, { "content": "bbd54b961764094b13f10cef733e3725d0a834c3", "rev": "ca1774a07b6e02c1caa7ae678924efa9259ee7c6", "date": 1610644118, "path": "Paris/i", }, { "content": "7ce4fe9a22f589fa1656a752ea371b0ebc2106b1", "rev": "611fe71d75b6ea151b06e3845c09777acc783d82", "date": 1610644120, "path": "Paris/j", }, { "content": "cb79b39935c9392fa5193d9f84a6c35dc9c22c75", "rev": "4c5551b4969eb2160824494d40b8e1f6187fc01e", "date": 1610644122, "path": "Paris/k", }, ] for expected in first_expected_content: contentid = bytes.fromhex(expected["content"]) (blob, rev, date, path) = provenance.content_find_first(contentid) if isinstance(expected["path"], tuple): assert bytes(path).decode() in expected["path"] else: assert bytes(path).decode() == expected["path"] assert bytes(blob) == contentid assert bytes(rev).hex() == expected["rev"] assert int(date.timestamp()) == expected["date"] def sha1s(cur, table): """return the 'sha1' column from the DB 'table' (as hex) 'cur' is a cursor to the provenance index DB. """ cur.execute(f"SELECT sha1 FROM {table}") return set(sha1.hex() for (sha1,) in cur.fetchall()) def locations(cur): """return the 'path' column from the DB location table 'cur' is a cursor to the provenance index DB. """ cur.execute("SELECT encode(location.path::bytea, 'escape') FROM location") return set(x for (x,) in cur.fetchall()) def relations(cur, src, dst): """return the triplets ('sha1', 'sha1', 'path') from the DB for the relation between 'src' table and 'dst' table (i.e. for C-R, C-D and D-R relations). 'cur' is a cursor to the provenance index DB. """ relation = { ("content", "revision"): "content_early_in_rev", ("content", "directory"): "content_in_dir", ("directory", "revision"): "directory_in_rev", }[(src, dst)] srccol = {"content": "blob", "directory": "dir"}[src] dstcol = {"directory": "dir", "revision": "rev"}[dst] cur.execute( f"SELECT encode(src.sha1::bytea, 'hex')," f" encode(dst.sha1::bytea, 'hex')," f" encode(location.path::bytea, 'escape') " f"FROM {relation} as rel, " f" {src} as src, {dst} as dst, location " f"WHERE rel.{srccol}=src.id AND rel.{dstcol}=dst.id AND rel.loc=location.id" ) return set(cur.fetchall()) @pytest.mark.parametrize( "syntheticfile, args", ( ("synthetic_lower_1.txt", {"lower": True, "mindepth": 1}), ("synthetic_upper_1.txt", {"lower": False, "mindepth": 1}), ("synthetic_lower_2.txt", {"lower": True, "mindepth": 2}), ("synthetic_upper_2.txt", {"lower": False, "mindepth": 2}), ), ) def test_provenance_heuristics( provenance, storage_and_CMDBTS, archive, syntheticfile, args ): storage, data = storage_and_CMDBTS revisions = {rev["id"]: rev for rev in data["revision"]} rows = { "content": set(), "content_in_dir": set(), "content_early_in_rev": set(), "directory": set(), "directory_in_rev": set(), "location": set(), "revision": set(), } for synth_rev in synthetic_result(syntheticfile): revision = revisions[synth_rev["sha1"]] entry = RevisionEntry( - id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], + id=revision["id"], + date=ts2dt(revision["date"]), + root=revision["directory"], ) revision_add(provenance, archive, entry, **args) # each "entry" in the synth file is one new revision rows["revision"].add(synth_rev["sha1"].hex()) assert rows["revision"] == sha1s(provenance.cursor, "revision"), synth_rev[ "msg" ] # this revision might have added new content objects rows["content"] |= set(x["dst"].hex() for x in synth_rev["R_C"]) rows["content"] |= set(x["dst"].hex() for x in synth_rev["D_C"]) assert rows["content"] == sha1s(provenance.cursor, "content"), synth_rev["msg"] # check for R-C (direct) entries rows["content_early_in_rev"] |= set( (x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["R_C"] ) assert rows["content_early_in_rev"] == relations( provenance.cursor, "content", "revision" ), synth_rev["msg"] # check directories rows["directory"] |= set(x["dst"].hex() for x in synth_rev["R_D"]) assert rows["directory"] == sha1s(provenance.cursor, "directory"), synth_rev[ "msg" ] # check for R-D entries rows["directory_in_rev"] |= set( (x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["R_D"] ) assert rows["directory_in_rev"] == relations( provenance.cursor, "directory", "revision" ), synth_rev["msg"] # check for D-C entries rows["content_in_dir"] |= set( (x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["D_C"] ) assert rows["content_in_dir"] == relations( provenance.cursor, "content", "directory" ), synth_rev["msg"] # check for location entries rows["location"] |= set(x["path"] for x in synth_rev["R_C"]) rows["location"] |= set(x["path"] for x in synth_rev["D_C"]) rows["location"] |= set(x["path"] for x in synth_rev["R_D"]) assert rows["location"] == locations(provenance.cursor), synth_rev["msg"] diff --git a/tox.ini b/tox.ini index bd11bd4..acd42f2 100644 --- a/tox.ini +++ b/tox.ini @@ -1,35 +1,35 @@ [tox] envlist=black,flake8,mypy,py3 [testenv] extras = testing deps = pytest-cov commands = pytest --doctest-modules \ {envsitepackagesdir}/swh/provenance \ --cov={envsitepackagesdir}/swh/provenance \ --cov-branch {posargs} [testenv:black] skip_install = true deps = - black==19.10b0 + black==20.8b1 commands = {envpython} -m black --check swh [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 [testenv:mypy] extras = testing deps = mypy commands = mypy swh