diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,48 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.4.0 + hooks: + - id: trailing-whitespace + - id: check-json + - id: check-yaml + +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.3 + hooks: + - id: flake8 + +- repo: https://github.com/codespell-project/codespell + rev: v1.16.0 + hooks: + - id: codespell + +- repo: local + hooks: + - id: mypy + name: mypy + entry: mypy + args: [swh] + pass_filenames: false + language: system + types: [python] + +# unfortunately, we are far from being able to enable this... +# - repo: https://github.com/PyCQA/pydocstyle.git +# rev: 4.0.0 +# hooks: +# - id: pydocstyle +# name: pydocstyle +# description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions. +# entry: pydocstyle --convention=google +# language: python +# types: [python] + +- repo: https://github.com/PyCQA/isort + rev: 5.5.2 + hooks: + - id: isort + +- repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black diff --git a/pyproject.toml b/pyproject.toml --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,12 @@ [tool.black] target-version = ['py37'] + +[tool.isort] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +line_length = 88 +force_sort_within_sections = true + diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -4,10 +4,10 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from setuptools import setup, find_packages - -from os import path from io import open +from os import path + +from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py --- a/swh/provenance/__init__.py +++ b/swh/provenance/__init__.py @@ -1,10 +1,10 @@ from .archive import ArchiveInterface -from .provenance import ProvenanceInterface -from .storage.archive import ArchiveStorage from .postgresql.archive import ArchivePostgreSQL from .postgresql.db_utils import connect from .postgresql.provenance import ProvenancePostgreSQL from .postgresql_nopath.provenance import ProvenancePostgreSQLNoPath +from .provenance import ProvenanceInterface +from .storage.archive import ArchiveStorage def get_archive(cls: str, **kwargs) -> ArchiveInterface: diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py --- a/swh/provenance/cli.py +++ b/swh/provenance/cli.py @@ -91,8 +91,8 @@ ctx.obj["config"] = conf if profile: - import cProfile import atexit + import cProfile print("Profiling...") pr = cProfile.Profile() @@ -136,8 +136,8 @@ # TODO: add file size filtering """Process a provided list of revisions.""" from . import get_archive, get_provenance - from .revision import FileRevisionIterator from .provenance import revision_add + from .revision import FileRevisionIterator archive = get_archive(**ctx.obj["config"]["archive"]) provenance = get_provenance(**ctx.obj["config"]["provenance"]) diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py --- a/swh/provenance/origin.py +++ b/swh/provenance/origin.py @@ -1,9 +1,9 @@ -from .archive import ArchiveInterface -from .revision import RevisionEntry - from typing import Optional -from swh.model.model import Origin, ObjectType, TargetType +from swh.model.model import ObjectType, Origin, TargetType + +from .archive import ArchiveInterface +from .revision import RevisionEntry class OriginEntry: diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py --- a/swh/provenance/postgresql/archive.py +++ b/swh/provenance/postgresql/archive.py @@ -1,25 +1,19 @@ -import psycopg2 +from typing import Any, Dict, List -# import threading +from methodtools import lru_cache +import psycopg2 from ..archive import ArchiveInterface -# from functools import lru_cache -from methodtools import lru_cache -from typing import Any, Dict, List - class ArchivePostgreSQL(ArchiveInterface): def __init__(self, conn: psycopg2.extensions.connection): self.conn = conn - # self.mutex = threading.Lock() def directory_ls(self, id: bytes) -> List[Dict[str, Any]]: # TODO: only call directory_ls_internal if the id is not being queried by # someone else. Otherwise wait until results get properly cached. - # self.mutex.acquire() entries = self.directory_ls_internal(id) - # self.mutex.release() return entries @lru_cache(maxsize=1000000) diff --git a/swh/provenance/postgresql/db_utils.py b/swh/provenance/postgresql/db_utils.py --- a/swh/provenance/postgresql/db_utils.py +++ b/swh/provenance/postgresql/db_utils.py @@ -1,7 +1,7 @@ +from configparser import ConfigParser import io -import psycopg2 -from configparser import ConfigParser +import psycopg2 def config(filename: str, section: str): diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py --- a/swh/provenance/postgresql/provenance.py +++ b/swh/provenance/postgresql/provenance.py @@ -2,17 +2,17 @@ import logging import operator import os +from datetime import datetime +from typing import Any, Dict, Generator, List, Optional, Tuple + import psycopg2 import psycopg2.extras from ..model import DirectoryEntry, FileEntry from ..origin import OriginEntry -from .db_utils import connect, execute_sql from ..provenance import ProvenanceInterface from ..revision import RevisionEntry - -from datetime import datetime -from typing import Any, Dict, Generator, List, Optional, Tuple +from .db_utils import connect, execute_sql def normalize(path: bytes) -> bytes: diff --git a/swh/provenance/postgresql/provenance.sql b/swh/provenance/postgresql/provenance.sql --- a/swh/provenance/postgresql/provenance.sql +++ b/swh/provenance/postgresql/provenance.sql @@ -41,7 +41,7 @@ create table content_in_dir ( blob bigint not null, -- internal identifier of the content blob - dir bigint not null, -- internal identifier of the directory contaning the blob + dir bigint not null, -- internal identifier of the directory containing the blob loc bigint not null, -- location of the content relative to its parent directory in the isochrone frontier primary key (blob, dir, loc) -- foreign key (blob) references content (id), diff --git a/swh/provenance/postgresql_nopath/provenance.py b/swh/provenance/postgresql_nopath/provenance.py --- a/swh/provenance/postgresql_nopath/provenance.py +++ b/swh/provenance/postgresql_nopath/provenance.py @@ -2,6 +2,9 @@ import logging import operator import os +from datetime import datetime +from typing import Any, Dict, Generator, List, Optional, Tuple + import psycopg2 import psycopg2.extras @@ -11,9 +14,6 @@ from ..provenance import ProvenanceInterface from ..revision import RevisionEntry -from datetime import datetime -from typing import Any, Dict, Generator, List, Optional, Tuple - def create_database(conn: psycopg2.extensions.connection, conninfo: dict, name: str): conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) diff --git a/swh/provenance/postgresql_nopath/provenance.sql b/swh/provenance/postgresql_nopath/provenance.sql --- a/swh/provenance/postgresql_nopath/provenance.sql +++ b/swh/provenance/postgresql_nopath/provenance.sql @@ -38,7 +38,7 @@ create table content_in_dir ( blob bigint not null, -- internal identifier of the content blob - dir bigint not null, -- internal identifier of the directory contaning the blob + dir bigint not null, -- internal identifier of the directory containing the blob primary key (blob, dir) -- foreign key (blob) references content (id), -- foreign key (dir) references directory (id) diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -1,13 +1,12 @@ import os +from datetime import datetime +from typing import Dict, Generator, List, Optional, Tuple from .archive import ArchiveInterface from .model import DirectoryEntry, FileEntry, TreeEntry from .origin import OriginEntry from .revision import RevisionEntry -from datetime import datetime -from typing import Dict, Generator, List, Optional, Tuple - # TODO: consider moving to path utils file together with normalize. def is_child(path: bytes, prefix: bytes) -> bool: @@ -309,9 +308,7 @@ ) provenance.directory_add_to_revision(revision, current.entry, path) directory_process_content( - provenance, - directory=current.entry, - relative=current.entry, + provenance, directory=current.entry, relative=current.entry, ) else: # No point moving the frontier here. Either there are no files or they diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py --- a/swh/provenance/revision.py +++ b/swh/provenance/revision.py @@ -1,12 +1,11 @@ -import threading - -from .archive import ArchiveInterface - from datetime import datetime +import threading from typing import Optional from swh.model.hashutil import hash_to_bytes +from .archive import ArchiveInterface + class RevisionEntry: def __init__( diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py --- a/swh/provenance/storage/archive.py +++ b/swh/provenance/storage/archive.py @@ -1,10 +1,12 @@ -from ..archive import ArchiveInterface +from typing import List # from functools import lru_cache from methodtools import lru_cache -from typing import List + from swh.storage import get_storage +from ..archive import ArchiveInterface + class ArchiveStorage(ArchiveInterface): def __init__(self, cls: str, **kwargs):