Page MenuHomeSoftware Heritage

D5080.id18155.diff
No OneTemporary

D5080.id18155.diff

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,48 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v2.4.0
+ hooks:
+ - id: trailing-whitespace
+ - id: check-json
+ - id: check-yaml
+
+- repo: https://gitlab.com/pycqa/flake8
+ rev: 3.8.3
+ hooks:
+ - id: flake8
+
+- repo: https://github.com/codespell-project/codespell
+ rev: v1.16.0
+ hooks:
+ - id: codespell
+
+- repo: local
+ hooks:
+ - id: mypy
+ name: mypy
+ entry: mypy
+ args: [swh]
+ pass_filenames: false
+ language: system
+ types: [python]
+
+# unfortunately, we are far from being able to enable this...
+# - repo: https://github.com/PyCQA/pydocstyle.git
+# rev: 4.0.0
+# hooks:
+# - id: pydocstyle
+# name: pydocstyle
+# description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions.
+# entry: pydocstyle --convention=google
+# language: python
+# types: [python]
+
+- repo: https://github.com/PyCQA/isort
+ rev: 5.5.2
+ hooks:
+ - id: isort
+
+- repo: https://github.com/python/black
+ rev: 19.10b0
+ hooks:
+ - id: black
diff --git a/pyproject.toml b/pyproject.toml
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,12 @@
[tool.black]
target-version = ['py37']
+
+[tool.isort]
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+ensure_newline_before_comments = true
+line_length = 88
+force_sort_within_sections = true
+
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -4,10 +4,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from setuptools import setup, find_packages
-
-from os import path
from io import open
+from os import path
+
+from setuptools import find_packages, setup
here = path.abspath(path.dirname(__file__))
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -1,10 +1,10 @@
from .archive import ArchiveInterface
-from .provenance import ProvenanceInterface
-from .storage.archive import ArchiveStorage
from .postgresql.archive import ArchivePostgreSQL
from .postgresql.db_utils import connect
from .postgresql.provenance import ProvenancePostgreSQL
from .postgresql_nopath.provenance import ProvenancePostgreSQLNoPath
+from .provenance import ProvenanceInterface
+from .storage.archive import ArchiveStorage
def get_archive(cls: str, **kwargs) -> ArchiveInterface:
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -91,8 +91,8 @@
ctx.obj["config"] = conf
if profile:
- import cProfile
import atexit
+ import cProfile
print("Profiling...")
pr = cProfile.Profile()
@@ -136,8 +136,8 @@
# TODO: add file size filtering
"""Process a provided list of revisions."""
from . import get_archive, get_provenance
- from .revision import FileRevisionIterator
from .provenance import revision_add
+ from .revision import FileRevisionIterator
archive = get_archive(**ctx.obj["config"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"])
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -1,9 +1,9 @@
-from .archive import ArchiveInterface
-from .revision import RevisionEntry
-
from typing import Optional
-from swh.model.model import Origin, ObjectType, TargetType
+from swh.model.model import ObjectType, Origin, TargetType
+
+from .archive import ArchiveInterface
+from .revision import RevisionEntry
class OriginEntry:
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -1,25 +1,19 @@
-import psycopg2
+from typing import Any, Dict, List
-# import threading
+from methodtools import lru_cache
+import psycopg2
from ..archive import ArchiveInterface
-# from functools import lru_cache
-from methodtools import lru_cache
-from typing import Any, Dict, List
-
class ArchivePostgreSQL(ArchiveInterface):
def __init__(self, conn: psycopg2.extensions.connection):
self.conn = conn
- # self.mutex = threading.Lock()
def directory_ls(self, id: bytes) -> List[Dict[str, Any]]:
# TODO: only call directory_ls_internal if the id is not being queried by
# someone else. Otherwise wait until results get properly cached.
- # self.mutex.acquire()
entries = self.directory_ls_internal(id)
- # self.mutex.release()
return entries
@lru_cache(maxsize=1000000)
diff --git a/swh/provenance/postgresql/db_utils.py b/swh/provenance/postgresql/db_utils.py
--- a/swh/provenance/postgresql/db_utils.py
+++ b/swh/provenance/postgresql/db_utils.py
@@ -1,7 +1,7 @@
+from configparser import ConfigParser
import io
-import psycopg2
-from configparser import ConfigParser
+import psycopg2
def config(filename: str, section: str):
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -2,17 +2,17 @@
import logging
import operator
import os
+from datetime import datetime
+from typing import Any, Dict, Generator, List, Optional, Tuple
+
import psycopg2
import psycopg2.extras
from ..model import DirectoryEntry, FileEntry
from ..origin import OriginEntry
-from .db_utils import connect, execute_sql
from ..provenance import ProvenanceInterface
from ..revision import RevisionEntry
-
-from datetime import datetime
-from typing import Any, Dict, Generator, List, Optional, Tuple
+from .db_utils import connect, execute_sql
def normalize(path: bytes) -> bytes:
@@ -462,14 +462,14 @@
self.select_cache["revision"][revision.id] = date
return date
- def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
+ def revision_get_preferred_origin(self, revision: RevisionEntry) -> int:
# TODO: adapt this method to consider cached values
self.cursor.execute(
"""SELECT COALESCE(org,0) FROM revision WHERE sha1=%s""", (revision.id,)
)
row = self.cursor.fetchone()
# None means revision is not in database;
- # 0 means revision has no prefered origin
+ # 0 means revision has no preferred origin
return row[0] if row is not None and row[0] != 0 else None
def revision_in_history(self, revision: RevisionEntry) -> bool:
@@ -484,7 +484,7 @@
)
return self.cursor.fetchone() is not None
- def revision_set_prefered_origin(
+ def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
):
# TODO: adapt this method to consider cached values
diff --git a/swh/provenance/postgresql/provenance.sql b/swh/provenance/postgresql/provenance.sql
--- a/swh/provenance/postgresql/provenance.sql
+++ b/swh/provenance/postgresql/provenance.sql
@@ -41,7 +41,7 @@
create table content_in_dir
(
blob bigint not null, -- internal identifier of the content blob
- dir bigint not null, -- internal identifier of the directory contaning the blob
+ dir bigint not null, -- internal identifier of the directory containing the blob
loc bigint not null, -- location of the content relative to its parent directory in the isochrone frontier
primary key (blob, dir, loc)
-- foreign key (blob) references content (id),
@@ -112,7 +112,7 @@
id bigserial primary key, -- internal identifier of the revision
sha1 sha1_git unique not null, -- intrinsic identifier of the revision
date timestamptz not null, -- timestamp of the revision
- org bigint -- id of the prefered origin
+ org bigint -- id of the preferred origin
-- foreign key (org) references origin (id)
);
diff --git a/swh/provenance/postgresql_nopath/provenance.py b/swh/provenance/postgresql_nopath/provenance.py
--- a/swh/provenance/postgresql_nopath/provenance.py
+++ b/swh/provenance/postgresql_nopath/provenance.py
@@ -2,6 +2,9 @@
import logging
import operator
import os
+from datetime import datetime
+from typing import Any, Dict, Generator, List, Optional, Tuple
+
import psycopg2
import psycopg2.extras
@@ -11,9 +14,6 @@
from ..provenance import ProvenanceInterface
from ..revision import RevisionEntry
-from datetime import datetime
-from typing import Any, Dict, Generator, List, Optional, Tuple
-
def create_database(conn: psycopg2.extensions.connection, conninfo: dict, name: str):
conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
@@ -399,14 +399,14 @@
self.select_cache["revision"][revision.id] = date
return date
- def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
+ def revision_get_preferred_origin(self, revision: RevisionEntry) -> int:
# TODO: adapt this method to consider cached values
self.cursor.execute(
"""SELECT COALESCE(org,0) FROM revision WHERE sha1=%s""", (revision.id,)
)
row = self.cursor.fetchone()
# None means revision is not in database;
- # 0 means revision has no prefered origin
+ # 0 means revision has no preferred origin
return row[0] if row is not None and row[0] != 0 else None
def revision_in_history(self, revision: RevisionEntry) -> bool:
@@ -421,7 +421,7 @@
)
return self.cursor.fetchone() is not None
- def revision_set_prefered_origin(
+ def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
):
# TODO: adapt this method to consider cached values
diff --git a/swh/provenance/postgresql_nopath/provenance.sql b/swh/provenance/postgresql_nopath/provenance.sql
--- a/swh/provenance/postgresql_nopath/provenance.sql
+++ b/swh/provenance/postgresql_nopath/provenance.sql
@@ -38,7 +38,7 @@
create table content_in_dir
(
blob bigint not null, -- internal identifier of the content blob
- dir bigint not null, -- internal identifier of the directory contaning the blob
+ dir bigint not null, -- internal identifier of the directory containing the blob
primary key (blob, dir)
-- foreign key (blob) references content (id),
-- foreign key (dir) references directory (id)
@@ -92,7 +92,7 @@
id bigserial primary key, -- internal identifier of the revision
sha1 sha1_git unique not null, -- intrinsic identifier of the revision
date timestamptz not null, -- timestamp of the revision
- org bigint -- id of the prefered origin
+ org bigint -- id of the preferred origin
-- foreign key (org) references origin (id)
);
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -1,13 +1,12 @@
import os
+from datetime import datetime
+from typing import Dict, Generator, List, Optional, Tuple
from .archive import ArchiveInterface
from .model import DirectoryEntry, FileEntry, TreeEntry
from .origin import OriginEntry
from .revision import RevisionEntry
-from datetime import datetime
-from typing import Dict, Generator, List, Optional, Tuple
-
# TODO: consider moving to path utils file together with normalize.
def is_child(path: bytes, prefix: bytes) -> bool:
@@ -90,13 +89,13 @@
def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]:
raise NotImplementedError
- def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
+ def revision_get_preferred_origin(self, revision: RevisionEntry) -> int:
raise NotImplementedError
def revision_in_history(self, revision: RevisionEntry) -> bool:
raise NotImplementedError
- def revision_set_prefered_origin(
+ def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
):
raise NotImplementedError
@@ -138,11 +137,11 @@
while stack:
relative, current = stack.pop()
- # Check if current revision has no prefered origin and update if necessary.
- prefered = provenance.revision_get_prefered_origin(current)
+ # Check if current revision has no preferred origin and update if necessary.
+ preferred = provenance.revision_get_preferred_origin(current)
- if prefered is None:
- provenance.revision_set_prefered_origin(origin, current)
+ if preferred is None:
+ provenance.revision_set_preferred_origin(origin, current)
########################################################################
if relative is None:
@@ -309,9 +308,7 @@
)
provenance.directory_add_to_revision(revision, current.entry, path)
directory_process_content(
- provenance,
- directory=current.entry,
- relative=current.entry,
+ provenance, directory=current.entry, relative=current.entry,
)
else:
# No point moving the frontier here. Either there are no files or they
diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/revision.py
@@ -1,12 +1,11 @@
-import threading
-
-from .archive import ArchiveInterface
-
from datetime import datetime
+import threading
from typing import Optional
from swh.model.hashutil import hash_to_bytes
+from .archive import ArchiveInterface
+
class RevisionEntry:
def __init__(
@@ -127,12 +126,12 @@
# return None
#
# def make_record(self, row):
-# # Only revision with author or commiter date are considered
+# # Only revision with author or committer date are considered
# if row[1] is not None:
# # If the revision has author date, it takes precedence
# return RevisionEntry(row[0], row[1], row[3])
# elif row[2] is not None:
-# # If not, we use the commiter date
+# # If not, we use the committer date
# return RevisionEntry(row[0], row[2], row[3])
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
--- a/swh/provenance/storage/archive.py
+++ b/swh/provenance/storage/archive.py
@@ -1,10 +1,12 @@
-from ..archive import ArchiveInterface
+from typing import List
# from functools import lru_cache
from methodtools import lru_cache
-from typing import List
+
from swh.storage import get_storage
+from ..archive import ArchiveInterface
+
class ArchiveStorage(ArchiveInterface):
def __init__(self, cls: str, **kwargs):

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 7:09 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231879

Event Timeline