Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124764
D5080.id18133.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
D5080.id18133.diff
View Options
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,48 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v2.4.0
+ hooks:
+ - id: trailing-whitespace
+ - id: check-json
+ - id: check-yaml
+
+- repo: https://gitlab.com/pycqa/flake8
+ rev: 3.8.3
+ hooks:
+ - id: flake8
+
+- repo: https://github.com/codespell-project/codespell
+ rev: v1.16.0
+ hooks:
+ - id: codespell
+
+- repo: local
+ hooks:
+ - id: mypy
+ name: mypy
+ entry: mypy
+ args: [swh]
+ pass_filenames: false
+ language: system
+ types: [python]
+
+# unfortunately, we are far from being able to enable this...
+# - repo: https://github.com/PyCQA/pydocstyle.git
+# rev: 4.0.0
+# hooks:
+# - id: pydocstyle
+# name: pydocstyle
+# description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions.
+# entry: pydocstyle --convention=google
+# language: python
+# types: [python]
+
+- repo: https://github.com/PyCQA/isort
+ rev: 5.5.2
+ hooks:
+ - id: isort
+
+- repo: https://github.com/python/black
+ rev: 19.10b0
+ hooks:
+ - id: black
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -4,10 +4,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from setuptools import setup, find_packages
-
-from os import path
from io import open
+from os import path
+
+from setuptools import find_packages, setup
here = path.abspath(path.dirname(__file__))
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -1,10 +1,10 @@
from .archive import ArchiveInterface
-from .provenance import ProvenanceInterface
-from .storage.archive import ArchiveStorage
from .postgresql.archive import ArchivePostgreSQL
from .postgresql.db_utils import connect
from .postgresql.provenance import ProvenancePostgreSQL
from .postgresql_nopath.provenance import ProvenancePostgreSQLNoPath
+from .provenance import ProvenanceInterface
+from .storage.archive import ArchiveStorage
def get_archive(cls: str, **kwargs) -> ArchiveInterface:
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -91,8 +91,8 @@
ctx.obj["config"] = conf
if profile:
- import cProfile
import atexit
+ import cProfile
print("Profiling...")
pr = cProfile.Profile()
@@ -136,8 +136,8 @@
# TODO: add file size filtering
"""Process a provided list of revisions."""
from . import get_archive, get_provenance
- from .revision import FileRevisionIterator
from .provenance import revision_add
+ from .revision import FileRevisionIterator
archive = get_archive(**ctx.obj["config"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"])
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -1,9 +1,9 @@
-from .archive import ArchiveInterface
-from .revision import RevisionEntry
-
from typing import Optional
-from swh.model.model import Origin, ObjectType, TargetType
+from swh.model.model import ObjectType, Origin, TargetType
+
+from .archive import ArchiveInterface
+from .revision import RevisionEntry
class OriginEntry:
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -1,25 +1,19 @@
-import psycopg2
+from typing import Any, Dict, List
-# import threading
+import psycopg2
+from methodtools import lru_cache
from ..archive import ArchiveInterface
-# from functools import lru_cache
-from methodtools import lru_cache
-from typing import Any, Dict, List
-
class ArchivePostgreSQL(ArchiveInterface):
def __init__(self, conn: psycopg2.extensions.connection):
self.conn = conn
- # self.mutex = threading.Lock()
def directory_ls(self, id: bytes) -> List[Dict[str, Any]]:
# TODO: only call directory_ls_internal if the id is not being queried by
# someone else. Otherwise wait until results get properly cached.
- # self.mutex.acquire()
entries = self.directory_ls_internal(id)
- # self.mutex.release()
return entries
@lru_cache(maxsize=1000000)
diff --git a/swh/provenance/postgresql/db_utils.py b/swh/provenance/postgresql/db_utils.py
--- a/swh/provenance/postgresql/db_utils.py
+++ b/swh/provenance/postgresql/db_utils.py
@@ -1,8 +1,8 @@
import io
-import psycopg2
-
from configparser import ConfigParser
+import psycopg2
+
def config(filename: str, section: str):
# create a parser
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -2,17 +2,17 @@
import logging
import operator
import os
+from datetime import datetime
+from typing import Any, Dict, Generator, List, Optional, Tuple
+
import psycopg2
import psycopg2.extras
from ..model import DirectoryEntry, FileEntry
from ..origin import OriginEntry
-from .db_utils import connect, execute_sql
from ..provenance import ProvenanceInterface
from ..revision import RevisionEntry
-
-from datetime import datetime
-from typing import Any, Dict, Generator, List, Optional, Tuple
+from .db_utils import connect, execute_sql
def normalize(path: bytes) -> bytes:
@@ -462,14 +462,14 @@
self.select_cache["revision"][revision.id] = date
return date
- def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
+ def revision_get_preferred_origin(self, revision: RevisionEntry) -> int:
# TODO: adapt this method to consider cached values
self.cursor.execute(
"""SELECT COALESCE(org,0) FROM revision WHERE sha1=%s""", (revision.id,)
)
row = self.cursor.fetchone()
# None means revision is not in database;
- # 0 means revision has no prefered origin
+ # 0 means revision has no preferred origin
return row[0] if row is not None and row[0] != 0 else None
def revision_in_history(self, revision: RevisionEntry) -> bool:
@@ -484,7 +484,7 @@
)
return self.cursor.fetchone() is not None
- def revision_set_prefered_origin(
+ def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
):
# TODO: adapt this method to consider cached values
diff --git a/swh/provenance/postgresql/provenance.sql b/swh/provenance/postgresql/provenance.sql
--- a/swh/provenance/postgresql/provenance.sql
+++ b/swh/provenance/postgresql/provenance.sql
@@ -41,7 +41,7 @@
create table content_in_dir
(
blob bigint not null, -- internal identifier of the content blob
- dir bigint not null, -- internal identifier of the directory contaning the blob
+ dir bigint not null, -- internal identifier of the directory containing the blob
loc bigint not null, -- location of the content relative to its parent directory in the isochrone frontier
primary key (blob, dir, loc)
-- foreign key (blob) references content (id),
@@ -112,7 +112,7 @@
id bigserial primary key, -- internal identifier of the revision
sha1 sha1_git unique not null, -- intrinsic identifier of the revision
date timestamptz not null, -- timestamp of the revision
- org bigint -- id of the prefered origin
+ org bigint -- id of the preferred origin
-- foreign key (org) references origin (id)
);
diff --git a/swh/provenance/postgresql_nopath/provenance.py b/swh/provenance/postgresql_nopath/provenance.py
--- a/swh/provenance/postgresql_nopath/provenance.py
+++ b/swh/provenance/postgresql_nopath/provenance.py
@@ -2,6 +2,9 @@
import logging
import operator
import os
+from datetime import datetime
+from typing import Any, Dict, Generator, List, Optional, Tuple
+
import psycopg2
import psycopg2.extras
@@ -11,9 +14,6 @@
from ..provenance import ProvenanceInterface
from ..revision import RevisionEntry
-from datetime import datetime
-from typing import Any, Dict, Generator, List, Optional, Tuple
-
def create_database(conn: psycopg2.extensions.connection, conninfo: dict, name: str):
conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
@@ -399,14 +399,14 @@
self.select_cache["revision"][revision.id] = date
return date
- def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
+ def revision_get_preferred_origin(self, revision: RevisionEntry) -> int:
# TODO: adapt this method to consider cached values
self.cursor.execute(
"""SELECT COALESCE(org,0) FROM revision WHERE sha1=%s""", (revision.id,)
)
row = self.cursor.fetchone()
# None means revision is not in database;
- # 0 means revision has no prefered origin
+ # 0 means revision has no preferred origin
return row[0] if row is not None and row[0] != 0 else None
def revision_in_history(self, revision: RevisionEntry) -> bool:
@@ -421,7 +421,7 @@
)
return self.cursor.fetchone() is not None
- def revision_set_prefered_origin(
+ def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
):
# TODO: adapt this method to consider cached values
diff --git a/swh/provenance/postgresql_nopath/provenance.sql b/swh/provenance/postgresql_nopath/provenance.sql
--- a/swh/provenance/postgresql_nopath/provenance.sql
+++ b/swh/provenance/postgresql_nopath/provenance.sql
@@ -38,7 +38,7 @@
create table content_in_dir
(
blob bigint not null, -- internal identifier of the content blob
- dir bigint not null, -- internal identifier of the directory contaning the blob
+ dir bigint not null, -- internal identifier of the directory containing the blob
primary key (blob, dir)
-- foreign key (blob) references content (id),
-- foreign key (dir) references directory (id)
@@ -92,7 +92,7 @@
id bigserial primary key, -- internal identifier of the revision
sha1 sha1_git unique not null, -- intrinsic identifier of the revision
date timestamptz not null, -- timestamp of the revision
- org bigint -- id of the prefered origin
+ org bigint -- id of the preferred origin
-- foreign key (org) references origin (id)
);
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -1,13 +1,12 @@
import os
+from datetime import datetime
+from typing import Dict, Generator, List, Optional, Tuple
from .archive import ArchiveInterface
from .model import DirectoryEntry, FileEntry, TreeEntry
from .origin import OriginEntry
from .revision import RevisionEntry
-from datetime import datetime
-from typing import Dict, Generator, List, Optional, Tuple
-
# TODO: consider moving to path utils file together with normalize.
def is_child(path: bytes, prefix: bytes) -> bool:
@@ -90,13 +89,13 @@
def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]:
raise NotImplementedError
- def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
+ def revision_get_preferred_origin(self, revision: RevisionEntry) -> int:
raise NotImplementedError
def revision_in_history(self, revision: RevisionEntry) -> bool:
raise NotImplementedError
- def revision_set_prefered_origin(
+ def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
):
raise NotImplementedError
@@ -138,11 +137,11 @@
while stack:
relative, current = stack.pop()
- # Check if current revision has no prefered origin and update if necessary.
- prefered = provenance.revision_get_prefered_origin(current)
+ # Check if current revision has no preferred origin and update if necessary.
+ preferred = provenance.revision_get_preferred_origin(current)
- if prefered is None:
- provenance.revision_set_prefered_origin(origin, current)
+ if preferred is None:
+ provenance.revision_set_preferred_origin(origin, current)
########################################################################
if relative is None:
@@ -309,9 +308,7 @@
)
provenance.directory_add_to_revision(revision, current.entry, path)
directory_process_content(
- provenance,
- directory=current.entry,
- relative=current.entry,
+ provenance, directory=current.entry, relative=current.entry,
)
else:
# No point moving the frontier here. Either there are no files or they
diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/revision.py
@@ -1,12 +1,11 @@
import threading
-
-from .archive import ArchiveInterface
-
from datetime import datetime
from typing import Optional
from swh.model.hashutil import hash_to_bytes
+from .archive import ArchiveInterface
+
class RevisionEntry:
def __init__(
@@ -127,12 +126,12 @@
# return None
#
# def make_record(self, row):
-# # Only revision with author or commiter date are considered
+# # Only revision with author or committer date are considered
# if row[1] is not None:
# # If the revision has author date, it takes precedence
# return RevisionEntry(row[0], row[1], row[3])
# elif row[2] is not None:
-# # If not, we use the commiter date
+# # If not, we use the committer date
# return RevisionEntry(row[0], row[2], row[3])
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
--- a/swh/provenance/storage/archive.py
+++ b/swh/provenance/storage/archive.py
@@ -1,10 +1,12 @@
-from ..archive import ArchiveInterface
+from typing import List
# from functools import lru_cache
from methodtools import lru_cache
-from typing import List
+
from swh.storage import get_storage
+from ..archive import ArchiveInterface
+
class ArchiveStorage(ArchiveInterface):
def __init__(self, cls: str, **kwargs):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 6:45 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226801
Attached To
D5080: Add a pre-commit config file
Event Timeline
Log In to Comment