Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/provenance.py
# Copyright (C) 2021-2022 The Software Heritage developers | # Copyright (C) 2021-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime | from datetime import datetime | ||||
vlorentz: I'm not a huge fan of `import ... from` with stdlib modules in general, but here in particular… | |||||
Done Inline Actionsyeah I guess you are right, let me change this (thx) douardda: yeah I guess you are right, let me change this (thx) | |||||
import hashlib | |||||
import logging | import logging | ||||
import os | import os | ||||
from types import TracebackType | from types import TracebackType | ||||
from typing import Dict, Generator, Iterable, Optional, Set, Tuple, Type | from typing import Dict, Generator, Iterable, Optional, Set, Tuple, Type | ||||
from typing_extensions import Literal, TypedDict | from typing_extensions import Literal, TypedDict | ||||
from swh.core.statsd import statsd | from swh.core.statsd import statsd | ||||
▲ Show 20 Lines • Show All 232 Lines • ▼ Show 20 Lines | def flush_revision_content_layer(self) -> None: | ||||
metric=BACKEND_OPERATIONS_METRIC, | metric=BACKEND_OPERATIONS_METRIC, | ||||
tags={"method": "flush_revision_content_retry_directory_date"}, | tags={"method": "flush_revision_content_retry_directory_date"}, | ||||
) | ) | ||||
LOGGER.warning( | LOGGER.warning( | ||||
"Unable to write directory dates to the storage. Retrying..." | "Unable to write directory dates to the storage. Retrying..." | ||||
) | ) | ||||
revs = { | revs = { | ||||
sha1 | sha1: RevisionData(date=None, origin=None) | ||||
for sha1, date in self.cache["revision"]["data"].items() | for sha1, date in self.cache["revision"]["data"].items() | ||||
if sha1 in self.cache["revision"]["added"] and date is not None | if sha1 in self.cache["revision"]["added"] and date is not None | ||||
} | } | ||||
if revs: | if revs: | ||||
while not self.storage.revision_add(revs): | while not self.storage.revision_add(revs): | ||||
statsd.increment( | statsd.increment( | ||||
metric=BACKEND_OPERATIONS_METRIC, | metric=BACKEND_OPERATIONS_METRIC, | ||||
tags={"method": "flush_revision_content_retry_revision_none"}, | tags={"method": "flush_revision_content_retry_revision_none"}, | ||||
) | ) | ||||
LOGGER.warning( | LOGGER.warning( | ||||
"Unable to write revision entities to the storage. Retrying..." | "Unable to write revision entities to the storage. Retrying..." | ||||
) | ) | ||||
paths = { | paths = { | ||||
path | hashlib.sha1(path).digest(): path | ||||
for _, _, path in self.cache["content_in_revision"] | for _, _, path in self.cache["content_in_revision"] | ||||
| self.cache["content_in_directory"] | | self.cache["content_in_directory"] | ||||
| self.cache["directory_in_revision"] | | self.cache["directory_in_revision"] | ||||
} | } | ||||
if paths: | if paths: | ||||
while not self.storage.location_add(paths): | while not self.storage.location_add(paths): | ||||
statsd.increment( | statsd.increment( | ||||
metric=BACKEND_OPERATIONS_METRIC, | metric=BACKEND_OPERATIONS_METRIC, | ||||
▲ Show 20 Lines • Show All 239 Lines • Show Last 20 Lines |
I'm not a huge fan of import ... from with stdlib modules in general, but here in particular there is the extra issue of sha1 being a common variable name
(and then you can undo the variable name change below)