diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py --- a/swh/provenance/graph.py +++ b/swh/provenance/graph.py @@ -188,7 +188,7 @@ root = IsochroneNode(directory, dbdate=root_date) stack = [root] logging.debug( - f"Recursively creating isochrone graph for revision {revision.id.hex()}..." + "Recursively creating isochrone graph for revision %s...", revision.id.hex() ) fdates: Dict[Sha1Git, datetime] = {} # map {file_id: date} while stack: @@ -199,10 +199,12 @@ # the revision is being processed out of order. if current.dbdate is not None and current.dbdate > revision.date: logging.debug( - f"Invalidating frontier on {current.entry.id.hex()}" - f" (date {current.dbdate})" - f" when processing revision {revision.id.hex()}" - f" (date {revision.date})" + "Invalidating frontier on %s (date %s) " + "when processing revision %s (date %s)", + current.entry.id.hex(), + current.dbdate, + revision.id.hex(), + revision.date, ) current.invalidate() @@ -221,11 +223,11 @@ fdates.update(provenance.content_get_early_dates(current.entry.files)) logging.debug( - f"Isochrone graph for revision {revision.id.hex()} successfully created!" + "Isochrone graph for revision %s successfully created!", revision.id.hex() ) # Precalculate max known date for each node in the graph (only directory nodes are # pushed to the stack). - logging.debug(f"Computing maxdates for revision {revision.id.hex()}...") + logging.debug("Computing maxdates for revision %s...", revision.id.hex()) stack = [root] while stack: @@ -276,5 +278,5 @@ # node should be treated as unknown current.maxdate = revision.date current.known = False - logging.debug(f"Maxdates for revision {revision.id.hex()} successfully computed!") + logging.debug("Maxdates for revision %s successfully computed!", revision.id.hex()) return root diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py --- a/swh/provenance/origin.py +++ b/swh/provenance/origin.py @@ -60,9 +60,10 @@ provenance.flush() stop = time.time() logging.debug( - "Origins " - ";".join([origin.id.hex() + ":" + origin.snapshot.hex() for origin in origins]) - + f" were processed in {stop - start} secs (commit took {stop - done} secs)!" + "Origins %s were processed in %s secs (commit took %s secs)!", + ";".join(origin.id.hex() + ":" + origin.snapshot.hex() for origin in origins), + stop - start, + stop - done, ) diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -79,41 +79,47 @@ # For this layer, relations need to be inserted first so that, in case of # failure, reprocessing the input does not generated an inconsistent database. - while not self.storage.relation_add( - RelationType.CNT_EARLY_IN_REV, - ( - RelationData(src=src, dst=dst, path=path) - for src, dst, path in self.cache["content_in_revision"] - ), - ): - logging.warning( - f"Unable to write {RelationType.CNT_EARLY_IN_REV} rows to the storage. " - f"Data: {self.cache['content_in_revision']}. Retrying..." - ) - - while not self.storage.relation_add( - RelationType.CNT_IN_DIR, - ( - RelationData(src=src, dst=dst, path=path) - for src, dst, path in self.cache["content_in_directory"] - ), - ): - logging.warning( - f"Unable to write {RelationType.CNT_IN_DIR} rows to the storage. " - f"Data: {self.cache['content_in_directory']}. Retrying..." - ) - - while not self.storage.relation_add( - RelationType.DIR_IN_REV, - ( - RelationData(src=src, dst=dst, path=path) - for src, dst, path in self.cache["directory_in_revision"] - ), - ): - logging.warning( - f"Unable to write {RelationType.DIR_IN_REV} rows to the storage. " - f"Data: {self.cache['directory_in_revision']}. Retrying..." - ) + if self.cache["content_in_revision"]: + while not self.storage.relation_add( + RelationType.CNT_EARLY_IN_REV, + ( + RelationData(src=src, dst=dst, path=path) + for src, dst, path in self.cache["content_in_revision"] + ), + ): + logging.warning( + "Unable to write %s rows to the storage. Data: %s. Retrying...", + RelationType.CNT_EARLY_IN_REV, + self.cache["content_in_revision"], + ) + + if self.cache["content_in_directory"]: + while not self.storage.relation_add( + RelationType.CNT_IN_DIR, + ( + RelationData(src=src, dst=dst, path=path) + for src, dst, path in self.cache["content_in_directory"] + ), + ): + logging.warning( + "Unable to write %s rows to the storage. Data: %s. Retrying...", + RelationType.CNT_IN_DIR, + self.cache["content_in_directory"], + ) + + if self.cache["directory_in_revision"]: + while not self.storage.relation_add( + RelationType.DIR_IN_REV, + ( + RelationData(src=src, dst=dst, path=path) + for src, dst, path in self.cache["directory_in_revision"] + ), + ): + logging.warning( + "Unable to write %s rows to the storage. Data: %s. Retrying...", + RelationType.DIR_IN_REV, + self.cache["directory_in_revision"], + ) # After relations, dates for the entities can be safely set, acknowledging that # these entities won't need to be reprocessed in case of failure. @@ -122,33 +128,39 @@ for sha1, date in self.cache["content"]["data"].items() if sha1 in self.cache["content"]["added"] and date is not None } - while not self.storage.content_set_date(dates): - logging.warning( - f"Unable to write content dates to the storage. " - f"Data: {dates}. Retrying..." - ) + if dates: + while not self.storage.content_set_date(dates): + logging.warning( + "Unable to write content dates to the storage. " + "Data: %s. Retrying...", + dates, + ) dates = { sha1: date for sha1, date in self.cache["directory"]["data"].items() if sha1 in self.cache["directory"]["added"] and date is not None } - while not self.storage.directory_set_date(dates): - logging.warning( - f"Unable to write directory dates to the storage. " - f"Data: {dates}. Retrying..." - ) + if dates: + while not self.storage.directory_set_date(dates): + logging.warning( + "Unable to write directory dates to the storage. " + "Data: %s. Retrying...", + dates, + ) dates = { sha1: date for sha1, date in self.cache["revision"]["data"].items() if sha1 in self.cache["revision"]["added"] and date is not None } - while not self.storage.revision_set_date(dates): - logging.warning( - f"Unable to write revision dates to the storage. " - f"Data: {dates}. Retrying..." - ) + if dates: + while not self.storage.revision_set_date(dates): + logging.warning( + "Unable to write revision dates to the storage. " + "Data: %s. Retrying...", + dates, + ) # Origin-revision layer insertions ############################################# @@ -159,11 +171,13 @@ for sha1, url in self.cache["origin"]["data"].items() if sha1 in self.cache["origin"]["added"] } - while not self.storage.origin_set_url(urls): - logging.warning( - f"Unable to write origins urls to the storage. " - f"Data: {urls}. Retrying..." - ) + if urls: + while not self.storage.origin_set_url(urls): + logging.warning( + "Unable to write origins urls to the storage. " + "Data: %s. Retrying...", + urls, + ) # Second, flat models for revisions' histories (ie. revision-before-revision). data: Iterable[RelationData] = sum( @@ -176,11 +190,13 @@ ], [], ) - while not self.storage.relation_add(RelationType.REV_BEFORE_REV, data): - logging.warning( - f"Unable to write {RelationType.REV_BEFORE_REV} rows to the storage. " - f"Data: {data}. Retrying..." - ) + if data: + while not self.storage.relation_add(RelationType.REV_BEFORE_REV, data): + logging.warning( + "Unable to write %s rows to the storage. Data: %s. Retrying...", + RelationType.REV_BEFORE_REV, + data, + ) # Heads (ie. revision-in-origin entries) should be inserted once flat models for # their histories were already added. This is to guarantee consistent results if @@ -190,11 +206,13 @@ RelationData(src=rev, dst=org, path=None) for rev, org in self.cache["revision_in_origin"] ) - while not self.storage.relation_add(RelationType.REV_IN_ORG, data): - logging.warning( - f"Unable to write {RelationType.REV_IN_ORG} rows to the storage. " - f"Data: {data}. Retrying..." - ) + if data: + while not self.storage.relation_add(RelationType.REV_IN_ORG, data): + logging.warning( + "Unable to write %s rows to the storage. Data: %s. Retrying...", + RelationType.REV_IN_ORG, + data, + ) # Finally, preferred origins for the visited revisions are set (this step can be # reordered if required). @@ -202,11 +220,13 @@ sha1: self.cache["revision_origin"]["data"][sha1] for sha1 in self.cache["revision_origin"]["added"] } - while not self.storage.revision_set_origin(origins): - logging.warning( - f"Unable to write preferred origins to the storage. " - f"Data: {origins}. Retrying..." - ) + if origins: + while not self.storage.revision_set_origin(origins): + logging.warning( + "Unable to write preferred origins to the storage. " + "Data: %s. Retrying...", + origins, + ) # clear local cache ############################################################ self.clear_caches() diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py --- a/swh/provenance/revision.py +++ b/swh/provenance/revision.py @@ -67,8 +67,10 @@ date = provenance.revision_get_date(revision) if date is None or revision.date < date: logging.debug( - f"Processing revisions {revision.id.hex()}" - f" (known date {date} / revision date {revision.date})..." + "Processing revisions %s (known date %s / revision date %s)...", + revision.id.hex(), + date, + revision.date, ) graph = build_isochrone_graph( archive, @@ -91,8 +93,10 @@ provenance.flush() stop = time.time() logging.debug( - f"Revisions {';'.join([revision.id.hex() for revision in revisions])} " - f" were processed in {stop - start} secs (commit took {stop - done} secs)!" + "Revisions %s were processed in %s secs (commit took %s secs)!", + ";".join(revision.id.hex() for revision in revisions), + stop - start, + stop - done, )