diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -56,7 +56,7 @@ """ visit_date: Optional[datetime.datetime] - origin: Optional[Origin] + origin: Origin origin_metadata: Dict[str, Any] loaded_snapshot_id: Optional[Sha1Git] @@ -83,7 +83,6 @@ # possibly overridden in self.prepare method self.visit_date = None - self.origin = None if not hasattr(self, "visit_type"): self.visit_type: Optional[str] = None diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -185,7 +185,9 @@ def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]: meta = self.packages[version] - p_info = DebianPackageInfo.from_metadata(meta, url=self.url, version=version) + p_info = DebianPackageInfo.from_metadata( + meta, url=self.origin.url, version=version + ) yield release_name(version), p_info def download_package( diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -169,7 +169,7 @@ ) -> Iterator[Tuple[str, DepositPackageInfo]]: p_info = DepositPackageInfo.from_metadata( self.metadata(), - url=self.url, + url=self.origin.url, filename=self.default_filename, version=version, ) @@ -285,7 +285,7 @@ release_id=hash_to_hex(rel_id), directory_id=hash_to_hex(release.target), snapshot_id=r["snapshot_id"], - origin_url=self.url, + origin_url=self.origin.url, ) except Exception: logger.exception("Problem when trying to update the deposit's status") diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -48,7 +48,7 @@ swh_storage, url, deposit_id, deposit_client, default_filename="archive.zip" ) # Something that does not exist - assert loader.url == url + assert loader.origin.url == url assert loader.client is not None assert loader.client.base_url == swh_loader_config["deposit"]["url"] diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -170,7 +170,7 @@ """ super().__init__(storage=storage, max_content_size=max_content_size) - self.url = url + self.origin = Origin(url=url) self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc) def get_versions(self) -> Sequence[str]: @@ -222,7 +222,7 @@ def last_snapshot(self) -> Optional[Snapshot]: """Retrieve the last snapshot out of the last visit.""" - return snapshot_get_latest(self.storage, self.url) + return snapshot_get_latest(self.storage, self.origin.url) def new_packageinfo_to_extid(self, p_info: TPackageInfo) -> Optional[PartialExtID]: return p_info.extid() @@ -459,7 +459,7 @@ snapshot_id = snapshot.id assert visit.visit visit_status = OriginVisitStatus( - origin=self.url, + origin=self.origin.url, visit=visit.visit, type=self.visit_type, date=now(), @@ -534,14 +534,14 @@ failed_branches: List[str] = [] # Prepare origin and origin_visit - origin = Origin(url=self.url) + origin = Origin(url=self.origin.url) try: self.storage.origin_add([origin]) visit = list( self.storage.origin_visit_add( [ OriginVisit( - origin=self.url, + origin=self.origin.url, date=self.visit_date, type=self.visit_type, ) @@ -549,7 +549,9 @@ ) )[0] except Exception as e: - logger.exception("Failed to initialize origin_visit for %s", self.url) + logger.exception( + "Failed to initialize origin_visit for %s", self.origin.url + ) sentry_sdk.capture_exception(e) return {"status": "failed"} @@ -559,7 +561,7 @@ last_snapshot = self.last_snapshot() logger.debug("last snapshot: %s", last_snapshot) except Exception as e: - logger.exception("Failed to get previous state for %s", self.url) + logger.exception("Failed to get previous state for %s", self.origin.url) sentry_sdk.capture_exception(e) return self.finalize_visit( snapshot=snapshot, @@ -660,7 +662,7 @@ self.storage.clear_buffers() load_exceptions.append(e) sentry_sdk.capture_exception(e) - error = f"Failed to load branch {branch_name} for {self.url}" + error = f"Failed to load branch {branch_name} for {self.origin.url}" logger.exception(error) failed_branches.append(branch_name) errors.append(f"{error}: {e}") @@ -736,7 +738,7 @@ ) self.storage.flush() except Exception as e: - error = f"Failed to build snapshot for origin {self.url}" + error = f"Failed to build snapshot for origin {self.origin.url}" logger.exception(error) errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) @@ -748,7 +750,9 @@ metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id) self._load_metadata_objects(metadata_objects) except Exception as e: - error = f"Failed to load extrinsic snapshot metadata for {self.url}" + error = ( + f"Failed to load extrinsic snapshot metadata for {self.origin.url}" + ) logger.exception(error) errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) @@ -759,7 +763,7 @@ metadata_objects = self.build_extrinsic_origin_metadata() self._load_metadata_objects(metadata_objects) except Exception as e: - error = f"Failed to load extrinsic origin metadata for {self.url}" + error = f"Failed to load extrinsic origin metadata for {self.origin.url}" logger.exception(error) errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) @@ -843,7 +847,7 @@ fetcher=self.get_metadata_fetcher(), format="original-artifacts-json", metadata=json.dumps(metadata).encode(), - origin=self.url, + origin=self.origin.url, release=release.swhid(), ) self._load_metadata_objects([original_artifact_metadata]) @@ -960,7 +964,7 @@ for item in metadata_items: metadata_objects.append( RawExtrinsicMetadata( - target=Origin(self.url).swhid(), + target=self.origin.swhid(), discovery_date=item.discovery_date or self.visit_date, authority=authority, fetcher=fetcher, @@ -1002,7 +1006,7 @@ fetcher=fetcher, format=item.format, metadata=item.metadata, - origin=self.url, + origin=self.origin.url, ) ) @@ -1035,7 +1039,7 @@ fetcher=fetcher, format=item.format, metadata=item.metadata, - origin=self.url, + origin=self.origin.url, release=CoreSWHID( object_type=ObjectType.RELEASE, object_id=release_id ), @@ -1088,7 +1092,7 @@ try: self.storage.extid_add(list(extids)) except Exception as e: - logger.exception("Failed to load new ExtIDs for %s", self.url) + logger.exception("Failed to load new ExtIDs for %s", self.origin.url) sentry_sdk.capture_exception(e) # No big deal, it just means the next visit will load the same versions # again. diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py --- a/swh/loader/package/maven/loader.py +++ b/swh/loader/package/maven/loader.py @@ -185,7 +185,7 @@ def get_package_info(self, version: str) -> Iterator[Tuple[str, MavenPackageInfo]]: a_metadata = self.version_artifact[version] yield release_name(a_metadata["version"]), MavenPackageInfo.from_metadata( - self.url, a_metadata + self.origin.url, a_metadata ) def build_release( diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py --- a/swh/loader/package/nixguix/loader.py +++ b/swh/loader/package/nixguix/loader.py @@ -85,7 +85,7 @@ # base class. @cached_method def raw_sources(self): - return retrieve_sources(self.url) + return retrieve_sources(self.origin.url) @cached_method def supported_sources(self): @@ -109,7 +109,7 @@ def get_metadata_authority(self): return MetadataAuthority( type=MetadataAuthorityType.FORGE, - url=self.url, + url=self.origin.url, metadata={}, ) diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py --- a/swh/loader/package/opam/loader.py +++ b/swh/loader/package/opam/loader.py @@ -134,7 +134,7 @@ if not os.path.exists(package_dir): raise ValueError( f"can't get versions for package {self.opam_package} " - f"(at url {self.url})." + f"(at url {self.origin.url})." ) versions = [ @@ -143,7 +143,7 @@ if not versions: raise ValueError( f"can't get versions for package {self.opam_package} " - f"(at url {self.url})" + f"(at url {self.origin.url})" ) versions.sort() return versions @@ -214,7 +214,7 @@ if url is None: raise ValueError( f"can't get field url.src: for version {version} of package {self.opam_package}" - f" (at url {self.url}) from `opam show`" + f" (at url {self.origin.url}) from `opam show`" ) authors_field = self.get_enclosed_single_line_field("authors:", version) diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -84,7 +84,7 @@ max_content_size: Optional[int] = None, ): super().__init__(storage=storage, url=url, max_content_size=max_content_size) - self.provider_url = pypi_api_url(self.url) + self.provider_url = pypi_api_url(self.origin.url) @cached_method def _raw_info(self) -> bytes: @@ -102,7 +102,7 @@ return self.info()["info"]["version"] def get_metadata_authority(self): - p_url = urlparse(self.url) + p_url = urlparse(self.origin.url) return MetadataAuthority( type=MetadataAuthorityType.FORGE, url=f"{p_url.scheme}://{p_url.netloc}/",