Page MenuHomeSoftware Heritage

D7599.diff
No OneTemporary

D7599.diff

diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -56,7 +56,7 @@
"""
visit_date: Optional[datetime.datetime]
- origin: Optional[Origin]
+ origin: Origin
origin_metadata: Dict[str, Any]
loaded_snapshot_id: Optional[Sha1Git]
@@ -83,7 +83,6 @@
# possibly overridden in self.prepare method
self.visit_date = None
- self.origin = None
if not hasattr(self, "visit_type"):
self.visit_type: Optional[str] = None
diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py
--- a/swh/loader/package/debian/loader.py
+++ b/swh/loader/package/debian/loader.py
@@ -185,7 +185,9 @@
def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]:
meta = self.packages[version]
- p_info = DebianPackageInfo.from_metadata(meta, url=self.url, version=version)
+ p_info = DebianPackageInfo.from_metadata(
+ meta, url=self.origin.url, version=version
+ )
yield release_name(version), p_info
def download_package(
diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -169,7 +169,7 @@
) -> Iterator[Tuple[str, DepositPackageInfo]]:
p_info = DepositPackageInfo.from_metadata(
self.metadata(),
- url=self.url,
+ url=self.origin.url,
filename=self.default_filename,
version=version,
)
@@ -285,7 +285,7 @@
release_id=hash_to_hex(rel_id),
directory_id=hash_to_hex(release.target),
snapshot_id=r["snapshot_id"],
- origin_url=self.url,
+ origin_url=self.origin.url,
)
except Exception:
logger.exception("Problem when trying to update the deposit's status")
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -48,7 +48,7 @@
swh_storage, url, deposit_id, deposit_client, default_filename="archive.zip"
) # Something that does not exist
- assert loader.url == url
+ assert loader.origin.url == url
assert loader.client is not None
assert loader.client.base_url == swh_loader_config["deposit"]["url"]
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -170,7 +170,7 @@
"""
super().__init__(storage=storage, max_content_size=max_content_size)
- self.url = url
+ self.origin = Origin(url=url)
self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
def get_versions(self) -> Sequence[str]:
@@ -222,7 +222,7 @@
def last_snapshot(self) -> Optional[Snapshot]:
"""Retrieve the last snapshot out of the last visit."""
- return snapshot_get_latest(self.storage, self.url)
+ return snapshot_get_latest(self.storage, self.origin.url)
def new_packageinfo_to_extid(self, p_info: TPackageInfo) -> Optional[PartialExtID]:
return p_info.extid()
@@ -459,7 +459,7 @@
snapshot_id = snapshot.id
assert visit.visit
visit_status = OriginVisitStatus(
- origin=self.url,
+ origin=self.origin.url,
visit=visit.visit,
type=self.visit_type,
date=now(),
@@ -534,14 +534,14 @@
failed_branches: List[str] = []
# Prepare origin and origin_visit
- origin = Origin(url=self.url)
+ origin = Origin(url=self.origin.url)
try:
self.storage.origin_add([origin])
visit = list(
self.storage.origin_visit_add(
[
OriginVisit(
- origin=self.url,
+ origin=self.origin.url,
date=self.visit_date,
type=self.visit_type,
)
@@ -549,7 +549,9 @@
)
)[0]
except Exception as e:
- logger.exception("Failed to initialize origin_visit for %s", self.url)
+ logger.exception(
+ "Failed to initialize origin_visit for %s", self.origin.url
+ )
sentry_sdk.capture_exception(e)
return {"status": "failed"}
@@ -559,7 +561,7 @@
last_snapshot = self.last_snapshot()
logger.debug("last snapshot: %s", last_snapshot)
except Exception as e:
- logger.exception("Failed to get previous state for %s", self.url)
+ logger.exception("Failed to get previous state for %s", self.origin.url)
sentry_sdk.capture_exception(e)
return self.finalize_visit(
snapshot=snapshot,
@@ -660,7 +662,7 @@
self.storage.clear_buffers()
load_exceptions.append(e)
sentry_sdk.capture_exception(e)
- error = f"Failed to load branch {branch_name} for {self.url}"
+ error = f"Failed to load branch {branch_name} for {self.origin.url}"
logger.exception(error)
failed_branches.append(branch_name)
errors.append(f"{error}: {e}")
@@ -736,7 +738,7 @@
)
self.storage.flush()
except Exception as e:
- error = f"Failed to build snapshot for origin {self.url}"
+ error = f"Failed to build snapshot for origin {self.origin.url}"
logger.exception(error)
errors.append(f"{error}: {e}")
sentry_sdk.capture_exception(e)
@@ -748,7 +750,9 @@
metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id)
self._load_metadata_objects(metadata_objects)
except Exception as e:
- error = f"Failed to load extrinsic snapshot metadata for {self.url}"
+ error = (
+ f"Failed to load extrinsic snapshot metadata for {self.origin.url}"
+ )
logger.exception(error)
errors.append(f"{error}: {e}")
sentry_sdk.capture_exception(e)
@@ -759,7 +763,7 @@
metadata_objects = self.build_extrinsic_origin_metadata()
self._load_metadata_objects(metadata_objects)
except Exception as e:
- error = f"Failed to load extrinsic origin metadata for {self.url}"
+ error = f"Failed to load extrinsic origin metadata for {self.origin.url}"
logger.exception(error)
errors.append(f"{error}: {e}")
sentry_sdk.capture_exception(e)
@@ -843,7 +847,7 @@
fetcher=self.get_metadata_fetcher(),
format="original-artifacts-json",
metadata=json.dumps(metadata).encode(),
- origin=self.url,
+ origin=self.origin.url,
release=release.swhid(),
)
self._load_metadata_objects([original_artifact_metadata])
@@ -960,7 +964,7 @@
for item in metadata_items:
metadata_objects.append(
RawExtrinsicMetadata(
- target=Origin(self.url).swhid(),
+ target=self.origin.swhid(),
discovery_date=item.discovery_date or self.visit_date,
authority=authority,
fetcher=fetcher,
@@ -1002,7 +1006,7 @@
fetcher=fetcher,
format=item.format,
metadata=item.metadata,
- origin=self.url,
+ origin=self.origin.url,
)
)
@@ -1035,7 +1039,7 @@
fetcher=fetcher,
format=item.format,
metadata=item.metadata,
- origin=self.url,
+ origin=self.origin.url,
release=CoreSWHID(
object_type=ObjectType.RELEASE, object_id=release_id
),
@@ -1088,7 +1092,7 @@
try:
self.storage.extid_add(list(extids))
except Exception as e:
- logger.exception("Failed to load new ExtIDs for %s", self.url)
+ logger.exception("Failed to load new ExtIDs for %s", self.origin.url)
sentry_sdk.capture_exception(e)
# No big deal, it just means the next visit will load the same versions
# again.
diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py
--- a/swh/loader/package/maven/loader.py
+++ b/swh/loader/package/maven/loader.py
@@ -185,7 +185,7 @@
def get_package_info(self, version: str) -> Iterator[Tuple[str, MavenPackageInfo]]:
a_metadata = self.version_artifact[version]
yield release_name(a_metadata["version"]), MavenPackageInfo.from_metadata(
- self.url, a_metadata
+ self.origin.url, a_metadata
)
def build_release(
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -85,7 +85,7 @@
# base class.
@cached_method
def raw_sources(self):
- return retrieve_sources(self.url)
+ return retrieve_sources(self.origin.url)
@cached_method
def supported_sources(self):
@@ -109,7 +109,7 @@
def get_metadata_authority(self):
return MetadataAuthority(
type=MetadataAuthorityType.FORGE,
- url=self.url,
+ url=self.origin.url,
metadata={},
)
diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py
--- a/swh/loader/package/opam/loader.py
+++ b/swh/loader/package/opam/loader.py
@@ -134,7 +134,7 @@
if not os.path.exists(package_dir):
raise ValueError(
f"can't get versions for package {self.opam_package} "
- f"(at url {self.url})."
+ f"(at url {self.origin.url})."
)
versions = [
@@ -143,7 +143,7 @@
if not versions:
raise ValueError(
f"can't get versions for package {self.opam_package} "
- f"(at url {self.url})"
+ f"(at url {self.origin.url})"
)
versions.sort()
return versions
@@ -214,7 +214,7 @@
if url is None:
raise ValueError(
f"can't get field url.src: for version {version} of package {self.opam_package}"
- f" (at url {self.url}) from `opam show`"
+ f" (at url {self.origin.url}) from `opam show`"
)
authors_field = self.get_enclosed_single_line_field("authors:", version)
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -84,7 +84,7 @@
max_content_size: Optional[int] = None,
):
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
- self.provider_url = pypi_api_url(self.url)
+ self.provider_url = pypi_api_url(self.origin.url)
@cached_method
def _raw_info(self) -> bytes:
@@ -102,7 +102,7 @@
return self.info()["info"]["version"]
def get_metadata_authority(self):
- p_url = urlparse(self.url)
+ p_url = urlparse(self.origin.url)
return MetadataAuthority(
type=MetadataAuthorityType.FORGE,
url=f"{p_url.scheme}://{p_url.netloc}/",

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:49 AM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226431

Event Timeline