diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -23,6 +23,7 @@ from swh.loader.exception import NotFound from swh.loader.package.utils import download, get_url_body from swh.model import from_disk +from swh.model.hashutil import hash_to_bytes from swh.model.model import ( BaseContent, Content, @@ -715,6 +716,7 @@ def fetch_data(self) -> bool: """Retrieve the content file as a Content Object""" data: Optional[bytes] = None + expected_checksum = hash_to_bytes(self.expected_checksum.decode()) for url in self.mirror_urls: url_ = urlparse(url) self.log.debug( @@ -726,21 +728,22 @@ ) try: data = get_url_body(url) - self.content = Content.from_data(data) - - # Ensure content received matched the integrity field received - actual_checksum = self.content.get_hash(self.checksum_algo) - if actual_checksum == self.expected_checksum: - # match, we have found our content to ingest, exit loop - break # otherwise continue except NotFound: + self.log.debug("Not found %s, continue on next mirror url if any", url) continue - if not self.content: - raise NotFound(f"Unknown origin {self.origin.url}.") + content = Content.from_data(data) + # Ensure content received matched the integrity field received + if content.get_hash(self.checksum_algo) == expected_checksum: + # match, we have found our content to ingest + self.content = content + # we are done, no more data to fetch + return False - return False # no more data to fetch + # If we reach this point, we did not find any proper content, consider the + # origin not found + raise NotFound(f"Unknown origin {self.origin.url}.") def process_data(self) -> bool: """Build the snapshot out of the Content retrieved."""