diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -3,5 +3,5 @@ swh.model >= 0.5.0 swh.scheduler >= 0.1.1 swh.search >= 0.0.4 -swh.storage >= 0.8.0 +swh.storage >= 0.11.3 swh.vault >= 0.0.33 diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -224,25 +224,21 @@ origin information as dict. """ - origins = [origin] + origin_urls = [origin["url"]] if origin["url"]: # handle case when user provided an origin url with a trailing # slash while the url in storage does not have it (e.g. GitHub) if origin["url"].endswith("/"): - origins.append({"url": origin["url"][:-1]}) + origin_urls.append(origin["url"][:-1]) # handle case when user provided an origin url without a trailing # slash while the url in storage have it (e.g. Debian source package) else: - origins.append({"url": f"{origin['url']}/"}) - # Check all possible origin urls - for orig in origins: - origin_info = storage.origin_get(orig) - if origin_info: - break - if not origin_info: + origin_urls.append(f"{origin['url']}/") + origins = [o for o in storage.origin_get(origin_urls) if o is not None] + if not origins: msg = "Origin with url %s not found!" % origin["url"] raise NotFoundExc(msg) - return converters.from_origin(origin_info) + return converters.from_origin(origins[0].to_dict()) def lookup_origins( @@ -311,54 +307,53 @@ return (origins, page_token) -def search_origin_metadata(fulltext, limit=50): +def search_origin_metadata(fulltext: str, limit: int = 50) -> Iterable[Dict[str, Any]]: """Search for origins whose metadata match a provided string pattern. Args: fulltext: the string pattern to search for in origin metadata - offset: number of found origins to skip before returning results limit: the maximum number of found origins to return Returns: - list of origin metadata as dict. + list of origin metadata as dict. The unknown origin are filtered out. """ matches = idx_storage.origin_intrinsic_metadata_search_fulltext( conjunction=[fulltext], limit=limit ) - results = [] - - for match in matches: + results: List[Dict[str, Any]] = [] + origins = storage.origin_get([match["id"] for match in matches]) + for origin, match in zip(origins, matches): + if not origin: + continue match["from_revision"] = hashutil.hash_to_hex(match["from_revision"]) - - origin = storage.origin_get({"url": match["id"]}) del match["id"] - - result = converters.from_origin(origin) - if result: - result["metadata"] = match - results.append(result) - + result = converters.from_swh(origin.to_dict()) + result["metadata"] = match + results.append(result) return results -def lookup_origin_intrinsic_metadata(origin_dict): +def lookup_origin_intrinsic_metadata(origin_dict: Dict[str, Any]) -> Dict[str, Any]: """Return intrinsic metadata for origin whose origin matches given origin. Args: origin_dict: origin's dict with keys ('type' AND 'url') + Raises: + NotFoundExc when the origin is not found + Returns: origin metadata. """ - origin_info = storage.origin_get(origin_dict) + url = origin_dict["url"] + origins = [url] + origin_info = storage.origin_get(origins)[0] if not origin_info: - msg = "Origin with url %s not found!" % origin_dict["url"] - raise NotFoundExc(msg) + raise NotFoundExc(f"Origin with url {url} not found!") - origins = [origin_info["url"]] match = _first_element(idx_storage.origin_intrinsic_metadata_get(origins)) result = {} if match: diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -408,11 +408,11 @@ @given(origin()) def test_api_origin_by_url(api_client, archive_data, origin): - url = reverse("api-1-origin", url_args={"origin_url": origin["url"]}) + origin_url = origin["url"] + url = reverse("api-1-origin", url_args={"origin_url": origin_url}) rv = api_client.get(url) - expected_origin = archive_data.origin_get(origin) - + expected_origin = archive_data.origin_get([origin_url])[0] expected_origin = enrich_origin(expected_origin, rv.wsgi_request) assert rv.status_code == 200, rv.data diff --git a/swh/web/tests/common/test_service.py b/swh/web/tests/common/test_service.py --- a/swh/web/tests/common/test_service.py +++ b/swh/web/tests/common/test_service.py @@ -238,7 +238,7 @@ archive_data.origin_add([new_origin]) actual_origin = service.lookup_origin({"url": new_origin.url}) - expected_origin = archive_data.origin_get({"url": new_origin.url}) + expected_origin = archive_data.origin_get([new_origin.url])[0] assert actual_origin == expected_origin diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py --- a/swh/web/tests/conftest.py +++ b/swh/web/tests/conftest.py @@ -222,9 +222,9 @@ snp = snapshot_get_latest(self.storage, origin_url) return converters.from_snapshot(snp.to_dict()) - def origin_get(self, origin_info): - origin = self.storage.origin_get(origin_info) - return converters.from_origin(origin) + def origin_get(self, origin_urls): + origins = self.storage.origin_get(origin_urls) + return [converters.from_origin(o.to_dict()) for o in origins] def origin_visit_get(self, origin_url): visits = list(self.storage.origin_visit_get(origin_url)) diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -188,7 +188,8 @@ loader.storage = storage loader.load() - origin.update(storage.origin_get(origin)) # add an 'id' key if enabled + ori = storage.origin_get([origin["url"]])[0] + origin.update(ori.to_dict()) # add an 'id' key if enabled search.origin_update([{"url": origin["url"], "has_visits": True}]) for i in range(250): diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py --- a/swh/web/tests/strategies.py +++ b/swh/web/tests/strategies.py @@ -291,8 +291,7 @@ into the test archive. """ return new_origin_strategy().filter( - lambda origin: get_tests_data()["storage"].origin_get([origin.to_dict()])[0] - is None + lambda origin: get_tests_data()["storage"].origin_get([origin.url])[0] is None )