Page MenuHomeSoftware Heritage

D3618.diff
No OneTemporary

D3618.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -3,5 +3,5 @@
swh.model >= 0.5.0
swh.scheduler >= 0.1.1
swh.search >= 0.0.4
-swh.storage >= 0.8.0
+swh.storage >= 0.11.3
swh.vault >= 0.0.33
diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py
--- a/swh/web/api/views/origin.py
+++ b/swh/web/api/views/origin.py
@@ -462,14 +462,10 @@
:swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata`
"""
- ori_dict = {"url": origin_url}
-
- error_msg = "Origin with url %s not found" % ori_dict["url"]
-
return api_lookup(
service.lookup_origin_intrinsic_metadata,
- ori_dict,
- notfound_msg=error_msg,
+ origin_url,
+ notfound_msg=f"Origin with url {origin_url} not found",
enrich_fn=enrich_origin,
request=request,
)
diff --git a/swh/web/common/service.py b/swh/web/common/service.py
--- a/swh/web/common/service.py
+++ b/swh/web/common/service.py
@@ -21,7 +21,7 @@
from swh.web.common import query
from swh.web.common.exc import BadInputExc, NotFoundExc
from swh.web.common.origin_visits import get_origin_visit
-from swh.web.common.typing import OriginInfo, OriginVisitInfo
+from swh.web.common.typing import OriginInfo, OriginVisitInfo, OriginMetadataInfo
search = config.search()
@@ -224,25 +224,21 @@
origin information as dict.
"""
- origins = [origin]
+ origin_urls = [origin["url"]]
if origin["url"]:
# handle case when user provided an origin url with a trailing
# slash while the url in storage does not have it (e.g. GitHub)
if origin["url"].endswith("/"):
- origins.append({"url": origin["url"][:-1]})
+ origin_urls.append(origin["url"][:-1])
# handle case when user provided an origin url without a trailing
# slash while the url in storage have it (e.g. Debian source package)
else:
- origins.append({"url": f"{origin['url']}/"})
- # Check all possible origin urls
- for orig in origins:
- origin_info = storage.origin_get(orig)
- if origin_info:
- break
- if not origin_info:
+ origin_urls.append(f"{origin['url']}/")
+ origins = [o for o in storage.origin_get(origin_urls) if o is not None]
+ if not origins:
msg = "Origin with url %s not found!" % origin["url"]
raise NotFoundExc(msg)
- return converters.from_origin(origin_info)
+ return converters.from_origin(origins[0].to_dict())
def lookup_origins(
@@ -311,54 +307,52 @@
return (origins, page_token)
-def search_origin_metadata(fulltext, limit=50):
+def search_origin_metadata(
+ fulltext: str, limit: int = 50
+) -> Iterable[OriginMetadataInfo]:
"""Search for origins whose metadata match a provided string pattern.
Args:
fulltext: the string pattern to search for in origin metadata
- offset: number of found origins to skip before returning results
limit: the maximum number of found origins to return
Returns:
- list of origin metadata as dict.
+ Iterable of origin metadata information for existing origins
"""
matches = idx_storage.origin_intrinsic_metadata_search_fulltext(
conjunction=[fulltext], limit=limit
)
results = []
-
- for match in matches:
+ origins = storage.origin_get([match["id"] for match in matches])
+ for origin, match in zip(origins, matches):
+ if not origin:
+ continue
match["from_revision"] = hashutil.hash_to_hex(match["from_revision"])
-
- origin = storage.origin_get({"url": match["id"]})
del match["id"]
-
- result = converters.from_origin(origin)
- if result:
- result["metadata"] = match
- results.append(result)
-
+ results.append(OriginMetadataInfo(url=origin.url, metadata=match))
return results
-def lookup_origin_intrinsic_metadata(origin_dict):
+def lookup_origin_intrinsic_metadata(origin_url: str) -> Dict[str, Any]:
"""Return intrinsic metadata for origin whose origin matches given
origin.
Args:
- origin_dict: origin's dict with keys ('type' AND 'url')
+ origin_url: origin url
+
+ Raises:
+ NotFoundExc when the origin is not found
Returns:
origin metadata.
"""
- origin_info = storage.origin_get(origin_dict)
+ origins = [origin_url]
+ origin_info = storage.origin_get(origins)[0]
if not origin_info:
- msg = "Origin with url %s not found!" % origin_dict["url"]
- raise NotFoundExc(msg)
+ raise NotFoundExc(f"Origin with url {origin_url} not found!")
- origins = [origin_info["url"]]
match = _first_element(idx_storage.origin_intrinsic_metadata_get(origins))
result = {}
if match:
diff --git a/swh/web/common/typing.py b/swh/web/common/typing.py
--- a/swh/web/common/typing.py
+++ b/swh/web/common/typing.py
@@ -16,6 +16,13 @@
"""URL of the origin"""
+class OriginMetadataInfo(TypedDict):
+ url: str
+ """URL of the origin"""
+ metadata: Dict[str, Any]
+ """Origin metadata associated to the origin"""
+
+
class OriginVisitInfo(TypedDict):
date: str
"""date of the visit in iso format"""
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py
--- a/swh/web/tests/api/views/test_origin.py
+++ b/swh/web/tests/api/views/test_origin.py
@@ -408,11 +408,11 @@
@given(origin())
def test_api_origin_by_url(api_client, archive_data, origin):
- url = reverse("api-1-origin", url_args={"origin_url": origin["url"]})
+ origin_url = origin["url"]
+ url = reverse("api-1-origin", url_args={"origin_url": origin_url})
rv = api_client.get(url)
- expected_origin = archive_data.origin_get(origin)
-
+ expected_origin = archive_data.origin_get([origin_url])[0]
expected_origin = enrich_origin(expected_origin, rv.wsgi_request)
assert rv.status_code == 200, rv.data
diff --git a/swh/web/tests/common/test_service.py b/swh/web/tests/common/test_service.py
--- a/swh/web/tests/common/test_service.py
+++ b/swh/web/tests/common/test_service.py
@@ -238,7 +238,7 @@
archive_data.origin_add([new_origin])
actual_origin = service.lookup_origin({"url": new_origin.url})
- expected_origin = archive_data.origin_get({"url": new_origin.url})
+ expected_origin = archive_data.origin_get([new_origin.url])[0]
assert actual_origin == expected_origin
diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py
--- a/swh/web/tests/conftest.py
+++ b/swh/web/tests/conftest.py
@@ -222,9 +222,9 @@
snp = snapshot_get_latest(self.storage, origin_url)
return converters.from_snapshot(snp.to_dict())
- def origin_get(self, origin_info):
- origin = self.storage.origin_get(origin_info)
- return converters.from_origin(origin)
+ def origin_get(self, origin_urls):
+ origins = self.storage.origin_get(origin_urls)
+ return [converters.from_origin(o.to_dict()) for o in origins]
def origin_visit_get(self, origin_url):
visits = list(self.storage.origin_visit_get(origin_url))
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -188,7 +188,8 @@
loader.storage = storage
loader.load()
- origin.update(storage.origin_get(origin)) # add an 'id' key if enabled
+ ori = storage.origin_get([origin["url"]])[0]
+ origin.update(ori.to_dict()) # add an 'id' key if enabled
search.origin_update([{"url": origin["url"], "has_visits": True}])
for i in range(250):
diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py
--- a/swh/web/tests/strategies.py
+++ b/swh/web/tests/strategies.py
@@ -291,8 +291,7 @@
into the test archive.
"""
return new_origin_strategy().filter(
- lambda origin: get_tests_data()["storage"].origin_get([origin.to_dict()])[0]
- is None
+ lambda origin: get_tests_data()["storage"].origin_get([origin.url])[0] is None
)

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:19 PM (5 d, 19 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217156

Event Timeline