diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ # Add here internal Software Heritage dependencies, one per line. swh.core swh.model>=0.7.0 -swh.web.client>=0.2.1 +swh.web.client>=0.2.3 diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ psutil pyfuse3 python-daemon +requests diff --git a/swh/fuse/fs/mountpoint.py b/swh/fuse/fs/mountpoint.py --- a/swh/fuse/fs/mountpoint.py +++ b/swh/fuse/fs/mountpoint.py @@ -133,5 +133,5 @@ url_encoded = name await self.fuse.get_visits(url_encoded) return self.create_child(url_encoded) - except ValidationError: + except ValueError: return None diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -152,6 +152,13 @@ loop = asyncio.get_event_loop() # Web API only takes non-encoded URL url = urllib.parse.unquote_plus(url_encoded) + + origin_exists = await loop.run_in_executor( + None, self.web_api.origin_exists, url + ) + if not origin_exists: + raise ValueError("origin does not exist") + visits_it = await loop.run_in_executor( None, functools.partial(self.web_api.visits, url, typify=typify) ) @@ -159,8 +166,8 @@ await self.cache.metadata.set_visits(url_encoded, visits) # Retrieve it from cache so it is correctly typed return await self.cache.metadata.get_visits(url_encoded) - except requests.HTTPError as err: - logging.error("Cannot fetch visits for object %s: %s", url_encoded, err) + except (ValueError, requests.HTTPError) as err: + logging.error("Cannot fetch visits for origin '%s': %s", url_encoded, err) raise async def get_attrs(self, entry: FuseEntry) -> pyfuse3.EntryAttributes: diff --git a/swh/fuse/tests/conftest.py b/swh/fuse/tests/conftest.py --- a/swh/fuse/tests/conftest.py +++ b/swh/fuse/tests/conftest.py @@ -24,7 +24,13 @@ # Convert Python dict JSON into a string (only for non-raw API call) if not api_call.endswith("raw/") and not api_call.startswith("graph/"): data = json.dumps(data) - requests_mock.get(f"{API_URL}/{api_call}", text=data) + + http_method = requests_mock.get + if api_call.startswith("origin/") and api_call.endswith("get/"): + http_method = requests_mock.head + + http_method(f"{API_URL}/{api_call}", text=data) + return requests_mock diff --git a/swh/fuse/tests/data/api_data.py b/swh/fuse/tests/data/api_data.py --- a/swh/fuse/tests/data/api_data.py +++ b/swh/fuse/tests/data/api_data.py @@ -2929,4 +2929,5 @@ "snapshot_url": None, }, ], + "origin/https://github.com/rust-lang/rust/get/": "", } diff --git a/swh/fuse/tests/data/gen-api-data.py b/swh/fuse/tests/data/gen-api-data.py --- a/swh/fuse/tests/data/gen-api-data.py +++ b/swh/fuse/tests/data/gen-api-data.py @@ -139,10 +139,13 @@ def generate_origin_archive_web_api(url: str): - url = f"origin/{url}/visits/" - data = requests.get(f"{API_URL_real}/{url}").text + url_visits = f"origin/{url}/visits/" + data = requests.get(f"{API_URL_real}/{url_visits}").text data = json.loads(data) - MOCK_ARCHIVE[url] = data + MOCK_ARCHIVE[url_visits] = data + # Necessary since swh-fuse will check the origin URL using the get/ endpoint + url_get = f"origin/{url}/get/" + MOCK_ARCHIVE[url_get] = "" for entry in ALL_ENTRIES: