Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/archive.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import defaultdict | from collections import defaultdict | ||||
import itertools | import itertools | ||||
import os | import os | ||||
import re | import re | ||||
from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union | from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union | ||||
from urllib.parse import urlparse | |||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT | from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT | ||||
from swh.model.model import OriginVisit, Revision | from swh.model.model import OriginVisit, Revision | ||||
from swh.storage.algos import diff, revisions_walker | from swh.storage.algos import diff, revisions_walker | ||||
from swh.storage.algos.origin import origin_get_latest_visit_status | from swh.storage.algos.origin import origin_get_latest_visit_status | ||||
from swh.storage.algos.snapshot import snapshot_get_latest, snapshot_resolve_alias | from swh.storage.algos.snapshot import snapshot_get_latest, snapshot_resolve_alias | ||||
from swh.vault.exc import NotFoundExc as VaultNotFoundExc | from swh.vault.exc import NotFoundExc as VaultNotFoundExc | ||||
▲ Show 20 Lines • Show All 224 Lines • ▼ Show 20 Lines | if origin["url"]: | ||||
# handle case when user provided an origin url with a trailing | # handle case when user provided an origin url with a trailing | ||||
# slash while the url in storage does not have it (e.g. GitHub) | # slash while the url in storage does not have it (e.g. GitHub) | ||||
if origin["url"].endswith("/"): | if origin["url"].endswith("/"): | ||||
origin_urls.append(origin["url"][:-1]) | origin_urls.append(origin["url"][:-1]) | ||||
# handle case when user provided an origin url without a trailing | # handle case when user provided an origin url without a trailing | ||||
# slash while the url in storage have it (e.g. Debian source package) | # slash while the url in storage have it (e.g. Debian source package) | ||||
else: | else: | ||||
origin_urls.append(f"{origin['url']}/") | origin_urls.append(f"{origin['url']}/") | ||||
try: | |||||
# handle case where the "://" character sequence was mangled into ":/" | |||||
parsed_url = urlparse(origin["url"]) | |||||
if ( | |||||
parsed_url.scheme | |||||
and not parsed_url.netloc | |||||
and f"{parsed_url.scheme}:/" in origin["url"] | |||||
and f"{parsed_url.scheme}://" not in origin["url"] | |||||
): | |||||
origin_urls.append( | |||||
origin["url"].replace( | |||||
f"{parsed_url.scheme}:/", f"{parsed_url.scheme}://" | |||||
) | |||||
) | |||||
except Exception: | |||||
ardumont: any particular reason for this snippet of code to raise?
And if it raises, do we really want… | |||||
Done Inline Actions[urlparse](vhttps://docs.python.org/3/library/urllib.parse.html) can raise exceptions and any origin URLs can be passed in a SWHID so ... We can silent the exception as if the origin URL does not exist in the archive, an exception will be raised at the end of that function. anlambert: [urlparse](vhttps://docs.python.org/3/library/urllib.parse.html) can raise exceptions and any… | |||||
pass | |||||
origins = [o for o in storage.origin_get(origin_urls) if o is not None] | origins = [o for o in storage.origin_get(origin_urls) if o is not None] | ||||
if not origins: | if not origins: | ||||
msg = "Origin with url %s not found!" % origin["url"] | msg = "Origin with url %s not found!" % origin["url"] | ||||
raise NotFoundExc(msg) | raise NotFoundExc(msg) | ||||
return converters.from_origin(origins[0].to_dict()) | return converters.from_origin(origins[0].to_dict()) | ||||
def lookup_origins( | def lookup_origins( | ||||
▲ Show 20 Lines • Show All 1,109 Lines • Show Last 20 Lines |
any particular reason for this snippet of code to raise?
And if it raises, do we really want to continue silently, won't that pose problems further down the line?