Changeset View
Changeset View
Standalone View
Standalone View
swh/web/client/client.py
Show All 36 Lines | |||||
from swh.model.identifiers import SNAPSHOT, REVISION, RELEASE, DIRECTORY, CONTENT | from swh.model.identifiers import SNAPSHOT, REVISION, RELEASE, DIRECTORY, CONTENT | ||||
from swh.model.identifiers import PersistentId as PID | from swh.model.identifiers import PersistentId as PID | ||||
from swh.model.identifiers import parse_persistent_identifier as parse_pid | from swh.model.identifiers import parse_persistent_identifier as parse_pid | ||||
from .auth import AuthenticationError, OpenIDConnectSession, SWH_OIDC_SERVER_URL | from .auth import AuthenticationError, OpenIDConnectSession, SWH_OIDC_SERVER_URL | ||||
PIDish = Union[PID, str] | PIDish = Union[PID, str] | ||||
olasd: We use `origin_visit` in most places, so I guess this should use that name as well. | |||||
ORIGIN_VISIT = "origin_visit" | |||||
def _get_pid(pidish: PIDish) -> PID: | def _get_pid(pidish: PIDish) -> PID: | ||||
"""Parse string to PID if needed""" | """Parse string to PID if needed""" | ||||
if isinstance(pidish, str): | if isinstance(pidish, str): | ||||
return parse_pid(pidish) | return parse_pid(pidish) | ||||
else: | else: | ||||
return pidish | return pidish | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | elif obj_type == DIRECTORY: | ||||
for entry in data: | for entry in data: | ||||
dir_pid = dir_pid or to_pid(obj_type, entry["dir_id"]) | dir_pid = dir_pid or to_pid(obj_type, entry["dir_id"]) | ||||
entry["dir_id"] = dir_pid | entry["dir_id"] = dir_pid | ||||
entry["target"] = to_pid( | entry["target"] = to_pid( | ||||
obj_type_of_entry_type(entry["type"]), entry["target"] | obj_type_of_entry_type(entry["type"]), entry["target"] | ||||
) | ) | ||||
elif obj_type == CONTENT: | elif obj_type == CONTENT: | ||||
pass # nothing to do for contents | pass # nothing to do for contents | ||||
elif obj_type == ORIGIN_VISIT: | |||||
data['date'] = to_date(data['date']) | |||||
if data['snapshot'] is not None: | |||||
data['snapshot'] = to_pid(SNAPSHOT, data['snapshot']) | |||||
else: | else: | ||||
raise ValueError(f"invalid object type: {obj_type}") | raise ValueError(f"invalid object type: {obj_type}") | ||||
return data | return data | ||||
class WebAPIClient: | class WebAPIClient: | ||||
"""Client for the Software Heritage archive Web API, see | """Client for the Software Heritage archive Web API, see | ||||
▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines | ) -> Generator[Dict[str, Any], None, None]: | ||||
while not done: | while not done: | ||||
r = self._call(query, http_method="get", **req_args) | r = self._call(query, http_method="get", **req_args) | ||||
yield typify(r.json()["branches"], SNAPSHOT) | yield typify(r.json()["branches"], SNAPSHOT) | ||||
if "next" in r.links and "url" in r.links["next"]: | if "next" in r.links and "url" in r.links["next"]: | ||||
query = r.links["next"]["url"] | query = r.links["next"]["url"] | ||||
else: | else: | ||||
done = True | done = True | ||||
def visits(self, | |||||
origin: str, | |||||
per_page: Optional[int] = None, | |||||
last_visit: Optional[int] = None, | |||||
**req_args) -> Generator[Dict[str, Any], None, None]: | |||||
"""List visits of an origin | |||||
Args: | |||||
Not Done Inline Actionsmaybe "origin: the URL of a software origin" zack: maybe "origin: the URL of a software origin" | |||||
origin: the URL of a software origin | |||||
Not Done Inline Actionsjust "number of visits to list", for consistency with other docstrings zack: just "number of visits to list", for consistency with other docstrings | |||||
per_page: the number of visits to list | |||||
last_visit: visit to start listing from | |||||
req_args: extra keyword arguments for requests.get() | |||||
Returns: | |||||
an iterator over visits of the origin | |||||
Raises: | |||||
Not Done Inline Actionsindentation ;) ardumont: indentation ;)
| |||||
requests.HTTPError: if HTTP request fails | |||||
""" | |||||
done = False | |||||
r = None | |||||
params = [] | |||||
if last_visit is not None: | |||||
params.append(("last_visit", last_visit)) | |||||
if per_page is not None: | |||||
params.append(("per_page", per_page)) | |||||
Done Inline ActionsShould have a trailing slash. olasd: Should have a trailing slash. | |||||
query = f'origin/{origin}/visits/' | |||||
while not done: | |||||
r = self._call(query, http_method='get', params=params, **req_args) | |||||
yield from [typify(v, ORIGIN_VISIT) for v in r.json()] | |||||
if 'next' in r.links and 'url' in r.links['next']: | |||||
params = [] | |||||
query = r.links['next']['url'] | |||||
else: | |||||
done = True | |||||
def content_exists(self, pid: PIDish, **req_args) -> bool: | def content_exists(self, pid: PIDish, **req_args) -> bool: | ||||
"""Check if a content object exists in the archive | """Check if a content object exists in the archive | ||||
Args: | Args: | ||||
pid: object identifier | pid: object identifier | ||||
req_args: extra keyword arguments for requests.head() | req_args: extra keyword arguments for requests.head() | ||||
Raises: | Raises: | ||||
▲ Show 20 Lines • Show All 122 Lines • Show Last 20 Lines |
We use origin_visit in most places, so I guess this should use that name as well.