Changeset View
Changeset View
Standalone View
Standalone View
swh/web/client/client.py
Show All 40 Lines | |||||
from swh.model.identifiers import parse_persistent_identifier as parse_pid | from swh.model.identifiers import parse_persistent_identifier as parse_pid | ||||
from .auth import ( | from .auth import ( | ||||
AuthenticationError, OpenIDConnectSession, SWH_OIDC_SERVER_URL | AuthenticationError, OpenIDConnectSession, SWH_OIDC_SERVER_URL | ||||
) | ) | ||||
PIDish = Union[PID, str] | PIDish = Union[PID, str] | ||||
VISIT = "visit" | |||||
olasd: We use `origin_visit` in most places, so I guess this should use that name as well. | |||||
def _get_pid(pidish: PIDish) -> PID: | def _get_pid(pidish: PIDish) -> PID: | ||||
"""Parse string to PID if needed""" | """Parse string to PID if needed""" | ||||
if isinstance(pidish, str): | if isinstance(pidish, str): | ||||
return parse_pid(pidish) | return parse_pid(pidish) | ||||
else: | else: | ||||
return pidish | return pidish | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | elif obj_type == DIRECTORY: | ||||
dir_pid = None | dir_pid = None | ||||
for entry in data: | for entry in data: | ||||
dir_pid = dir_pid or to_pid(obj_type, entry['dir_id']) | dir_pid = dir_pid or to_pid(obj_type, entry['dir_id']) | ||||
entry['dir_id'] = dir_pid | entry['dir_id'] = dir_pid | ||||
entry['target'] = to_pid(obj_type_of_entry_type(entry['type']), | entry['target'] = to_pid(obj_type_of_entry_type(entry['type']), | ||||
entry['target']) | entry['target']) | ||||
elif obj_type == CONTENT: | elif obj_type == CONTENT: | ||||
pass # nothing to do for contents | pass # nothing to do for contents | ||||
elif obj_type == VISIT: | |||||
data['date'] = to_date(data['date']) | |||||
if data['snapshot'] is not None: | |||||
data['snapshot'] = to_pid(SNAPSHOT, data['snapshot']) | |||||
else: | else: | ||||
raise ValueError(f'invalid object type: {obj_type}') | raise ValueError(f'invalid object type: {obj_type}') | ||||
return data | return data | ||||
class WebAPIClient: | class WebAPIClient: | ||||
"""Client for the Software Heritage archive Web API, see | """Client for the Software Heritage archive Web API, see | ||||
▲ Show 20 Lines • Show All 204 Lines • ▼ Show 20 Lines | def snapshot(self, pid: PIDish, | ||||
while not done: | while not done: | ||||
r = self._call(query, http_method='get', **req_args) | r = self._call(query, http_method='get', **req_args) | ||||
yield typify(r.json()['branches'], SNAPSHOT) | yield typify(r.json()['branches'], SNAPSHOT) | ||||
if 'next' in r.links and 'url' in r.links['next']: | if 'next' in r.links and 'url' in r.links['next']: | ||||
query = r.links['next']['url'] | query = r.links['next']['url'] | ||||
else: | else: | ||||
done = True | done = True | ||||
def visits(self, | |||||
origin: str, | |||||
per_page: Optional[int] = None, | |||||
last_visit: Optional[int] = None, | |||||
**req_args) -> Generator[Dict[str, Any], None, None]: | |||||
"""List visits of an origin | |||||
Args: | |||||
Not Done Inline Actionsmaybe "origin: the URL of a software origin" zack: maybe "origin: the URL of a software origin" | |||||
origin: an origin | |||||
Not Done Inline Actionsjust "number of visits to list", for consistency with other docstrings zack: just "number of visits to list", for consistency with other docstrings | |||||
per_page: specify the number of visits to list | |||||
last_visit: visit to start listing from | |||||
req_args: extra keyword arguments for requests.get() | |||||
Returns: | |||||
an iterator over visits of the origin | |||||
Raises: | |||||
Not Done Inline Actionsindentation ;) ardumont: indentation ;)
| |||||
requests.HTTPError: if HTTP request fails | |||||
""" | |||||
done = False | |||||
r = None | |||||
params = [] | |||||
if last_visit is not None: | |||||
params.append(("last_visit", last_visit)) | |||||
if per_page is not None: | |||||
params.append(("per_page", per_page)) | |||||
query = f'origin/{origin}/visits' | |||||
olasdUnsubmitted Done Inline ActionsShould have a trailing slash. olasd: Should have a trailing slash. | |||||
while not done: | |||||
r = self._call(query, http_method='get', params=params, **req_args) | |||||
yield from [typify(v, VISIT) for v in r.json()] | |||||
if 'next' in r.links and 'url' in r.links['next']: | |||||
params = [] | |||||
query = r.links['next']['url'] | |||||
else: | |||||
done = True | |||||
def content_exists(self, pid: PIDish, **req_args) -> bool: | def content_exists(self, pid: PIDish, **req_args) -> bool: | ||||
"""Check if a content object exists in the archive | """Check if a content object exists in the archive | ||||
Args: | Args: | ||||
pid: object identifier | pid: object identifier | ||||
req_args: extra keyword arguments for requests.head() | req_args: extra keyword arguments for requests.head() | ||||
Raises: | Raises: | ||||
▲ Show 20 Lines • Show All 105 Lines • Show Last 20 Lines |
We use origin_visit in most places, so I guess this should use that name as well.