Changeset View
Changeset View
Standalone View
Standalone View
swh/web/client/client.py
| Show All 36 Lines | |||||
| from swh.model.identifiers import SNAPSHOT, REVISION, RELEASE, DIRECTORY, CONTENT | from swh.model.identifiers import SNAPSHOT, REVISION, RELEASE, DIRECTORY, CONTENT | ||||
| from swh.model.identifiers import PersistentId as PID | from swh.model.identifiers import PersistentId as PID | ||||
| from swh.model.identifiers import parse_persistent_identifier as parse_pid | from swh.model.identifiers import parse_persistent_identifier as parse_pid | ||||
| from .auth import AuthenticationError, OpenIDConnectSession, SWH_OIDC_SERVER_URL | from .auth import AuthenticationError, OpenIDConnectSession, SWH_OIDC_SERVER_URL | ||||
| PIDish = Union[PID, str] | PIDish = Union[PID, str] | ||||
olasd: We use `origin_visit` in most places, so I guess this should use that name as well. | |||||
| ORIGIN_VISIT = "origin_visit" | |||||
| def _get_pid(pidish: PIDish) -> PID: | def _get_pid(pidish: PIDish) -> PID: | ||||
| """Parse string to PID if needed""" | """Parse string to PID if needed""" | ||||
| if isinstance(pidish, str): | if isinstance(pidish, str): | ||||
| return parse_pid(pidish) | return parse_pid(pidish) | ||||
| else: | else: | ||||
| return pidish | return pidish | ||||
| ▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | elif obj_type == DIRECTORY: | ||||
| for entry in data: | for entry in data: | ||||
| dir_pid = dir_pid or to_pid(obj_type, entry["dir_id"]) | dir_pid = dir_pid or to_pid(obj_type, entry["dir_id"]) | ||||
| entry["dir_id"] = dir_pid | entry["dir_id"] = dir_pid | ||||
| entry["target"] = to_pid( | entry["target"] = to_pid( | ||||
| obj_type_of_entry_type(entry["type"]), entry["target"] | obj_type_of_entry_type(entry["type"]), entry["target"] | ||||
| ) | ) | ||||
| elif obj_type == CONTENT: | elif obj_type == CONTENT: | ||||
| pass # nothing to do for contents | pass # nothing to do for contents | ||||
| elif obj_type == ORIGIN_VISIT: | |||||
| data['date'] = to_date(data['date']) | |||||
| if data['snapshot'] is not None: | |||||
| data['snapshot'] = to_pid(SNAPSHOT, data['snapshot']) | |||||
| else: | else: | ||||
| raise ValueError(f"invalid object type: {obj_type}") | raise ValueError(f"invalid object type: {obj_type}") | ||||
| return data | return data | ||||
| class WebAPIClient: | class WebAPIClient: | ||||
| """Client for the Software Heritage archive Web API, see | """Client for the Software Heritage archive Web API, see | ||||
| ▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines | ) -> Generator[Dict[str, Any], None, None]: | ||||
| while not done: | while not done: | ||||
| r = self._call(query, http_method="get", **req_args) | r = self._call(query, http_method="get", **req_args) | ||||
| yield typify(r.json()["branches"], SNAPSHOT) | yield typify(r.json()["branches"], SNAPSHOT) | ||||
| if "next" in r.links and "url" in r.links["next"]: | if "next" in r.links and "url" in r.links["next"]: | ||||
| query = r.links["next"]["url"] | query = r.links["next"]["url"] | ||||
| else: | else: | ||||
| done = True | done = True | ||||
| def visits(self, | |||||
| origin: str, | |||||
| per_page: Optional[int] = None, | |||||
| last_visit: Optional[int] = None, | |||||
| **req_args) -> Generator[Dict[str, Any], None, None]: | |||||
| """List visits of an origin | |||||
| Args: | |||||
Not Done Inline Actionsmaybe "origin: the URL of a software origin" zack: maybe "origin: the URL of a software origin" | |||||
| origin: the URL of a software origin | |||||
Not Done Inline Actionsjust "number of visits to list", for consistency with other docstrings zack: just "number of visits to list", for consistency with other docstrings | |||||
| per_page: the number of visits to list | |||||
| last_visit: visit to start listing from | |||||
| req_args: extra keyword arguments for requests.get() | |||||
| Returns: | |||||
| an iterator over visits of the origin | |||||
| Raises: | |||||
Not Done Inline Actionsindentation ;) ardumont: indentation ;)
| |||||
| requests.HTTPError: if HTTP request fails | |||||
| """ | |||||
| done = False | |||||
| r = None | |||||
| params = [] | |||||
| if last_visit is not None: | |||||
| params.append(("last_visit", last_visit)) | |||||
| if per_page is not None: | |||||
| params.append(("per_page", per_page)) | |||||
Done Inline ActionsShould have a trailing slash. olasd: Should have a trailing slash. | |||||
| query = f'origin/{origin}/visits/' | |||||
| while not done: | |||||
| r = self._call(query, http_method='get', params=params, **req_args) | |||||
| yield from [typify(v, ORIGIN_VISIT) for v in r.json()] | |||||
| if 'next' in r.links and 'url' in r.links['next']: | |||||
| params = [] | |||||
| query = r.links['next']['url'] | |||||
| else: | |||||
| done = True | |||||
| def content_exists(self, pid: PIDish, **req_args) -> bool: | def content_exists(self, pid: PIDish, **req_args) -> bool: | ||||
| """Check if a content object exists in the archive | """Check if a content object exists in the archive | ||||
| Args: | Args: | ||||
| pid: object identifier | pid: object identifier | ||||
| req_args: extra keyword arguments for requests.head() | req_args: extra keyword arguments for requests.head() | ||||
| Raises: | Raises: | ||||
| ▲ Show 20 Lines • Show All 122 Lines • Show Last 20 Lines | |||||
We use origin_visit in most places, so I guess this should use that name as well.