diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -1051,6 +1051,138 @@ "content", id, authority, after, page_token, limit, ) + def directory_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "directory", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def directory_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "directory", id, authority, after, page_token, limit, + ) + + def revision_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "revision", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def revision_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "revision", id, authority, after, page_token, limit, + ) + + def release_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "release", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def release_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "release", id, authority, after, page_token, limit, + ) + + def snapshot_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "snapshot", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def snapshot_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "snapshot", id, authority, after, page_token, limit, + ) + def origin_metadata_add( self, origin_url: str, diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -1049,6 +1049,138 @@ "content", id, authority, after, page_token, limit ) + def directory_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "directory", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def directory_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "directory", id, authority, after, page_token, limit + ) + + def revision_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "revision", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def revision_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "revision", id, authority, after, page_token, limit + ) + + def release_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "release", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def release_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "release", id, authority, after, page_token, limit + ) + + def snapshot_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "snapshot", + id, + discovery_date, + authority, + fetcher, + format, + metadata, + context, + ) + + def snapshot_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "snapshot", id, authority, after, page_token, limit + ) + def origin_metadata_add( self, origin_url: str, diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -1176,6 +1176,282 @@ """ ... + @remote_api_endpoint("directory/metadata/add") + def directory_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + """Add a directory_metadata for the directory at discovery_date, + obtained using the `fetcher` from the `authority`. + + The authority and fetcher must be known to the storage before + using this endpoint. + + If there is already directory metadata for the same directory, authority, + fetcher, and at the same date; the new one will be either dropped or + will replace the existing one + (it is unspecified which one of these two behaviors happens). + + Args: + discovery_date: when the metadata was fetched. + authority: a dict containing keys `type` and `url`. + fetcher: a dict containing keys `name` and `version`. + format: text field indicating the format of the directory of the + metadata: blob of raw metadata + """ + ... + + @remote_api_endpoint("directory/metadata/get") + def directory_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + """Retrieve list of all directory_metadata entries for the id + + Args: + id: the directory's SWHID + authority: a dict containing keys `type` and `url`. + after: minimum discovery_date for a result to be returned + page_token: opaque token, used to get the next page of results + limit: maximum number of results to be returned + + Returns: + dict with keys `next_page_token` and `results`. + `next_page_token` is an opaque token that is used to get the + next page of results, or `None` if there are no more results. + `results` is a list of dicts in the format: + + .. code-block: python + + { + 'authority': {'type': ..., 'url': ...}, + 'fetcher': {'name': ..., 'version': ...}, + 'discovery_date': ..., + 'format': '...', + 'metadata': b'...', + 'context': { ... }, + } + + """ + ... + + @remote_api_endpoint("revision/metadata/add") + def revision_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + """Add a revision_metadata for the revision at discovery_date, + obtained using the `fetcher` from the `authority`. + + The authority and fetcher must be known to the storage before + using this endpoint. + + If there is already revision metadata for the same revision, authority, + fetcher, and at the same date; the new one will be either dropped or + will replace the existing one + (it is unspecified which one of these two behaviors happens). + + Args: + discovery_date: when the metadata was fetched. + authority: a dict containing keys `type` and `url`. + fetcher: a dict containing keys `name` and `version`. + format: text field indicating the format of the revision of the + metadata: blob of raw metadata + """ + ... + + @remote_api_endpoint("revision/metadata/get") + def revision_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + """Retrieve list of all revision_metadata entries for the id + + Args: + id: the revision's SWHID + authority: a dict containing keys `type` and `url`. + after: minimum discovery_date for a result to be returned + page_token: opaque token, used to get the next page of results + limit: maximum number of results to be returned + + Returns: + dict with keys `next_page_token` and `results`. + `next_page_token` is an opaque token that is used to get the + next page of results, or `None` if there are no more results. + `results` is a list of dicts in the format: + + .. code-block: python + + { + 'authority': {'type': ..., 'url': ...}, + 'fetcher': {'name': ..., 'version': ...}, + 'discovery_date': ..., + 'format': '...', + 'metadata': b'...', + 'context': { ... }, + } + + """ + ... + + @remote_api_endpoint("release/metadata/add") + def release_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + """Add a release_metadata for the release at discovery_date, + obtained using the `fetcher` from the `authority`. + + The authority and fetcher must be known to the storage before + using this endpoint. + + If there is already release metadata for the same release, authority, + fetcher, and at the same date; the new one will be either dropped or + will replace the existing one + (it is unspecified which one of these two behaviors happens). + + Args: + discovery_date: when the metadata was fetched. + authority: a dict containing keys `type` and `url`. + fetcher: a dict containing keys `name` and `version`. + format: text field indicating the format of the release of the + metadata: blob of raw metadata + """ + ... + + @remote_api_endpoint("release/metadata/get") + def release_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + """Retrieve list of all release_metadata entries for the id + + Args: + id: the release's SWHID + authority: a dict containing keys `type` and `url`. + after: minimum discovery_date for a result to be returned + page_token: opaque token, used to get the next page of results + limit: maximum number of results to be returned + + Returns: + dict with keys `next_page_token` and `results`. + `next_page_token` is an opaque token that is used to get the + next page of results, or `None` if there are no more results. + `results` is a list of dicts in the format: + + .. code-block: python + + { + 'authority': {'type': ..., 'url': ...}, + 'fetcher': {'name': ..., 'version': ...}, + 'discovery_date': ..., + 'format': '...', + 'metadata': b'...', + 'context': { ... }, + } + + """ + ... + + @remote_api_endpoint("snapshot/metadata/add") + def snapshot_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + """Add a snapshot_metadata for the snapshot at discovery_date, + obtained using the `fetcher` from the `authority`. + + The authority and fetcher must be known to the storage before + using this endpoint. + + If there is already snapshot metadata for the same snapshot, authority, + fetcher, and at the same date; the new one will be either dropped or + will replace the existing one + (it is unspecified which one of these two behaviors happens). + + Args: + discovery_date: when the metadata was fetched. + authority: a dict containing keys `type` and `url`. + fetcher: a dict containing keys `name` and `version`. + format: text field indicating the format of the snapshot of the + metadata: blob of raw metadata + """ + ... + + @remote_api_endpoint("snapshot/metadata/get") + def snapshot_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + """Retrieve list of all snapshot_metadata entries for the id + + Args: + id: the snapshot's SWHID + authority: a dict containing keys `type` and `url`. + after: minimum discovery_date for a result to be returned + page_token: opaque token, used to get the next page of results + limit: maximum number of results to be returned + + Returns: + dict with keys `next_page_token` and `results`. + `next_page_token` is an opaque token that is used to get the + next page of results, or `None` if there are no more results. + `results` is a list of dicts in the format: + + .. code-block: python + + { + 'authority': {'type': ..., 'url': ...}, + 'fetcher': {'name': ..., 'version': ...}, + 'discovery_date': ..., + 'format': '...', + 'metadata': b'...', + 'context': { ... }, + } + + """ + ... + @remote_api_endpoint("origin/metadata/add") def origin_metadata_add( self, diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1154,7 +1154,6 @@ cur.execute("select * from swh_update_counter(%s)", (key,)) @timed - @db_transaction() def content_metadata_add( self, id: str, @@ -1164,8 +1163,6 @@ fetcher: Dict[str, Any], format: str, metadata: bytes, - db=None, - cur=None, ) -> None: self._object_metadata_add( "content", @@ -1176,12 +1173,9 @@ fetcher, format, metadata, - db, - cur, ) @timed - @db_transaction() def content_metadata_get( self, id: str, @@ -1189,15 +1183,154 @@ after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "content", id, authority, after, page_token, limit, + ) + + @timed + def directory_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "directory", + id, + context, + discovery_date, + authority, + fetcher, + format, + metadata, + ) + + @timed + def directory_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, db=None, cur=None, ) -> Dict[str, Any]: return self._object_metadata_get( - "content", id, authority, after, page_token, limit, db, cur + "directory", id, authority, after, page_token, limit, + ) + + @timed + def revision_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "revision", + id, + context, + discovery_date, + authority, + fetcher, + format, + metadata, + ) + + @timed + def revision_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "revision", id, authority, after, page_token, limit, + ) + + @timed + def release_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "release", + id, + context, + discovery_date, + authority, + fetcher, + format, + metadata, + ) + + @timed + def release_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "release", id, authority, after, page_token, limit, + ) + + @timed + def snapshot_metadata_add( + self, + id: str, + context: Dict[str, Union[str, bytes, int]], + discovery_date: datetime.datetime, + authority: Dict[str, Any], + fetcher: Dict[str, Any], + format: str, + metadata: bytes, + ) -> None: + self._object_metadata_add( + "snapshot", + id, + context, + discovery_date, + authority, + fetcher, + format, + metadata, + ) + + @timed + def snapshot_metadata_get( + self, + id: str, + authority: Dict[str, str], + after: Optional[datetime.datetime] = None, + page_token: Optional[bytes] = None, + limit: int = 1000, + ) -> Dict[str, Any]: + return self._object_metadata_get( + "snapshot", id, authority, after, page_token, limit, ) @timed - @db_transaction() def origin_metadata_add( self, origin_url: str, @@ -1206,8 +1339,6 @@ fetcher: Dict[str, Any], format: str, metadata: bytes, - db=None, - cur=None, ) -> None: context: Dict[str, Union[str, bytes, int]] = {} # origins have no context @@ -1220,12 +1351,9 @@ fetcher, format, metadata, - db, - cur, ) @timed - @db_transaction(statement_timeout=500) def origin_metadata_get( self, origin_url: str, @@ -1233,11 +1361,9 @@ after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, - db=None, - cur=None, ) -> Dict[str, Any]: result = self._object_metadata_get( - "origin", origin_url, authority, after, page_token, limit, db, cur + "origin", origin_url, authority, after, page_token, limit, ) for res in result["results"]: @@ -1246,6 +1372,7 @@ return result + @db_transaction() def _object_metadata_add( self, object_type: str, @@ -1286,6 +1413,7 @@ method_name=f"{object_type}_metadata_add", ) + @db_transaction(statement_timeout=500) def _object_metadata_get( self, object_type: str, diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -532,6 +532,230 @@ "metadata": b"foo: bar", } +directory_metadata = { + "id": f"swh:1:dir:{dir['id']}", + "context": {"origin": origin["url"]}, + "discovery_date": datetime.datetime( + 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "json", + "metadata": b'{"foo": "bar"}', +} +directory_metadata2 = { + "id": f"swh:1:dir:{dir['id']}", + "context": {"origin": origin2["url"]}, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} +directory_metadata3 = { + "id": f"swh:1:dir:{dir['id']}", + "context": { + "origin": origin["url"], + "visit": 42, + "snapshot": f"swh:1:snp:{hash_to_hex(snapshot['id'])}", + "release": f"swh:1:rel:{hash_to_hex(release['id'])}", + "revision": f"swh:1:rev:{hash_to_hex(revision['id'])}", + "path": b"/foo/bar", + }, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority2["type"], + "url": metadata_authority2["url"], + }, + "fetcher": { + "name": metadata_fetcher2["name"], + "version": metadata_fetcher2["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} + +revision_metadata = { + "id": f"swh:1:rev:{revision['id']}", + "context": {"origin": origin["url"]}, + "discovery_date": datetime.datetime( + 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "json", + "metadata": b'{"foo": "bar"}', +} +revision_metadata2 = { + "id": f"swh:1:rev:{revision['id']}", + "context": {"origin": origin2["url"]}, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} +revision_metadata3 = { + "id": f"swh:1:rev:{revision['id']}", + "context": { + "origin": origin["url"], + "visit": 42, + "snapshot": f"swh:1:snp:{hash_to_hex(snapshot['id'])}", + "release": f"swh:1:rel:{hash_to_hex(release['id'])}", + }, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority2["type"], + "url": metadata_authority2["url"], + }, + "fetcher": { + "name": metadata_fetcher2["name"], + "version": metadata_fetcher2["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} + +release_metadata = { + "id": f"swh:1:rel:{release['id']}", + "context": {"origin": origin["url"]}, + "discovery_date": datetime.datetime( + 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "json", + "metadata": b'{"foo": "bar"}', +} +release_metadata2 = { + "id": f"swh:1:rel:{release['id']}", + "context": {"origin": origin2["url"]}, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} +release_metadata3 = { + "id": f"swh:1:rel:{release['id']}", + "context": { + "origin": origin["url"], + "visit": 42, + "snapshot": f"swh:1:snp:{hash_to_hex(snapshot['id'])}", + }, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority2["type"], + "url": metadata_authority2["url"], + }, + "fetcher": { + "name": metadata_fetcher2["name"], + "version": metadata_fetcher2["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} + +snapshot_metadata = { + "id": f"swh:1:snp:{snapshot['id']}", + "context": {"origin": origin["url"]}, + "discovery_date": datetime.datetime( + 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "json", + "metadata": b'{"foo": "bar"}', +} +snapshot_metadata2 = { + "id": f"swh:1:snp:{snapshot['id']}", + "context": {"origin": origin2["url"]}, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority["type"], + "url": metadata_authority["url"], + }, + "fetcher": { + "name": metadata_fetcher["name"], + "version": metadata_fetcher["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} +snapshot_metadata3 = { + "id": f"swh:1:snp:{snapshot['id']}", + "context": {"origin": origin["url"], "visit": 42,}, + "discovery_date": datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + "authority": { + "type": metadata_authority2["type"], + "url": metadata_authority2["url"], + }, + "fetcher": { + "name": metadata_fetcher2["name"], + "version": metadata_fetcher2["version"], + }, + "format": "yaml", + "metadata": b"foo: bar", +} + origin_metadata = { "origin_url": origin["url"], "discovery_date": datetime.datetime( diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -3438,6 +3438,762 @@ assert result["next_page_token"] is None assert [content_metadata2] == result["results"] + def test_directory_metadata_add(self, swh_storage): + directory = data.dir + fetcher = data.metadata_fetcher + authority = data.metadata_authority + directory_swhid = f"swh:1:dir:{directory['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.directory_metadata_add(**data.directory_metadata) + swh_storage.directory_metadata_add(**data.directory_metadata2) + + result = swh_storage.directory_metadata_get(directory_swhid, authority) + assert result["next_page_token"] is None + assert [data.directory_metadata, data.directory_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + def test_directory_metadata_add_duplicate(self, swh_storage): + """Duplicates should be silently updated.""" + directory = data.dir + fetcher = data.metadata_fetcher + authority = data.metadata_authority + directory_swhid = f"swh:1:dir:{directory['id']}" + + new_directory_metadata2 = { + **data.directory_metadata2, + "format": "new-format", + "metadata": b"new-metadata", + } + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.directory_metadata_add(**data.directory_metadata) + swh_storage.directory_metadata_add(**data.directory_metadata2) + swh_storage.directory_metadata_add(**new_directory_metadata2) + + result = swh_storage.directory_metadata_get(directory_swhid, authority) + assert result["next_page_token"] is None + + expected_results1 = (data.directory_metadata, new_directory_metadata2) + expected_results2 = (data.directory_metadata, data.directory_metadata2) + + assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( + expected_results1, # cassandra + expected_results2, # postgresql + ) + + def test_directory_metadata_add_dict(self, swh_storage): + fetcher = data.metadata_fetcher + authority = data.metadata_authority + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + kwargs = data.directory_metadata.copy() + kwargs["metadata"] = {"foo": "bar"} + + with pytest.raises(StorageArgumentException): + swh_storage.directory_metadata_add(**kwargs) + + def test_directory_metadata_get(self, swh_storage): + authority = data.metadata_authority + fetcher = data.metadata_fetcher + authority2 = data.metadata_authority2 + fetcher2 = data.metadata_fetcher2 + directory1_swhid = f"swh:1:dir:{data.dir['id']}" + directory2_swhid = f"swh:1:dir:{data.dir2['id']}" + + directory1_metadata1 = data.directory_metadata + directory1_metadata2 = data.directory_metadata2 + directory1_metadata3 = data.directory_metadata3 + directory2_metadata = {**data.directory_metadata2, "id": directory2_swhid} + + swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority2) + swh_storage.metadata_fetcher_add(**fetcher2) + + swh_storage.directory_metadata_add(**directory1_metadata1) + swh_storage.directory_metadata_add(**directory1_metadata2) + swh_storage.directory_metadata_add(**directory1_metadata3) + swh_storage.directory_metadata_add(**directory2_metadata) + + result = swh_storage.directory_metadata_get(directory1_swhid, authority) + assert result["next_page_token"] is None + assert [directory1_metadata1, directory1_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.directory_metadata_get(directory1_swhid, authority2) + assert result["next_page_token"] is None + assert [directory1_metadata3] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.directory_metadata_get(directory2_swhid, authority) + assert result["next_page_token"] is None + assert [directory2_metadata] == list(result["results"],) + + def test_directory_metadata_get_after(self, swh_storage): + directory = data.dir + fetcher = data.metadata_fetcher + authority = data.metadata_authority + directory_swhid = f"swh:1:dir:{directory['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.directory_metadata_add(**data.directory_metadata) + swh_storage.directory_metadata_add(**data.directory_metadata2) + + result = swh_storage.directory_metadata_get( + directory_swhid, + authority, + after=data.directory_metadata["discovery_date"] - timedelta(seconds=1), + ) + assert result["next_page_token"] is None + assert [data.directory_metadata, data.directory_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.directory_metadata_get( + directory_swhid, authority, after=data.directory_metadata["discovery_date"] + ) + assert result["next_page_token"] is None + assert [data.directory_metadata2] == result["results"] + + result = swh_storage.directory_metadata_get( + directory_swhid, authority, after=data.directory_metadata2["discovery_date"] + ) + assert result["next_page_token"] is None + assert [] == result["results"] + + def test_directory_metadata_get_paginate(self, swh_storage): + directory = data.dir + fetcher = data.metadata_fetcher + authority = data.metadata_authority + directory_swhid = f"swh:1:dir:{directory['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.directory_metadata_add(**data.directory_metadata) + swh_storage.directory_metadata_add(**data.directory_metadata2) + + swh_storage.directory_metadata_get(directory_swhid, authority) + + result = swh_storage.directory_metadata_get(directory_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.directory_metadata] == result["results"] + + result = swh_storage.directory_metadata_get( + directory_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [data.directory_metadata2] == result["results"] + + def test_directory_metadata_get_paginate_same_date(self, swh_storage): + directory = data.dir + fetcher1 = data.metadata_fetcher + fetcher2 = data.metadata_fetcher2 + authority = data.metadata_authority + directory_swhid = f"swh:1:dir:{directory['id']}" + + swh_storage.metadata_fetcher_add(**fetcher1) + swh_storage.metadata_fetcher_add(**fetcher2) + swh_storage.metadata_authority_add(**authority) + + directory_metadata2 = { + **data.directory_metadata2, + "discovery_date": data.directory_metadata2["discovery_date"], + "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, + } + + swh_storage.directory_metadata_add(**data.directory_metadata) + swh_storage.directory_metadata_add(**directory_metadata2) + + result = swh_storage.directory_metadata_get(directory_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.directory_metadata] == result["results"] + + result = swh_storage.directory_metadata_get( + directory_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [directory_metadata2] == result["results"] + + def test_revision_metadata_add(self, swh_storage): + revision = data.revision + fetcher = data.metadata_fetcher + authority = data.metadata_authority + revision_swhid = f"swh:1:rev:{revision['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.revision_metadata_add(**data.revision_metadata) + swh_storage.revision_metadata_add(**data.revision_metadata2) + + result = swh_storage.revision_metadata_get(revision_swhid, authority) + assert result["next_page_token"] is None + assert [data.revision_metadata, data.revision_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + def test_revision_metadata_add_duplicate(self, swh_storage): + """Duplicates should be silently updated.""" + revision = data.revision + fetcher = data.metadata_fetcher + authority = data.metadata_authority + revision_swhid = f"swh:1:rev:{revision['id']}" + + new_revision_metadata2 = { + **data.revision_metadata2, + "format": "new-format", + "metadata": b"new-metadata", + } + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.revision_metadata_add(**data.revision_metadata) + swh_storage.revision_metadata_add(**data.revision_metadata2) + swh_storage.revision_metadata_add(**new_revision_metadata2) + + result = swh_storage.revision_metadata_get(revision_swhid, authority) + assert result["next_page_token"] is None + + expected_results1 = (data.revision_metadata, new_revision_metadata2) + expected_results2 = (data.revision_metadata, data.revision_metadata2) + + assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( + expected_results1, # cassandra + expected_results2, # postgresql + ) + + def test_revision_metadata_add_dict(self, swh_storage): + fetcher = data.metadata_fetcher + authority = data.metadata_authority + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + kwargs = data.revision_metadata.copy() + kwargs["metadata"] = {"foo": "bar"} + + with pytest.raises(StorageArgumentException): + swh_storage.revision_metadata_add(**kwargs) + + def test_revision_metadata_get(self, swh_storage): + authority = data.metadata_authority + fetcher = data.metadata_fetcher + authority2 = data.metadata_authority2 + fetcher2 = data.metadata_fetcher2 + revision1_swhid = f"swh:1:rev:{data.revision['id']}" + revision2_swhid = f"swh:1:rev:{data.revision2['id']}" + + revision1_metadata1 = data.revision_metadata + revision1_metadata2 = data.revision_metadata2 + revision1_metadata3 = data.revision_metadata3 + revision2_metadata = {**data.revision_metadata2, "id": revision2_swhid} + + swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority2) + swh_storage.metadata_fetcher_add(**fetcher2) + + swh_storage.revision_metadata_add(**revision1_metadata1) + swh_storage.revision_metadata_add(**revision1_metadata2) + swh_storage.revision_metadata_add(**revision1_metadata3) + swh_storage.revision_metadata_add(**revision2_metadata) + + result = swh_storage.revision_metadata_get(revision1_swhid, authority) + assert result["next_page_token"] is None + assert [revision1_metadata1, revision1_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.revision_metadata_get(revision1_swhid, authority2) + assert result["next_page_token"] is None + assert [revision1_metadata3] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.revision_metadata_get(revision2_swhid, authority) + assert result["next_page_token"] is None + assert [revision2_metadata] == list(result["results"],) + + def test_revision_metadata_get_after(self, swh_storage): + revision = data.revision + fetcher = data.metadata_fetcher + authority = data.metadata_authority + revision_swhid = f"swh:1:rev:{revision['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.revision_metadata_add(**data.revision_metadata) + swh_storage.revision_metadata_add(**data.revision_metadata2) + + result = swh_storage.revision_metadata_get( + revision_swhid, + authority, + after=data.revision_metadata["discovery_date"] - timedelta(seconds=1), + ) + assert result["next_page_token"] is None + assert [data.revision_metadata, data.revision_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.revision_metadata_get( + revision_swhid, authority, after=data.revision_metadata["discovery_date"] + ) + assert result["next_page_token"] is None + assert [data.revision_metadata2] == result["results"] + + result = swh_storage.revision_metadata_get( + revision_swhid, authority, after=data.revision_metadata2["discovery_date"] + ) + assert result["next_page_token"] is None + assert [] == result["results"] + + def test_revision_metadata_get_paginate(self, swh_storage): + revision = data.revision + fetcher = data.metadata_fetcher + authority = data.metadata_authority + revision_swhid = f"swh:1:rev:{revision['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.revision_metadata_add(**data.revision_metadata) + swh_storage.revision_metadata_add(**data.revision_metadata2) + + swh_storage.revision_metadata_get(revision_swhid, authority) + + result = swh_storage.revision_metadata_get(revision_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.revision_metadata] == result["results"] + + result = swh_storage.revision_metadata_get( + revision_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [data.revision_metadata2] == result["results"] + + def test_revision_metadata_get_paginate_same_date(self, swh_storage): + revision = data.revision + fetcher1 = data.metadata_fetcher + fetcher2 = data.metadata_fetcher2 + authority = data.metadata_authority + revision_swhid = f"swh:1:rev:{revision['id']}" + + swh_storage.metadata_fetcher_add(**fetcher1) + swh_storage.metadata_fetcher_add(**fetcher2) + swh_storage.metadata_authority_add(**authority) + + revision_metadata2 = { + **data.revision_metadata2, + "discovery_date": data.revision_metadata2["discovery_date"], + "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, + } + + swh_storage.revision_metadata_add(**data.revision_metadata) + swh_storage.revision_metadata_add(**revision_metadata2) + + result = swh_storage.revision_metadata_get(revision_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.revision_metadata] == result["results"] + + result = swh_storage.revision_metadata_get( + revision_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [revision_metadata2] == result["results"] + + def test_release_metadata_add(self, swh_storage): + release = data.release + fetcher = data.metadata_fetcher + authority = data.metadata_authority + release_swhid = f"swh:1:rel:{release['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.release_metadata_add(**data.release_metadata) + swh_storage.release_metadata_add(**data.release_metadata2) + + result = swh_storage.release_metadata_get(release_swhid, authority) + assert result["next_page_token"] is None + assert [data.release_metadata, data.release_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + def test_release_metadata_add_duplicate(self, swh_storage): + """Duplicates should be silently updated.""" + release = data.release + fetcher = data.metadata_fetcher + authority = data.metadata_authority + release_swhid = f"swh:1:rel:{release['id']}" + + new_release_metadata2 = { + **data.release_metadata2, + "format": "new-format", + "metadata": b"new-metadata", + } + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.release_metadata_add(**data.release_metadata) + swh_storage.release_metadata_add(**data.release_metadata2) + swh_storage.release_metadata_add(**new_release_metadata2) + + result = swh_storage.release_metadata_get(release_swhid, authority) + assert result["next_page_token"] is None + + expected_results1 = (data.release_metadata, new_release_metadata2) + expected_results2 = (data.release_metadata, data.release_metadata2) + + assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( + expected_results1, # cassandra + expected_results2, # postgresql + ) + + def test_release_metadata_add_dict(self, swh_storage): + fetcher = data.metadata_fetcher + authority = data.metadata_authority + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + kwargs = data.release_metadata.copy() + kwargs["metadata"] = {"foo": "bar"} + + with pytest.raises(StorageArgumentException): + swh_storage.release_metadata_add(**kwargs) + + def test_release_metadata_get(self, swh_storage): + authority = data.metadata_authority + fetcher = data.metadata_fetcher + authority2 = data.metadata_authority2 + fetcher2 = data.metadata_fetcher2 + release1_swhid = f"swh:1:rel:{data.release['id']}" + release2_swhid = f"swh:1:rel:{data.release2['id']}" + + release1_metadata1 = data.release_metadata + release1_metadata2 = data.release_metadata2 + release1_metadata3 = data.release_metadata3 + release2_metadata = {**data.release_metadata2, "id": release2_swhid} + + swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority2) + swh_storage.metadata_fetcher_add(**fetcher2) + + swh_storage.release_metadata_add(**release1_metadata1) + swh_storage.release_metadata_add(**release1_metadata2) + swh_storage.release_metadata_add(**release1_metadata3) + swh_storage.release_metadata_add(**release2_metadata) + + result = swh_storage.release_metadata_get(release1_swhid, authority) + assert result["next_page_token"] is None + assert [release1_metadata1, release1_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.release_metadata_get(release1_swhid, authority2) + assert result["next_page_token"] is None + assert [release1_metadata3] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.release_metadata_get(release2_swhid, authority) + assert result["next_page_token"] is None + assert [release2_metadata] == list(result["results"],) + + def test_release_metadata_get_after(self, swh_storage): + release = data.release + fetcher = data.metadata_fetcher + authority = data.metadata_authority + release_swhid = f"swh:1:rel:{release['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.release_metadata_add(**data.release_metadata) + swh_storage.release_metadata_add(**data.release_metadata2) + + result = swh_storage.release_metadata_get( + release_swhid, + authority, + after=data.release_metadata["discovery_date"] - timedelta(seconds=1), + ) + assert result["next_page_token"] is None + assert [data.release_metadata, data.release_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.release_metadata_get( + release_swhid, authority, after=data.release_metadata["discovery_date"] + ) + assert result["next_page_token"] is None + assert [data.release_metadata2] == result["results"] + + result = swh_storage.release_metadata_get( + release_swhid, authority, after=data.release_metadata2["discovery_date"] + ) + assert result["next_page_token"] is None + assert [] == result["results"] + + def test_release_metadata_get_paginate(self, swh_storage): + release = data.release + fetcher = data.metadata_fetcher + authority = data.metadata_authority + release_swhid = f"swh:1:rel:{release['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.release_metadata_add(**data.release_metadata) + swh_storage.release_metadata_add(**data.release_metadata2) + + swh_storage.release_metadata_get(release_swhid, authority) + + result = swh_storage.release_metadata_get(release_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.release_metadata] == result["results"] + + result = swh_storage.release_metadata_get( + release_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [data.release_metadata2] == result["results"] + + def test_release_metadata_get_paginate_same_date(self, swh_storage): + release = data.release + fetcher1 = data.metadata_fetcher + fetcher2 = data.metadata_fetcher2 + authority = data.metadata_authority + release_swhid = f"swh:1:rel:{release['id']}" + + swh_storage.metadata_fetcher_add(**fetcher1) + swh_storage.metadata_fetcher_add(**fetcher2) + swh_storage.metadata_authority_add(**authority) + + release_metadata2 = { + **data.release_metadata2, + "discovery_date": data.release_metadata2["discovery_date"], + "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, + } + + swh_storage.release_metadata_add(**data.release_metadata) + swh_storage.release_metadata_add(**release_metadata2) + + result = swh_storage.release_metadata_get(release_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.release_metadata] == result["results"] + + result = swh_storage.release_metadata_get( + release_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [release_metadata2] == result["results"] + + def test_snapshot_metadata_add(self, swh_storage): + snapshot = data.snapshot + fetcher = data.metadata_fetcher + authority = data.metadata_authority + snapshot_swhid = f"swh:1:snp:{snapshot['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.snapshot_metadata_add(**data.snapshot_metadata) + swh_storage.snapshot_metadata_add(**data.snapshot_metadata2) + + result = swh_storage.snapshot_metadata_get(snapshot_swhid, authority) + assert result["next_page_token"] is None + assert [data.snapshot_metadata, data.snapshot_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + def test_snapshot_metadata_add_duplicate(self, swh_storage): + """Duplicates should be silently updated.""" + snapshot = data.snapshot + fetcher = data.metadata_fetcher + authority = data.metadata_authority + snapshot_swhid = f"swh:1:snp:{snapshot['id']}" + + new_snapshot_metadata2 = { + **data.snapshot_metadata2, + "format": "new-format", + "metadata": b"new-metadata", + } + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.snapshot_metadata_add(**data.snapshot_metadata) + swh_storage.snapshot_metadata_add(**data.snapshot_metadata2) + swh_storage.snapshot_metadata_add(**new_snapshot_metadata2) + + result = swh_storage.snapshot_metadata_get(snapshot_swhid, authority) + assert result["next_page_token"] is None + + expected_results1 = (data.snapshot_metadata, new_snapshot_metadata2) + expected_results2 = (data.snapshot_metadata, data.snapshot_metadata2) + + assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( + expected_results1, # cassandra + expected_results2, # postgresql + ) + + def test_snapshot_metadata_add_dict(self, swh_storage): + fetcher = data.metadata_fetcher + authority = data.metadata_authority + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + kwargs = data.snapshot_metadata.copy() + kwargs["metadata"] = {"foo": "bar"} + + with pytest.raises(StorageArgumentException): + swh_storage.snapshot_metadata_add(**kwargs) + + def test_snapshot_metadata_get(self, swh_storage): + authority = data.metadata_authority + fetcher = data.metadata_fetcher + authority2 = data.metadata_authority2 + fetcher2 = data.metadata_fetcher2 + snapshot1_swhid = f"swh:1:snp:{data.snapshot['id']}" + snapshot2_swhid = f"swh:1:snp:{data.complete_snapshot['id']}" + + snapshot1_metadata1 = data.snapshot_metadata + snapshot1_metadata2 = data.snapshot_metadata2 + snapshot1_metadata3 = data.snapshot_metadata3 + snapshot2_metadata = {**data.snapshot_metadata2, "id": snapshot2_swhid} + + swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority2) + swh_storage.metadata_fetcher_add(**fetcher2) + + swh_storage.snapshot_metadata_add(**snapshot1_metadata1) + swh_storage.snapshot_metadata_add(**snapshot1_metadata2) + swh_storage.snapshot_metadata_add(**snapshot1_metadata3) + swh_storage.snapshot_metadata_add(**snapshot2_metadata) + + result = swh_storage.snapshot_metadata_get(snapshot1_swhid, authority) + assert result["next_page_token"] is None + assert [snapshot1_metadata1, snapshot1_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.snapshot_metadata_get(snapshot1_swhid, authority2) + assert result["next_page_token"] is None + assert [snapshot1_metadata3] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.snapshot_metadata_get(snapshot2_swhid, authority) + assert result["next_page_token"] is None + assert [snapshot2_metadata] == list(result["results"],) + + def test_snapshot_metadata_get_after(self, swh_storage): + snapshot = data.snapshot + fetcher = data.metadata_fetcher + authority = data.metadata_authority + snapshot_swhid = f"swh:1:snp:{snapshot['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.snapshot_metadata_add(**data.snapshot_metadata) + swh_storage.snapshot_metadata_add(**data.snapshot_metadata2) + + result = swh_storage.snapshot_metadata_get( + snapshot_swhid, + authority, + after=data.snapshot_metadata["discovery_date"] - timedelta(seconds=1), + ) + assert result["next_page_token"] is None + assert [data.snapshot_metadata, data.snapshot_metadata2] == list( + sorted(result["results"], key=lambda x: x["discovery_date"],) + ) + + result = swh_storage.snapshot_metadata_get( + snapshot_swhid, authority, after=data.snapshot_metadata["discovery_date"] + ) + assert result["next_page_token"] is None + assert [data.snapshot_metadata2] == result["results"] + + result = swh_storage.snapshot_metadata_get( + snapshot_swhid, authority, after=data.snapshot_metadata2["discovery_date"] + ) + assert result["next_page_token"] is None + assert [] == result["results"] + + def test_snapshot_metadata_get_paginate(self, swh_storage): + snapshot = data.snapshot + fetcher = data.metadata_fetcher + authority = data.metadata_authority + snapshot_swhid = f"swh:1:snp:{snapshot['id']}" + + swh_storage.metadata_fetcher_add(**fetcher) + swh_storage.metadata_authority_add(**authority) + + swh_storage.snapshot_metadata_add(**data.snapshot_metadata) + swh_storage.snapshot_metadata_add(**data.snapshot_metadata2) + + swh_storage.snapshot_metadata_get(snapshot_swhid, authority) + + result = swh_storage.snapshot_metadata_get(snapshot_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.snapshot_metadata] == result["results"] + + result = swh_storage.snapshot_metadata_get( + snapshot_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [data.snapshot_metadata2] == result["results"] + + def test_snapshot_metadata_get_paginate_same_date(self, swh_storage): + snapshot = data.snapshot + fetcher1 = data.metadata_fetcher + fetcher2 = data.metadata_fetcher2 + authority = data.metadata_authority + snapshot_swhid = f"swh:1:snp:{snapshot['id']}" + + swh_storage.metadata_fetcher_add(**fetcher1) + swh_storage.metadata_fetcher_add(**fetcher2) + swh_storage.metadata_authority_add(**authority) + + snapshot_metadata2 = { + **data.snapshot_metadata2, + "discovery_date": data.snapshot_metadata2["discovery_date"], + "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, + } + + swh_storage.snapshot_metadata_add(**data.snapshot_metadata) + swh_storage.snapshot_metadata_add(**snapshot_metadata2) + + result = swh_storage.snapshot_metadata_get(snapshot_swhid, authority, limit=1) + assert result["next_page_token"] is not None + assert [data.snapshot_metadata] == result["results"] + + result = swh_storage.snapshot_metadata_get( + snapshot_swhid, authority, limit=1, page_token=result["next_page_token"] + ) + assert result["next_page_token"] is None + assert [snapshot_metadata2] == result["results"] + def test_origin_metadata_add(self, swh_storage): origin = data.origin fetcher = data.metadata_fetcher