Page MenuHomeSoftware Heritage

D4986.id17785.diff
No OneTemporary

D4986.id17785.diff

diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -11,14 +11,12 @@
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -31,6 +29,7 @@
Snapshot,
SnapshotBranch,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
class ListOrder(Enum):
@@ -1099,8 +1098,7 @@
@remote_api_endpoint("raw_extrinsic_metadata/get")
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1109,8 +1107,7 @@
"""Retrieve list of all raw_extrinsic_metadata entries for the id
Args:
- type: one of the values of swh.model.model.MetadataTargetType
- target: an URL if type is 'origin', else a core SWHID
+ target: a core SWHID
authority: a dict containing keys `type` and `url`.
after: minimum discovery_date for a result to be returned
page_token: opaque token, used to get the next page of results
diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py
--- a/swh/storage/postgresql/converters.py
+++ b/swh/storage/postgresql/converters.py
@@ -4,15 +4,16 @@
# See top-level LICENSE file for more information
import datetime
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Tuple
+
+import attr
from swh.core.utils import encode_with_unescape
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
+ SWHID,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Person,
RawExtrinsicMetadata,
@@ -22,6 +23,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHIDObjectType, parse_swhid
from ..utils import map_optional
@@ -142,6 +144,24 @@
}
+def swhid_to_db(swhid: SWHID) -> Optional[Tuple[int, str, bytes]]:
+ if swhid is not None:
+ assert not swhid.metadata
+ assert swhid.namespace == "swh"
+ return (swhid.scheme_version, swhid.object_type.value, swhid.object_id)
+
+
+def db_to_swhid(db_swhid: Optional[Tuple[int, str, bytes]]) -> SWHID:
+ if db_swhid:
+ return SWHID(
+ namespace="swh",
+ scheme_version=db_swhid[0],
+ object_type=db_swhid[1],
+ object_id=db_swhid[2],
+ metadata={},
+ )
+
+
def revision_to_db(revision: Revision) -> Dict[str, Any]:
"""Convert a swh-model revision to its database representation.
"""
@@ -295,13 +315,8 @@
def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata:
- type_ = MetadataTargetType(row["raw_extrinsic_metadata.type"])
- target = row["raw_extrinsic_metadata.target"]
- if type_ != MetadataTargetType.ORIGIN:
- target = parse_swhid(target)
return RawExtrinsicMetadata(
- type=type_,
- target=target,
+ target=row["raw_extrinsic_metadata.target"],
authority=MetadataAuthority(
type=MetadataAuthorityType(row["metadata_authority.type"]),
url=row["metadata_authority.url"],
@@ -314,9 +329,9 @@
metadata=row["raw_extrinsic_metadata.metadata"],
origin=row["origin"],
visit=row["visit"],
- snapshot=map_optional(parse_swhid, row["snapshot"]),
- release=map_optional(parse_swhid, row["release"]),
- revision=map_optional(parse_swhid, row["revision"]),
+ snapshot=row["snapshot"],
+ release=row["release"],
+ revision=row["revision"],
path=row["path"],
- directory=map_optional(parse_swhid, row["directory"]),
+ directory=row["directory"],
)
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -6,15 +6,22 @@
import datetime
import logging
import random
+import re
import select
from typing import Any, Dict, Iterable, List, Optional, Tuple
+import psycopg2
+
from swh.core.db import BaseDb
from swh.core.db.db_utils import execute_values_generator
from swh.core.db.db_utils import jsonize as _jsonize
from swh.core.db.db_utils import stored_procedure
+from swh.model.hashutil import hash_to_bytes
from swh.model.model import SHA1_SIZE, OriginVisit, OriginVisitStatus
+from swh.model.swhid import SWHID
+from swh.model.swhid import _swhid_type_map as swhid_typemap
from swh.storage.interface import ListOrder
+from swh.storage.postgresql.converters import swhid_to_db
logger = logging.getLogger(__name__)
@@ -23,6 +30,35 @@
return _jsonize(dict(d) if d is not None else None)
+def typecast_swhid(value, cur):
+ if value is None:
+ return None
+ m = re.match(r'\(([^)]+),([^)]+),"([^)]+)"\)', value)
+ if m:
+ return SWHID(
+ scheme_version=int(m.group(1)),
+ object_type=swhid_typemap[m.group(2)],
+ object_id=hash_to_bytes(m.group(3)[3:]),
+ )
+ else:
+ raise InterfaceError("bad SWHID representation: %r" % value)
+
+
+def adapt_swhid(swhid: SWHID):
+ value = psycopg2.extensions.AsIs(
+ (
+ b"ROW(%d, '%s'::swhid_type, '\\x%s'::bytea)"
+ % (
+ swhid.scheme_version,
+ swhid.object_type.value.encode(),
+ swhid.object_id.encode(),
+ )
+ ).decode()
+ )
+ print("VALUE", value)
+ return value
+
+
class Db(BaseDb):
"""Proxy to the SWH DB, with wrappers around stored procedures
@@ -30,6 +66,30 @@
current_version = 166
+ def __init__(
+ self,
+ conn: psycopg2.extensions.connection,
+ pool: Optional[psycopg2.pool.AbstractConnectionPool] = None,
+ ):
+ super().__init__(conn, pool)
+ with conn.cursor() as cur:
+ cur.execute(
+ """
+ SELECT pg_type.oid
+ FROM pg_type
+ JOIN pg_namespace
+ ON typnamespace = pg_namespace.oid
+ WHERE typname = %(typename)s
+ AND nspname = %(namespace)s""",
+ {"typename": "swhid", "namespace": "public"},
+ )
+
+ oid = cur.fetchone()[0]
+
+ t_SWHID = psycopg2.extensions.new_type((oid,), "SWHID", typecast_swhid)
+ psycopg2.extensions.register_type(t_SWHID, conn)
+ psycopg2.extensions.register_adapter(SWHID, adapt_swhid)
+
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
"SELECT swh_mktemp_dir_entry(%s)", (("directory_entry_%s" % entry_type),)
@@ -1136,7 +1196,6 @@
"""The list of context columns for all artifact types."""
_raw_extrinsic_metadata_insert_cols = [
- "type",
"target",
"authority_id",
"fetcher_id",
@@ -1158,7 +1217,6 @@
raw_extrinsic_metadata_get_cols = [
"raw_extrinsic_metadata.target",
- "raw_extrinsic_metadata.type",
"discovery_date",
"metadata_authority.type",
"metadata_authority.url",
@@ -1179,30 +1237,29 @@
INNER JOIN metadata_authority
ON (metadata_authority.id=authority_id)
INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id)
- WHERE raw_extrinsic_metadata.target=%s AND authority_id=%s
+ WHERE (raw_extrinsic_metadata.target)=%s
+ AND authority_id=%s
"""
def raw_extrinsic_metadata_add(
self,
- type: str,
- target: str,
+ target: SWHID,
discovery_date: datetime.datetime,
authority_id: int,
fetcher_id: int,
format: str,
metadata: bytes,
- origin: Optional[str],
+ origin: Optional[SWHID],
visit: Optional[int],
- snapshot: Optional[str],
- release: Optional[str],
- revision: Optional[str],
+ snapshot: Optional[SWHID],
+ release: Optional[SWHID],
+ revision: Optional[SWHID],
path: Optional[bytes],
- directory: Optional[str],
+ directory: Optional[SWHID],
cur,
):
query = self._raw_extrinsic_metadata_insert_query
args: Dict[str, Any] = dict(
- type=type,
target=target,
authority_id=authority_id,
fetcher_id=fetcher_id,
@@ -1224,8 +1281,7 @@
def raw_extrinsic_metadata_get(
self,
- type: str,
- target: str,
+ target: SWHID,
authority_id: int,
after_time: Optional[datetime.datetime],
after_fetcher: Optional[int],
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -19,7 +19,6 @@
from swh.core.api.serializers import msgpack_dumps, msgpack_loads
from swh.core.db.common import db_transaction, db_transaction_generator
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
-from swh.model.identifiers import SWHID
from swh.model.model import (
SHA1_SIZE,
Content,
@@ -27,7 +26,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -41,6 +39,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
from swh.storage.exc import HashCollision, StorageArgumentException, StorageDBError
from swh.storage.interface import (
VISIT_STATUSES,
@@ -1256,26 +1255,18 @@
) -> None:
metadata = list(metadata)
self.journal_writer.raw_extrinsic_metadata_add(metadata)
- counter = Counter[MetadataTargetType]()
+ counter = Counter[SWHIDObjectType]()
for metadata_entry in metadata:
- authority_id = self._get_authority_id(metadata_entry.authority, db, cur)
- fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur)
+ d = attr.asdict(
+ metadata_entry, recurse=False
+ ) # we want SWHID not converted
+ d.pop("authority")
+ d.pop("fetcher")
db.raw_extrinsic_metadata_add(
- type=metadata_entry.type.value,
- target=str(metadata_entry.target),
- discovery_date=metadata_entry.discovery_date,
- authority_id=authority_id,
- fetcher_id=fetcher_id,
- format=metadata_entry.format,
- metadata=metadata_entry.metadata,
- origin=metadata_entry.origin,
- visit=metadata_entry.visit,
- snapshot=map_optional(str, metadata_entry.snapshot),
- release=map_optional(str, metadata_entry.release),
- revision=map_optional(str, metadata_entry.revision),
- path=metadata_entry.path,
- directory=map_optional(str, metadata_entry.directory),
+ **d,
+ authority_id=self._get_authority_id(metadata_entry.authority, db, cur),
+ fetcher_id=self._get_fetcher_id(metadata_entry.fetcher, db, cur),
cur=cur,
)
counter[metadata_entry.type] += 1
@@ -1290,8 +1281,7 @@
@db_transaction()
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1299,19 +1289,6 @@
db=None,
cur=None,
) -> PagedResult[RawExtrinsicMetadata]:
- if type == MetadataTargetType.ORIGIN:
- if isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type='origin', "
- f"but provided target is a SWHID: {target!r}"
- )
- else:
- if not isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type!='origin', "
- f"but provided target is not a SWHID: {target!r}"
- )
-
if page_token:
(after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token))
if after and after_time < after:
@@ -1327,12 +1304,13 @@
return PagedResult(next_page_token=None, results=[],)
rows = db.raw_extrinsic_metadata_get(
- type, str(target), authority_id, after_time, after_fetcher, limit + 1, cur,
+ target, authority_id, after_time, after_fetcher, limit + 1, cur,
)
+
rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows]
results = []
for row in rows:
- assert str(target) == row["raw_extrinsic_metadata.target"]
+ assert target == row["raw_extrinsic_metadata.target"]
results.append(converters.db_to_raw_extrinsic_metadata(row))
if len(results) > limit:
diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql
--- a/swh/storage/sql/20-enums.sql
+++ b/swh/storage/sql/20-enums.sql
@@ -23,3 +23,5 @@
'failed'
);
comment on type origin_visit_state IS 'Possible origin visit status values';
+
+create type swhid_type as enum ('cnt', 'dir', 'rev', 'rel', 'snp', 'ori');
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -38,7 +38,9 @@
create domain file_perms as int;
-- an SWHID
-create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
+-- create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
+
+create type swhid as (version int, object_type swhid_type, object_id bytea);
-- Checksums about actual file content. Note that the content itself is not
@@ -430,8 +432,7 @@
-- Extrinsic metadata on a DAG objects and origins.
create table raw_extrinsic_metadata
(
- type text not null,
- target text not null,
+ target swhid not null,
-- metadata source
authority_id bigint not null,
@@ -443,7 +444,7 @@
metadata bytea not null,
-- context
- origin text,
+ origin swhid,
visit bigint,
snapshot swhid,
release swhid,
@@ -453,7 +454,6 @@
);
comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object';
-comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on';
comment on column raw_extrinsic_metadata.target is 'the SWHID or origin URL for which the metadata was found';
comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval';
comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.';
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -10,7 +10,6 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes, hash_to_hex
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
Content,
Directory,
@@ -18,7 +17,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Origin,
OriginVisit,
@@ -34,6 +32,23 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
+from swh.model.swhid import _swhid_type_map as swhid_type_map
+from swh.model.swhid import parse_swhid
+
+
+class SWHIDProvider:
+ def __init__(self, data):
+ self._data = data
+
+ def __getattr__(self, name):
+ return mkswhid(getattr(self._data, name))
+
+
+def mkswhid(obj):
+ object_type = swhid_type_map.get(obj.object_type)
+ if object_type:
+ return SWHID(object_type=object_type, object_id=obj.id)
class StorageData:
@@ -463,9 +478,8 @@
snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot)
content_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin.url,
+ origin=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -475,9 +489,8 @@
metadata=b'{"foo": "bar"}',
)
content_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin2.url,
+ origin=mkswhid(origin2),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -487,7 +500,6 @@
metadata=b"foo: bar",
)
content_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
@@ -496,7 +508,7 @@
fetcher=attr.evolve(metadata_fetcher2, metadata=None),
format="yaml",
metadata=b"foo: bar",
- origin=origin.url,
+ origin=mkswhid(origin),
visit=42,
snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"),
release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"),
@@ -512,8 +524,7 @@
)
origin_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -523,8 +534,7 @@
metadata=b'{"foo": "bar"}',
)
origin_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -534,8 +544,7 @@
metadata=b"foo: bar",
)
origin_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -550,3 +559,6 @@
origin_metadata2,
origin_metadata3,
)
+
+
+StorageData.swhid = SWHIDProvider(StorageData)
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -19,11 +19,9 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes
from swh.model.hypothesis_strategies import objects
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -33,6 +31,7 @@
Snapshot,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
from swh.storage import get_storage
from swh.storage.common import origin_url_to_sha1 as sha1
from swh.storage.exc import HashCollision, StorageArgumentException
@@ -3324,22 +3323,17 @@
def test_content_metadata_add(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add(content_metadata)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list(
content_metadata
@@ -3357,12 +3351,10 @@
def test_content_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
new_content_metadata2 = attr.evolve(
content_metadata2, format="new-format", metadata=b"new-metadata",
@@ -3374,9 +3366,7 @@
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_content_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
expected_results1 = (content_metadata, new_content_metadata2)
@@ -3397,8 +3387,8 @@
content1_metadata3,
) = sample_data.content_metadata[:3]
- content1_swhid = SWHID(object_type="content", object_id=content.sha1_git)
- content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git)
+ content1_swhid = sample_data.swhid.content
+ content2_swhid = sample_data.swhid.content2
content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
@@ -3413,43 +3403,35 @@
]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority)
assert result.next_page_token is None
assert [content1_metadata1, content1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority2)
assert result.next_page_token is None
assert [content1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content2_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content2_swhid, authority)
assert result.next_page_token is None
assert [content2_metadata] == list(result.results,)
def test_content_metadata_get_after(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
content_swhid,
authority,
after=content_metadata.discovery_date - timedelta(seconds=1),
@@ -3460,62 +3442,48 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata.discovery_date,
+ content_swhid, authority, after=content_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata2.discovery_date,
+ content_swhid, authority, after=content_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_content_metadata_get_paginate(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher1, fetcher2])
swh_storage.metadata_authority_add([authority])
@@ -3530,38 +3498,20 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_content_metadata2]
- def test_content_metadata_get__invalid_id(self, swh_storage, sample_data):
- origin = sample_data.origin
- fetcher = sample_data.metadata_fetcher
- authority = sample_data.metadata_authority
- content_metadata, content_metadata2 = sample_data.content_metadata[:2]
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
- swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, origin.url, authority
- )
-
def test_origin_metadata_add(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3573,9 +3523,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [
origin_metadata,
@@ -3596,6 +3544,7 @@
def test_origin_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3611,9 +3560,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
# which of the two behavior happens is backend-specific.
@@ -3626,7 +3573,10 @@
)
def test_origin_metadata_get(self, swh_storage, sample_data):
- origin, origin2 = sample_data.origins[:2]
+ origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
+ origin2 = sample_data.origin2
+ origin2_swhid = sample_data.swhid.origin2
fetcher, fetcher2 = sample_data.fetchers[:2]
authority, authority2 = sample_data.authorities[:2]
(
@@ -3637,7 +3587,7 @@
assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2}
- origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url)
+ origin2_metadata = attr.evolve(origin1_metadata2, target=origin2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
swh_storage.metadata_fetcher_add([fetcher, fetcher2])
@@ -3646,30 +3596,25 @@
[origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert [origin1_metadata1, origin1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority2)
assert result.next_page_token is None
assert [origin1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin2.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin2_swhid, authority)
assert result.next_page_token is None
assert [origin2_metadata] == list(result.results,)
def test_origin_metadata_get_after(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3681,8 +3626,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
+ origin_swhid,
authority,
after=origin_metadata.discovery_date - timedelta(seconds=1),
)
@@ -3693,25 +3637,20 @@
]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata.discovery_date,
+ origin_swhid, authority, after=origin_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata2.discovery_date,
+ origin_swhid, authority, after=origin_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_origin_metadata_get_paginate(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3722,28 +3661,23 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3761,17 +3695,13 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_origin_metadata2]
@@ -3798,24 +3728,6 @@
with pytest.raises(StorageArgumentException, match="fetcher"):
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data):
- origin = sample_data.origin
- authority = sample_data.metadata_authority
- fetcher = sample_data.metadata_fetcher
- origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
- content_metadata = sample_data.content_metadata[0]
- assert swh_storage.origin_add([origin]) == {"origin:add": 1}
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
-
- swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, content_metadata.target, authority,
- )
-
class TestStorageGeneratedData:
def test_generate_content_get_data(self, swh_storage, swh_contents):

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 4:22 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217344

Event Timeline