Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123844
D4986.id17785.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
37 KB
Subscribers
None
D4986.id17785.diff
View Options
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -11,14 +11,12 @@
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -31,6 +29,7 @@
Snapshot,
SnapshotBranch,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
class ListOrder(Enum):
@@ -1099,8 +1098,7 @@
@remote_api_endpoint("raw_extrinsic_metadata/get")
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1109,8 +1107,7 @@
"""Retrieve list of all raw_extrinsic_metadata entries for the id
Args:
- type: one of the values of swh.model.model.MetadataTargetType
- target: an URL if type is 'origin', else a core SWHID
+ target: a core SWHID
authority: a dict containing keys `type` and `url`.
after: minimum discovery_date for a result to be returned
page_token: opaque token, used to get the next page of results
diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py
--- a/swh/storage/postgresql/converters.py
+++ b/swh/storage/postgresql/converters.py
@@ -4,15 +4,16 @@
# See top-level LICENSE file for more information
import datetime
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Tuple
+
+import attr
from swh.core.utils import encode_with_unescape
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
+ SWHID,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Person,
RawExtrinsicMetadata,
@@ -22,6 +23,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHIDObjectType, parse_swhid
from ..utils import map_optional
@@ -142,6 +144,24 @@
}
+def swhid_to_db(swhid: SWHID) -> Optional[Tuple[int, str, bytes]]:
+ if swhid is not None:
+ assert not swhid.metadata
+ assert swhid.namespace == "swh"
+ return (swhid.scheme_version, swhid.object_type.value, swhid.object_id)
+
+
+def db_to_swhid(db_swhid: Optional[Tuple[int, str, bytes]]) -> SWHID:
+ if db_swhid:
+ return SWHID(
+ namespace="swh",
+ scheme_version=db_swhid[0],
+ object_type=db_swhid[1],
+ object_id=db_swhid[2],
+ metadata={},
+ )
+
+
def revision_to_db(revision: Revision) -> Dict[str, Any]:
"""Convert a swh-model revision to its database representation.
"""
@@ -295,13 +315,8 @@
def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata:
- type_ = MetadataTargetType(row["raw_extrinsic_metadata.type"])
- target = row["raw_extrinsic_metadata.target"]
- if type_ != MetadataTargetType.ORIGIN:
- target = parse_swhid(target)
return RawExtrinsicMetadata(
- type=type_,
- target=target,
+ target=row["raw_extrinsic_metadata.target"],
authority=MetadataAuthority(
type=MetadataAuthorityType(row["metadata_authority.type"]),
url=row["metadata_authority.url"],
@@ -314,9 +329,9 @@
metadata=row["raw_extrinsic_metadata.metadata"],
origin=row["origin"],
visit=row["visit"],
- snapshot=map_optional(parse_swhid, row["snapshot"]),
- release=map_optional(parse_swhid, row["release"]),
- revision=map_optional(parse_swhid, row["revision"]),
+ snapshot=row["snapshot"],
+ release=row["release"],
+ revision=row["revision"],
path=row["path"],
- directory=map_optional(parse_swhid, row["directory"]),
+ directory=row["directory"],
)
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -6,15 +6,22 @@
import datetime
import logging
import random
+import re
import select
from typing import Any, Dict, Iterable, List, Optional, Tuple
+import psycopg2
+
from swh.core.db import BaseDb
from swh.core.db.db_utils import execute_values_generator
from swh.core.db.db_utils import jsonize as _jsonize
from swh.core.db.db_utils import stored_procedure
+from swh.model.hashutil import hash_to_bytes
from swh.model.model import SHA1_SIZE, OriginVisit, OriginVisitStatus
+from swh.model.swhid import SWHID
+from swh.model.swhid import _swhid_type_map as swhid_typemap
from swh.storage.interface import ListOrder
+from swh.storage.postgresql.converters import swhid_to_db
logger = logging.getLogger(__name__)
@@ -23,6 +30,35 @@
return _jsonize(dict(d) if d is not None else None)
+def typecast_swhid(value, cur):
+ if value is None:
+ return None
+ m = re.match(r'\(([^)]+),([^)]+),"([^)]+)"\)', value)
+ if m:
+ return SWHID(
+ scheme_version=int(m.group(1)),
+ object_type=swhid_typemap[m.group(2)],
+ object_id=hash_to_bytes(m.group(3)[3:]),
+ )
+ else:
+ raise InterfaceError("bad SWHID representation: %r" % value)
+
+
+def adapt_swhid(swhid: SWHID):
+ value = psycopg2.extensions.AsIs(
+ (
+ b"ROW(%d, '%s'::swhid_type, '\\x%s'::bytea)"
+ % (
+ swhid.scheme_version,
+ swhid.object_type.value.encode(),
+ swhid.object_id.encode(),
+ )
+ ).decode()
+ )
+ print("VALUE", value)
+ return value
+
+
class Db(BaseDb):
"""Proxy to the SWH DB, with wrappers around stored procedures
@@ -30,6 +66,30 @@
current_version = 166
+ def __init__(
+ self,
+ conn: psycopg2.extensions.connection,
+ pool: Optional[psycopg2.pool.AbstractConnectionPool] = None,
+ ):
+ super().__init__(conn, pool)
+ with conn.cursor() as cur:
+ cur.execute(
+ """
+ SELECT pg_type.oid
+ FROM pg_type
+ JOIN pg_namespace
+ ON typnamespace = pg_namespace.oid
+ WHERE typname = %(typename)s
+ AND nspname = %(namespace)s""",
+ {"typename": "swhid", "namespace": "public"},
+ )
+
+ oid = cur.fetchone()[0]
+
+ t_SWHID = psycopg2.extensions.new_type((oid,), "SWHID", typecast_swhid)
+ psycopg2.extensions.register_type(t_SWHID, conn)
+ psycopg2.extensions.register_adapter(SWHID, adapt_swhid)
+
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
"SELECT swh_mktemp_dir_entry(%s)", (("directory_entry_%s" % entry_type),)
@@ -1136,7 +1196,6 @@
"""The list of context columns for all artifact types."""
_raw_extrinsic_metadata_insert_cols = [
- "type",
"target",
"authority_id",
"fetcher_id",
@@ -1158,7 +1217,6 @@
raw_extrinsic_metadata_get_cols = [
"raw_extrinsic_metadata.target",
- "raw_extrinsic_metadata.type",
"discovery_date",
"metadata_authority.type",
"metadata_authority.url",
@@ -1179,30 +1237,29 @@
INNER JOIN metadata_authority
ON (metadata_authority.id=authority_id)
INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id)
- WHERE raw_extrinsic_metadata.target=%s AND authority_id=%s
+ WHERE (raw_extrinsic_metadata.target)=%s
+ AND authority_id=%s
"""
def raw_extrinsic_metadata_add(
self,
- type: str,
- target: str,
+ target: SWHID,
discovery_date: datetime.datetime,
authority_id: int,
fetcher_id: int,
format: str,
metadata: bytes,
- origin: Optional[str],
+ origin: Optional[SWHID],
visit: Optional[int],
- snapshot: Optional[str],
- release: Optional[str],
- revision: Optional[str],
+ snapshot: Optional[SWHID],
+ release: Optional[SWHID],
+ revision: Optional[SWHID],
path: Optional[bytes],
- directory: Optional[str],
+ directory: Optional[SWHID],
cur,
):
query = self._raw_extrinsic_metadata_insert_query
args: Dict[str, Any] = dict(
- type=type,
target=target,
authority_id=authority_id,
fetcher_id=fetcher_id,
@@ -1224,8 +1281,7 @@
def raw_extrinsic_metadata_get(
self,
- type: str,
- target: str,
+ target: SWHID,
authority_id: int,
after_time: Optional[datetime.datetime],
after_fetcher: Optional[int],
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -19,7 +19,6 @@
from swh.core.api.serializers import msgpack_dumps, msgpack_loads
from swh.core.db.common import db_transaction, db_transaction_generator
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
-from swh.model.identifiers import SWHID
from swh.model.model import (
SHA1_SIZE,
Content,
@@ -27,7 +26,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -41,6 +39,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
from swh.storage.exc import HashCollision, StorageArgumentException, StorageDBError
from swh.storage.interface import (
VISIT_STATUSES,
@@ -1256,26 +1255,18 @@
) -> None:
metadata = list(metadata)
self.journal_writer.raw_extrinsic_metadata_add(metadata)
- counter = Counter[MetadataTargetType]()
+ counter = Counter[SWHIDObjectType]()
for metadata_entry in metadata:
- authority_id = self._get_authority_id(metadata_entry.authority, db, cur)
- fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur)
+ d = attr.asdict(
+ metadata_entry, recurse=False
+ ) # we want SWHID not converted
+ d.pop("authority")
+ d.pop("fetcher")
db.raw_extrinsic_metadata_add(
- type=metadata_entry.type.value,
- target=str(metadata_entry.target),
- discovery_date=metadata_entry.discovery_date,
- authority_id=authority_id,
- fetcher_id=fetcher_id,
- format=metadata_entry.format,
- metadata=metadata_entry.metadata,
- origin=metadata_entry.origin,
- visit=metadata_entry.visit,
- snapshot=map_optional(str, metadata_entry.snapshot),
- release=map_optional(str, metadata_entry.release),
- revision=map_optional(str, metadata_entry.revision),
- path=metadata_entry.path,
- directory=map_optional(str, metadata_entry.directory),
+ **d,
+ authority_id=self._get_authority_id(metadata_entry.authority, db, cur),
+ fetcher_id=self._get_fetcher_id(metadata_entry.fetcher, db, cur),
cur=cur,
)
counter[metadata_entry.type] += 1
@@ -1290,8 +1281,7 @@
@db_transaction()
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1299,19 +1289,6 @@
db=None,
cur=None,
) -> PagedResult[RawExtrinsicMetadata]:
- if type == MetadataTargetType.ORIGIN:
- if isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type='origin', "
- f"but provided target is a SWHID: {target!r}"
- )
- else:
- if not isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type!='origin', "
- f"but provided target is not a SWHID: {target!r}"
- )
-
if page_token:
(after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token))
if after and after_time < after:
@@ -1327,12 +1304,13 @@
return PagedResult(next_page_token=None, results=[],)
rows = db.raw_extrinsic_metadata_get(
- type, str(target), authority_id, after_time, after_fetcher, limit + 1, cur,
+ target, authority_id, after_time, after_fetcher, limit + 1, cur,
)
+
rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows]
results = []
for row in rows:
- assert str(target) == row["raw_extrinsic_metadata.target"]
+ assert target == row["raw_extrinsic_metadata.target"]
results.append(converters.db_to_raw_extrinsic_metadata(row))
if len(results) > limit:
diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql
--- a/swh/storage/sql/20-enums.sql
+++ b/swh/storage/sql/20-enums.sql
@@ -23,3 +23,5 @@
'failed'
);
comment on type origin_visit_state IS 'Possible origin visit status values';
+
+create type swhid_type as enum ('cnt', 'dir', 'rev', 'rel', 'snp', 'ori');
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -38,7 +38,9 @@
create domain file_perms as int;
-- an SWHID
-create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
+-- create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
+
+create type swhid as (version int, object_type swhid_type, object_id bytea);
-- Checksums about actual file content. Note that the content itself is not
@@ -430,8 +432,7 @@
-- Extrinsic metadata on a DAG objects and origins.
create table raw_extrinsic_metadata
(
- type text not null,
- target text not null,
+ target swhid not null,
-- metadata source
authority_id bigint not null,
@@ -443,7 +444,7 @@
metadata bytea not null,
-- context
- origin text,
+ origin swhid,
visit bigint,
snapshot swhid,
release swhid,
@@ -453,7 +454,6 @@
);
comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object';
-comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on';
comment on column raw_extrinsic_metadata.target is 'the SWHID or origin URL for which the metadata was found';
comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval';
comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.';
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -10,7 +10,6 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes, hash_to_hex
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
Content,
Directory,
@@ -18,7 +17,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Origin,
OriginVisit,
@@ -34,6 +32,23 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
+from swh.model.swhid import _swhid_type_map as swhid_type_map
+from swh.model.swhid import parse_swhid
+
+
+class SWHIDProvider:
+ def __init__(self, data):
+ self._data = data
+
+ def __getattr__(self, name):
+ return mkswhid(getattr(self._data, name))
+
+
+def mkswhid(obj):
+ object_type = swhid_type_map.get(obj.object_type)
+ if object_type:
+ return SWHID(object_type=object_type, object_id=obj.id)
class StorageData:
@@ -463,9 +478,8 @@
snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot)
content_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin.url,
+ origin=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -475,9 +489,8 @@
metadata=b'{"foo": "bar"}',
)
content_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin2.url,
+ origin=mkswhid(origin2),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -487,7 +500,6 @@
metadata=b"foo: bar",
)
content_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
@@ -496,7 +508,7 @@
fetcher=attr.evolve(metadata_fetcher2, metadata=None),
format="yaml",
metadata=b"foo: bar",
- origin=origin.url,
+ origin=mkswhid(origin),
visit=42,
snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"),
release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"),
@@ -512,8 +524,7 @@
)
origin_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -523,8 +534,7 @@
metadata=b'{"foo": "bar"}',
)
origin_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -534,8 +544,7 @@
metadata=b"foo: bar",
)
origin_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -550,3 +559,6 @@
origin_metadata2,
origin_metadata3,
)
+
+
+StorageData.swhid = SWHIDProvider(StorageData)
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -19,11 +19,9 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes
from swh.model.hypothesis_strategies import objects
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -33,6 +31,7 @@
Snapshot,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
from swh.storage import get_storage
from swh.storage.common import origin_url_to_sha1 as sha1
from swh.storage.exc import HashCollision, StorageArgumentException
@@ -3324,22 +3323,17 @@
def test_content_metadata_add(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add(content_metadata)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list(
content_metadata
@@ -3357,12 +3351,10 @@
def test_content_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
new_content_metadata2 = attr.evolve(
content_metadata2, format="new-format", metadata=b"new-metadata",
@@ -3374,9 +3366,7 @@
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_content_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
expected_results1 = (content_metadata, new_content_metadata2)
@@ -3397,8 +3387,8 @@
content1_metadata3,
) = sample_data.content_metadata[:3]
- content1_swhid = SWHID(object_type="content", object_id=content.sha1_git)
- content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git)
+ content1_swhid = sample_data.swhid.content
+ content2_swhid = sample_data.swhid.content2
content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
@@ -3413,43 +3403,35 @@
]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority)
assert result.next_page_token is None
assert [content1_metadata1, content1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority2)
assert result.next_page_token is None
assert [content1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content2_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content2_swhid, authority)
assert result.next_page_token is None
assert [content2_metadata] == list(result.results,)
def test_content_metadata_get_after(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
content_swhid,
authority,
after=content_metadata.discovery_date - timedelta(seconds=1),
@@ -3460,62 +3442,48 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata.discovery_date,
+ content_swhid, authority, after=content_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata2.discovery_date,
+ content_swhid, authority, after=content_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_content_metadata_get_paginate(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data):
content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher1, fetcher2])
swh_storage.metadata_authority_add([authority])
@@ -3530,38 +3498,20 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_content_metadata2]
- def test_content_metadata_get__invalid_id(self, swh_storage, sample_data):
- origin = sample_data.origin
- fetcher = sample_data.metadata_fetcher
- authority = sample_data.metadata_authority
- content_metadata, content_metadata2 = sample_data.content_metadata[:2]
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
- swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, origin.url, authority
- )
-
def test_origin_metadata_add(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3573,9 +3523,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [
origin_metadata,
@@ -3596,6 +3544,7 @@
def test_origin_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3611,9 +3560,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
# which of the two behavior happens is backend-specific.
@@ -3626,7 +3573,10 @@
)
def test_origin_metadata_get(self, swh_storage, sample_data):
- origin, origin2 = sample_data.origins[:2]
+ origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
+ origin2 = sample_data.origin2
+ origin2_swhid = sample_data.swhid.origin2
fetcher, fetcher2 = sample_data.fetchers[:2]
authority, authority2 = sample_data.authorities[:2]
(
@@ -3637,7 +3587,7 @@
assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2}
- origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url)
+ origin2_metadata = attr.evolve(origin1_metadata2, target=origin2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
swh_storage.metadata_fetcher_add([fetcher, fetcher2])
@@ -3646,30 +3596,25 @@
[origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert [origin1_metadata1, origin1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority2)
assert result.next_page_token is None
assert [origin1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin2.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin2_swhid, authority)
assert result.next_page_token is None
assert [origin2_metadata] == list(result.results,)
def test_origin_metadata_get_after(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3681,8 +3626,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
+ origin_swhid,
authority,
after=origin_metadata.discovery_date - timedelta(seconds=1),
)
@@ -3693,25 +3637,20 @@
]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata.discovery_date,
+ origin_swhid, authority, after=origin_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata2.discovery_date,
+ origin_swhid, authority, after=origin_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_origin_metadata_get_paginate(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3722,28 +3661,23 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3761,17 +3695,13 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_origin_metadata2]
@@ -3798,24 +3728,6 @@
with pytest.raises(StorageArgumentException, match="fetcher"):
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data):
- origin = sample_data.origin
- authority = sample_data.metadata_authority
- fetcher = sample_data.metadata_fetcher
- origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
- content_metadata = sample_data.content_metadata[0]
- assert swh_storage.origin_add([origin]) == {"origin:add": 1}
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
-
- swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, content_metadata.target, authority,
- )
-
class TestStorageGeneratedData:
def test_generate_content_get_data(self, swh_storage, swh_contents):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 4:22 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217344
Attached To
D4986: [WIP] Add a composite swhid type in postgresql
Event Timeline
Log In to Comment