Page MenuHomeSoftware Heritage

D4986.diff
No OneTemporary

D4986.diff

diff --git a/swh/storage/api/serializers.py b/swh/storage/api/serializers.py
--- a/swh/storage/api/serializers.py
+++ b/swh/storage/api/serializers.py
@@ -7,8 +7,8 @@
from typing import Callable, Dict, List, Tuple
-from swh.model.identifiers import SWHID, parse_swhid
import swh.model.model as model
+from swh.model.swhid import SWHID, parse_swhid
from swh.storage import interface
@@ -36,7 +36,6 @@
ENCODERS: List[Tuple[type, str, Callable]] = [
(model.BaseModel, "model", _encode_model_object),
(SWHID, "swhid", str),
- (model.MetadataTargetType, "model_enum", _encode_enum),
(model.MetadataAuthorityType, "model_enum", _encode_enum),
(interface.ListOrder, "storage_enum", _encode_enum),
]
@@ -46,6 +45,5 @@
"swhid": parse_swhid,
"model": lambda d: getattr(model, d.pop("__type__")).from_dict(d),
"model_enum": _decode_model_enum,
- "model_enum": _decode_model_enum,
"storage_enum": _decode_storage_enum,
}
diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -35,6 +35,7 @@
db_to_release,
db_to_revision,
)
+from swh.storage.postgresql.db import register_swhid_type
from swh.storage.replay import object_converter_fn
from swh.storage.writer import JournalWriter
@@ -79,7 +80,6 @@
"metadata_authority": ["type", "url", "metadata",],
"metadata_fetcher": ["name", "version", "metadata",],
"raw_extrinsic_metadata": [
- "raw_extrinsic_metadata.type",
"raw_extrinsic_metadata.target",
"metadata_authority.type",
"metadata_authority.url",
@@ -533,6 +533,8 @@
)
db = BaseDb.connect(self.config["storage"]["db"])
+ register_swhid_type(db.conn)
+
writer = JournalWriter({"cls": "kafka", **self.config["journal_writer"]})
assert writer.journal is not None
diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py
--- a/swh/storage/cassandra/model.py
+++ b/swh/storage/cassandra/model.py
@@ -250,7 +250,6 @@
"fetcher_version",
)
- type: str
target: str
authority_type: str
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -188,7 +188,6 @@
);""",
"""
CREATE TABLE IF NOT EXISTS raw_extrinsic_metadata (
- type text,
target text,
-- metadata source
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -26,7 +26,6 @@
from swh.core.api.serializers import msgpack_dumps, msgpack_loads
from swh.model.hashutil import DEFAULT_ALGORITHMS
-from swh.model.identifiers import SWHID, parse_swhid
from swh.model.model import (
Content,
Directory,
@@ -34,7 +33,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -47,6 +45,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, parse_swhid
from swh.storage.interface import (
VISIT_STATUSES,
ListOrder,
@@ -1157,7 +1156,6 @@
try:
row = RawExtrinsicMetadataRow(
- type=metadata_entry.type.value,
target=str(metadata_entry.target),
authority_type=metadata_entry.authority.type.value,
authority_url=metadata_entry.authority.url,
@@ -1166,7 +1164,7 @@
fetcher_version=metadata_entry.fetcher.version,
format=metadata_entry.format,
metadata=metadata_entry.metadata,
- origin=metadata_entry.origin,
+ origin=map_optional(str, metadata_entry.origin),
visit=metadata_entry.visit,
snapshot=map_optional(str, metadata_entry.snapshot),
release=map_optional(str, metadata_entry.release),
@@ -1180,26 +1178,12 @@
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
) -> PagedResult[RawExtrinsicMetadata]:
- if type == MetadataTargetType.ORIGIN:
- if isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type='origin', "
- f"but provided target is a SWHID: {target!r}"
- )
- else:
- if not isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type!='origin', "
- f"but provided target is not a SWHID: {target!r}"
- )
-
if page_token is not None:
(after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads(
base64.b64decode(page_token)
@@ -1235,7 +1219,6 @@
assert str(target) == entry.target
result = RawExtrinsicMetadata(
- type=MetadataTargetType(entry.type),
target=target,
authority=MetadataAuthority(
type=MetadataAuthorityType(entry.authority_type),
@@ -1247,7 +1230,7 @@
discovery_date=discovery_date,
format=entry.format,
metadata=entry.metadata,
- origin=entry.origin,
+ origin=map_optional(parse_swhid, entry.origin),
visit=entry.visit,
snapshot=map_optional(parse_swhid, entry.snapshot),
release=map_optional(parse_swhid, entry.release),
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -5,20 +5,18 @@
import datetime
from enum import Enum
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar
from typing_extensions import Protocol, TypedDict, runtime_checkable
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -31,6 +29,7 @@
Snapshot,
SnapshotBranch,
)
+from swh.model.swhid import SWHID
class ListOrder(Enum):
@@ -1096,8 +1095,7 @@
@remote_api_endpoint("raw_extrinsic_metadata/get")
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1106,8 +1104,7 @@
"""Retrieve list of all raw_extrinsic_metadata entries for the id
Args:
- type: one of the values of swh.model.model.MetadataTargetType
- target: an URL if type is 'origin', else a core SWHID
+ target: a core SWHID
authority: a dict containing keys `type` and `url`.
after: minimum discovery_date for a result to be returned
page_token: opaque token, used to get the next page of results
diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py
old mode 100644
new mode 100755
--- a/swh/storage/migrate_extrinsic_metadata.py
+++ b/swh/storage/migrate_extrinsic_metadata.py
@@ -38,15 +38,14 @@
from swh.core.db import BaseDb
from swh.model.hashutil import hash_to_hex
-from swh.model.identifiers import SWHID, parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
RawExtrinsicMetadata,
Sha1Git,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage import get_storage
from swh.storage.algos.origin import iter_origin_visit_statuses, iter_origin_visits
from swh.storage.algos.snapshot import snapshot_get_all_branches
@@ -412,19 +411,27 @@
dry_run: bool,
):
"""Does the actual loading to swh-storage."""
+ origin_swhid: Optional[SWHID]
+ if origin is not None:
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin.encode()
+ )
+ else:
+ origin_swhid = None
directory_swhid = SWHID(
- object_type="directory", object_id=hash_to_hex(directory_id)
+ object_type=SWHIDObjectType.DIRECTORY, object_id=hash_to_hex(directory_id)
+ )
+ revision_swhid = SWHID(
+ object_type=SWHIDObjectType.REVISION, object_id=hash_to_hex(revision_id)
)
- revision_swhid = SWHID(object_type="revision", object_id=hash_to_hex(revision_id))
obj = RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=directory_swhid,
discovery_date=discovery_date,
authority=authority,
fetcher=FETCHER,
format=format,
metadata=json.dumps(metadata).encode(),
- origin=origin,
+ origin=origin_swhid,
revision=revision_swhid,
)
if not dry_run:
@@ -516,7 +523,6 @@
if discovery_date is None:
discovery_date = max(dates)
-
# Sanity checks to make sure deposit requests are consistent with each other
assert len(metadata_entries) >= 1, deposit_id
assert len(provider_urls) == 1, f"expected 1 provider url, got {provider_urls}"
diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py
--- a/swh/storage/postgresql/converters.py
+++ b/swh/storage/postgresql/converters.py
@@ -7,12 +7,10 @@
from typing import Any, Dict, Optional
from swh.core.utils import encode_with_unescape
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Person,
RawExtrinsicMetadata,
@@ -23,8 +21,6 @@
TimestampWithTimezone,
)
-from ..utils import map_optional
-
DEFAULT_AUTHOR = {
"fullname": None,
"name": None,
@@ -295,13 +291,8 @@
def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata:
- type_ = MetadataTargetType(row["raw_extrinsic_metadata.type"])
- target = row["raw_extrinsic_metadata.target"]
- if type_ != MetadataTargetType.ORIGIN:
- target = parse_swhid(target)
return RawExtrinsicMetadata(
- type=type_,
- target=target,
+ target=row["raw_extrinsic_metadata.target"],
authority=MetadataAuthority(
type=MetadataAuthorityType(row["metadata_authority.type"]),
url=row["metadata_authority.url"],
@@ -314,9 +305,9 @@
metadata=row["raw_extrinsic_metadata.metadata"],
origin=row["origin"],
visit=row["visit"],
- snapshot=map_optional(parse_swhid, row["snapshot"]),
- release=map_optional(parse_swhid, row["release"]),
- revision=map_optional(parse_swhid, row["revision"]),
+ snapshot=row["snapshot"],
+ release=row["release"],
+ revision=row["revision"],
path=row["path"],
- directory=map_optional(parse_swhid, row["directory"]),
+ directory=row["directory"],
)
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -6,14 +6,20 @@
import datetime
import logging
import random
+import re
import select
from typing import Any, Dict, Iterable, List, Optional, Tuple
+import psycopg2
+
from swh.core.db import BaseDb
from swh.core.db.db_utils import execute_values_generator
from swh.core.db.db_utils import jsonize as _jsonize
from swh.core.db.db_utils import stored_procedure
+from swh.model.hashutil import hash_to_bytes
from swh.model.model import SHA1_SIZE, OriginVisit, OriginVisitStatus
+from swh.model.swhid import SWHID
+from swh.model.swhid import _swhid_type_map as swhid_typemap
from swh.storage.interface import ListOrder
logger = logging.getLogger(__name__)
@@ -23,6 +29,55 @@
return _jsonize(dict(d) if d is not None else None)
+def typecast_swhid(value, cur):
+ if value is None:
+ return None
+ m = re.match(r'\(([^)]+),([^)]+),"([^)]+)"\)', value)
+ if m:
+ return SWHID(
+ scheme_version=int(m.group(1)),
+ object_type=swhid_typemap[m.group(2)],
+ object_id=hash_to_bytes(m.group(3)[3:]),
+ )
+ else:
+ raise psycopg2.InterfaceError("bad SWHID representation: %r" % value)
+
+
+def adapt_swhid(swhid: SWHID):
+ value = psycopg2.extensions.AsIs(
+ (
+ b"ROW(%d, '%s'::swhid_type, '\\x%s'::bytea)"
+ % (
+ swhid.scheme_version,
+ swhid.object_type.value.encode(),
+ swhid.object_id.encode(),
+ )
+ ).decode()
+ )
+ print("VALUE", value)
+ return value
+
+
+def register_swhid_type(conn):
+ with conn.cursor() as cur:
+ cur.execute(
+ """
+ SELECT pg_type.oid
+ FROM pg_type
+ JOIN pg_namespace
+ ON typnamespace = pg_namespace.oid
+ WHERE typname = %(typename)s
+ AND nspname = %(namespace)s""",
+ {"typename": "swhid", "namespace": "public"},
+ )
+
+ oid = cur.fetchone()[0]
+
+ t_SWHID = psycopg2.extensions.new_type((oid,), "SWHID", typecast_swhid)
+ psycopg2.extensions.register_type(t_SWHID, conn)
+ psycopg2.extensions.register_adapter(SWHID, adapt_swhid)
+
+
class Db(BaseDb):
"""Proxy to the SWH DB, with wrappers around stored procedures
@@ -30,6 +85,14 @@
current_version = 166
+ def __init__(
+ self,
+ conn: psycopg2.extensions.connection,
+ pool: Optional[psycopg2.pool.AbstractConnectionPool] = None,
+ ):
+ super().__init__(conn, pool)
+ register_swhid_type(conn)
+
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
"SELECT swh_mktemp_dir_entry(%s)", (("directory_entry_%s" % entry_type),)
@@ -1136,7 +1199,6 @@
"""The list of context columns for all artifact types."""
_raw_extrinsic_metadata_insert_cols = [
- "type",
"target",
"authority_id",
"fetcher_id",
@@ -1158,7 +1220,6 @@
raw_extrinsic_metadata_get_cols = [
"raw_extrinsic_metadata.target",
- "raw_extrinsic_metadata.type",
"discovery_date",
"metadata_authority.type",
"metadata_authority.url",
@@ -1179,30 +1240,29 @@
INNER JOIN metadata_authority
ON (metadata_authority.id=authority_id)
INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id)
- WHERE raw_extrinsic_metadata.target=%s AND authority_id=%s
+ WHERE (raw_extrinsic_metadata.target)=%s
+ AND authority_id=%s
"""
def raw_extrinsic_metadata_add(
self,
- type: str,
- target: str,
+ target: SWHID,
discovery_date: datetime.datetime,
authority_id: int,
fetcher_id: int,
format: str,
metadata: bytes,
- origin: Optional[str],
+ origin: Optional[SWHID],
visit: Optional[int],
- snapshot: Optional[str],
- release: Optional[str],
- revision: Optional[str],
+ snapshot: Optional[SWHID],
+ release: Optional[SWHID],
+ revision: Optional[SWHID],
path: Optional[bytes],
- directory: Optional[str],
+ directory: Optional[SWHID],
cur,
):
query = self._raw_extrinsic_metadata_insert_query
args: Dict[str, Any] = dict(
- type=type,
target=target,
authority_id=authority_id,
fetcher_id=fetcher_id,
@@ -1224,8 +1284,7 @@
def raw_extrinsic_metadata_get(
self,
- type: str,
- target: str,
+ target: SWHID,
authority_id: int,
after_time: Optional[datetime.datetime],
after_fetcher: Optional[int],
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -9,7 +9,7 @@
from contextlib import contextmanager
import datetime
import itertools
-from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple
import attr
import psycopg2
@@ -19,7 +19,6 @@
from swh.core.api.serializers import msgpack_dumps, msgpack_loads
from swh.core.db.common import db_transaction, db_transaction_generator
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
-from swh.model.identifiers import SWHID
from swh.model.model import (
SHA1_SIZE,
Content,
@@ -27,7 +26,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -41,6 +39,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
from swh.storage.exc import HashCollision, StorageArgumentException, StorageDBError
from swh.storage.interface import (
VISIT_STATUSES,
@@ -50,12 +49,7 @@
)
from swh.storage.metrics import process_metrics, send_metric, timed
from swh.storage.objstorage import ObjStorage
-from swh.storage.utils import (
- extract_collision_hash,
- get_partition_bounds_bytes,
- map_optional,
- now,
-)
+from swh.storage.utils import extract_collision_hash, get_partition_bounds_bytes, now
from swh.storage.writer import JournalWriter
from . import converters
@@ -1250,26 +1244,22 @@
) -> None:
metadata = list(metadata)
self.journal_writer.raw_extrinsic_metadata_add(metadata)
- counter = Counter[MetadataTargetType]()
+ counter = Counter[SWHIDObjectType]()
for metadata_entry in metadata:
- authority_id = self._get_authority_id(metadata_entry.authority, db, cur)
- fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur)
-
db.raw_extrinsic_metadata_add(
- type=metadata_entry.type.value,
- target=str(metadata_entry.target),
+ target=metadata_entry.target,
+ authority_id=self._get_authority_id(metadata_entry.authority, db, cur),
discovery_date=metadata_entry.discovery_date,
- authority_id=authority_id,
- fetcher_id=fetcher_id,
+ fetcher_id=self._get_fetcher_id(metadata_entry.fetcher, db, cur),
format=metadata_entry.format,
metadata=metadata_entry.metadata,
origin=metadata_entry.origin,
visit=metadata_entry.visit,
- snapshot=map_optional(str, metadata_entry.snapshot),
- release=map_optional(str, metadata_entry.release),
- revision=map_optional(str, metadata_entry.revision),
+ snapshot=metadata_entry.snapshot,
+ release=metadata_entry.release,
+ revision=metadata_entry.revision,
path=metadata_entry.path,
- directory=map_optional(str, metadata_entry.directory),
+ directory=metadata_entry.directory,
cur=cur,
)
counter[metadata_entry.type] += 1
@@ -1284,8 +1274,7 @@
@db_transaction()
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1293,19 +1282,6 @@
db=None,
cur=None,
) -> PagedResult[RawExtrinsicMetadata]:
- if type == MetadataTargetType.ORIGIN:
- if isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type='origin', "
- f"but provided target is a SWHID: {target!r}"
- )
- else:
- if not isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type!='origin', "
- f"but provided target is not a SWHID: {target!r}"
- )
-
if page_token:
(after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token))
if after and after_time < after:
@@ -1321,12 +1297,13 @@
return PagedResult(next_page_token=None, results=[],)
rows = db.raw_extrinsic_metadata_get(
- type, str(target), authority_id, after_time, after_fetcher, limit + 1, cur,
+ target, authority_id, after_time, after_fetcher, limit + 1, cur,
)
+
rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows]
results = []
for row in rows:
- assert str(target) == row["raw_extrinsic_metadata.target"]
+ assert target == row["raw_extrinsic_metadata.target"]
results.append(converters.db_to_raw_extrinsic_metadata(row))
if len(results) > limit:
diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql
--- a/swh/storage/sql/20-enums.sql
+++ b/swh/storage/sql/20-enums.sql
@@ -23,3 +23,5 @@
'failed'
);
comment on type origin_visit_state IS 'Possible origin visit status values';
+
+create type swhid_type as enum ('cnt', 'dir', 'rev', 'rel', 'snp', 'ori');
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -37,8 +37,8 @@
-- a set of UNIX-like access permissions, as manipulated by, e.g., chmod
create domain file_perms as int;
--- an SWHID
-create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
+-- a SWHID
+create type swhid as (version int, object_type swhid_type, object_id bytea);
-- Checksums about actual file content. Note that the content itself is not
@@ -430,8 +430,7 @@
-- Extrinsic metadata on a DAG objects and origins.
create table raw_extrinsic_metadata
(
- type text not null,
- target text not null,
+ target swhid not null,
-- metadata source
authority_id bigint not null,
@@ -443,7 +442,7 @@
metadata bytea not null,
-- context
- origin text,
+ origin swhid,
visit bigint,
snapshot swhid,
release swhid,
@@ -453,7 +452,6 @@
);
comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object';
-comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on';
comment on column raw_extrinsic_metadata.target is 'the SWHID or origin URL for which the metadata was found';
comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval';
comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.';
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row
FETCHER = MetadataFetcher(
@@ -102,7 +101,9 @@
}
origin_url = "https://cran.r-project.org/package=ExtremeRisks"
-
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
def origin_get(urls):
@@ -118,7 +119,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 5, 7, 15, 27, 38, 652281, tzinfo=datetime.timezone.utc,
@@ -127,7 +127,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000361aa33842cbdea5fa6e77db696b937ebd269"
),
@@ -191,6 +191,9 @@
}
origin_url = "https://cran.r-project.org/package=gofgamma"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -207,7 +210,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 4, 30, 11, 1, 57, 832481, tzinfo=datetime.timezone.utc,
@@ -216,7 +218,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0000d4ef5e166122aee6862ad38a18ce5386cc3e"
),
@@ -269,6 +271,9 @@
}
origin_url = "https://cran.r-project.org/package=r2mlm"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -285,7 +290,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 9, 25, 14, 4, 20, 926667, tzinfo=datetime.timezone.utc,
@@ -294,7 +298,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:2e223782ee4ba152e4c886f797976241c39a9aab"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
@@ -15,12 +15,10 @@
import attr
import pytest
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -34,6 +32,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage import get_storage
from swh.storage.interface import ListOrder, PagedResult
from swh.storage.migrate_extrinsic_metadata import debian_origins_from_row, handle_row
@@ -425,6 +424,9 @@
}
origin_url = "deb://Debian/packages/kalgebra"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -437,7 +439,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc,
@@ -446,7 +447,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee"
),
@@ -545,6 +546,9 @@
storage = Mock()
origin_url = "http://snapshot.debian.org/package/pymongo"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
deposit_cur = None
with patch("debian_origins_from_row", return_value=[origin_url]):
@@ -554,7 +558,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc
@@ -563,7 +566,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import MagicMock, Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import (
DEPOSIT_COLS,
cran_package_from_url,
@@ -130,6 +129,9 @@
origin_url = (
"https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476"
)
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
swhid = (
f"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
@@ -186,7 +188,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc
@@ -195,7 +196,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
),
@@ -205,7 +206,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc
@@ -214,7 +214,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
),
@@ -333,6 +333,9 @@
]
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -352,7 +355,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc
@@ -361,7 +363,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
),
@@ -371,7 +373,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc
@@ -380,7 +381,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
),
@@ -498,6 +499,9 @@
origin_url = (
"https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420"
)
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -517,7 +521,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc
@@ -526,7 +529,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
),
@@ -536,7 +539,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc
@@ -545,7 +547,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
),
@@ -662,6 +664,9 @@
]
origin_url = "https://hal.archives-ouvertes.fr/hal-02960679"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -681,7 +686,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc
@@ -690,7 +694,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
),
@@ -700,7 +704,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc
@@ -709,7 +712,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
),
@@ -837,6 +840,9 @@
]
origin_url = "https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -856,7 +862,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc
@@ -865,7 +870,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
@@ -875,7 +880,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc
@@ -884,7 +888,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
@@ -894,7 +898,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc
@@ -903,7 +906,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
@@ -1056,6 +1059,9 @@
]
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -1075,7 +1081,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc
@@ -1084,7 +1089,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8"
),
@@ -1164,6 +1169,9 @@
]
origin_url = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -1183,7 +1191,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc
@@ -1192,7 +1199,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4"
),
@@ -1203,7 +1210,7 @@
]
-def test_deposit_missing_metadata_in_revision():
+def test_deposit_ignore_origin_in_metadata():
extrinsic_metadata = {
"id": "hal-01243573",
"@xmlns": "http://www.w3.org/2005/Atom",
@@ -1297,6 +1304,9 @@
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"
# /!\ not https://hal-test.archives-ouvertes.fr/hal-01243573
# do not trust the metadata!
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -1311,12 +1321,11 @@
deposit_cur.execute.assert_called_once()
deposit_cur.__iter__.assert_called_once()
- assert storage.method_calls == [
+ expected = [
call.origin_get([origin_url]),
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc
@@ -1325,7 +1334,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
),
@@ -1335,7 +1344,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc
@@ -1344,7 +1352,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
),
@@ -1352,3 +1360,4 @@
]
),
]
+ assert storage.method_calls == expected
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row
FETCHER = MetadataFetcher(
@@ -76,6 +75,9 @@
}
origin_url = "https://ftp.gnu.org/gnu/gperf/"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -92,7 +94,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 11, 27, 11, 17, 38, 318997, tzinfo=datetime.timezone.utc
@@ -101,7 +102,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row
FETCHER = MetadataFetcher(
@@ -74,6 +73,9 @@
}
origin_url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -90,7 +92,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc
@@ -99,7 +100,7 @@
fetcher=FETCHER,
format="nixguix-sources-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"
),
@@ -109,7 +110,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc
@@ -118,7 +118,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import (
handle_row,
npm_package_from_source_url,
@@ -126,6 +125,9 @@
}
origin_url = "https://www.npmjs.com/package/@l3ilkojr/jdinsults"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -142,7 +144,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc,
@@ -151,7 +152,7 @@
fetcher=FETCHER,
format="replicate-npm-package-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc"
),
@@ -161,7 +162,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc,
@@ -170,7 +170,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc"
),
@@ -231,6 +231,9 @@
}
origin_url = "https://www.npmjs.com/package/simplemaps"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -247,7 +250,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc,
@@ -256,7 +258,7 @@
fetcher=FETCHER,
format="replicate-npm-package-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5"
),
@@ -266,7 +268,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc,
@@ -275,7 +276,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5"
),
@@ -332,6 +333,9 @@
}
origin_url = "https://www.npmjs.com/package/@piximi/components"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -348,7 +352,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc,
@@ -357,7 +360,7 @@
fetcher=FETCHER,
format="replicate-npm-package-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:00002019c5775874bced007a529bd3b78bf60457"
),
@@ -367,7 +370,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc,
@@ -376,7 +378,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:00002019c5775874bced007a529bd3b78bf60457"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
@@ -13,12 +13,10 @@
import attr
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -27,6 +25,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage import get_storage
from swh.storage.interface import PagedResult
from swh.storage.migrate_extrinsic_metadata import (
@@ -266,6 +265,9 @@
}
origin_url = "https://pypi.org/project/m3-ui/"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = get_storage("memory")
storage.origin_add([Origin(url=origin_url)])
@@ -282,11 +284,10 @@
revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc,
@@ -295,18 +296,17 @@
fetcher=FETCHER,
format="pypi-project-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=revision_swhid,
),
],
next_page_token=None,
)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc,
@@ -315,7 +315,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=revision_swhid,
),
],
@@ -405,11 +405,10 @@
revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc,
@@ -425,11 +424,10 @@
next_page_token=None,
)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc,
@@ -512,14 +510,13 @@
revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(results=[], next_page_token=None,)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc,
@@ -580,6 +577,9 @@
}
origin_url = "https://pypi.org/project/PyPDFLite/"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = get_storage("memory")
@@ -624,14 +624,13 @@
revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(results=[], next_page_token=None,)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc,
@@ -640,7 +639,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=revision_swhid,
),
],
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -10,7 +10,6 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes, hash_to_hex
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
Content,
Directory,
@@ -18,7 +17,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Origin,
OriginVisit,
@@ -34,6 +32,23 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID
+from swh.model.swhid import _swhid_type_map as swhid_type_map
+from swh.model.swhid import parse_swhid
+
+
+class SWHIDProvider:
+ def __init__(self, data):
+ self._data = data
+
+ def __getattr__(self, name):
+ return mkswhid(getattr(self._data, name))
+
+
+def mkswhid(obj):
+ object_type = swhid_type_map.get(obj.object_type)
+ if object_type:
+ return SWHID(object_type=object_type, object_id=obj.id)
class StorageData:
@@ -41,6 +56,8 @@
"""
+ swhid: SWHIDProvider
+
content = Content(
data=b"42\n",
length=3,
@@ -463,9 +480,8 @@
snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot)
content_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin.url,
+ origin=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -475,9 +491,8 @@
metadata=b'{"foo": "bar"}',
)
content_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin2.url,
+ origin=mkswhid(origin2),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -487,7 +502,6 @@
metadata=b"foo: bar",
)
content_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
@@ -496,7 +510,7 @@
fetcher=attr.evolve(metadata_fetcher2, metadata=None),
format="yaml",
metadata=b"foo: bar",
- origin=origin.url,
+ origin=mkswhid(origin),
visit=42,
snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"),
release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"),
@@ -512,8 +526,7 @@
)
origin_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -523,8 +536,7 @@
metadata=b'{"foo": "bar"}',
)
origin_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -534,8 +546,7 @@
metadata=b"foo: bar",
)
origin_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -550,3 +561,6 @@
origin_metadata2,
origin_metadata3,
)
+
+
+StorageData.swhid = SWHIDProvider(StorageData)
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -19,11 +19,9 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes
from swh.model.hypothesis_strategies import objects
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -3323,23 +3321,17 @@
swh_storage.metadata_authority_add([])
def test_content_metadata_add(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add(content_metadata)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list(
content_metadata
@@ -3356,13 +3348,10 @@
def test_content_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
new_content_metadata2 = attr.evolve(
content_metadata2, format="new-format", metadata=b"new-metadata",
@@ -3374,9 +3363,7 @@
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_content_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
expected_results1 = (content_metadata, new_content_metadata2)
@@ -3397,8 +3384,8 @@
content1_metadata3,
) = sample_data.content_metadata[:3]
- content1_swhid = SWHID(object_type="content", object_id=content.sha1_git)
- content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git)
+ content1_swhid = sample_data.swhid.content
+ content2_swhid = sample_data.swhid.content2
content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
@@ -3413,43 +3400,34 @@
]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority)
assert result.next_page_token is None
assert [content1_metadata1, content1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority2)
assert result.next_page_token is None
assert [content1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content2_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content2_swhid, authority)
assert result.next_page_token is None
assert [content2_metadata] == list(result.results,)
def test_content_metadata_get_after(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
content_swhid,
authority,
after=content_metadata.discovery_date - timedelta(seconds=1),
@@ -3460,62 +3438,46 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata.discovery_date,
+ content_swhid, authority, after=content_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata2.discovery_date,
+ content_swhid, authority, after=content_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_content_metadata_get_paginate(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher1, fetcher2])
swh_storage.metadata_authority_add([authority])
@@ -3530,38 +3492,20 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_content_metadata2]
- def test_content_metadata_get__invalid_id(self, swh_storage, sample_data):
- origin = sample_data.origin
- fetcher = sample_data.metadata_fetcher
- authority = sample_data.metadata_authority
- content_metadata, content_metadata2 = sample_data.content_metadata[:2]
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
- swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, origin.url, authority
- )
-
def test_origin_metadata_add(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3573,9 +3517,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [
origin_metadata,
@@ -3596,6 +3538,7 @@
def test_origin_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3611,9 +3554,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
# which of the two behavior happens is backend-specific.
@@ -3626,7 +3567,10 @@
)
def test_origin_metadata_get(self, swh_storage, sample_data):
- origin, origin2 = sample_data.origins[:2]
+ origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
+ origin2 = sample_data.origin2
+ origin2_swhid = sample_data.swhid.origin2
fetcher, fetcher2 = sample_data.fetchers[:2]
authority, authority2 = sample_data.authorities[:2]
(
@@ -3637,7 +3581,7 @@
assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2}
- origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url)
+ origin2_metadata = attr.evolve(origin1_metadata2, target=origin2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
swh_storage.metadata_fetcher_add([fetcher, fetcher2])
@@ -3646,30 +3590,25 @@
[origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert [origin1_metadata1, origin1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority2)
assert result.next_page_token is None
assert [origin1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin2.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin2_swhid, authority)
assert result.next_page_token is None
assert [origin2_metadata] == list(result.results,)
def test_origin_metadata_get_after(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3681,8 +3620,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
+ origin_swhid,
authority,
after=origin_metadata.discovery_date - timedelta(seconds=1),
)
@@ -3693,25 +3631,20 @@
]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata.discovery_date,
+ origin_swhid, authority, after=origin_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata2.discovery_date,
+ origin_swhid, authority, after=origin_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_origin_metadata_get_paginate(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3722,28 +3655,23 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3761,17 +3689,13 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_origin_metadata2]
@@ -3798,24 +3722,6 @@
with pytest.raises(StorageArgumentException, match="fetcher"):
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data):
- origin = sample_data.origin
- authority = sample_data.metadata_authority
- fetcher = sample_data.metadata_fetcher
- origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
- content_metadata = sample_data.content_metadata[0]
- assert swh_storage.origin_add([origin]) == {"origin:add": 1}
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
-
- swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, content_metadata.target, authority,
- )
-
class TestStorageGeneratedData:
def test_generate_content_get_data(self, swh_storage, swh_contents):

File Metadata

Mime Type
text/plain
Expires
Dec 19 2024, 10:16 PM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217345

Event Timeline