Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123767
D4986.id17931.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
82 KB
Subscribers
None
D4986.id17931.diff
View Options
diff --git a/swh/storage/api/serializers.py b/swh/storage/api/serializers.py
--- a/swh/storage/api/serializers.py
+++ b/swh/storage/api/serializers.py
@@ -7,8 +7,8 @@
from typing import Callable, Dict, List, Tuple
-from swh.model.identifiers import SWHID, parse_swhid
import swh.model.model as model
+from swh.model.swhid import SWHID, parse_swhid
from swh.storage import interface
@@ -36,7 +36,6 @@
ENCODERS: List[Tuple[type, str, Callable]] = [
(model.BaseModel, "model", _encode_model_object),
(SWHID, "swhid", str),
- (model.MetadataTargetType, "model_enum", _encode_enum),
(model.MetadataAuthorityType, "model_enum", _encode_enum),
(interface.ListOrder, "storage_enum", _encode_enum),
]
@@ -46,6 +45,5 @@
"swhid": parse_swhid,
"model": lambda d: getattr(model, d.pop("__type__")).from_dict(d),
"model_enum": _decode_model_enum,
- "model_enum": _decode_model_enum,
"storage_enum": _decode_storage_enum,
}
diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -35,6 +35,7 @@
db_to_release,
db_to_revision,
)
+from swh.storage.postgresql.db import register_swhid_type
from swh.storage.replay import object_converter_fn
from swh.storage.writer import JournalWriter
@@ -79,7 +80,6 @@
"metadata_authority": ["type", "url", "metadata",],
"metadata_fetcher": ["name", "version", "metadata",],
"raw_extrinsic_metadata": [
- "raw_extrinsic_metadata.type",
"raw_extrinsic_metadata.target",
"metadata_authority.type",
"metadata_authority.url",
@@ -533,6 +533,8 @@
)
db = BaseDb.connect(self.config["storage"]["db"])
+ register_swhid_type(db.conn)
+
writer = JournalWriter({"cls": "kafka", **self.config["journal_writer"]})
assert writer.journal is not None
diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py
--- a/swh/storage/cassandra/model.py
+++ b/swh/storage/cassandra/model.py
@@ -250,7 +250,6 @@
"fetcher_version",
)
- type: str
target: str
authority_type: str
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -188,7 +188,6 @@
);""",
"""
CREATE TABLE IF NOT EXISTS raw_extrinsic_metadata (
- type text,
target text,
-- metadata source
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -26,7 +26,6 @@
from swh.core.api.serializers import msgpack_dumps, msgpack_loads
from swh.model.hashutil import DEFAULT_ALGORITHMS
-from swh.model.identifiers import SWHID, parse_swhid
from swh.model.model import (
Content,
Directory,
@@ -34,7 +33,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -47,6 +45,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, parse_swhid
from swh.storage.interface import (
VISIT_STATUSES,
ListOrder,
@@ -1157,7 +1156,6 @@
try:
row = RawExtrinsicMetadataRow(
- type=metadata_entry.type.value,
target=str(metadata_entry.target),
authority_type=metadata_entry.authority.type.value,
authority_url=metadata_entry.authority.url,
@@ -1166,7 +1164,7 @@
fetcher_version=metadata_entry.fetcher.version,
format=metadata_entry.format,
metadata=metadata_entry.metadata,
- origin=metadata_entry.origin,
+ origin=map_optional(str, metadata_entry.origin),
visit=metadata_entry.visit,
snapshot=map_optional(str, metadata_entry.snapshot),
release=map_optional(str, metadata_entry.release),
@@ -1180,26 +1178,12 @@
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
) -> PagedResult[RawExtrinsicMetadata]:
- if type == MetadataTargetType.ORIGIN:
- if isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type='origin', "
- f"but provided target is a SWHID: {target!r}"
- )
- else:
- if not isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type!='origin', "
- f"but provided target is not a SWHID: {target!r}"
- )
-
if page_token is not None:
(after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads(
base64.b64decode(page_token)
@@ -1235,7 +1219,6 @@
assert str(target) == entry.target
result = RawExtrinsicMetadata(
- type=MetadataTargetType(entry.type),
target=target,
authority=MetadataAuthority(
type=MetadataAuthorityType(entry.authority_type),
@@ -1247,7 +1230,7 @@
discovery_date=discovery_date,
format=entry.format,
metadata=entry.metadata,
- origin=entry.origin,
+ origin=map_optional(parse_swhid, entry.origin),
visit=entry.visit,
snapshot=map_optional(parse_swhid, entry.snapshot),
release=map_optional(parse_swhid, entry.release),
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -5,20 +5,18 @@
import datetime
from enum import Enum
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar
from typing_extensions import Protocol, TypedDict, runtime_checkable
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -31,6 +29,7 @@
Snapshot,
SnapshotBranch,
)
+from swh.model.swhid import SWHID
class ListOrder(Enum):
@@ -1096,8 +1095,7 @@
@remote_api_endpoint("raw_extrinsic_metadata/get")
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1106,8 +1104,7 @@
"""Retrieve list of all raw_extrinsic_metadata entries for the id
Args:
- type: one of the values of swh.model.model.MetadataTargetType
- target: an URL if type is 'origin', else a core SWHID
+ target: a core SWHID
authority: a dict containing keys `type` and `url`.
after: minimum discovery_date for a result to be returned
page_token: opaque token, used to get the next page of results
diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py
old mode 100644
new mode 100755
--- a/swh/storage/migrate_extrinsic_metadata.py
+++ b/swh/storage/migrate_extrinsic_metadata.py
@@ -38,15 +38,14 @@
from swh.core.db import BaseDb
from swh.model.hashutil import hash_to_hex
-from swh.model.identifiers import SWHID, parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
RawExtrinsicMetadata,
Sha1Git,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage import get_storage
from swh.storage.algos.origin import iter_origin_visit_statuses, iter_origin_visits
from swh.storage.algos.snapshot import snapshot_get_all_branches
@@ -412,19 +411,27 @@
dry_run: bool,
):
"""Does the actual loading to swh-storage."""
+ origin_swhid: Optional[SWHID]
+ if origin is not None:
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin.encode()
+ )
+ else:
+ origin_swhid = None
directory_swhid = SWHID(
- object_type="directory", object_id=hash_to_hex(directory_id)
+ object_type=SWHIDObjectType.DIRECTORY, object_id=hash_to_hex(directory_id)
+ )
+ revision_swhid = SWHID(
+ object_type=SWHIDObjectType.REVISION, object_id=hash_to_hex(revision_id)
)
- revision_swhid = SWHID(object_type="revision", object_id=hash_to_hex(revision_id))
obj = RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=directory_swhid,
discovery_date=discovery_date,
authority=authority,
fetcher=FETCHER,
format=format,
metadata=json.dumps(metadata).encode(),
- origin=origin,
+ origin=origin_swhid,
revision=revision_swhid,
)
if not dry_run:
@@ -516,7 +523,6 @@
if discovery_date is None:
discovery_date = max(dates)
-
# Sanity checks to make sure deposit requests are consistent with each other
assert len(metadata_entries) >= 1, deposit_id
assert len(provider_urls) == 1, f"expected 1 provider url, got {provider_urls}"
diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py
--- a/swh/storage/postgresql/converters.py
+++ b/swh/storage/postgresql/converters.py
@@ -7,12 +7,10 @@
from typing import Any, Dict, Optional
from swh.core.utils import encode_with_unescape
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Person,
RawExtrinsicMetadata,
@@ -23,8 +21,6 @@
TimestampWithTimezone,
)
-from ..utils import map_optional
-
DEFAULT_AUTHOR = {
"fullname": None,
"name": None,
@@ -295,13 +291,8 @@
def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata:
- type_ = MetadataTargetType(row["raw_extrinsic_metadata.type"])
- target = row["raw_extrinsic_metadata.target"]
- if type_ != MetadataTargetType.ORIGIN:
- target = parse_swhid(target)
return RawExtrinsicMetadata(
- type=type_,
- target=target,
+ target=row["raw_extrinsic_metadata.target"],
authority=MetadataAuthority(
type=MetadataAuthorityType(row["metadata_authority.type"]),
url=row["metadata_authority.url"],
@@ -314,9 +305,9 @@
metadata=row["raw_extrinsic_metadata.metadata"],
origin=row["origin"],
visit=row["visit"],
- snapshot=map_optional(parse_swhid, row["snapshot"]),
- release=map_optional(parse_swhid, row["release"]),
- revision=map_optional(parse_swhid, row["revision"]),
+ snapshot=row["snapshot"],
+ release=row["release"],
+ revision=row["revision"],
path=row["path"],
- directory=map_optional(parse_swhid, row["directory"]),
+ directory=row["directory"],
)
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -6,14 +6,20 @@
import datetime
import logging
import random
+import re
import select
from typing import Any, Dict, Iterable, List, Optional, Tuple
+import psycopg2
+
from swh.core.db import BaseDb
from swh.core.db.db_utils import execute_values_generator
from swh.core.db.db_utils import jsonize as _jsonize
from swh.core.db.db_utils import stored_procedure
+from swh.model.hashutil import hash_to_bytes
from swh.model.model import SHA1_SIZE, OriginVisit, OriginVisitStatus
+from swh.model.swhid import SWHID
+from swh.model.swhid import _swhid_type_map as swhid_typemap
from swh.storage.interface import ListOrder
logger = logging.getLogger(__name__)
@@ -23,6 +29,55 @@
return _jsonize(dict(d) if d is not None else None)
+def typecast_swhid(value, cur):
+ if value is None:
+ return None
+ m = re.match(r'\(([^)]+),([^)]+),"([^)]+)"\)', value)
+ if m:
+ return SWHID(
+ scheme_version=int(m.group(1)),
+ object_type=swhid_typemap[m.group(2)],
+ object_id=hash_to_bytes(m.group(3)[3:]),
+ )
+ else:
+ raise psycopg2.InterfaceError("bad SWHID representation: %r" % value)
+
+
+def adapt_swhid(swhid: SWHID):
+ value = psycopg2.extensions.AsIs(
+ (
+ b"ROW(%d, '%s'::swhid_type, '\\x%s'::bytea)"
+ % (
+ swhid.scheme_version,
+ swhid.object_type.value.encode(),
+ swhid.object_id.encode(),
+ )
+ ).decode()
+ )
+ print("VALUE", value)
+ return value
+
+
+def register_swhid_type(conn):
+ with conn.cursor() as cur:
+ cur.execute(
+ """
+ SELECT pg_type.oid
+ FROM pg_type
+ JOIN pg_namespace
+ ON typnamespace = pg_namespace.oid
+ WHERE typname = %(typename)s
+ AND nspname = %(namespace)s""",
+ {"typename": "swhid", "namespace": "public"},
+ )
+
+ oid = cur.fetchone()[0]
+
+ t_SWHID = psycopg2.extensions.new_type((oid,), "SWHID", typecast_swhid)
+ psycopg2.extensions.register_type(t_SWHID, conn)
+ psycopg2.extensions.register_adapter(SWHID, adapt_swhid)
+
+
class Db(BaseDb):
"""Proxy to the SWH DB, with wrappers around stored procedures
@@ -30,6 +85,14 @@
current_version = 166
+ def __init__(
+ self,
+ conn: psycopg2.extensions.connection,
+ pool: Optional[psycopg2.pool.AbstractConnectionPool] = None,
+ ):
+ super().__init__(conn, pool)
+ register_swhid_type(conn)
+
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
"SELECT swh_mktemp_dir_entry(%s)", (("directory_entry_%s" % entry_type),)
@@ -1136,7 +1199,6 @@
"""The list of context columns for all artifact types."""
_raw_extrinsic_metadata_insert_cols = [
- "type",
"target",
"authority_id",
"fetcher_id",
@@ -1158,7 +1220,6 @@
raw_extrinsic_metadata_get_cols = [
"raw_extrinsic_metadata.target",
- "raw_extrinsic_metadata.type",
"discovery_date",
"metadata_authority.type",
"metadata_authority.url",
@@ -1179,30 +1240,29 @@
INNER JOIN metadata_authority
ON (metadata_authority.id=authority_id)
INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id)
- WHERE raw_extrinsic_metadata.target=%s AND authority_id=%s
+ WHERE (raw_extrinsic_metadata.target)=%s
+ AND authority_id=%s
"""
def raw_extrinsic_metadata_add(
self,
- type: str,
- target: str,
+ target: SWHID,
discovery_date: datetime.datetime,
authority_id: int,
fetcher_id: int,
format: str,
metadata: bytes,
- origin: Optional[str],
+ origin: Optional[SWHID],
visit: Optional[int],
- snapshot: Optional[str],
- release: Optional[str],
- revision: Optional[str],
+ snapshot: Optional[SWHID],
+ release: Optional[SWHID],
+ revision: Optional[SWHID],
path: Optional[bytes],
- directory: Optional[str],
+ directory: Optional[SWHID],
cur,
):
query = self._raw_extrinsic_metadata_insert_query
args: Dict[str, Any] = dict(
- type=type,
target=target,
authority_id=authority_id,
fetcher_id=fetcher_id,
@@ -1224,8 +1284,7 @@
def raw_extrinsic_metadata_get(
self,
- type: str,
- target: str,
+ target: SWHID,
authority_id: int,
after_time: Optional[datetime.datetime],
after_fetcher: Optional[int],
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -9,7 +9,7 @@
from contextlib import contextmanager
import datetime
import itertools
-from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple
import attr
import psycopg2
@@ -19,7 +19,6 @@
from swh.core.api.serializers import msgpack_dumps, msgpack_loads
from swh.core.db.common import db_transaction, db_transaction_generator
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
-from swh.model.identifiers import SWHID
from swh.model.model import (
SHA1_SIZE,
Content,
@@ -27,7 +26,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -41,6 +39,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
from swh.storage.exc import HashCollision, StorageArgumentException, StorageDBError
from swh.storage.interface import (
VISIT_STATUSES,
@@ -50,12 +49,7 @@
)
from swh.storage.metrics import process_metrics, send_metric, timed
from swh.storage.objstorage import ObjStorage
-from swh.storage.utils import (
- extract_collision_hash,
- get_partition_bounds_bytes,
- map_optional,
- now,
-)
+from swh.storage.utils import extract_collision_hash, get_partition_bounds_bytes, now
from swh.storage.writer import JournalWriter
from . import converters
@@ -1250,26 +1244,22 @@
) -> None:
metadata = list(metadata)
self.journal_writer.raw_extrinsic_metadata_add(metadata)
- counter = Counter[MetadataTargetType]()
+ counter = Counter[SWHIDObjectType]()
for metadata_entry in metadata:
- authority_id = self._get_authority_id(metadata_entry.authority, db, cur)
- fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur)
-
db.raw_extrinsic_metadata_add(
- type=metadata_entry.type.value,
- target=str(metadata_entry.target),
+ target=metadata_entry.target,
+ authority_id=self._get_authority_id(metadata_entry.authority, db, cur),
discovery_date=metadata_entry.discovery_date,
- authority_id=authority_id,
- fetcher_id=fetcher_id,
+ fetcher_id=self._get_fetcher_id(metadata_entry.fetcher, db, cur),
format=metadata_entry.format,
metadata=metadata_entry.metadata,
origin=metadata_entry.origin,
visit=metadata_entry.visit,
- snapshot=map_optional(str, metadata_entry.snapshot),
- release=map_optional(str, metadata_entry.release),
- revision=map_optional(str, metadata_entry.revision),
+ snapshot=metadata_entry.snapshot,
+ release=metadata_entry.release,
+ revision=metadata_entry.revision,
path=metadata_entry.path,
- directory=map_optional(str, metadata_entry.directory),
+ directory=metadata_entry.directory,
cur=cur,
)
counter[metadata_entry.type] += 1
@@ -1284,8 +1274,7 @@
@db_transaction()
def raw_extrinsic_metadata_get(
self,
- type: MetadataTargetType,
- target: Union[str, SWHID],
+ target: SWHID,
authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
@@ -1293,19 +1282,6 @@
db=None,
cur=None,
) -> PagedResult[RawExtrinsicMetadata]:
- if type == MetadataTargetType.ORIGIN:
- if isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type='origin', "
- f"but provided target is a SWHID: {target!r}"
- )
- else:
- if not isinstance(target, SWHID):
- raise StorageArgumentException(
- f"raw_extrinsic_metadata_get called with type!='origin', "
- f"but provided target is not a SWHID: {target!r}"
- )
-
if page_token:
(after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token))
if after and after_time < after:
@@ -1321,12 +1297,13 @@
return PagedResult(next_page_token=None, results=[],)
rows = db.raw_extrinsic_metadata_get(
- type, str(target), authority_id, after_time, after_fetcher, limit + 1, cur,
+ target, authority_id, after_time, after_fetcher, limit + 1, cur,
)
+
rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows]
results = []
for row in rows:
- assert str(target) == row["raw_extrinsic_metadata.target"]
+ assert target == row["raw_extrinsic_metadata.target"]
results.append(converters.db_to_raw_extrinsic_metadata(row))
if len(results) > limit:
diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql
--- a/swh/storage/sql/20-enums.sql
+++ b/swh/storage/sql/20-enums.sql
@@ -23,3 +23,5 @@
'failed'
);
comment on type origin_visit_state IS 'Possible origin visit status values';
+
+create type swhid_type as enum ('cnt', 'dir', 'rev', 'rel', 'snp', 'ori');
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -37,8 +37,8 @@
-- a set of UNIX-like access permissions, as manipulated by, e.g., chmod
create domain file_perms as int;
--- an SWHID
-create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
+-- a SWHID
+create type swhid as (version int, object_type swhid_type, object_id bytea);
-- Checksums about actual file content. Note that the content itself is not
@@ -430,8 +430,7 @@
-- Extrinsic metadata on a DAG objects and origins.
create table raw_extrinsic_metadata
(
- type text not null,
- target text not null,
+ target swhid not null,
-- metadata source
authority_id bigint not null,
@@ -443,7 +442,7 @@
metadata bytea not null,
-- context
- origin text,
+ origin swhid,
visit bigint,
snapshot swhid,
release swhid,
@@ -453,7 +452,6 @@
);
comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object';
-comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on';
comment on column raw_extrinsic_metadata.target is 'the SWHID or origin URL for which the metadata was found';
comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval';
comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.';
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row
FETCHER = MetadataFetcher(
@@ -102,7 +101,9 @@
}
origin_url = "https://cran.r-project.org/package=ExtremeRisks"
-
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
def origin_get(urls):
@@ -118,7 +119,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 5, 7, 15, 27, 38, 652281, tzinfo=datetime.timezone.utc,
@@ -127,7 +127,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000361aa33842cbdea5fa6e77db696b937ebd269"
),
@@ -191,6 +191,9 @@
}
origin_url = "https://cran.r-project.org/package=gofgamma"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -207,7 +210,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 4, 30, 11, 1, 57, 832481, tzinfo=datetime.timezone.utc,
@@ -216,7 +218,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0000d4ef5e166122aee6862ad38a18ce5386cc3e"
),
@@ -269,6 +271,9 @@
}
origin_url = "https://cran.r-project.org/package=r2mlm"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -285,7 +290,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 9, 25, 14, 4, 20, 926667, tzinfo=datetime.timezone.utc,
@@ -294,7 +298,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:2e223782ee4ba152e4c886f797976241c39a9aab"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
@@ -15,12 +15,10 @@
import attr
import pytest
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -34,6 +32,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage import get_storage
from swh.storage.interface import ListOrder, PagedResult
from swh.storage.migrate_extrinsic_metadata import debian_origins_from_row, handle_row
@@ -425,6 +424,9 @@
}
origin_url = "deb://Debian/packages/kalgebra"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -437,7 +439,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc,
@@ -446,7 +447,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee"
),
@@ -545,6 +546,9 @@
storage = Mock()
origin_url = "http://snapshot.debian.org/package/pymongo"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
deposit_cur = None
with patch("debian_origins_from_row", return_value=[origin_url]):
@@ -554,7 +558,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc
@@ -563,7 +566,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import MagicMock, Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import (
DEPOSIT_COLS,
cran_package_from_url,
@@ -130,6 +129,9 @@
origin_url = (
"https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476"
)
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
swhid = (
f"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
@@ -186,7 +188,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc
@@ -195,7 +196,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
),
@@ -205,7 +206,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc
@@ -214,7 +214,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
),
@@ -333,6 +333,9 @@
]
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -352,7 +355,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc
@@ -361,7 +363,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
),
@@ -371,7 +373,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc
@@ -380,7 +381,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
),
@@ -498,6 +499,9 @@
origin_url = (
"https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420"
)
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -517,7 +521,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc
@@ -526,7 +529,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
),
@@ -536,7 +539,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc
@@ -545,7 +547,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
),
@@ -662,6 +664,9 @@
]
origin_url = "https://hal.archives-ouvertes.fr/hal-02960679"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -681,7 +686,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc
@@ -690,7 +694,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
),
@@ -700,7 +704,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc
@@ -709,7 +712,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
),
@@ -837,6 +840,9 @@
]
origin_url = "https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -856,7 +862,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc
@@ -865,7 +870,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
@@ -875,7 +880,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc
@@ -884,7 +888,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
@@ -894,7 +898,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc
@@ -903,7 +906,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
@@ -1056,6 +1059,9 @@
]
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -1075,7 +1081,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc
@@ -1084,7 +1089,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8"
),
@@ -1164,6 +1169,9 @@
]
origin_url = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -1183,7 +1191,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc
@@ -1192,7 +1199,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4"
),
@@ -1203,7 +1210,7 @@
]
-def test_deposit_missing_metadata_in_revision():
+def test_deposit_ignore_origin_in_metadata():
extrinsic_metadata = {
"id": "hal-01243573",
"@xmlns": "http://www.w3.org/2005/Atom",
@@ -1297,6 +1304,9 @@
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"
# /!\ not https://hal-test.archives-ouvertes.fr/hal-01243573
# do not trust the metadata!
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -1311,12 +1321,11 @@
deposit_cur.execute.assert_called_once()
deposit_cur.__iter__.assert_called_once()
- assert storage.method_calls == [
+ expected = [
call.origin_get([origin_url]),
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc
@@ -1325,7 +1334,7 @@
fetcher=FETCHER,
format="sword-v2-atom-codemeta-v2-in-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
),
@@ -1335,7 +1344,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc
@@ -1344,7 +1352,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
),
@@ -1352,3 +1360,4 @@
]
),
]
+ assert storage.method_calls == expected
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row
FETCHER = MetadataFetcher(
@@ -76,6 +75,9 @@
}
origin_url = "https://ftp.gnu.org/gnu/gperf/"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -92,7 +94,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 11, 27, 11, 17, 38, 318997, tzinfo=datetime.timezone.utc
@@ -101,7 +102,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row
FETCHER = MetadataFetcher(
@@ -74,6 +73,9 @@
}
origin_url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -90,7 +92,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc
@@ -99,7 +100,7 @@
fetcher=FETCHER,
format="nixguix-sources-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"
),
@@ -109,7 +110,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc
@@ -118,7 +118,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py
@@ -11,15 +11,14 @@
import json
from unittest.mock import Mock, call
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
RawExtrinsicMetadata,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage.migrate_extrinsic_metadata import (
handle_row,
npm_package_from_source_url,
@@ -126,6 +125,9 @@
}
origin_url = "https://www.npmjs.com/package/@l3ilkojr/jdinsults"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -142,7 +144,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc,
@@ -151,7 +152,7 @@
fetcher=FETCHER,
format="replicate-npm-package-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc"
),
@@ -161,7 +162,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc,
@@ -170,7 +170,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc"
),
@@ -231,6 +231,9 @@
}
origin_url = "https://www.npmjs.com/package/simplemaps"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -247,7 +250,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc,
@@ -256,7 +258,7 @@
fetcher=FETCHER,
format="replicate-npm-package-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5"
),
@@ -266,7 +268,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc,
@@ -275,7 +276,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5"
),
@@ -332,6 +333,9 @@
}
origin_url = "https://www.npmjs.com/package/@piximi/components"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = Mock()
@@ -348,7 +352,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc,
@@ -357,7 +360,7 @@
fetcher=FETCHER,
format="replicate-npm-package-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:00002019c5775874bced007a529bd3b78bf60457"
),
@@ -367,7 +370,6 @@
call.raw_extrinsic_metadata_add(
[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc,
@@ -376,7 +378,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=parse_swhid(
"swh:1:rev:00002019c5775874bced007a529bd3b78bf60457"
),
diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
--- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
+++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
@@ -13,12 +13,10 @@
import attr
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -27,6 +25,7 @@
SnapshotBranch,
TargetType,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.storage import get_storage
from swh.storage.interface import PagedResult
from swh.storage.migrate_extrinsic_metadata import (
@@ -266,6 +265,9 @@
}
origin_url = "https://pypi.org/project/m3-ui/"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = get_storage("memory")
storage.origin_add([Origin(url=origin_url)])
@@ -282,11 +284,10 @@
revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc,
@@ -295,18 +296,17 @@
fetcher=FETCHER,
format="pypi-project-json",
metadata=json.dumps(extrinsic_metadata).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=revision_swhid,
),
],
next_page_token=None,
)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc,
@@ -315,7 +315,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=revision_swhid,
),
],
@@ -405,11 +405,10 @@
revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc,
@@ -425,11 +424,10 @@
next_page_token=None,
)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc,
@@ -512,14 +510,13 @@
revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(results=[], next_page_token=None,)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc,
@@ -580,6 +577,9 @@
}
origin_url = "https://pypi.org/project/PyPDFLite/"
+ origin_swhid = SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode()
+ )
storage = get_storage("memory")
@@ -624,14 +624,13 @@
revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2")
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
+ DIRECTORY_SWHID, authority=PYPI_AUTHORITY,
) == PagedResult(results=[], next_page_token=None,)
assert storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY,
+ DIRECTORY_SWHID, authority=SWH_AUTHORITY,
) == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=DIRECTORY_SWHID,
discovery_date=datetime.datetime(
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc,
@@ -640,7 +639,7 @@
fetcher=FETCHER,
format="original-artifacts-json",
metadata=json.dumps(dest_original_artifacts).encode(),
- origin=origin_url,
+ origin=origin_swhid,
revision=revision_swhid,
),
],
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -10,7 +10,6 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes, hash_to_hex
-from swh.model.identifiers import parse_swhid
from swh.model.model import (
Content,
Directory,
@@ -18,7 +17,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Origin,
OriginVisit,
@@ -34,6 +32,23 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID
+from swh.model.swhid import _swhid_type_map as swhid_type_map
+from swh.model.swhid import parse_swhid
+
+
+class SWHIDProvider:
+ def __init__(self, data):
+ self._data = data
+
+ def __getattr__(self, name):
+ return mkswhid(getattr(self._data, name))
+
+
+def mkswhid(obj):
+ object_type = swhid_type_map.get(obj.object_type)
+ if object_type:
+ return SWHID(object_type=object_type, object_id=obj.id)
class StorageData:
@@ -41,6 +56,8 @@
"""
+ swhid: SWHIDProvider
+
content = Content(
data=b"42\n",
length=3,
@@ -463,9 +480,8 @@
snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot)
content_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin.url,
+ origin=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -475,9 +491,8 @@
metadata=b'{"foo": "bar"}',
)
content_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
- origin=origin2.url,
+ origin=mkswhid(origin2),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -487,7 +502,6 @@
metadata=b"foo: bar",
)
content_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
@@ -496,7 +510,7 @@
fetcher=attr.evolve(metadata_fetcher2, metadata=None),
format="yaml",
metadata=b"foo: bar",
- origin=origin.url,
+ origin=mkswhid(origin),
visit=42,
snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"),
release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"),
@@ -512,8 +526,7 @@
)
origin_metadata1 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -523,8 +536,7 @@
metadata=b'{"foo": "bar"}',
)
origin_metadata2 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -534,8 +546,7 @@
metadata=b"foo: bar",
)
origin_metadata3 = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=origin.url,
+ target=mkswhid(origin),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -550,3 +561,6 @@
origin_metadata2,
origin_metadata3,
)
+
+
+StorageData.swhid = SWHIDProvider(StorageData)
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -19,11 +19,9 @@
from swh.model import from_disk
from swh.model.hashutil import hash_to_bytes
from swh.model.hypothesis_strategies import objects
-from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
- MetadataTargetType,
Origin,
OriginVisit,
OriginVisitStatus,
@@ -3323,23 +3321,17 @@
swh_storage.metadata_authority_add([])
def test_content_metadata_add(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add(content_metadata)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list(
content_metadata
@@ -3356,13 +3348,10 @@
def test_content_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(
- object_type="content", object_id=hash_to_bytes(content.sha1_git)
- )
new_content_metadata2 = attr.evolve(
content_metadata2, format="new-format", metadata=b"new-metadata",
@@ -3374,9 +3363,7 @@
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_content_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
assert result.next_page_token is None
expected_results1 = (content_metadata, new_content_metadata2)
@@ -3397,8 +3384,8 @@
content1_metadata3,
) = sample_data.content_metadata[:3]
- content1_swhid = SWHID(object_type="content", object_id=content.sha1_git)
- content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git)
+ content1_swhid = sample_data.swhid.content
+ content2_swhid = sample_data.swhid.content2
content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
@@ -3413,43 +3400,34 @@
]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority)
assert result.next_page_token is None
assert [content1_metadata1, content1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content1_swhid, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority2)
assert result.next_page_token is None
assert [content1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content2_swhid, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(content2_swhid, authority)
assert result.next_page_token is None
assert [content2_metadata] == list(result.results,)
def test_content_metadata_get_after(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
content_swhid,
authority,
after=content_metadata.discovery_date - timedelta(seconds=1),
@@ -3460,62 +3438,46 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata.discovery_date,
+ content_swhid, authority, after=content_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- after=content_metadata2.discovery_date,
+ content_swhid, authority, after=content_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_content_metadata_get_paginate(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher])
swh_storage.metadata_authority_add([authority])
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(content_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [content_metadata2]
def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data):
- content = sample_data.content
+ content_swhid = sample_data.swhid.content
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
content_metadata, content_metadata2 = sample_data.content_metadata[:2]
- content_swhid = SWHID(object_type="content", object_id=content.sha1_git)
-
swh_storage.metadata_fetcher_add([fetcher1, fetcher2])
swh_storage.metadata_authority_add([authority])
@@ -3530,38 +3492,20 @@
)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ content_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [content_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT,
- content_swhid,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ content_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_content_metadata2]
- def test_content_metadata_get__invalid_id(self, swh_storage, sample_data):
- origin = sample_data.origin
- fetcher = sample_data.metadata_fetcher
- authority = sample_data.metadata_authority
- content_metadata, content_metadata2 = sample_data.content_metadata[:2]
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
- swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.CONTENT, origin.url, authority
- )
-
def test_origin_metadata_add(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3573,9 +3517,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [
origin_metadata,
@@ -3596,6 +3538,7 @@
def test_origin_metadata_add_duplicate(self, swh_storage, sample_data):
"""Duplicates should be silently updated."""
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3611,9 +3554,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2])
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
# which of the two behavior happens is backend-specific.
@@ -3626,7 +3567,10 @@
)
def test_origin_metadata_get(self, swh_storage, sample_data):
- origin, origin2 = sample_data.origins[:2]
+ origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
+ origin2 = sample_data.origin2
+ origin2_swhid = sample_data.swhid.origin2
fetcher, fetcher2 = sample_data.fetchers[:2]
authority, authority2 = sample_data.authorities[:2]
(
@@ -3637,7 +3581,7 @@
assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2}
- origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url)
+ origin2_metadata = attr.evolve(origin1_metadata2, target=origin2_swhid)
swh_storage.metadata_authority_add([authority, authority2])
swh_storage.metadata_fetcher_add([fetcher, fetcher2])
@@ -3646,30 +3590,25 @@
[origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata]
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
assert result.next_page_token is None
assert [origin1_metadata1, origin1_metadata2] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority2
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority2)
assert result.next_page_token is None
assert [origin1_metadata3] == list(
sorted(result.results, key=lambda x: x.discovery_date,)
)
- result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin2.url, authority
- )
+ result = swh_storage.raw_extrinsic_metadata_get(origin2_swhid, authority)
assert result.next_page_token is None
assert [origin2_metadata] == list(result.results,)
def test_origin_metadata_get_after(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3681,8 +3620,7 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
+ origin_swhid,
authority,
after=origin_metadata.discovery_date - timedelta(seconds=1),
)
@@ -3693,25 +3631,20 @@
]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata.discovery_date,
+ origin_swhid, authority, after=origin_metadata.discovery_date,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- after=origin_metadata2.discovery_date,
+ origin_swhid, authority, after=origin_metadata2.discovery_date,
)
assert result.next_page_token is None
assert result.results == []
def test_origin_metadata_get_paginate(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher = sample_data.metadata_fetcher
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3722,28 +3655,23 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority
- )
+ swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority)
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [origin_metadata2]
def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data):
origin = sample_data.origin
+ origin_swhid = sample_data.swhid.origin
fetcher1, fetcher2 = sample_data.fetchers[:2]
authority = sample_data.metadata_authority
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
@@ -3761,17 +3689,13 @@
swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2])
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, origin.url, authority, limit=1
+ origin_swhid, authority, limit=1
)
assert result.next_page_token is not None
assert result.results == [origin_metadata]
result = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN,
- origin.url,
- authority,
- limit=1,
- page_token=result.next_page_token,
+ origin_swhid, authority, limit=1, page_token=result.next_page_token,
)
assert result.next_page_token is None
assert result.results == [new_origin_metadata2]
@@ -3798,24 +3722,6 @@
with pytest.raises(StorageArgumentException, match="fetcher"):
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
- def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data):
- origin = sample_data.origin
- authority = sample_data.metadata_authority
- fetcher = sample_data.metadata_fetcher
- origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
- content_metadata = sample_data.content_metadata[0]
- assert swh_storage.origin_add([origin]) == {"origin:add": 1}
-
- swh_storage.metadata_fetcher_add([fetcher])
- swh_storage.metadata_authority_add([authority])
-
- swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
-
- with pytest.raises(StorageArgumentException, match="SWHID"):
- swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, content_metadata.target, authority,
- )
-
class TestStorageGeneratedData:
def test_generate_content_get_data(self, swh_storage, swh_contents):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 12:22 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217345
Attached To
D4986: [WIP] Add a composite swhid type in postgresql
Event Timeline
Log In to Comment