Page MenuHomeSoftware Heritage

D8760.diff
No OneTemporary

D8760.diff

diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -80,7 +80,15 @@
"reason",
],
"directory": ["id", "dir_entries", "file_entries", "rev_entries", "raw_manifest"],
- "extid": ["extid_type", "extid", "extid_version", "target_type", "target"],
+ "extid": [
+ "extid_type",
+ "extid",
+ "extid_version",
+ "target_type",
+ "target",
+ "payload_type",
+ "payload",
+ ],
"metadata_authority": ["type", "url"],
"metadata_fetcher": ["name", "version"],
"origin": ["url"],
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -1352,6 +1352,8 @@
extid: bytes,
extid_version: int,
target: CoreSWHID,
+ payload_type: str,
+ payload: Sha1Git,
*,
statement,
) -> Optional[ExtIDRow]:
@@ -1364,6 +1366,8 @@
extid_version,
target.object_type.value,
target.object_id,
+ payload_type,
+ payload,
],
),
)
diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py
--- a/swh/storage/cassandra/model.py
+++ b/swh/storage/cassandra/model.py
@@ -317,13 +317,23 @@
@dataclasses.dataclass
class ExtIDRow(BaseRow):
TABLE = "extid"
- PARTITION_KEY = ("target", "target_type", "extid_version", "extid", "extid_type")
+ PARTITION_KEY = (
+ "target",
+ "target_type",
+ "extid_version",
+ "extid",
+ "extid_type",
+ "payload_type",
+ "payload",
+ )
extid_type: str
extid: bytes
extid_version: int
target_type: str
target: bytes
+ payload_type: Optional[str]
+ payload: Optional[bytes]
@dataclasses.dataclass
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -278,7 +278,9 @@
extid_version smallint,
target_type ascii,
target blob,
- PRIMARY KEY ((extid_type, extid), extid_version, target_type, target)
+ payload_type ascii,
+ payload blob,
+ PRIMARY KEY ((extid_type, extid), extid_version, target_type, target, payload_type, payload)
);""",
"""
CREATE TABLE IF NOT EXISTS extid_by_target (
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -1663,6 +1663,8 @@
extid_version=extid.extid_version,
extid=extid.extid,
target=extid.target,
+ payload_type=extid.payload_type,
+ payload=extid.payload,
)
]
else:
@@ -1676,12 +1678,16 @@
target = extid.target.object_id
extid_version = extid.extid_version
extid_type = extid.extid_type
+ payload_type = extid.payload_type
+ payload = extid.payload
extidrow = ExtIDRow(
extid_type=extid_type,
extid_version=extid_version,
extid=extid.extid,
target_type=target_type,
target=target,
+ payload_type=payload_type,
+ payload=payload,
)
(token, insertion_finalizer) = self._cql_runner.extid_add_prepare(extidrow)
indexrow = ExtIDByTargetRow(
@@ -1714,6 +1720,8 @@
object_type=extidrow.target_type,
object_id=extidrow.target,
),
+ payload_type=extidrow.payload_type,
+ payload=extidrow.payload,
)
for extidrow in extidrows
)
@@ -1749,6 +1757,8 @@
object_type=SwhidObjectType(extidrow.target_type),
object_id=extidrow.target,
),
+ payload_type=extidrow.payload_type,
+ payload=extidrow.payload,
)
for extidrow in extidrows
)
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -740,6 +740,8 @@
extid: bytes,
extid_version: int,
target: ExtendedSWHID,
+ payload_type: str,
+ payload: Sha1Git,
) -> Optional[ExtIDRow]:
primary_key = self._extid.primary_key_from_dict(
dict(
@@ -748,6 +750,8 @@
extid_version=extid_version,
target_type=target.object_type.value,
target=target.object_id,
+ payload_type=payload_type,
+ payload=payload,
)
)
return self._extid.get_from_primary_key(primary_key)
diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py
--- a/swh/storage/postgresql/converters.py
+++ b/swh/storage/postgresql/converters.py
@@ -359,4 +359,6 @@
object_id=row["target"],
object_type=SwhidObjectType[row["target_type"].upper()],
),
+ payload_type=row["payload_type"],
+ payload=row["payload"],
)
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -899,7 +899,15 @@
((sortkey, id) for sortkey, id in enumerate(revisions)),
)
- extid_cols = ["extid", "extid_version", "extid_type", "target", "target_type"]
+ extid_cols = [
+ "extid",
+ "extid_version",
+ "extid_type",
+ "target",
+ "target_type",
+ "payload_type",
+ "payload",
+ ]
def extid_get_from_extid_list(
self, extid_type: str, ids: List[bytes], version: Optional[int] = None, cur=None
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -108,7 +108,7 @@
class Storage:
"""SWH storage datastore proxy, encompassing DB and object storage"""
- current_version: int = 186
+ current_version: int = 187
def __init__(
self,
@@ -807,6 +807,8 @@
"extid_version": getattr(extid, "extid_version", 0),
"target": extid.target.object_id,
"target_type": extid.target.object_type.name.lower(), # arghh
+ "payload_type": extid.payload_type,
+ "payload": extid.payload,
}
for extid in ids
]
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -506,7 +506,9 @@
extid bytea not null,
target_type object_type not null,
target sha1_git not null,
- extid_version bigint not null default 0
+ extid_version bigint not null default 0,
+ payload_type text,
+ payload sha1_git
);
comment on table extid is 'Correspondance SWH object (SWHID) <-> original revision id (vcs id)';
@@ -515,3 +517,5 @@
comment on column extid.target_type is 'Type of SWHID of the referenced SWH object';
comment on column extid.target is 'Value (hash) of SWHID of the refenced SWH object';
comment on column extid.extid_version is 'Version of the extid type for the given original object';
+comment on column extid.payload_type is 'The type of the payload object';
+comment on column extid.payload is 'Content object ID of data associated with the ExtID';
diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql
--- a/swh/storage/sql/40-funcs.sql
+++ b/swh/storage/sql/40-funcs.sql
@@ -598,8 +598,8 @@
language plpgsql
as $$
begin
- insert into extid (extid_type, extid, extid_version, target_type, target)
- select distinct t.extid_type, t.extid, t.extid_version, t.target_type, t.target
+ insert into extid (extid_type, extid, extid_version, target_type, target, payload_type, payload)
+ select distinct t.extid_type, t.extid, t.extid_version, t.target_type, t.target, t.payload_type, t.payload
from tmp_extid t
on conflict do nothing;
return;
diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql
--- a/swh/storage/sql/60-indexes.sql
+++ b/swh/storage/sql/60-indexes.sql
@@ -318,6 +318,14 @@
-- extid
+-- the payload can be null if and only if the payload_type is null
+alter table extid
+ add constraint extid_payload_check
+ check ((payload_type is null) = (payload is null))
+ not valid;
+
-- used to query by (extid_type, extid) + to deduplicate the whole row
-create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target);
+create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target) where payload_type is null and payload is null;
+create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target, payload_type, payload) where payload_type is not null and payload is not null;
+
create index concurrently on extid(target_type, target);
diff --git a/swh/storage/sql/upgrades/187.sql b/swh/storage/sql/upgrades/187.sql
new file mode 100644
--- /dev/null
+++ b/swh/storage/sql/upgrades/187.sql
@@ -0,0 +1,51 @@
+-- SWH DB schema upgrade
+-- from_version: 186
+-- to_version: 187
+-- description: Add payloads to ExtIDs
+
+insert into dbversion(version, release, description)
+ values(187, now(), 'Work In Progress');
+
+-- Add payload_type and payload columns.
+
+alter table extid
+ add column payload_type text;
+comment on column extid.payload_type is 'The type of the payload object';
+
+alter table extid
+ add column payload sha1_git;
+comment on column extid.payload is 'Content object ID of data associated with the ExtID';
+
+-- Add payload constraint.
+
+alter table extid
+ add constraint extid_payload_check
+ check ((payload_type is null) = (payload is null))
+ not valid;
+
+-- Update the unique index.
+
+create unique index concurrently extid_no_payload_idx
+ on extid(extid_type, extid, extid_version, target_type, target)
+ where payload_type is null and payload is null;
+
+create unique index concurrently extid_payload_idx
+ on extid(extid_type, extid, extid_version, target_type, target, payload_type, payload)
+ where payload_type is not null and payload is not null;
+
+drop index if exists extid_extid_type_extid_extid_version_target_type_target_idx;
+
+-- Update the swh_extid_add procedure to include the new columns.
+
+create or replace function swh_extid_add()
+ returns void
+ language plpgsql
+as $$
+begin
+ insert into extid (extid_type, extid, extid_version, target_type, target, payload_type, payload)
+ select distinct t.extid_type, t.extid, t.extid_version, t.target_type, t.target, t.payload_type, t.payload
+ from tmp_extid t
+ on conflict do nothing;
+ return;
+end
+$$;
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -831,9 +831,20 @@
extid_version=2,
)
+ extid5 = ExtID(
+ target=CoreSWHID(
+ object_type=SwhidObjectType.DIRECTORY, object_id=directory2.id
+ ),
+ extid_type="directory",
+ extid=b"something",
+ payload_type="test",
+ payload=content.sha1_git,
+ )
+
extids: Tuple[ExtID, ...] = (
extid1,
extid2,
extid3,
extid4,
+ extid5,
)
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -1695,6 +1695,41 @@
ObjectType.REVISION, [ids[0]], extid_type="git"
)
+ def test_extid_payload(self, swh_storage, sample_data):
+ target = sample_data.directory2.swhid()
+ extids = [
+ ExtID(
+ extid=b"abc123",
+ extid_type="test",
+ target=target,
+ payload_type="test_payload",
+ payload=sample_data.content.sha1_git,
+ ),
+ ]
+
+ assert swh_storage.extid_get_from_extid("test", [b"abc123"]) == []
+ assert (
+ swh_storage.extid_get_from_target(target.object_type, [target.object_id])
+ == []
+ )
+
+ summary = swh_storage.extid_add(extids)
+ assert summary == {"extid:add": 1}
+
+ assert swh_storage.extid_get_from_extid("test", [b"abc123"]) == extids
+ assert (
+ swh_storage.extid_get_from_target(target.object_type, [target.object_id])
+ == extids
+ )
+
+ # check ExtIDs have been added to the journal
+ extids_in_journal = [
+ obj
+ for (obj_type, obj) in swh_storage.journal_writer.journal.objects
+ if obj_type == "extid"
+ ]
+ assert extids == extids_in_journal
+
def test_release_add(self, swh_storage, sample_data):
release, release2 = sample_data.releases[:2]

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 4:51 AM (5 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216578

Event Timeline