Page MenuHomeSoftware Heritage

D6023.id21772.diff
No OneTemporary

D6023.id21772.diff

diff --git a/sql/upgrades/176.sql b/sql/upgrades/176.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/176.sql
@@ -0,0 +1,27 @@
+-- SWH DB schema upgrade
+-- from_version: 175
+-- to_version: 176
+-- description: add storage of the extid.extid_version field
+
+insert into dbversion(version, release, description)
+ values(176, now(), 'Work In Progress');
+
+alter table extid add column extid_version bigint not null default 0;
+
+comment on column extid.extid_version is 'Version of the extid for the given original object';
+
+create or replace function swh_extid_add()
+ returns void
+ language plpgsql
+as $$
+begin
+ insert into extid (extid_type, extid, extid_version, target_type, target)
+ select distinct t.extid_type, t.extid, t.extid_version, t.target_type, t.target
+ from tmp_extid t
+ on conflict do nothing;
+ return;
+end
+$$;
+
+create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target);
+drop index extid_extid_type_extid_target_type_target_idx;
diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -80,7 +80,7 @@
"reason",
],
"directory": ["id", "dir_entries", "file_entries", "rev_entries"],
- "extid": ["extid_type", "extid", "target_type", "target"],
+ "extid": ["extid_type", "extid", "extid_version", "target_type", "target"],
"metadata_authority": ["type", "url"],
"metadata_fetcher": ["name", "version"],
"origin": ["url"],
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -1178,15 +1178,29 @@
return (token, finalizer)
@_prepared_select_statement(
- ExtIDRow, "WHERE extid_type=? AND extid=? AND target_type=? AND target=?",
+ ExtIDRow,
+ "WHERE extid_type=? AND extid=? AND extid_version=? "
+ "AND target_type=? AND target=?",
)
def extid_get_from_pk(
- self, extid_type: str, extid: bytes, target: CoreSWHID, *, statement,
+ self,
+ extid_type: str,
+ extid: bytes,
+ extid_version: int,
+ target: CoreSWHID,
+ *,
+ statement,
) -> Optional[ExtIDRow]:
rows = list(
self._execute_with_retries(
statement,
- [extid_type, extid, target.object_type.value, target.object_id],
+ [
+ extid_type,
+ extid,
+ extid_version,
+ target.object_type.value,
+ target.object_id,
+ ],
),
)
assert len(rows) <= 1
diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py
--- a/swh/storage/cassandra/model.py
+++ b/swh/storage/cassandra/model.py
@@ -314,10 +314,11 @@
@dataclasses.dataclass
class ExtIDRow(BaseRow):
TABLE = "extid"
- PARTITION_KEY = ("target", "target_type", "extid", "extid_type")
+ PARTITION_KEY = ("target", "target_type", "extid_version", "extid", "extid_type")
extid_type: str
extid: bytes
+ extid_version: int
target_type: str
target: bytes
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -277,9 +277,10 @@
CREATE TABLE IF NOT EXISTS extid (
extid_type ascii,
extid blob,
+ extid_version smallint,
target_type ascii,
target blob,
- PRIMARY KEY ((extid_type, extid), target_type, target)
+ PRIMARY KEY ((extid_type, extid), extid_version, target_type, target)
);""",
"""
CREATE TABLE IF NOT EXISTS extid_by_target (
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -1455,7 +1455,10 @@
extid
for extid in ids
if not self._cql_runner.extid_get_from_pk(
- extid_type=extid.extid_type, extid=extid.extid, target=extid.target,
+ extid_type=extid.extid_type,
+ extid_version=extid.extid_version,
+ extid=extid.extid,
+ target=extid.target,
)
]
else:
@@ -1469,6 +1472,7 @@
target = extid.target.object_id
extidrow = ExtIDRow(
extid_type=extid.extid_type,
+ extid_version=extid.extid_version,
extid=extid.extid,
target_type=target_type,
target=target,
@@ -1489,6 +1493,7 @@
result.extend(
ExtID(
extid_type=extidrow.extid_type,
+ extid_version=extidrow.extid_version,
extid=extidrow.extid,
target=CoreSWHID(
object_type=extidrow.target_type, object_id=extidrow.target,
@@ -1509,6 +1514,7 @@
result.extend(
ExtID(
extid_type=extidrow.extid_type,
+ extid_version=extidrow.extid_version,
extid=extidrow.extid,
target=CoreSWHID(
object_type=SwhidObjectType(extidrow.target_type),
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -681,12 +681,13 @@
pass
def extid_get_from_pk(
- self, extid_type: str, extid: bytes, target: ExtendedSWHID,
+ self, extid_type: str, extid: bytes, extid_version: int, target: ExtendedSWHID,
) -> Optional[ExtIDRow]:
primary_key = self._extid.primary_key_from_dict(
dict(
extid_type=extid_type,
extid=extid,
+ extid_version=extid_version,
target_type=target.object_type.value,
target=target.object_id,
)
diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py
--- a/swh/storage/postgresql/converters.py
+++ b/swh/storage/postgresql/converters.py
@@ -331,6 +331,7 @@
return ExtID(
extid=row["extid"],
extid_type=row["extid_type"],
+ extid_version=row.get("extid_version", 0),
target=CoreSWHID(
object_id=row["target"],
object_type=SwhidObjectType[row["target_type"].upper()],
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -30,7 +30,7 @@
"""
- current_version = 175
+ current_version = 176
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
@@ -843,7 +843,7 @@
((sortkey, id) for sortkey, id in enumerate(revisions)),
)
- extid_cols = ["extid", "extid_type", "target", "target_type"]
+ extid_cols = ["extid", "extid_version", "extid_type", "target", "target_type"]
def extid_get_from_extid_list(self, extid_type, ids, cur=None):
cur = self._cursor(cur)
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -718,6 +718,7 @@
{
"extid": extid.extid,
"extid_type": extid.extid_type,
+ "extid_version": getattr(extid, "extid_version", 0),
"target": extid.target.object_id,
"target_type": extid.target.object_type.name.lower(), # arghh
}
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(175, now(), 'Work In Progress');
+ values(176, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
@@ -505,7 +505,8 @@
extid_type text not null,
extid bytea not null,
target_type object_type not null,
- target sha1_git not null
+ target sha1_git not null,
+ extid_version bigint not null default 0
);
comment on table extid is 'Correspondance SWH object (SWHID) <-> original revision id (vcs id)';
@@ -513,3 +514,4 @@
comment on column extid.extid is 'Intrinsic identifier of the object (e.g. hg revision)';
comment on column extid.target_type is 'Type of SWHID of the referenced SWH object';
comment on column extid.target is 'Value (hash) of SWHID of the refenced SWH object';
+comment on column extid.extid_version is 'Version of the extid for the given original object';
diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql
--- a/swh/storage/sql/40-funcs.sql
+++ b/swh/storage/sql/40-funcs.sql
@@ -585,8 +585,8 @@
language plpgsql
as $$
begin
- insert into extid (extid_type, extid, target_type, target)
- select distinct t.extid_type, t.extid, t.target_type, t.target
+ insert into extid (extid_type, extid, extid_version, target_type, target)
+ select distinct t.extid_type, t.extid, t.extid_version, t.target_type, t.target
from tmp_extid t
on conflict do nothing;
return;
diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql
--- a/swh/storage/sql/60-indexes.sql
+++ b/swh/storage/sql/60-indexes.sql
@@ -289,5 +289,5 @@
-- extid
-- used to query by (extid_type, extid) + to deduplicate the whole row
-create unique index concurrently on extid(extid_type, extid, target_type, target);
+create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target);
create index concurrently on extid(target_type, target);
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -698,9 +698,18 @@
extid_type="directory",
extid=b"something",
)
+ extid4 = ExtID(
+ target=CoreSWHID(
+ object_type=SwhidObjectType.DIRECTORY, object_id=directory2.id
+ ),
+ extid_type="directory",
+ extid=b"something",
+ extid_version=2,
+ )
extids: Tuple[ExtID, ...] = (
extid1,
extid2,
extid3,
+ extid4,
)
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -1183,6 +1183,7 @@
ExtID(
extid=hgid,
extid_type="hg",
+ extid_version=1,
target=CoreSWHID(object_id=swhid, object_type=ObjectType.REVISION,),
)
for hgid, swhid in zip(extids, swhids)
@@ -1243,6 +1244,7 @@
ExtID(
extid=extid,
extid_type="git",
+ extid_version=2,
target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
)
for extid in ids
@@ -1254,6 +1256,7 @@
ExtID(
extid=extid,
extid_type="hg",
+ extid_version=2,
target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
)
for extid in ids
@@ -1300,6 +1303,42 @@
assert swh_storage.extid_get_from_target(ObjectType.REVISION, ids) == extids
assert swh_storage.extid_get_from_target(ObjectType.RELEASE, ids) == extids2
+ def test_extid_version_behavior(self, swh_storage, sample_data):
+ ids = [
+ revision.id
+ for revision in sample_data.revisions
+ if revision.type.value == "git"
+ ]
+
+ # Insert extids with several different versions
+ extids = [
+ ExtID(
+ extid=extid,
+ extid_type="git",
+ target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
+ )
+ for extid in ids
+ ] + [
+ ExtID(
+ extid=extid,
+ extid_type="git",
+ extid_version=1,
+ target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
+ )
+ for extid in ids
+ ]
+ swh_storage.extid_add(extids)
+
+ # Check that both versions get returned
+ for git_id in ids:
+ objs = swh_storage.extid_get_from_extid("git", [git_id])
+ assert len(objs) == 2
+ assert set(obj.extid_version for obj in objs) == {0, 1}
+ for swhid in ids:
+ objs = swh_storage.extid_get_from_target(ObjectType.REVISION, [swhid])
+ assert len(objs) == 2
+ assert set(obj.extid_version for obj in objs) == {0, 1}
+
def test_release_add(self, swh_storage, sample_data):
release, release2 = sample_data.releases[:2]
diff --git a/swh/storage/tests/test_cassandra.py b/swh/storage/tests/test_cassandra.py
--- a/swh/storage/tests/test_cassandra.py
+++ b/swh/storage/tests/test_cassandra.py
@@ -469,6 +469,7 @@
ExtIDRow(
extid_type=extid.extid_type,
extid=extid.extid,
+ extid_version=extid.extid_version,
target_type=extid.target.object_type.value,
target=extid.target.object_id,
)
diff --git a/swh/storage/tests/test_storage_data.py b/swh/storage/tests/test_storage_data.py
--- a/swh/storage/tests/test_storage_data.py
+++ b/swh/storage/tests/test_storage_data.py
@@ -24,6 +24,7 @@
"authorities",
"origin_metadata",
"content_metadata",
+ "extids",
]:
for obj in getattr(data, attribute_key):
assert isinstance(obj, BaseModel)

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:30 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224545

Event Timeline