Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345772
D6023.id21772.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Subscribers
None
D6023.id21772.diff
View Options
diff --git a/sql/upgrades/176.sql b/sql/upgrades/176.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/176.sql
@@ -0,0 +1,27 @@
+-- SWH DB schema upgrade
+-- from_version: 175
+-- to_version: 176
+-- description: add storage of the extid.extid_version field
+
+insert into dbversion(version, release, description)
+ values(176, now(), 'Work In Progress');
+
+alter table extid add column extid_version bigint not null default 0;
+
+comment on column extid.extid_version is 'Version of the extid for the given original object';
+
+create or replace function swh_extid_add()
+ returns void
+ language plpgsql
+as $$
+begin
+ insert into extid (extid_type, extid, extid_version, target_type, target)
+ select distinct t.extid_type, t.extid, t.extid_version, t.target_type, t.target
+ from tmp_extid t
+ on conflict do nothing;
+ return;
+end
+$$;
+
+create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target);
+drop index extid_extid_type_extid_target_type_target_idx;
diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -80,7 +80,7 @@
"reason",
],
"directory": ["id", "dir_entries", "file_entries", "rev_entries"],
- "extid": ["extid_type", "extid", "target_type", "target"],
+ "extid": ["extid_type", "extid", "extid_version", "target_type", "target"],
"metadata_authority": ["type", "url"],
"metadata_fetcher": ["name", "version"],
"origin": ["url"],
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -1178,15 +1178,29 @@
return (token, finalizer)
@_prepared_select_statement(
- ExtIDRow, "WHERE extid_type=? AND extid=? AND target_type=? AND target=?",
+ ExtIDRow,
+ "WHERE extid_type=? AND extid=? AND extid_version=? "
+ "AND target_type=? AND target=?",
)
def extid_get_from_pk(
- self, extid_type: str, extid: bytes, target: CoreSWHID, *, statement,
+ self,
+ extid_type: str,
+ extid: bytes,
+ extid_version: int,
+ target: CoreSWHID,
+ *,
+ statement,
) -> Optional[ExtIDRow]:
rows = list(
self._execute_with_retries(
statement,
- [extid_type, extid, target.object_type.value, target.object_id],
+ [
+ extid_type,
+ extid,
+ extid_version,
+ target.object_type.value,
+ target.object_id,
+ ],
),
)
assert len(rows) <= 1
diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py
--- a/swh/storage/cassandra/model.py
+++ b/swh/storage/cassandra/model.py
@@ -314,10 +314,11 @@
@dataclasses.dataclass
class ExtIDRow(BaseRow):
TABLE = "extid"
- PARTITION_KEY = ("target", "target_type", "extid", "extid_type")
+ PARTITION_KEY = ("target", "target_type", "extid_version", "extid", "extid_type")
extid_type: str
extid: bytes
+ extid_version: int
target_type: str
target: bytes
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -277,9 +277,10 @@
CREATE TABLE IF NOT EXISTS extid (
extid_type ascii,
extid blob,
+ extid_version smallint,
target_type ascii,
target blob,
- PRIMARY KEY ((extid_type, extid), target_type, target)
+ PRIMARY KEY ((extid_type, extid), extid_version, target_type, target)
);""",
"""
CREATE TABLE IF NOT EXISTS extid_by_target (
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -1455,7 +1455,10 @@
extid
for extid in ids
if not self._cql_runner.extid_get_from_pk(
- extid_type=extid.extid_type, extid=extid.extid, target=extid.target,
+ extid_type=extid.extid_type,
+ extid_version=extid.extid_version,
+ extid=extid.extid,
+ target=extid.target,
)
]
else:
@@ -1469,6 +1472,7 @@
target = extid.target.object_id
extidrow = ExtIDRow(
extid_type=extid.extid_type,
+ extid_version=extid.extid_version,
extid=extid.extid,
target_type=target_type,
target=target,
@@ -1489,6 +1493,7 @@
result.extend(
ExtID(
extid_type=extidrow.extid_type,
+ extid_version=extidrow.extid_version,
extid=extidrow.extid,
target=CoreSWHID(
object_type=extidrow.target_type, object_id=extidrow.target,
@@ -1509,6 +1514,7 @@
result.extend(
ExtID(
extid_type=extidrow.extid_type,
+ extid_version=extidrow.extid_version,
extid=extidrow.extid,
target=CoreSWHID(
object_type=SwhidObjectType(extidrow.target_type),
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -681,12 +681,13 @@
pass
def extid_get_from_pk(
- self, extid_type: str, extid: bytes, target: ExtendedSWHID,
+ self, extid_type: str, extid: bytes, extid_version: int, target: ExtendedSWHID,
) -> Optional[ExtIDRow]:
primary_key = self._extid.primary_key_from_dict(
dict(
extid_type=extid_type,
extid=extid,
+ extid_version=extid_version,
target_type=target.object_type.value,
target=target.object_id,
)
diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py
--- a/swh/storage/postgresql/converters.py
+++ b/swh/storage/postgresql/converters.py
@@ -331,6 +331,7 @@
return ExtID(
extid=row["extid"],
extid_type=row["extid_type"],
+ extid_version=row.get("extid_version", 0),
target=CoreSWHID(
object_id=row["target"],
object_type=SwhidObjectType[row["target_type"].upper()],
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -30,7 +30,7 @@
"""
- current_version = 175
+ current_version = 176
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
@@ -843,7 +843,7 @@
((sortkey, id) for sortkey, id in enumerate(revisions)),
)
- extid_cols = ["extid", "extid_type", "target", "target_type"]
+ extid_cols = ["extid", "extid_version", "extid_type", "target", "target_type"]
def extid_get_from_extid_list(self, extid_type, ids, cur=None):
cur = self._cursor(cur)
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -718,6 +718,7 @@
{
"extid": extid.extid,
"extid_type": extid.extid_type,
+ "extid_version": getattr(extid, "extid_version", 0),
"target": extid.target.object_id,
"target_type": extid.target.object_type.name.lower(), # arghh
}
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(175, now(), 'Work In Progress');
+ values(176, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
@@ -505,7 +505,8 @@
extid_type text not null,
extid bytea not null,
target_type object_type not null,
- target sha1_git not null
+ target sha1_git not null,
+ extid_version bigint not null default 0
);
comment on table extid is 'Correspondance SWH object (SWHID) <-> original revision id (vcs id)';
@@ -513,3 +514,4 @@
comment on column extid.extid is 'Intrinsic identifier of the object (e.g. hg revision)';
comment on column extid.target_type is 'Type of SWHID of the referenced SWH object';
comment on column extid.target is 'Value (hash) of SWHID of the refenced SWH object';
+comment on column extid.extid_version is 'Version of the extid for the given original object';
diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql
--- a/swh/storage/sql/40-funcs.sql
+++ b/swh/storage/sql/40-funcs.sql
@@ -585,8 +585,8 @@
language plpgsql
as $$
begin
- insert into extid (extid_type, extid, target_type, target)
- select distinct t.extid_type, t.extid, t.target_type, t.target
+ insert into extid (extid_type, extid, extid_version, target_type, target)
+ select distinct t.extid_type, t.extid, t.extid_version, t.target_type, t.target
from tmp_extid t
on conflict do nothing;
return;
diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql
--- a/swh/storage/sql/60-indexes.sql
+++ b/swh/storage/sql/60-indexes.sql
@@ -289,5 +289,5 @@
-- extid
-- used to query by (extid_type, extid) + to deduplicate the whole row
-create unique index concurrently on extid(extid_type, extid, target_type, target);
+create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target);
create index concurrently on extid(target_type, target);
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -698,9 +698,18 @@
extid_type="directory",
extid=b"something",
)
+ extid4 = ExtID(
+ target=CoreSWHID(
+ object_type=SwhidObjectType.DIRECTORY, object_id=directory2.id
+ ),
+ extid_type="directory",
+ extid=b"something",
+ extid_version=2,
+ )
extids: Tuple[ExtID, ...] = (
extid1,
extid2,
extid3,
+ extid4,
)
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -1183,6 +1183,7 @@
ExtID(
extid=hgid,
extid_type="hg",
+ extid_version=1,
target=CoreSWHID(object_id=swhid, object_type=ObjectType.REVISION,),
)
for hgid, swhid in zip(extids, swhids)
@@ -1243,6 +1244,7 @@
ExtID(
extid=extid,
extid_type="git",
+ extid_version=2,
target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
)
for extid in ids
@@ -1254,6 +1256,7 @@
ExtID(
extid=extid,
extid_type="hg",
+ extid_version=2,
target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
)
for extid in ids
@@ -1300,6 +1303,42 @@
assert swh_storage.extid_get_from_target(ObjectType.REVISION, ids) == extids
assert swh_storage.extid_get_from_target(ObjectType.RELEASE, ids) == extids2
+ def test_extid_version_behavior(self, swh_storage, sample_data):
+ ids = [
+ revision.id
+ for revision in sample_data.revisions
+ if revision.type.value == "git"
+ ]
+
+ # Insert extids with several different versions
+ extids = [
+ ExtID(
+ extid=extid,
+ extid_type="git",
+ target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
+ )
+ for extid in ids
+ ] + [
+ ExtID(
+ extid=extid,
+ extid_type="git",
+ extid_version=1,
+ target=CoreSWHID(object_id=extid, object_type=ObjectType.REVISION,),
+ )
+ for extid in ids
+ ]
+ swh_storage.extid_add(extids)
+
+ # Check that both versions get returned
+ for git_id in ids:
+ objs = swh_storage.extid_get_from_extid("git", [git_id])
+ assert len(objs) == 2
+ assert set(obj.extid_version for obj in objs) == {0, 1}
+ for swhid in ids:
+ objs = swh_storage.extid_get_from_target(ObjectType.REVISION, [swhid])
+ assert len(objs) == 2
+ assert set(obj.extid_version for obj in objs) == {0, 1}
+
def test_release_add(self, swh_storage, sample_data):
release, release2 = sample_data.releases[:2]
diff --git a/swh/storage/tests/test_cassandra.py b/swh/storage/tests/test_cassandra.py
--- a/swh/storage/tests/test_cassandra.py
+++ b/swh/storage/tests/test_cassandra.py
@@ -469,6 +469,7 @@
ExtIDRow(
extid_type=extid.extid_type,
extid=extid.extid,
+ extid_version=extid.extid_version,
target_type=extid.target.object_type.value,
target=extid.target.object_id,
)
diff --git a/swh/storage/tests/test_storage_data.py b/swh/storage/tests/test_storage_data.py
--- a/swh/storage/tests/test_storage_data.py
+++ b/swh/storage/tests/test_storage_data.py
@@ -24,6 +24,7 @@
"authorities",
"origin_metadata",
"content_metadata",
+ "extids",
]:
for obj in getattr(data, attribute_key):
assert isinstance(obj, BaseModel)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:30 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224545
Attached To
D6023: Implement storage of the ExtID.extid_version field
Event Timeline
Log In to Comment