Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9341937
D5363.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Subscribers
None
D5363.diff
View Options
diff --git a/sql/upgrades/173.sql b/sql/upgrades/173.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/173.sql
@@ -0,0 +1,14 @@
+-- SWH DB schema upgrade
+-- from_version: 172
+-- to_version: 173
+-- description: remove unicity on (extid_type, extid) and (target_type, target)
+
+insert into dbversion(version, release, description)
+ values(173, now(), 'Work In Progress');
+
+-- At the time this migratinon runs, the table is empty,
+-- so no need to bother about performance
+drop index extid_extid_type_extid_idx;
+drop index extid_target_type_target_idx;
+create unique index concurrently on extid(extid_type, extid, target_type, target);
+create index concurrently on extid(target_type, target);
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -1360,63 +1360,46 @@
target=extid.target.object_id,
)
(token, insertion_finalizer) = self._cql_runner.extid_add_prepare(extidrow)
- if (
- self.extid_get_from_extid(extid.extid_type, [extid.extid])[0]
- or self.extid_get_from_target(
- extid.target.object_type, [extid.target.object_id]
- )[0]
- ):
- # on conflict do nothing...
- continue
self._cql_runner.extid_index_add_one(extidrow, token)
insertion_finalizer()
inserted += 1
return {"extid:add": inserted}
- def extid_get_from_extid(
- self, id_type: str, ids: List[bytes]
- ) -> List[Optional[ExtID]]:
- result: List[Optional[ExtID]] = []
+ def extid_get_from_extid(self, id_type: str, ids: List[bytes]) -> List[ExtID]:
+ result: List[ExtID] = []
for extid in ids:
extidrows = list(self._cql_runner.extid_get_from_extid(id_type, extid))
- assert len(extidrows) <= 1
- if extidrows:
- result.append(
- ExtID(
- extid_type=extidrows[0].extid_type,
- extid=extidrows[0].extid,
- target=CoreSWHID(
- object_type=extidrows[0].target_type,
- object_id=extidrows[0].target,
- ),
- )
+ result.extend(
+ ExtID(
+ extid_type=extidrow.extid_type,
+ extid=extidrow.extid,
+ target=CoreSWHID(
+ object_type=extidrow.target_type, object_id=extidrow.target,
+ ),
)
- else:
- result.append(None)
+ for extidrow in extidrows
+ )
return result
def extid_get_from_target(
self, target_type: SwhidObjectType, ids: List[Sha1Git]
- ) -> List[Optional[ExtID]]:
- result: List[Optional[ExtID]] = []
+ ) -> List[ExtID]:
+ result: List[ExtID] = []
for target in ids:
extidrows = list(
self._cql_runner.extid_get_from_target(target_type.value, target)
)
- assert len(extidrows) <= 1
- if extidrows:
- result.append(
- ExtID(
- extid_type=extidrows[0].extid_type,
- extid=extidrows[0].extid,
- target=CoreSWHID(
- object_type=SwhidObjectType(extidrows[0].target_type),
- object_id=extidrows[0].target,
- ),
- )
+ result.extend(
+ ExtID(
+ extid_type=extidrow.extid_type,
+ extid=extidrow.extid,
+ target=CoreSWHID(
+ object_type=SwhidObjectType(extidrow.target_type),
+ object_id=extidrow.target,
+ ),
)
- else:
- result.append(None)
+ for extidrow in extidrows
+ )
return result
# Misc
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -502,9 +502,7 @@
...
@remote_api_endpoint("extid/from_extid")
- def extid_get_from_extid(
- self, id_type: str, ids: List[bytes]
- ) -> List[Optional[ExtID]]:
+ def extid_get_from_extid(self, id_type: str, ids: List[bytes]) -> List[ExtID]:
"""Get ExtID objects from external IDs
Args:
@@ -512,7 +510,7 @@
ids: list of external IDs
Returns:
- list of ExtID objects (if the ext ID is known, None otherwise)
+ list of ExtID objects
"""
...
@@ -520,7 +518,7 @@
@remote_api_endpoint("extid/from_target")
def extid_get_from_target(
self, target_type: ObjectType, ids: List[Sha1Git]
- ) -> List[Optional[ExtID]]:
+ ) -> List[ExtID]:
"""Get ExtID objects from target IDs and target_type
Args:
@@ -528,7 +526,7 @@
ids: list of target IDs
Returns:
- list of ExtID objects (if the SWH ID is known, None otherwise)
+ list of ExtID objects
"""
...
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -29,7 +29,7 @@
"""
- current_version = 172
+ current_version = 173
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -643,28 +643,22 @@
@db_transaction()
def extid_get_from_extid(
self, id_type: str, ids: List[bytes], db=None, cur=None
- ) -> List[Optional[ExtID]]:
+ ) -> List[ExtID]:
extids = []
for row in db.extid_get_from_extid_list(id_type, ids, cur):
- extids.append(
- converters.db_to_extid(dict(zip(db.extid_cols, row)))
- if row[0] is not None
- else None
- )
+ if row[0] is not None:
+ extids.append(converters.db_to_extid(dict(zip(db.extid_cols, row))))
return extids
@timed
@db_transaction()
def extid_get_from_target(
self, target_type: ObjectType, ids: List[Sha1Git], db=None, cur=None
- ) -> List[Optional[ExtID]]:
+ ) -> List[ExtID]:
extids = []
for row in db.extid_get_from_swhid_list(target_type.value, ids, cur):
- extids.append(
- converters.db_to_extid(dict(zip(db.extid_cols, row)))
- if row[0] is not None
- else None
- )
+ if row[0] is not None:
+ extids.append(converters.db_to_extid(dict(zip(db.extid_cols, row))))
return extids
@timed
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(172, now(), 'Work In Progress');
+ values(173, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql
--- a/swh/storage/sql/60-indexes.sql
+++ b/swh/storage/sql/60-indexes.sql
@@ -287,5 +287,7 @@
alter table object_counts_bucketed add primary key using index object_counts_bucketed_pkey;
-- extid
-create unique index concurrently on extid(extid_type, extid);
-create unique index concurrently on extid(target_type, target);
+
+-- used to query by (extid_type, extid) + to deduplicate the whole row
+create unique index concurrently on extid(extid_type, extid, target_type, target);
+create index concurrently on extid(target_type, target);
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -1090,7 +1090,6 @@
for revision in sample_data.revisions
if revision.type.value == "git"
]
- nullids = [None] * len(gitids)
extids = [
ExtID(
extid=gitid,
@@ -1100,8 +1099,8 @@
for gitid in gitids
]
- assert swh_storage.extid_get_from_extid("git", gitids) == nullids
- assert swh_storage.extid_get_from_target(ObjectType.REVISION, gitids) == nullids
+ assert swh_storage.extid_get_from_extid("git", gitids) == []
+ assert swh_storage.extid_get_from_target(ObjectType.REVISION, gitids) == []
summary = swh_storage.extid_add(extids)
assert summary == {"extid:add": len(gitids)}
@@ -1109,8 +1108,8 @@
assert swh_storage.extid_get_from_extid("git", gitids) == extids
assert swh_storage.extid_get_from_target(ObjectType.REVISION, gitids) == extids
- assert swh_storage.extid_get_from_extid("hg", gitids) == nullids
- assert swh_storage.extid_get_from_target(ObjectType.RELEASE, gitids) == nullids
+ assert swh_storage.extid_get_from_extid("hg", gitids) == []
+ assert swh_storage.extid_get_from_target(ObjectType.RELEASE, gitids) == []
def test_extid_add_hg(self, swh_storage, sample_data):
def get_node(revision):
@@ -1131,10 +1130,9 @@
for revision in sample_data.revisions
if revision.type.value == "hg"
]
- nullids = [None] * len(swhids)
- assert swh_storage.extid_get_from_extid("hg", extids) == nullids
- assert swh_storage.extid_get_from_target(ObjectType.REVISION, swhids) == nullids
+ assert swh_storage.extid_get_from_extid("hg", extids) == []
+ assert swh_storage.extid_get_from_target(ObjectType.REVISION, swhids) == []
extid_objs = [
ExtID(
@@ -1152,8 +1150,8 @@
swh_storage.extid_get_from_target(ObjectType.REVISION, swhids) == extid_objs
)
- assert swh_storage.extid_get_from_extid("git", extids) == nullids
- assert swh_storage.extid_get_from_target(ObjectType.RELEASE, swhids) == nullids
+ assert swh_storage.extid_get_from_extid("git", extids) == []
+ assert swh_storage.extid_get_from_target(ObjectType.RELEASE, swhids) == []
def test_extid_add_twice(self, swh_storage, sample_data):
@@ -1180,14 +1178,13 @@
assert swh_storage.extid_get_from_extid("git", gitids) == extids
assert swh_storage.extid_get_from_target(ObjectType.REVISION, gitids) == extids
- def test_extid_add_extid_unicity(self, swh_storage, sample_data):
+ def test_extid_add_extid_multicity(self, swh_storage, sample_data):
ids = [
revision.id
for revision in sample_data.revisions
if revision.type.value == "git"
]
- nullids = [None] * len(ids)
extids = [
ExtID(
@@ -1199,7 +1196,7 @@
]
swh_storage.extid_add(extids)
- # try to add "modified-extid" versions, should be noops
+ # try to add "modified-extid" versions, should be added
extids2 = [
ExtID(
extid=extid,
@@ -1211,17 +1208,19 @@
swh_storage.extid_add(extids2)
assert swh_storage.extid_get_from_extid("git", ids) == extids
- assert swh_storage.extid_get_from_extid("hg", ids) == nullids
- assert swh_storage.extid_get_from_target(ObjectType.REVISION, ids) == extids
+ assert swh_storage.extid_get_from_extid("hg", ids) == extids2
+ assert set(swh_storage.extid_get_from_target(ObjectType.REVISION, ids)) == {
+ *extids,
+ *extids2,
+ }
- def test_extid_add_target_unicity(self, swh_storage, sample_data):
+ def test_extid_add_target_multicity(self, swh_storage, sample_data):
ids = [
revision.id
for revision in sample_data.revisions
if revision.type.value == "git"
]
- nullids = [None] * len(ids)
extids = [
ExtID(
@@ -1233,7 +1232,7 @@
]
swh_storage.extid_add(extids)
- # try to add "modified" versions, should be noops
+ # try to add "modified" versions, should be added
extids2 = [
ExtID(
extid=extid,
@@ -1244,9 +1243,9 @@
]
swh_storage.extid_add(extids2)
- assert swh_storage.extid_get_from_extid("git", ids) == extids
+ assert set(swh_storage.extid_get_from_extid("git", ids)) == {*extids, *extids2}
assert swh_storage.extid_get_from_target(ObjectType.REVISION, ids) == extids
- assert swh_storage.extid_get_from_target(ObjectType.RELEASE, ids) == nullids
+ assert swh_storage.extid_get_from_target(ObjectType.RELEASE, ids) == extids2
def test_release_add(self, swh_storage, sample_data):
release, release2 = sample_data.releases[:2]
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 12:22 PM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224157
Attached To
D5363: extid: remove unicity on (extid_type, extid) and (target_type, target)
Event Timeline
Log In to Comment