Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9697271
D2771.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D2771.diff
View Options
diff --git a/sql/upgrades/156.sql b/sql/upgrades/156.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/156.sql
@@ -0,0 +1,25 @@
+-- SWH DB schema upgrade
+-- from_version: 155
+-- to_version: 156
+-- description: Make swh_release_add properly idempotent
+
+-- latest schema version
+insert into dbversion(version, release, description)
+ values(156, now(), 'Work In Progress');
+
+-- Create entries in release from tmp_release
+create or replace function swh_release_add()
+ returns void
+ language plpgsql
+as $$
+begin
+ perform swh_person_add_from_release();
+
+ insert into release (id, target, target_type, date, date_offset, date_neg_utc_offset, name, comment, author, synthetic)
+ select distinct t.id, t.target, t.target_type, t.date, t.date_offset, t.date_neg_utc_offset, t.name, t.comment, a.id, t.synthetic
+ from tmp_release t
+ left join person a on a.fullname = t.author_fullname
+ where not exists (select 1 from release where t.id = release.id);
+ return;
+end
+$$;
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -512,16 +512,20 @@
return self._cql_runner.revision_get_random().id
def release_add(self, releases: Iterable[Release]) -> Dict:
- missing = self.release_missing([rel.id for rel in releases])
- releases = [rel for rel in releases if rel.id in missing]
+ to_add = []
+ for rel in releases:
+ if rel not in to_add:
+ to_add.append(rel)
+ missing = set(self.release_missing([rel.id for rel in to_add]))
+ to_add = [rel for rel in to_add if rel.id in missing]
- self.journal_writer.release_add(releases)
+ self.journal_writer.release_add(to_add)
- for release in releases:
+ for release in to_add:
if release:
self._cql_runner.release_add_one(release_to_db(release))
- return {"release:add": len(missing)}
+ return {"release:add": len(to_add)}
def release_missing(self, releases):
return self._cql_runner.release_missing(releases)
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -523,18 +523,19 @@
return random.choice(list(self._revisions))
def release_add(self, releases: Iterable[Release]) -> Dict:
- releases = [rel for rel in releases if rel.id not in self._releases]
- self.journal_writer.release_add(releases)
-
- count = 0
+ to_add = []
for rel in releases:
+ if rel.id not in self._releases and rel not in to_add:
+ to_add.append(rel)
+ self.journal_writer.release_add(to_add)
+
+ for rel in to_add:
if rel.author:
self._person_add(rel.author)
self._objects[rel.id].append(("release", rel.id))
self._releases[rel.id] = rel
- count += 1
- return {"release:add": count}
+ return {"release:add": len(to_add)}
def release_missing(self, releases):
yield from (rel for rel in releases if rel not in self._releases)
diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql
--- a/swh/storage/sql/30-swh-schema.sql
+++ b/swh/storage/sql/30-swh-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(155, now(), 'Work In Progress');
+ values(156, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql
--- a/swh/storage/sql/40-swh-func.sql
+++ b/swh/storage/sql/40-swh-func.sql
@@ -568,9 +568,10 @@
perform swh_person_add_from_release();
insert into release (id, target, target_type, date, date_offset, date_neg_utc_offset, name, comment, author, synthetic)
- select t.id, t.target, t.target_type, t.date, t.date_offset, t.date_neg_utc_offset, t.name, t.comment, a.id, t.synthetic
- from tmp_release t
- left join person a on a.fullname = t.author_fullname;
+ select distinct t.id, t.target, t.target_type, t.date, t.date_offset, t.date_neg_utc_offset, t.name, t.comment, a.id, t.synthetic
+ from tmp_release t
+ left join person a on a.fullname = t.author_fullname
+ where not exists (select 1 from release where t.id = release.id);
return;
end
$$;
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1227,13 +1227,17 @@
("release", Release.from_dict(data.release))
]
- actual_result = swh_storage.release_add([data.release, data.release2])
+ actual_result = swh_storage.release_add(
+ [data.release, data.release2, data.release, data.release2]
+ )
assert actual_result == {"release:add": 1}
- assert list(swh_storage.journal_writer.journal.objects) == [
- ("release", Release.from_dict(data.release)),
- ("release", Release.from_dict(data.release2)),
- ]
+ assert set(swh_storage.journal_writer.journal.objects) == set(
+ [
+ ("release", Release.from_dict(data.release)),
+ ("release", Release.from_dict(data.release2)),
+ ]
+ )
def test_release_add_name_clash(self, swh_storage):
release1 = data.release.copy()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 17, 11:15 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216417
Attached To
D2771: Make release_add support adding the same object twice in the same call
Event Timeline
Log In to Comment