Page MenuHomeSoftware Heritage

D2771.diff
No OneTemporary

D2771.diff

diff --git a/sql/upgrades/156.sql b/sql/upgrades/156.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/156.sql
@@ -0,0 +1,25 @@
+-- SWH DB schema upgrade
+-- from_version: 155
+-- to_version: 156
+-- description: Make swh_release_add properly idempotent
+
+-- latest schema version
+insert into dbversion(version, release, description)
+ values(156, now(), 'Work In Progress');
+
+-- Create entries in release from tmp_release
+create or replace function swh_release_add()
+ returns void
+ language plpgsql
+as $$
+begin
+ perform swh_person_add_from_release();
+
+ insert into release (id, target, target_type, date, date_offset, date_neg_utc_offset, name, comment, author, synthetic)
+ select distinct t.id, t.target, t.target_type, t.date, t.date_offset, t.date_neg_utc_offset, t.name, t.comment, a.id, t.synthetic
+ from tmp_release t
+ left join person a on a.fullname = t.author_fullname
+ where not exists (select 1 from release where t.id = release.id);
+ return;
+end
+$$;
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -512,16 +512,20 @@
return self._cql_runner.revision_get_random().id
def release_add(self, releases: Iterable[Release]) -> Dict:
- missing = self.release_missing([rel.id for rel in releases])
- releases = [rel for rel in releases if rel.id in missing]
+ to_add = []
+ for rel in releases:
+ if rel not in to_add:
+ to_add.append(rel)
+ missing = set(self.release_missing([rel.id for rel in to_add]))
+ to_add = [rel for rel in to_add if rel.id in missing]
- self.journal_writer.release_add(releases)
+ self.journal_writer.release_add(to_add)
- for release in releases:
+ for release in to_add:
if release:
self._cql_runner.release_add_one(release_to_db(release))
- return {"release:add": len(missing)}
+ return {"release:add": len(to_add)}
def release_missing(self, releases):
return self._cql_runner.release_missing(releases)
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -523,18 +523,19 @@
return random.choice(list(self._revisions))
def release_add(self, releases: Iterable[Release]) -> Dict:
- releases = [rel for rel in releases if rel.id not in self._releases]
- self.journal_writer.release_add(releases)
-
- count = 0
+ to_add = []
for rel in releases:
+ if rel.id not in self._releases and rel not in to_add:
+ to_add.append(rel)
+ self.journal_writer.release_add(to_add)
+
+ for rel in to_add:
if rel.author:
self._person_add(rel.author)
self._objects[rel.id].append(("release", rel.id))
self._releases[rel.id] = rel
- count += 1
- return {"release:add": count}
+ return {"release:add": len(to_add)}
def release_missing(self, releases):
yield from (rel for rel in releases if rel not in self._releases)
diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql
--- a/swh/storage/sql/30-swh-schema.sql
+++ b/swh/storage/sql/30-swh-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(155, now(), 'Work In Progress');
+ values(156, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql
--- a/swh/storage/sql/40-swh-func.sql
+++ b/swh/storage/sql/40-swh-func.sql
@@ -568,9 +568,10 @@
perform swh_person_add_from_release();
insert into release (id, target, target_type, date, date_offset, date_neg_utc_offset, name, comment, author, synthetic)
- select t.id, t.target, t.target_type, t.date, t.date_offset, t.date_neg_utc_offset, t.name, t.comment, a.id, t.synthetic
- from tmp_release t
- left join person a on a.fullname = t.author_fullname;
+ select distinct t.id, t.target, t.target_type, t.date, t.date_offset, t.date_neg_utc_offset, t.name, t.comment, a.id, t.synthetic
+ from tmp_release t
+ left join person a on a.fullname = t.author_fullname
+ where not exists (select 1 from release where t.id = release.id);
return;
end
$$;
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1227,13 +1227,17 @@
("release", Release.from_dict(data.release))
]
- actual_result = swh_storage.release_add([data.release, data.release2])
+ actual_result = swh_storage.release_add(
+ [data.release, data.release2, data.release, data.release2]
+ )
assert actual_result == {"release:add": 1}
- assert list(swh_storage.journal_writer.journal.objects) == [
- ("release", Release.from_dict(data.release)),
- ("release", Release.from_dict(data.release2)),
- ]
+ assert set(swh_storage.journal_writer.journal.objects) == set(
+ [
+ ("release", Release.from_dict(data.release)),
+ ("release", Release.from_dict(data.release2)),
+ ]
+ )
def test_release_add_name_clash(self, swh_storage):
release1 = data.release.copy()

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 11:15 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216417

Event Timeline