diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -482,20 +482,15 @@ """ ov = origin_visit assert ov.visit is not None - # doing an extra query like this is way simpler than trying to join - # the origin id in the query below - origin_id = next(self.origin_id_get_by_url([ov.origin])) - origin_visit_cols = ["origin", "visit", "date", "type"] - cur = self._cursor(cur) - query = """INSERT INTO origin_visit ({cols}) VALUES ({values}) + origin_visit_cols = ["origin", "visit", "date", "type"] + query = """INSERT INTO origin_visit ({cols}) + VALUES ((select id from origin where url=%s), {values}) ON CONFLICT (origin, visit) DO NOTHING""".format( cols=", ".join(origin_visit_cols), - values=", ".join("%s" for col in origin_visit_cols), - ) - cur.execute( - query, (origin_id, ov.visit, ov.date, ov.type), + values=", ".join("%s" for col in origin_visit_cols[1:]), ) + cur.execute(query, (ov.origin, ov.visit, ov.date, ov.type)) origin_visit_get_cols = [ "origin", diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -822,7 +822,7 @@ ) visit = attr.evolve(visit, visit=visit_id) else: - db.origin_visit_add_with_id(visit) + db.origin_visit_add_with_id(visit, cur=cur) assert visit.visit is not None all_visits.append(visit) # Forced to write after for the case when the visit has no id