diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -251,22 +251,6 @@ LOGGER.warning( "Unable to write directory dates to the storage. Retrying..." ) - - revs = { - sha1: RevisionData(date=None, origin=None) - for sha1, date in self.cache["revision"]["data"].items() - if sha1 in self.cache["revision"]["added"] and date is not None - } - if revs: - while not self.storage.revision_add(revs): - statsd.increment( - metric=BACKEND_OPERATIONS_METRIC, - tags={"method": "flush_revision_content_retry_revision_none"}, - ) - LOGGER.warning( - "Unable to write revision entities to the storage. Retrying..." - ) - paths = { hashlib.sha1(path).digest(): path for _, _, path in self.cache["content_in_revision"] diff --git a/swh/provenance/sql/30-schema.sql b/swh/provenance/sql/30-schema.sql --- a/swh/provenance/sql/30-schema.sql +++ b/swh/provenance/sql/30-schema.sql @@ -20,7 +20,7 @@ ( id bigserial primary key, -- internal identifier of the content blob sha1 sha1_git unique not null, -- intrinsic identifier of the content blob - date timestamptz not null -- timestamp of the revision where the blob appears early + date timestamptz -- timestamp of the revision where the blob appears early ); comment on column content.id is 'Content internal identifier'; comment on column content.sha1 is 'Content intrinsic identifier'; @@ -30,7 +30,7 @@ ( id bigserial primary key, -- internal identifier of the directory appearing in an isochrone inner frontier sha1 sha1_git unique not null, -- intrinsic identifier of the directory - date timestamptz not null, -- max timestamp among those of the directory children's + date timestamptz, -- max timestamp among those of the directory children's flat boolean not null default false -- flag acknowledging if the directory is flattenned in the model ); comment on column directory.id is 'Directory internal identifier'; diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql --- a/swh/provenance/sql/40-funcs.sql +++ b/swh/provenance/sql/40-funcs.sql @@ -104,6 +104,27 @@ join_location := ''; end if; + execute format( + 'insert into %s (sha1) + select src + from tmp_relation_add + on conflict do nothing', + src_table); + + execute format( + 'insert into %s (sha1) + select dst + from tmp_relation_add + on conflict do nothing', + dst_table); + + if src_table in ('content'::regclass, 'directory'::regclass) then + insert into location(path) + select distinct path + from tmp_relation_add + on conflict do nothing; + end if; + execute format( 'insert into %s select S.id, ' || select_fields || ' @@ -283,6 +304,29 @@ group_entries text; on_conflict text; begin + + execute format( + 'insert into %s (sha1) + select src + from tmp_relation_add + on conflict do nothing', + src_table); + + execute format( + 'insert into %s (sha1) + select dst + from tmp_relation_add + on conflict do nothing', + dst_table); + + if src_table in ('content'::regclass, 'directory'::regclass) then + insert into location(path) + select distinct path + from tmp_relation_add + on conflict do nothing; + end if; + + if src_table in ('content'::regclass, 'directory'::regclass) then select_fields := 'array_agg(D.id), array_agg(L.id)'; join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))'; diff --git a/swh/provenance/sql/upgrades/004.sql b/swh/provenance/sql/upgrades/004.sql --- a/swh/provenance/sql/upgrades/004.sql +++ b/swh/provenance/sql/upgrades/004.sql @@ -24,3 +24,6 @@ $$; drop type database_flavor_old; + +alter table content alter column date drop not null; +alter table directory alter column date drop not null;