diff --git a/sql/swh-func.sql b/sql/swh-func.sql --- a/sql/swh-func.sql +++ b/sql/swh-func.sql @@ -725,36 +725,6 @@ end $$; -create or replace function swh_occurrence_update_for_origin(origin_id bigint) - returns void - language sql -as $$ - delete from occurrence where origin = origin_id; - insert into occurrence (origin, branch, target, target_type) - select origin, branch, target, target_type - from occurrence_history - where origin = origin_id and - (select visit from origin_visit - where origin = origin_id - order by date desc - limit 1) = any(visits); -$$; - -create or replace function swh_occurrence_update_all() - returns void - language plpgsql -as $$ -declare - origin_id origin.id%type; -begin - for origin_id in - select distinct id from origin - loop - perform swh_occurrence_update_for_origin(origin_id); - end loop; - return; -end; -$$; -- add a new origin_visit for origin origin_id at date. -- @@ -805,13 +775,6 @@ select origin, branch, target, target_type, ARRAY[visit] from occurrence_history_id_visit where object_id is null; - - -- update occurrence - for origin_id in - select distinct origin from tmp_occurrence_history - loop - perform swh_occurrence_update_for_origin(origin_id); - end loop; return; end $$; @@ -1282,7 +1245,6 @@ 'directory_entry_dir', 'directory_entry_file', 'directory_entry_rev', - 'occurrence', 'occurrence_history', 'origin', 'origin_visit', diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql --- a/sql/swh-indexes.sql +++ b/sql/swh-indexes.sql @@ -204,13 +204,6 @@ alter table occurrence_history add constraint occurrence_history_origin_fkey foreign key (origin) references origin(id) not valid; alter table occurrence_history validate constraint occurrence_history_origin_fkey; --- occurrence -create unique index concurrently occurrence_pkey on occurrence(origin, branch); -alter table occurrence add primary key using index occurrence_pkey; - -alter table occurrence add constraint occurrence_origin_fkey foreign key (origin) references origin(id) not valid; -alter table occurrence validate constraint occurrence_origin_fkey; - -- release create unique index concurrently release_pkey on release(id); alter table release add primary key using index release_pkey; diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql --- a/sql/swh-schema.sql +++ b/sql/swh-schema.sql @@ -12,7 +12,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(122, now(), 'Work In Progress'); + values(123, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -337,16 +337,6 @@ snapshot_branch_id bigint ); --- Materialized view of occurrence_history, storing the *current* value of each --- branch, as last seen by SWH. -create table occurrence -( - origin bigint, - branch bytea not null, - target sha1_git not null, - target_type object_type not null -); - -- END legacy section (T830) -- A snapshot represents the entire state of a software origin as crawled by diff --git a/sql/upgrades/123.sql b/sql/upgrades/123.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/123.sql @@ -0,0 +1,76 @@ +-- SWH DB schema upgrade +-- from_version: 122 +-- to_version: 123 +-- description: Remove the occurrence table + +insert into dbversion(version, release, description) + values(123, now(), 'Work In Progress'); + +CREATE OR REPLACE FUNCTION swh_occurrence_history_add() RETURNS void + LANGUAGE plpgsql + AS $$ +declare + origin_id origin.id%type; +begin + -- Create or update occurrence_history + with occurrence_history_id_visit as ( + select tmp_occurrence_history.*, object_id, visits from tmp_occurrence_history + left join occurrence_history using(origin, branch, target, target_type) + ), + occurrences_to_update as ( + select object_id, visit from occurrence_history_id_visit where object_id is not null + ), + update_occurrences as ( + update occurrence_history + set visits = array(select unnest(occurrence_history.visits) as e + union + select occurrences_to_update.visit as e + order by e) + from occurrences_to_update + where occurrence_history.object_id = occurrences_to_update.object_id + ) + insert into occurrence_history (origin, branch, target, target_type, visits) + select origin, branch, target, target_type, ARRAY[visit] + from occurrence_history_id_visit + where object_id is null; + return; +end +$$; + +CREATE OR REPLACE FUNCTION swh_stat_counters() RETURNS SETOF public.counter + LANGUAGE sql STABLE + AS $$ + select object_type as label, value as value + from object_counts + where object_type in ( + 'content', + 'directory', + 'directory_entry_dir', + 'directory_entry_file', + 'directory_entry_rev', + 'occurrence_history', + 'origin', + 'origin_visit', + 'person', + 'entity', + 'entity_history', + 'release', + 'revision', + 'revision_history', + 'skipped_content', + 'snapshot' + ); +$$; + +DROP FUNCTION swh_occurrence_by_origin_visit(origin_id bigint, visit_id bigint); + +DROP FUNCTION swh_occurrence_update_all(); + +DROP FUNCTION swh_occurrence_update_for_origin(origin_id bigint); + +DROP FUNCTION swh_revision_find_occurrence(revision_id public.sha1_git); + +DROP TABLE occurrence; + +DELETE FROM object_counts where object_type = 'occurrence'; + diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -92,9 +92,6 @@ def object_find_by_sha1_git(self, ids): return self.post('object/find_by_sha1_git', {'ids': ids}) - def occurrence_get(self, origin_id): - return self.post('occurrence', {'origin_id': origin_id}) - def occurrence_add(self, occurrences): return self.post('occurrence/add', {'occurrences': occurrences}) diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py --- a/swh/storage/api/server.py +++ b/swh/storage/api/server.py @@ -192,11 +192,6 @@ **decode_request(request))) -@app.route('/occurrence', methods=['POST']) -def occurrence_get(): - return encode_data(get_storage().occurrence_get(**decode_request(request))) - - @app.route('/occurrence/add', methods=['POST']) def occurrence_add(): return encode_data(get_storage().occurrence_add(**decode_request(request))) diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -344,22 +344,6 @@ yield from cursor_to_bytes(cur) - def occurrence_get(self, origin_id, cur=None): - """Retrieve latest occurrence's information by origin_id. - - """ - cur = self._cursor(cur) - - cur.execute("""SELECT origin, branch, target, target_type, - (select max(date) from origin_visit - where origin=%s) as date - FROM occurrence - WHERE origin=%s - """, - (origin_id, origin_id)) - - yield from cursor_to_bytes(cur) - def snapshot_exists(self, snapshot_id, cur=None): """Check whether a snapshot with the given id exists""" cur = self._cursor(cur) diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -863,25 +863,6 @@ db.occurrence_history_add_from_temp(cur) - @db_transaction_generator(statement_timeout=2000) - def occurrence_get(self, origin_id, db=None, cur=None): - """Retrieve occurrence information per origin_id. - - Args: - origin_id: The occurrence's origin. - - Yields: - List of occurrences matching criterion. - - """ - for line in db.occurrence_get(origin_id, cur): - yield { - 'origin': line[0], - 'branch': line[1], - 'target': line[2], - 'target_type': line[3], - } - @db_transaction() def origin_visit_add(self, origin, ts, db=None, cur=None): """Add an origin_visit for the origin at ts with status 'ongoing'. diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1583,36 +1583,6 @@ occur2['target_type'], date_visit2)) @istest - def occurrence_get(self): - # given - occur = self.occurrence.copy() - origin_id = self.storage.origin_add_one(self.origin2) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) - - revision = self.revision.copy() - revision['id'] = occur['target'] - self.storage.revision_add([revision]) - - occur.update({ - 'origin': origin_id, - 'visit': origin_visit1['visit'], - }) - self.storage.occurrence_add([occur]) - self.storage.occurrence_add([occur]) - - # when - actual_occurrence = list(self.storage.occurrence_get(origin_id)) - - # then - expected_occurrence = self.occurrence.copy() - expected_occurrence.update({ - 'origin': origin_id - }) - self.assertEquals(len(actual_occurrence), 1) - self.assertEquals(actual_occurrence[0], expected_occurrence) - - @istest def snapshot_add_get_empty(self): origin_id = self.storage.origin_add_one(self.origin) origin_visit1 = self.storage.origin_visit_add(origin_id, @@ -1827,7 +1797,7 @@ @istest def stat_counters(self): expected_keys = ['content', 'directory', 'directory_entry_dir', - 'occurrence', 'origin', 'person', 'revision'] + 'origin', 'person', 'revision'] for key in expected_keys: self.cursor.execute('select * from swh_update_counter(%s)', (key,))