diff --git a/sql/swh-func.sql b/sql/swh-func.sql --- a/sql/swh-func.sql +++ b/sql/swh-func.sql @@ -91,20 +91,6 @@ alter table tmp_release drop column object_id; $$; --- create a temporary table for occurrence_history -create or replace function swh_mktemp_occurrence_history() - returns void - language sql -as $$ - create temporary table tmp_occurrence_history( - like occurrence_history including defaults, - visit bigint not null - ) on commit drop; - alter table tmp_occurrence_history - drop column visits, - drop column object_id; -$$; - -- create a temporary table for the branches of a snapshot create or replace function swh_mktemp_snapshot_branch() returns void @@ -720,42 +706,6 @@ returning visit; $$; --- add tmp_occurrence_history entries to occurrence_history --- --- operates in bulk: 0. swh_mktemp(occurrence_history), 1. COPY to tmp_occurrence_history, --- 2. call this function -create or replace function swh_occurrence_history_add() - returns void - language plpgsql -as $$ -declare - origin_id origin.id%type; -begin - -- Create or update occurrence_history - with occurrence_history_id_visit as ( - select tmp_occurrence_history.*, object_id, visits from tmp_occurrence_history - left join occurrence_history using(origin, branch, target, target_type) - ), - occurrences_to_update as ( - select object_id, visit from occurrence_history_id_visit where object_id is not null - ), - update_occurrences as ( - update occurrence_history - set visits = array(select unnest(occurrence_history.visits) as e - union - select occurrences_to_update.visit as e - order by e) - from occurrences_to_update - where occurrence_history.object_id = occurrences_to_update.object_id - ) - insert into occurrence_history (origin, branch, target, target_type, visits) - select origin, branch, target, target_type, ARRAY[visit] - from occurrence_history_id_visit - where object_id is null; - return; -end -$$; - create or replace function swh_snapshot_add(origin bigint, visit bigint, snapshot_id snapshot.id%type) returns void language plpgsql @@ -926,44 +876,6 @@ order by date desc $$; - --- Retrieve occurrence by filtering on origin_id and optionally on --- branch_name and/or validity range -create or replace function swh_occurrence_get_by( - origin_id bigint, - branch_name bytea default NULL, - date timestamptz default NULL) - returns setof occurrence_history - language plpgsql -as $$ -declare - filters text[] := array[] :: text[]; -- AND-clauses used to filter content - visit_id bigint; - q text; -begin - if origin_id is null then - raise exception 'Needs an origin_id to get an occurrence.'; - end if; - filters := filters || format('origin = %L', origin_id); - if branch_name is not null then - filters := filters || format('branch = %L', branch_name); - end if; - if date is not null then - select visit from swh_visit_find_by_date(origin_id, date) into visit_id; - else - select visit from origin_visit where origin = origin_id order by origin_visit.date desc limit 1 into visit_id; - end if; - if visit_id is null then - return; - end if; - filters := filters || format('%L = any(visits)', visit_id); - - q = format('select * from occurrence_history where %s', - array_to_string(filters, ' and ')); - return query execute q; -end -$$; - -- Object listing by object_id create or replace function swh_content_list_by_object_id( @@ -1114,7 +1026,6 @@ 'directory_entry_dir', 'directory_entry_file', 'directory_entry_rev', - 'occurrence_history', 'origin', 'origin_visit', 'person', diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql --- a/sql/swh-indexes.sql +++ b/sql/swh-indexes.sql @@ -138,17 +138,6 @@ alter table origin_visit add constraint origin_visit_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid; alter table origin_visit validate constraint origin_visit_snapshot_id_fkey; --- occurrence_history -create unique index concurrently occurrence_history_pkey on occurrence_history(object_id); -alter table occurrence_history add primary key using index occurrence_history_pkey; - -create index concurrently on occurrence_history(target, target_type); -create index concurrently on occurrence_history(origin, branch); -create unique index concurrently on occurrence_history(origin, branch, target, target_type); - -alter table occurrence_history add constraint occurrence_history_origin_fkey foreign key (origin) references origin(id) not valid; -alter table occurrence_history validate constraint occurrence_history_origin_fkey; - -- release create unique index concurrently release_pkey on release(id); alter table release add primary key using index release_pkey; diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql --- a/sql/swh-schema.sql +++ b/sql/swh-schema.sql @@ -12,7 +12,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(126, now(), 'Work In Progress'); + values(127, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -222,28 +222,6 @@ comment on column origin_visit.snapshot_id is 'Origin snapshot at visit time'; --- BEGIN legacy section (T830) - --- The content of software origins is indexed starting from top-level pointers --- called "branches". Every time we fetch some origin we store in this table --- where the branches pointed to at fetch time. --- --- Synonyms/mappings: --- * git: ref (in the "git update-ref" sense) -create table occurrence_history -( - origin bigint not null, - branch bytea not null, -- e.g., b"master" (for VCS), or b"sid" (for Debian) - target sha1_git not null, -- ref target, e.g., commit id - target_type object_type not null, -- ref target type - visits bigint[] not null, -- the visits where that occurrence was valid. References - -- origin_visit(visit), where o_h.origin = origin_visit.origin. - object_id bigserial not null, -- short object identifier - snapshot_branch_id bigint -); - --- END legacy section (T830) - -- A snapshot represents the entire state of a software origin as crawled by -- Software Heritage. This table is a simple mapping between (public) intrinsic -- snapshot identifiers and (private) numeric sequential identifiers. diff --git a/sql/upgrades/127.sql b/sql/upgrades/127.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/127.sql @@ -0,0 +1,39 @@ +-- SWH DB schema upgrade +-- from_version: 126 +-- to_version: 127 +-- description: Drop the now superseded occurrence_history table + +insert into dbversion(version, release, description) + values(127, now(), 'Work In Progress'); + +DROP FUNCTION swh_mktemp_occurrence_history(); + +DROP FUNCTION swh_occurrence_get_by(origin_id bigint, branch_name bytea, "date" timestamp with time zone); + +DROP FUNCTION swh_occurrence_history_add(); + +DROP TABLE occurrence_history; + +DROP SEQUENCE occurrence_history_object_id_seq; + +CREATE OR REPLACE FUNCTION swh_stat_counters() RETURNS SETOF public.counter + LANGUAGE sql STABLE + AS $$ + select object_type as label, value as value + from object_counts + where object_type in ( + 'content', + 'directory', + 'directory_entry_dir', + 'directory_entry_file', + 'directory_entry_rev', + 'origin', + 'origin_visit', + 'person', + 'release', + 'revision', + 'revision_history', + 'skipped_content', + 'snapshot' + ); +$$; diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -80,9 +80,6 @@ def object_find_by_sha1_git(self, ids): return self.post('object/find_by_sha1_git', {'ids': ids}) - def occurrence_add(self, occurrences): - return self.post('occurrence/add', {'occurrences': occurrences}) - def snapshot_add(self, origin, visit, snapshot): return self.post('snapshot/add', { 'origin': origin, 'visit': visit, 'snapshot': snapshot, diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py --- a/swh/storage/api/server.py +++ b/swh/storage/api/server.py @@ -180,11 +180,6 @@ **decode_request(request))) -@app.route('/occurrence/add', methods=['POST']) -def occurrence_add(): - return encode_data(get_storage().occurrence_add(**decode_request(request))) - - @app.route('/snapshot/add', methods=['POST']) def snapshot_add(): return encode_data(get_storage().snapshot_add(**decode_request(request))) diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -235,9 +235,6 @@ @stored_procedure('swh_mktemp_release') def mktemp_release(self, cur=None): pass - @stored_procedure('swh_mktemp_occurrence_history') - def mktemp_occurrence_history(self, cur=None): pass - @stored_procedure('swh_mktemp_snapshot_branch') def mktemp_snapshot_branch(self, cur=None): pass @@ -269,9 +266,6 @@ @stored_procedure('swh_release_add') def release_add_from_temp(self, cur=None): pass - @stored_procedure('swh_occurrence_history_add') - def occurrence_history_add_from_temp(self, cur=None): pass - def content_update_from_temp(self, keys_to_update, cur=None): cur = self._cursor(cur) cur.execute("""select swh_content_update(ARRAY[%s] :: text[])""" % @@ -611,8 +605,6 @@ return None return line_to_bytes(r) - occurrence_cols = ['origin', 'branch', 'target', 'target_type'] - @staticmethod def mangle_query_key(key, main_table): if key == 'id': diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -914,31 +914,6 @@ return None @db_transaction() - def occurrence_add(self, occurrences, db=None, cur=None): - """Add occurrences to the storage - - Args: - occurrences: iterable of dictionaries representing the individual - occurrences to add. Each dict has the following keys: - - - origin (int): id of the origin corresponding to the - occurrence - - visit (int): id of the visit corresponding to the - occurrence - - branch (str): the reference name of the occurrence - - target (sha1_git): the id of the object pointed to by - the occurrence - - target_type (str): the type of object pointed to by the - occurrence - - """ - db.mktemp_occurrence_history(cur) - db.copy_to(occurrences, 'tmp_occurrence_history', - ['origin', 'branch', 'target', 'target_type', 'visit'], cur) - - db.occurrence_history_add_from_temp(cur) - - @db_transaction() def origin_visit_add(self, origin, ts, db=None, cur=None): """Add an origin_visit for the origin at ts with status 'ongoing'. diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -376,21 +376,9 @@ self.date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) - self.occurrence = { - 'branch': b'master', - 'target': self.revision['id'], - 'target_type': 'revision', - } - self.date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) - self.occurrence2 = { - 'branch': b'master', - 'target': self.revision2['id'], - 'target_type': 'revision', - } - self.date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) @@ -471,9 +459,9 @@ self.snapshot = { 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'), 'branches': { - self.occurrence['branch']: { - 'target': self.occurrence['target'], - 'target_type': self.occurrence['target_type'], + b'master': { + 'target': self.revision['id'], + 'target_type': 'revision', }, }, 'next_branch': None @@ -1167,65 +1155,6 @@ self.assertIsNone(actual_origin_visit) - def test_occurrence_add(self): - occur = self.occurrence.copy() - - origin_id = self.storage.origin_add_one(self.origin2) - date_visit1 = self.date_visit1 - origin_visit1 = self.storage.origin_visit_add(origin_id, date_visit1) - - revision = self.revision.copy() - revision['id'] = occur['target'] - self.storage.revision_add([revision]) - - occur.update({ - 'origin': origin_id, - 'visit': origin_visit1['visit'], - }) - self.storage.occurrence_add([occur]) - - test_query = ''' - with indiv_occurrences as ( - select origin, branch, target, target_type, unnest(visits) as visit - from occurrence_history - ) - select origin, branch, target, target_type, date - from indiv_occurrences - left join origin_visit using(origin, visit) - order by origin, date''' - - self.cursor.execute(test_query) - ret = self.cursor.fetchall() - self.assertEqual(len(ret), 1) - self.assertEqual( - (ret[0][0], ret[0][1].tobytes(), ret[0][2].tobytes(), - ret[0][3], ret[0][4]), - (occur['origin'], occur['branch'], occur['target'], - occur['target_type'], self.date_visit1)) - - date_visit2 = date_visit1 + datetime.timedelta(hours=10) - - origin_visit2 = self.storage.origin_visit_add(origin_id, date_visit2) - occur2 = occur.copy() - occur2.update({ - 'visit': origin_visit2['visit'], - }) - self.storage.occurrence_add([occur2]) - - self.cursor.execute(test_query) - ret = self.cursor.fetchall() - self.assertEqual(len(ret), 2) - self.assertEqual( - (ret[0][0], ret[0][1].tobytes(), ret[0][2].tobytes(), - ret[0][3], ret[0][4]), - (occur['origin'], occur['branch'], occur['target'], - occur['target_type'], date_visit1)) - self.assertEqual( - (ret[1][0], ret[1][1].tobytes(), ret[1][2].tobytes(), - ret[1][3], ret[1][4]), - (occur2['origin'], occur2['branch'], occur2['target'], - occur2['target_type'], date_visit2)) - def test_snapshot_add_get_empty(self): origin_id = self.storage.origin_add_one(self.origin) origin_visit1 = self.storage.origin_visit_add(origin_id,