diff --git a/sql/swh-func.sql b/sql/swh-func.sql --- a/sql/swh-func.sql +++ b/sql/swh-func.sql @@ -964,31 +964,6 @@ end $$; - --- Retrieve revisions by occurrence criterion filtering -create or replace function swh_revision_get_by( - origin_id bigint, - branch_name bytea default NULL, - date timestamptz default NULL) - returns setof revision_entry - language sql - stable -as $$ - select r.id, r.date, r.date_offset, r.date_neg_utc_offset, - r.committer_date, r.committer_date_offset, r.committer_date_neg_utc_offset, - r.type, r.directory, r.message, - a.id, a.fullname, a.name, a.email, c.id, c.fullname, c.name, c.email, r.metadata, r.synthetic, - array(select rh.parent_id::bytea - from revision_history rh - where rh.id = r.id - order by rh.parent_rank - ) as parents, r.object_id - from swh_occurrence_get_by(origin_id, branch_name, date) as occ - inner join revision r on occ.target = r.id - left join person a on a.id = r.author - left join person c on c.id = r.committer; -$$; - -- Object listing by object_id create or replace function swh_content_list_by_object_id( diff --git a/sql/upgrades/125.sql b/sql/upgrades/125.sql --- a/sql/upgrades/125.sql +++ b/sql/upgrades/125.sql @@ -1,70 +1,9 @@ -- SWH DB schema upgrade -- from_version: 124 -- to_version: 125 --- description: Drop useless entity tables +-- description: drop useless function swh_revision_get_by insert into dbversion(version, release, description) values(125, now(), 'Work In Progress'); -DROP FUNCTION swh_entity_from_tmp_entity_lister(); - -DROP FUNCTION swh_entity_get(entity_uuid uuid); - -DROP FUNCTION swh_entity_history_add(); - -DROP FUNCTION swh_mktemp_entity_history(); - -DROP FUNCTION swh_mktemp_entity_lister(); - -DROP FUNCTION swh_update_entity_from_entity_history(); - -ALTER TABLE origin - DROP CONSTRAINT origin_lister_fkey; - -ALTER TABLE origin - DROP CONSTRAINT origin_project_fkey; - -DROP TABLE entity; - -DROP TABLE entity_equivalence; - -DROP TABLE entity_history; - -DROP TABLE list_history; - -DROP TABLE listable_entity; - -DROP SEQUENCE entity_history_id_seq; - -DROP SEQUENCE list_history_id_seq; - -DROP TYPE entity_type; - -DROP TYPE entity_id; - -ALTER TABLE origin - DROP COLUMN lister, - DROP COLUMN project; - -CREATE OR REPLACE FUNCTION swh_stat_counters() RETURNS SETOF public.counter - LANGUAGE sql STABLE - AS $$ - select object_type as label, value as value - from object_counts - where object_type in ( - 'content', - 'directory', - 'directory_entry_dir', - 'directory_entry_file', - 'directory_entry_rev', - 'occurrence_history', - 'origin', - 'origin_visit', - 'person', - 'release', - 'revision', - 'revision_history', - 'skipped_content', - 'snapshot' - ); -$$; +DROP FUNCTION swh_revision_get_by(origin_id bigint, branch_name bytea, "date" timestamp with time zone); diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -54,22 +54,10 @@ def revision_get(self, revisions): return self.post('revision', {'revisions': revisions}) - def revision_get_by(self, origin_id, branch_name, timestamp, limit=None): - return self.post('revision/by', dict(origin_id=origin_id, - branch_name=branch_name, - timestamp=timestamp, - limit=limit)) - def revision_log(self, revisions, limit=None): return self.post('revision/log', {'revisions': revisions, 'limit': limit}) - def revision_log_by(self, origin_id, branch_name, timestamp, limit=None): - return self.post('revision/logby', {'origin_id': origin_id, - 'branch_name': branch_name, - 'timestamp': timestamp, - 'limit': limit}) - def revision_shortlog(self, revisions, limit=None): return self.post('revision/shortlog', {'revisions': revisions, 'limit': limit}) diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py --- a/swh/storage/api/server.py +++ b/swh/storage/api/server.py @@ -141,23 +141,11 @@ return encode_data(get_storage().revision_get(**decode_request(request))) -@app.route('/revision/by', methods=['POST']) -def revision_get_by(): - return encode_data(get_storage().revision_get_by( - **decode_request(request))) - - @app.route('/revision/log', methods=['POST']) def revision_log(): return encode_data(get_storage().revision_log(**decode_request(request))) -@app.route('/revision/logby', methods=['POST']) -def revision_log_by(): - return encode_data(get_storage().revision_log_by( - **decode_request(request))) - - @app.route('/revision/shortlog', methods=['POST']) def revision_shortlog(): return encode_data(get_storage().revision_shortlog( diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -922,34 +922,6 @@ """ % query_keys, ((id,) for id in releases)) - def revision_get_by(self, - origin_id, - branch_name, - datetime, - limit=None, - cur=None): - """Retrieve a revision by occurrence criterion. - - Args: - - origin_id: The origin to look for - - branch_name: the branch name to look for - - datetime: the lower bound of timerange to look for. - - limit: limit number of results to return - The upper bound being now. - """ - cur = self._cursor(cur) - if branch_name and isinstance(branch_name, str): - branch_name = branch_name.encode('utf-8') - - query = ''' - SELECT %s - FROM swh_revision_get_by(%%s, %%s, %%s) - LIMIT %%s - ''' % ', '.join(self.revision_get_cols) - - cur.execute(query, (origin_id, branch_name, datetime, limit)) - yield from cursor_to_bytes(cur) - def origin_metadata_add(self, origin, ts, provider, tool, metadata, cur=None): """ Add an origin_metadata for the origin at ts with provider, tool and diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -599,37 +599,6 @@ yield from db.revision_shortlog(revisions, limit, cur) - @db_transaction_generator(statement_timeout=2000) - def revision_log_by(self, origin_id, branch_name=None, timestamp=None, - limit=None, db=None, cur=None): - """Fetch revision entry from the actual origin_id's latest revision. - - Args: - origin_id: the origin id from which deriving the revision - branch_name: (optional) occurrence's branch name - timestamp: (optional) occurrence's time - limit: (optional) depth limitation for the - output. Default to None. - - Yields: - The revision log starting from the revision derived from - the (origin, branch_name, timestamp) combination if any. - - Returns: - None if no revision matching this combination is found. - - """ - # Retrieve the revision by criterion - revisions = list(db.revision_get_by( - origin_id, branch_name, timestamp, limit=1, cur=cur)) - - if not revisions: - return None - - revision_id = revisions[0][0] - # otherwise, retrieve the revision log from that revision - yield from self.revision_log([revision_id], limit, db=db, cur=cur) - def release_add(self, releases): """Add releases to the storage @@ -1048,37 +1017,6 @@ return dict(zip(db.origin_visit_get_cols, ori_visit)) - @db_transaction_generator(statement_timeout=500) - def revision_get_by(self, - origin_id, - branch_name=None, - timestamp=None, - limit=None, - db=None, - cur=None): - """Given an origin_id, retrieve occurrences' list per given criterions. - - Args: - origin_id: The origin to filter on. - branch_name: (optional) branch name. - timestamp: (optional) time. - limit: (optional) limit - - Yields: - List of occurrences matching the criterions or None if nothing is - found. - - """ - for line in db.revision_get_by(origin_id, branch_name, timestamp, - limit=limit, cur=cur): - data = converters.db_to_revision( - dict(zip(db.revision_get_cols, line)) - ) - if not data['type']: - yield None - continue - yield data - @db_transaction(statement_timeout=2000) def object_find_by_sha1_git(self, ids, db=None, cur=None): """Return the objects found with the given ids. diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -394,12 +394,6 @@ self.date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) - # template occurrence to be filled in test (cf. revision_log_by) - self.occurrence3 = { - 'branch': b'master', - 'target_type': 'revision', - } - self.release = { 'id': b'87659012345678901234', 'name': b'v0.0.1', @@ -822,68 +816,6 @@ self.assertEqual(len(actual_results), 1) self.assertEquals(actual_results[0], self.revision4) - def test_revision_log_by(self): - # given - origin_id = self.storage.origin_add_one(self.origin2) - self.storage.revision_add([self.revision3, - self.revision4]) - - # occurrence3 targets 'revision4' - # with branch 'master' and origin origin_id - occurrence3 = self.occurrence3.copy() - date_visit1 = self.date_visit3 - origin_visit1 = self.storage.origin_visit_add(origin_id, - date_visit1) - occurrence3.update({ - 'origin': origin_id, - 'target': self.revision4['id'], - 'visit': origin_visit1['visit'], - }) - - self.storage.occurrence_add([occurrence3]) - - # self.revision4 -is-child-of-> self.revision3 - # when - actual_results = list(self.storage.revision_log_by( - origin_id, - branch_name=occurrence3['branch'], - timestamp=date_visit1)) - - # hack: ids generated - for actual_result in actual_results: - del actual_result['author']['id'] - del actual_result['committer']['id'] - - self.assertEqual(len(actual_results), 2) - self.assertEquals(actual_results[0], - self.normalize_entity(self.revision4)) - self.assertEquals(actual_results[1], - self.normalize_entity(self.revision3)) - - # when - 2 - actual_results = list(self.storage.revision_log_by( - origin_id, - branch_name=None, - timestamp=None, - limit=1)) - - # then - for actual_result in actual_results: - del actual_result['author']['id'] - del actual_result['committer']['id'] - - self.assertEqual(len(actual_results), 1) - self.assertEquals(actual_results[0], self.revision4) - - # when - 3 (revision not found) - - actual_res = list(self.storage.revision_log_by( - origin_id, - branch_name='inexistant-branch', - timestamp=None)) - - self.assertEquals(actual_res, []) - @staticmethod def _short_revision(revision): return [revision['id'], revision['parents']] @@ -939,148 +871,6 @@ self.assertEqual(len(get), 1) self.assertEqual(get[0]['parents'], []) # no parents on this one - def test_revision_get_by(self): - # given - self.storage.content_add([self.cont2]) - self.storage.directory_add([self.dir2]) # point to self.cont - self.storage.revision_add([self.revision2]) # points to self.dir - origin_id = self.storage.origin_add_one(self.origin2) - - # occurrence2 points to 'revision2' with branch 'master', we - # need to point to the right origin - occurrence2 = self.occurrence2.copy() - date_visit1 = self.date_visit2 - origin_visit1 = self.storage.origin_visit_add(origin_id, date_visit1) - occurrence2.update({ - 'origin': origin_id, - 'visit': origin_visit1['visit'], - }) - self.storage.occurrence_add([occurrence2]) - - # we want only revision 2 - expected_revisions = list(self.storage.revision_get( - [self.revision2['id']])) - - # when - actual_results = list(self.storage.revision_get_by( - origin_id, - occurrence2['branch'], - None)) - - self.assertEqual(actual_results[0], expected_revisions[0]) - - # when (with no branch filtering, it's still ok) - actual_results = list(self.storage.revision_get_by( - origin_id, - None, - None)) - - self.assertEqual(actual_results[0], expected_revisions[0]) - - def test_revision_get_by_multiple_occurrence(self): - # 2 occurrences pointing to 2 different revisions - # each occurrence have 1 day delta - # the api must return the revision whose occurrence is the nearest. - - # given - self.storage.content_add([self.cont2]) - self.storage.directory_add([self.dir2]) - self.storage.revision_add([self.revision2, self.revision3]) - origin_id = self.storage.origin_add_one(self.origin2) - - # occurrence2 points to 'revision2' with branch 'master', we - # need to point to the right origin - date_visit1 = self.date_visit2 - origin_visit1 = self.storage.origin_visit_add(origin_id, date_visit1) - occurrence2 = self.occurrence2.copy() - occurrence2.update({ - 'origin': origin_id, - 'visit': origin_visit1['visit'] - }) - - dt = datetime.timedelta(days=1) - date_visit2 = date_visit1 + dt - origin_visit2 = self.storage.origin_visit_add(origin_id, date_visit2) - occurrence3 = self.occurrence2.copy() - occurrence3.update({ - 'origin': origin_id, - 'visit': origin_visit2['visit'], - 'target': self.revision3['id'], - }) - # 2 occurrences on same revision with lower validity date with 1 day - # delta - self.storage.occurrence_add([occurrence2]) - self.storage.occurrence_add([occurrence3]) - - # when - actual_results0 = list(self.storage.revision_get_by( - origin_id, - occurrence2['branch'], - date_visit1)) - - # hack: ids are generated - del actual_results0[0]['author']['id'] - del actual_results0[0]['committer']['id'] - - self.assertEquals(len(actual_results0), 1) - self.assertEqual(actual_results0, - [self.normalize_entity(self.revision2)]) - - # when - actual_results1 = list(self.storage.revision_get_by( - origin_id, - occurrence2['branch'], - date_visit1 + dt/3)) # closer to first visit - - # hack: ids are generated - del actual_results1[0]['author']['id'] - del actual_results1[0]['committer']['id'] - - self.assertEquals(len(actual_results1), 1) - self.assertEqual(actual_results1, - [self.normalize_entity(self.revision2)]) - - # when - actual_results2 = list(self.storage.revision_get_by( - origin_id, - occurrence2['branch'], - date_visit1 + 2*dt/3)) # closer to second visit - - del actual_results2[0]['author']['id'] - del actual_results2[0]['committer']['id'] - - self.assertEquals(len(actual_results2), 1) - self.assertEqual(actual_results2, - [self.normalize_entity(self.revision3)]) - - # when - actual_results3 = list(self.storage.revision_get_by( - origin_id, - occurrence3['branch'], - date_visit2)) - - # hack: ids are generated - del actual_results3[0]['author']['id'] - del actual_results3[0]['committer']['id'] - - self.assertEquals(len(actual_results3), 1) - self.assertEqual(actual_results3, - [self.normalize_entity(self.revision3)]) - - # when - actual_results4 = list(self.storage.revision_get_by( - origin_id, - None, - None)) - - for actual_result in actual_results4: - del actual_result['author']['id'] - del actual_result['committer']['id'] - - self.assertEquals(len(actual_results4), 1) - self.assertCountEqual(actual_results4, - [self.normalize_entity(self.revision3)]) - def test_release_add(self): init_missing = self.storage.release_missing([self.release['id'], self.release2['id']])