diff --git a/sql/upgrades/141.sql b/sql/upgrades/141.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/141.sql @@ -0,0 +1,10 @@ +-- SWH DB schema upgrade +-- from_version: 140 +-- to_version: 141 +-- description: Remove fetch history + +insert into dbversion(version, release, description) + values(141, now(), 'Work In Progress'); + +drop table fetch_history; + diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -197,17 +197,6 @@ {'origin': origin, 'allowed_statuses': allowed_statuses, 'require_snapshot': require_snapshot}) - def fetch_history_start(self, origin_id): - return self.post('fetch_history/start', {'origin_id': origin_id}) - - def fetch_history_end(self, fetch_history_id, data): - return self.post('fetch_history/end', - {'fetch_history_id': fetch_history_id, - 'data': data}) - - def fetch_history_get(self, fetch_history_id): - return self.get('fetch_history', {'id': fetch_history_id}) - def stat_counters(self): return self.get('stat/counters') diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py --- a/swh/storage/api/server.py +++ b/swh/storage/api/server.py @@ -440,26 +440,6 @@ **decode_request(request))) -@app.route('/fetch_history', methods=['GET']) -@timed -def fetch_history_get(): - return encode_data(get_storage().fetch_history_get(request.args['id'])) - - -@app.route('/fetch_history/start', methods=['POST']) -@timed -def fetch_history_start(): - return encode_data( - get_storage().fetch_history_start(**decode_request(request))) - - -@app.route('/fetch_history/end', methods=['POST']) -@timed -def fetch_history_end(): - return encode_data( - get_storage().fetch_history_end(**decode_request(request))) - - @app.route('/tool/data', methods=['POST']) @timed def tool_get(): diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -604,52 +604,6 @@ cur.execute('SELECT * FROM swh_stat_counters()') yield from cur - fetch_history_cols = ['origin', 'date', 'status', 'result', 'stdout', - 'stderr', 'duration'] - - def create_fetch_history(self, fetch_history, cur=None): - """Create a fetch_history entry with the data in fetch_history""" - cur = self._cursor(cur) - query = '''INSERT INTO fetch_history (%s) - VALUES (%s) RETURNING id''' % ( - ','.join(self.fetch_history_cols), - ','.join(['%s'] * len(self.fetch_history_cols)) - ) - cur.execute(query, [fetch_history.get(col) for col in - self.fetch_history_cols]) - - return cur.fetchone()[0] - - def get_fetch_history(self, fetch_history_id, cur=None): - """Get a fetch_history entry with the given id""" - cur = self._cursor(cur) - query = '''SELECT %s FROM fetch_history WHERE id=%%s''' % ( - ', '.join(self.fetch_history_cols), - ) - cur.execute(query, (fetch_history_id,)) - - data = cur.fetchone() - - if not data: - return None - - ret = {'id': fetch_history_id} - for i, col in enumerate(self.fetch_history_cols): - ret[col] = data[i] - - return ret - - def update_fetch_history(self, fetch_history, cur=None): - """Update the fetch_history entry from the data in fetch_history""" - cur = self._cursor(cur) - query = '''UPDATE fetch_history - SET %s - WHERE id=%%s''' % ( - ','.join('%s=%%s' % col for col in self.fetch_history_cols) - ) - cur.execute(query, [jsonize(fetch_history.get(col)) for col in - self.fetch_history_cols + ['id']]) - def origin_add(self, type, url, cur=None): """Insert a new origin and return the new identifier.""" insert = """INSERT INTO origin (type, url) values (%s, %s) diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -1222,25 +1222,6 @@ else: return origin.url - def fetch_history_start(self, origin_id): - """Add an entry for origin origin_id in fetch_history. Returns the id - of the added fetch_history entry - """ - assert not ENABLE_ORIGIN_IDS, 'origin ids are disabled' - pass - - def fetch_history_end(self, fetch_history_id, data): - """Close the fetch_history entry with id `fetch_history_id`, replacing - its data with `data`. - """ - pass - - def fetch_history_get(self, fetch_history_id): - """Get the fetch_history entry with id `fetch_history_id`. - """ - raise NotImplementedError('fetch_history_get is deprecated, use ' - 'origin_visit_get instead.') - def origin_visit_add(self, origin, date, type=None): """Add an origin_visit for the origin at date with status 'ongoing'. diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql --- a/swh/storage/sql/30-swh-schema.sql +++ b/swh/storage/sql/30-swh-schema.sql @@ -17,7 +17,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(140, now(), 'Work In Progress'); + values(141, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -119,31 +119,6 @@ comment on column skipped_content.object_id is 'Skipped content identifier'; --- Log of all origin fetches (i.e., origin crawling) that have been done in the --- past, or are still ongoing. Similar to list_history, but for origins. -create table fetch_history -( - id bigserial, - origin bigint, - date timestamptz not null, - status boolean, -- true if and only if the fetch has been successful - result jsonb, -- more detailed returned values, times, etc... - stdout text, - stderr text, -- null when status is true, filled otherwise - duration interval -- fetch duration of NULL if still ongoing -); - -comment on table fetch_history is 'Log of all origin fetches'; -comment on column fetch_history.id is 'Identifier for fetch history'; -comment on column fetch_history.origin is 'Origin table identifier'; -comment on column fetch_history.date is 'Fetch start time'; -comment on column fetch_history.status is 'True indicates successful fetch'; -comment on column fetch_history.result is 'Detailed return values, times etc'; -comment on column fetch_history.stdout is 'Standard output of fetch operation'; -comment on column fetch_history.stderr is 'Standard error of fetch operation'; -comment on column fetch_history.duration is 'Time taken to complete fetch, NULL if ongoing'; - - -- A file-system directory. A directory is a list of directory entries (see -- tables: directory_entry_{dir,file}). -- diff --git a/swh/storage/sql/60-swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql --- a/swh/storage/sql/60-swh-indexes.sql +++ b/swh/storage/sql/60-swh-indexes.sql @@ -31,14 +31,6 @@ alter table skipped_content add constraint skipped_content_origin_fkey foreign key (origin) references origin(id) not valid; alter table skipped_content validate constraint skipped_content_origin_fkey; --- fetch_history - -create unique index concurrently fetch_history_pkey on fetch_history(id); -alter table fetch_history add primary key using index fetch_history_pkey; - -alter table fetch_history add constraint fetch_history_origin_fkey foreign key (origin) references origin(id) not valid; -alter table fetch_history validate constraint fetch_history_origin_fkey; - -- directory create unique index concurrently directory_pkey on directory(id); diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1663,47 +1663,6 @@ return db.origin_add(origin['type'], origin['url'], cur) - @db_transaction() - def fetch_history_start(self, origin_id, db=None, cur=None): - """Add an entry for origin origin_id in fetch_history. Returns the id - of the added fetch_history entry - """ - if isinstance(origin_id, str): - origin = \ - self.origin_get([{'url': origin_id}], db=db, cur=cur) - if not origin: - return - origin_id = origin[0]['id'] - fetch_history = { - 'origin': origin_id, - 'date': datetime.datetime.now(tz=datetime.timezone.utc), - } - - return db.create_fetch_history(fetch_history, cur) - - @db_transaction() - def fetch_history_end(self, fetch_history_id, data, db=None, cur=None): - """Close the fetch_history entry with id `fetch_history_id`, replacing - its data with `data`. - """ - now = datetime.datetime.now(tz=datetime.timezone.utc) - fetch_history = db.get_fetch_history(fetch_history_id, cur) - - if not fetch_history: - raise ValueError('No fetch_history with id %d' % fetch_history_id) - - fetch_history['duration'] = now - fetch_history['date'] - - fetch_history.update(data) - - db.update_fetch_history(fetch_history, cur) - - @db_transaction() - def fetch_history_get(self, fetch_history_id, db=None, cur=None): - """Get the fetch_history entry with id `fetch_history_id`. - """ - return db.get_fetch_history(fetch_history_id, cur) - @db_transaction(statement_timeout=500) def stat_counters(self, db=None, cur=None): """compute statistics about the number of tuples in various tables diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -435,21 +435,6 @@ releases = (release, release2, release3) -fetch_history_date = datetime.datetime( - 2015, 1, 2, 21, 0, 0, - tzinfo=datetime.timezone.utc) - -fetch_history_end = datetime.datetime( - 2015, 1, 2, 23, 0, 0, - tzinfo=datetime.timezone.utc) - -fetch_history_data = { - 'status': True, - 'result': {'foo': 'bar'}, - 'stdout': 'blabla', - 'stderr': 'blablabla', -} - snapshot = { 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'), 'branches': { @@ -528,5 +513,3 @@ 'version': '0.0.1' } } - -fetch_history_duration = (fetch_history_end - fetch_history_date) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -10,7 +10,7 @@ import queue import threading from collections import defaultdict -from unittest.mock import Mock, patch +from unittest.mock import Mock import psycopg2 import pytest @@ -3274,36 +3274,6 @@ """Test the local storage""" _test_origin_ids = True - # Can only be tested with local storage as you can't mock - # datetimes for the remote server - @pytest.mark.parametrize('use_url', [True, False]) - def test_fetch_history(self, swh_storage, use_url): - if not self._test_origin_ids and not use_url: - return - - origin_id = swh_storage.origin_add_one(data.origin) - origin_id_or_url = data.origin['url'] if use_url else origin_id - with patch('datetime.datetime'): - datetime.datetime.now.return_value = data.fetch_history_date - fetch_history_id = swh_storage.fetch_history_start( - origin_id_or_url) - datetime.datetime.now.assert_called_with(tz=datetime.timezone.utc) - - with patch('datetime.datetime'): - datetime.datetime.now.return_value = data.fetch_history_end - swh_storage.fetch_history_end(fetch_history_id, - data.fetch_history_data) - - fetch_history = swh_storage.fetch_history_get(fetch_history_id) - expected_fetch_history = data.fetch_history_data.copy() - - expected_fetch_history['id'] = fetch_history_id - expected_fetch_history['origin'] = origin_id - expected_fetch_history['date'] = data.fetch_history_date - expected_fetch_history['duration'] = data.fetch_history_duration - - assert expected_fetch_history == fetch_history - # This test is only relevant on the local storage, with an actual # objstorage raising an exception def test_content_add_objstorage_exception(self, swh_storage):