diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -827,7 +827,7 @@ 'provider_name', 'provider_type', 'provider_url'] - def origin_metadata_get_by(self, origin_id, provider_type=None, cur=None): + def origin_metadata_get_by(self, origin_url, provider_type=None, cur=None): """Retrieve all origin_metadata entries for one origin_id """ @@ -838,7 +838,7 @@ %%s)''' % (','.join( self.origin_metadata_get_cols)) - cur.execute(query, (origin_id, )) + cur.execute(query, (origin_url, )) else: query = '''SELECT %s @@ -846,7 +846,7 @@ %%s, %%s)''' % (','.join( self.origin_metadata_get_cols)) - cur.execute(query, (origin_id, provider_type)) + cur.execute(query, (origin_url, provider_type)) yield from cur diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os import re import bisect import dateutil @@ -32,10 +31,6 @@ return datetime.datetime.now(tz=datetime.timezone.utc) -ENABLE_ORIGIN_IDS = \ - os.environ.get('SWH_STORAGE_IN_MEMORY_ENABLE_ORIGIN_IDS', 'true') == 'true' - - class Storage: def __init__(self, journal_writer=None): self._contents = {} @@ -846,9 +841,7 @@ branches. """ - origin_url = self._get_origin_url(origin) - if not origin_url: - return + origin_url = origin if origin_url not in self._origins or \ visit > len(self._origin_visits[origin_url]): @@ -887,9 +880,7 @@ or :const:`None` if the snapshot has less than 1000 branches. """ - origin_url = self._get_origin_url(origin) - if not origin_url: - return + origin_url = origin visit = self.origin_visit_get_latest( origin_url, @@ -1010,11 +1001,7 @@ def _convert_origin(self, t): if t is None: return None - (origin_id, origin) = t - origin = origin.to_dict() - if ENABLE_ORIGIN_IDS: - origin['id'] = origin_id - return origin + return t.to_dict() def origin_get(self, origins): """Return origins, either all identified by their ids or all @@ -1068,15 +1055,11 @@ for origin in origins: result = None if 'id' in origin: - assert ENABLE_ORIGIN_IDS, 'origin ids are disabled' - if origin['id'] <= len(self._origins_by_id): - result = self._origins[self._origins_by_id[origin['id']-1]] - elif 'url' in origin: - if origin['url'] in self._origins: - result = self._origins[origin['url']] - else: - raise ValueError( - 'Origin must have either id or url.') + raise ValueError('origin ids are no longer supported.') + if 'url' not in origin: + raise ValueError('Missing origin URL are no longer supported.') + if origin['url'] in self._origins: + result = self._origins[origin['url']] results.append(self._convert_origin(result)) if return_single: @@ -1137,9 +1120,6 @@ origins = [orig for orig in origins if len(self._origin_visits[orig['url']]) > 0] - if ENABLE_ORIGIN_IDS: - origins.sort(key=lambda origin: origin['id']) - return origins[offset:offset+limit] def origin_count(self, url_pattern, regexp=False, with_visit=False, @@ -1178,10 +1158,7 @@ """ origins = copy.deepcopy(origins) for origin in origins: - if ENABLE_ORIGIN_IDS: - origin['id'] = self.origin_add_one(origin) - else: - self.origin_add_one(origin) + self.origin_add_one(origin) return origins def origin_add_one(self, origin): @@ -1200,46 +1177,18 @@ """ origin = Origin.from_dict(origin) - if origin.url in self._origins: - if ENABLE_ORIGIN_IDS: - (origin_id, _) = self._origins[origin.url] - else: + if origin.url not in self._origins: if self.journal_writer: self.journal_writer.write_addition('origin', origin) - if ENABLE_ORIGIN_IDS: - # origin ids are in the range [1, +inf[ - origin_id = len(self._origins) + 1 - self._origins_by_id.append(origin.url) - assert len(self._origins_by_id) == origin_id - else: - origin_id = None - self._origins[origin.url] = (origin_id, origin) - self._origin_visits[origin.url] = [] - self._objects[origin.url].append(('origin', origin.url)) - if ENABLE_ORIGIN_IDS: - return origin_id - else: - return origin.url + # TODO: remove this, only used for origin_get_range + self._origins_by_id.append(origin.url) - def fetch_history_start(self, origin_id): - """Add an entry for origin origin_id in fetch_history. Returns the id - of the added fetch_history entry - """ - assert not ENABLE_ORIGIN_IDS, 'origin ids are disabled' - pass - - def fetch_history_end(self, fetch_history_id, data): - """Close the fetch_history entry with id `fetch_history_id`, replacing - its data with `data`. - """ - pass + self._origins[origin.url] = origin + self._origin_visits[origin.url] = [] + self._objects[origin.url].append(('origin', origin.url)) - def fetch_history_get(self, fetch_history_id): - """Get the fetch_history entry with id `fetch_history_id`. - """ - raise NotImplementedError('fetch_history_get is deprecated, use ' - 'origin_visit_get instead.') + return origin.url def origin_visit_add(self, origin, date, type=None): """Add an origin_visit for the origin at date with status 'ongoing'. @@ -1259,8 +1208,8 @@ - visit: the visit's identifier for the new visit occurrence """ - origin_url = self._get_origin_url(origin) - if origin_url is None: + origin_url = origin + if origin_url not in self._origins: raise ValueError('Unknown origin.') if isinstance(date, str): @@ -1271,7 +1220,7 @@ visit_ret = None if origin_url in self._origins: - (origin_id, origin) = self._origins[origin_url] + origin = self._origins[origin_url] # visit ids are in the range [1, +inf[ visit_id = len(self._origin_visits[origin_url]) + 1 status = 'ongoing' @@ -1286,7 +1235,7 @@ ) self._origin_visits[origin_url].append(visit) visit_ret = { - 'origin': origin_id if ENABLE_ORIGIN_IDS else origin.url, + 'origin': origin.url, 'visit': visit_id, } @@ -1314,8 +1263,8 @@ None """ - origin_url = self._get_origin_url(origin) - if origin_url is None: + origin_url = origin + if origin_url not in self._origins: raise ValueError('Unknown origin.') try: @@ -1335,7 +1284,6 @@ visit = attr.evolve(visit, **updates) if self.journal_writer: - (_, origin) = self._origins[origin_url] self.journal_writer.write_update('origin_visit', visit) self._origin_visits[origin_url][visit_id-1] = visit @@ -1365,7 +1313,7 @@ if self.journal_writer: for visit in visits: - (_, visit.origin) = self._origins[visit.origin.url] + visit.origin = self._origins[visit.origin.url] self.journal_writer.write_addition('origin_visit', visit) for visit in visits: @@ -1384,12 +1332,9 @@ if visit is None: return - (origin_id, origin) = self._origins[visit.origin.url] + origin = self._origins[visit.origin.url] visit = visit.to_dict() - if ENABLE_ORIGIN_IDS: - visit['origin'] = origin_id - else: - visit['origin'] = origin.url + visit['origin'] = origin.url return visit @@ -1407,7 +1352,7 @@ List of visits. """ - origin_url = self._get_origin_url(origin) + origin_url = origin if origin_url in self._origin_visits: visits = self._origin_visits[origin_url] if last_visit is not None: @@ -1435,7 +1380,7 @@ A visit. """ - origin_url = self._get_origin_url(origin) + origin_url = origin if origin_url in self._origin_visits: visits = self._origin_visits[origin_url] visit = min( @@ -1454,7 +1399,7 @@ it does not exist """ - origin_url = self._get_origin_url(origin) + origin_url = origin if origin_url in self._origin_visits and \ visit <= len(self._origin_visits[origin_url]): return self._convert_visit( @@ -1486,10 +1431,9 @@ snapshot (Optional[sha1_git]): identifier of the snapshot associated to the visit """ - res = self._origins.get(origin) - if not res: + origin = self._origins.get(origin) + if not origin: return - (_, origin) = res visits = self._origin_visits[origin.url] if allowed_statuses is not None: visits = [visit for visit in visits @@ -1538,29 +1482,27 @@ metadata. Args: - origin_id (int): the origin's id for which the metadata is added + origin_id (str): the origin URL for which the metadata is added ts (datetime): timestamp of the found metadata provider: id of the provider of metadata (ex:'hal') tool: id of the tool used to extract metadata metadata (jsonb): the metadata retrieved at the time and location """ - if isinstance(origin_id, str): - origin = self.origin_get({'url': origin_id}) - if not origin: - return - origin_id = origin['id'] + origin = self.origin_get({'url': origin_id}) + if not origin: + return if isinstance(ts, str): ts = dateutil.parser.parse(ts) origin_metadata = { - 'origin_id': origin_id, + 'origin_url': origin_id, 'discovery_date': ts, 'tool_id': tool, 'metadata': metadata, 'provider_id': provider, } - self._origin_metadata[origin_id].append(origin_metadata) + self._origin_metadata[origin['url']].append(origin_metadata) return None def origin_metadata_get_by(self, origin_id, provider_type=None, db=None, @@ -1568,13 +1510,13 @@ """Retrieve list of all origin_metadata entries for the origin_id Args: - origin_id (int): the unique origin's identifier + origin_id (str): the origin URL provider_type (str): (optional) type of provider Returns: list of dicts: the origin_metadata dictionary with the keys: - - origin_id (int): origin's identifier + - origin_url (str): origin's identifier - discovery_date (datetime): timestamp of discovery - tool_id (int): metadata's extracting tool - metadata (jsonb) @@ -1584,11 +1526,9 @@ - provider_url (str) """ - if isinstance(origin_id, str): - origin = self.origin_get({'url': origin_id}) - if not origin: - return - origin_id = origin['id'] + origin = self.origin_get({'url': origin_id}) + if not origin: + return metadata = [] for item in self._origin_metadata[origin_id]: @@ -1694,17 +1634,6 @@ key = self._metadata_provider_key(provider) return self._metadata_providers.get(key) - def _get_origin_url(self, origin): - if isinstance(origin, str): - return origin - elif isinstance(origin, int): - if origin <= len(self._origins_by_id): - return self._origins_by_id[origin-1] - else: - return None - else: - raise TypeError('origin must be a string or an integer.') - def _person_add(self, person): """Add a person in storage. diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql --- a/swh/storage/sql/40-swh-func.sql +++ b/swh/storage/sql/40-swh-func.sql @@ -937,21 +937,22 @@ provider_url text ); create or replace function swh_origin_metadata_get_by_origin( - origin integer) + origin text) returns setof origin_metadata_signature language sql stable as $$ - select om.id as id, origin_id, discovery_date, tool_id, om.metadata, + select om.url as url, origin_id, discovery_date, tool_id, om.metadata, mp.id as provider_id, provider_name, provider_type, provider_url from origin_metadata as om inner join metadata_provider mp on om.provider_id = mp.id - where om.origin_id = origin + inner join origin o on o.id = om.origin_id + where o.url = origin order by discovery_date desc; $$; create or replace function swh_origin_metadata_get_by_provider_type( - origin integer, + origin text, type text) returns setof origin_metadata_signature language sql @@ -961,7 +962,8 @@ mp.id as provider_id, provider_name, provider_type, provider_url from origin_metadata as om inner join metadata_provider mp on om.provider_id = mp.id - where om.origin_id = origin + inner join origin o on o.id = om.origin_id + where o.url = origin and mp.provider_type = type order by discovery_date desc; $$; diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1743,7 +1743,7 @@ metadata. Args: - origin_id (int): the origin's id for which the metadata is added + origin_id (str): the origin URL for which the metadata is added ts (datetime): timestamp of the found metadata provider (int): the provider of metadata (ex:'hal') tool (int): tool used to extract metadata @@ -1752,11 +1752,10 @@ Returns: id (int): the origin_metadata unique id """ - if isinstance(origin_id, str): - origin = self.origin_get({'url': origin_id}, db=db, cur=cur) - if not origin: - return - origin_id = origin['id'] + origin = self.origin_get({'url': origin_id}, db=db, cur=cur) + if not origin: + return + origin_id = origin['id'] if isinstance(ts, str): ts = dateutil.parser.parse(ts) @@ -1770,13 +1769,13 @@ """Retrieve list of all origin_metadata entries for the origin_id Args: - origin_id (int): the unique origin identifier + origin_id (str): the origin URL provider_type (str): (optional) type of provider Returns: list of dicts: the origin_metadata dictionary with the keys: - - origin_id (int): origin's id + - origin_id (str): origin's URL - discovery_date (datetime): timestamp of discovery - tool_id (int): metadata's extracting tool - metadata (jsonb) @@ -1786,13 +1785,12 @@ - provider_url (str) """ - if isinstance(origin_id, str): - origin = self.origin_get({'url': origin_id}, db=db, cur=cur) - if not origin: - return - origin_id = origin['id'] + origin = self.origin_get({'url': origin_id}, db=db, cur=cur) + if not origin: + return + origin_url = origin['url'] - for line in db.origin_metadata_get_by(origin_id, provider_type, cur): + for line in db.origin_metadata_get_by(origin_url, provider_type, cur): yield dict(zip(db.origin_metadata_get_cols, line)) @db_transaction() diff --git a/swh/storage/tests/test_in_memory.py b/swh/storage/tests/test_in_memory.py --- a/swh/storage/tests/test_in_memory.py +++ b/swh/storage/tests/test_in_memory.py @@ -6,7 +6,7 @@ import pytest -from swh.storage.in_memory import Storage, ENABLE_ORIGIN_IDS +from swh.storage.in_memory import Storage from swh.storage.tests.test_storage import \ CommonTestStorage, CommonPropTestStorage @@ -19,8 +19,6 @@ functionality between local and remote storage. All the tests are therefore defined in CommonTestStorage. """ - _test_origin_ids = ENABLE_ORIGIN_IDS - def setUp(self): super().setUp() self.reset_storage() @@ -37,19 +35,6 @@ def test_content_add_metadata_db(self): pass - if not _test_origin_ids: - @pytest.mark.skip('requires origin ids') - def test_origin_metadata_add(self): - pass - - @pytest.mark.skip('requires origin ids') - def test_origin_metadata_get(self): - pass - - @pytest.mark.skip('requires origin ids') - def test_origin_metadata_get_by_provider_type(self): - pass - def reset_storage(self): self.storage = Storage(journal_writer={'cls': 'memory'}) self.journal_writer = self.storage.journal_writer @@ -63,16 +48,9 @@ functionality between local and remote storage. All the tests are therefore defined in CommonPropTestStorage. """ - _test_origin_ids = ENABLE_ORIGIN_IDS - def setUp(self): super().setUp() self.storage = Storage() def reset_storage(self): self.storage = Storage() - - if not _test_origin_ids: - @pytest.mark.skip('requires origin ids') - def test_origin_get_range(self, new_origins): - pass diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -12,7 +12,7 @@ import threading import unittest from collections import defaultdict -from unittest.mock import Mock, patch +from unittest.mock import Mock import psycopg2 import pytest @@ -565,7 +565,6 @@ """ maxDiff = None # type: ClassVar[Optional[int]] - _test_origin_ids = True @staticmethod def normalize_entity(entity): @@ -1488,8 +1487,6 @@ id = self.storage.origin_add_one(self.origin) actual_origin = self.storage.origin_get({'url': self.origin['url']}) - if self._test_origin_ids: - self.assertEqual(actual_origin['id'], id) self.assertEqual(actual_origin['url'], self.origin['url']) id2 = self.storage.origin_add_one(self.origin) @@ -1505,15 +1502,11 @@ actual_origin = self.storage.origin_get([{ 'url': self.origin['url'], }])[0] - if self._test_origin_ids: - self.assertEqual(actual_origin['id'], origin1['id']) self.assertEqual(actual_origin['url'], origin1['url']) actual_origin2 = self.storage.origin_get([{ 'url': self.origin2['url'], }])[0] - if self._test_origin_ids: - self.assertEqual(actual_origin2['id'], origin2['id']) self.assertEqual(actual_origin2['url'], origin2['url']) if 'id' in actual_origin: @@ -1545,44 +1538,23 @@ def test_origin_get_legacy(self): self.assertIsNone(self.storage.origin_get(self.origin)) - id = self.storage.origin_add_one(self.origin) + self.storage.origin_add_one(self.origin) - # lookup per url (returns id) + # lookup per url actual_origin0 = self.storage.origin_get( {'url': self.origin['url']}) - if self._test_origin_ids: - self.assertEqual(actual_origin0['id'], id) self.assertEqual(actual_origin0['url'], self.origin['url']) - # lookup per id (returns dict) - if self._test_origin_ids: - actual_origin1 = self.storage.origin_get({'id': id}) - - self.assertEqual(actual_origin1, {'id': id, - 'type': self.origin['type'], - 'url': self.origin['url']}) - def test_origin_get(self): self.assertIsNone(self.storage.origin_get(self.origin)) - origin_id = self.storage.origin_add_one(self.origin) + self.storage.origin_add_one(self.origin) - # lookup per url (returns id) + # lookup per url actual_origin0 = self.storage.origin_get( [{'url': self.origin['url']}]) self.assertEqual(len(actual_origin0), 1, actual_origin0) - if self._test_origin_ids: - self.assertEqual(actual_origin0[0]['id'], origin_id) self.assertEqual(actual_origin0[0]['url'], self.origin['url']) - if self._test_origin_ids: - # lookup per id (returns dict) - actual_origin1 = self.storage.origin_get([{'id': origin_id}]) - - self.assertEqual(len(actual_origin1), 1, actual_origin1) - self.assertEqual(actual_origin1[0], {'id': origin_id, - 'type': self.origin['type'], - 'url': self.origin['url']}) - def test_origin_get_consistency(self): self.assertIsNone(self.storage.origin_get(self.origin)) id = self.storage.origin_add_one(self.origin) @@ -1722,31 +1694,28 @@ found_origins0 + found_origins1, [origin, origin2]) - @given(strategies.booleans()) - def test_origin_visit_add(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_add(self): self.reset_storage() # given self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - origin_id = self.storage.origin_add_one(self.origin2) - self.assertIsNotNone(origin_id) + origin_url = self.storage.origin_add_one(self.origin2) + self.assertIsNotNone(origin_url) - origin_id_or_url = self.origin2['url'] if use_url else origin_id + origin_url = self.origin2['url'] # when origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, type='git', date=self.date_visit2) actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) + origin_url)) self.assertEqual(actual_origin_visits, [{ - 'origin': origin_id, + 'origin': origin_url, 'date': self.date_visit2, 'visit': origin_visit1['visit'], 'type': 'git', @@ -1771,39 +1740,33 @@ def test_origin_visit_get__unknown_origin(self): self.assertEqual([], list(self.storage.origin_visit_get('foo'))) - if self._test_origin_ids: - self.assertEqual([], list(self.storage.origin_visit_get(10))) - @given(strategies.booleans()) - def test_origin_visit_add_default_type(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_add_default_type(self): self.reset_storage() # given self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - origin_id = self.storage.origin_add_one(self.origin2) - origin_id_or_url = self.origin2['url'] if use_url else origin_id - self.assertIsNotNone(origin_id) + origin_url = self.storage.origin_add_one(self.origin2) + self.assertIsNotNone(origin_url) # when origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date=self.date_visit2) origin_visit2 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date='2018-01-01 23:00:00+00') # then - self.assertEqual(origin_visit1['origin'], origin_id) + self.assertEqual(origin_visit1['origin'], origin_url) self.assertIsNotNone(origin_visit1['visit']) actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) + origin_url)) self.assertEqual(actual_origin_visits, [ { - 'origin': origin_id, + 'origin': origin_url, 'date': self.date_visit2, 'visit': origin_visit1['visit'], 'type': 'hg', @@ -1812,7 +1775,7 @@ 'snapshot': None, }, { - 'origin': origin_id, + 'origin': origin_url, 'date': self.date_visit3, 'visit': origin_visit2['visit'], 'type': 'hg', @@ -1847,38 +1810,35 @@ ('origin_visit', data2)]) def test_origin_visit_add_validation(self): - origin_id_or_url = self.storage.origin_add_one(self.origin2) + origin_url = self.storage.origin_add_one(self.origin2) with self.assertRaises((TypeError, psycopg2.ProgrammingError)) as cm: - self.storage.origin_visit_add(origin_id_or_url, date=[b'foo']) + self.storage.origin_visit_add(origin_url, date=[b'foo']) if type(cm.exception) == psycopg2.ProgrammingError: self.assertEqual(cm.exception.pgcode, psycopg2.errorcodes.UNDEFINED_FUNCTION) - @given(strategies.booleans()) - def test_origin_visit_update(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_update(self): self.reset_storage() # given - origin_id = self.storage.origin_add_one(self.origin) - origin_id2 = self.storage.origin_add_one(self.origin2) - origin2_id_or_url = self.origin2['url'] if use_url else origin_id2 + self.storage.origin_add_one(self.origin) + self.storage.origin_add_one(self.origin2) + origin2_url = self.origin2['url'] - origin_id_or_url = self.origin['url'] if use_url else origin_id + origin_url = self.origin['url'] origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date=self.date_visit2) origin_visit2 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date=self.date_visit3) origin_visit3 = self.storage.origin_visit_add( - origin2_id_or_url, + origin2_url, date=self.date_visit3) # when @@ -1887,16 +1847,16 @@ 'directories': 22, } self.storage.origin_visit_update( - origin_id_or_url, + origin_url, origin_visit1['visit'], status='full', metadata=visit1_metadata) self.storage.origin_visit_update( - origin2_id_or_url, + origin2_url, origin_visit3['visit'], status='partial') # then actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) + origin_url)) self.assertEqual(actual_origin_visits, [{ 'origin': origin_visit2['origin'], 'date': self.date_visit2, @@ -1916,7 +1876,7 @@ }]) actual_origin_visits_bis = list(self.storage.origin_visit_get( - origin_id_or_url, + origin_url, limit=1)) self.assertEqual(actual_origin_visits_bis, [{ @@ -1930,7 +1890,7 @@ }]) actual_origin_visits_ter = list(self.storage.origin_visit_get( - origin_id_or_url, + origin_url, last_visit=origin_visit1['visit'])) self.assertEqual(actual_origin_visits_ter, [{ @@ -1944,7 +1904,7 @@ }]) actual_origin_visits2 = list(self.storage.origin_visit_get( - origin2_id_or_url)) + origin2_url)) self.assertEqual(actual_origin_visits2, [{ 'origin': origin_visit3['origin'], @@ -2013,15 +1973,15 @@ ('origin_visit', data5)]) def test_origin_visit_update_validation(self): - origin_id = self.storage.origin_add_one(self.origin) + self.storage.origin_add_one(self.origin) visit = self.storage.origin_visit_add( - origin_id, + self.origin['url'], date=self.date_visit2) with self.assertRaisesRegex( (ValueError, psycopg2.DataError), 'status') as cm: self.storage.origin_visit_update( - origin_id, visit['visit'], status='foobar') + self.origin['url'], visit['visit'], status='foobar') if type(cm.exception) == psycopg2.DataError: self.assertEqual(cm.exception.pgcode, @@ -2056,30 +2016,26 @@ def test_origin_visit_find_by_date__unknown_origin(self): self.storage.origin_visit_find_by_date('foo', self.date_visit2) - @settings(deadline=None) - @given(strategies.booleans()) - def test_origin_visit_update_missing_snapshot(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_update_missing_snapshot(self): self.reset_storage() # given - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] origin_visit = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date=self.date_visit1) # when self.storage.origin_visit_update( - origin_id_or_url, + origin_url, origin_visit['visit'], snapshot=self.snapshot['id']) # then actual_origin_visit = self.storage.origin_visit_get_by( - origin_id_or_url, + origin_url, origin_visit['visit']) self.assertEqual(actual_origin_visit['snapshot'], self.snapshot['id']) @@ -2087,35 +2043,31 @@ self.storage.snapshot_add([self.snapshot]) self.assertEqual(actual_origin_visit['snapshot'], self.snapshot['id']) - @settings(deadline=None) - @given(strategies.booleans()) - def test_origin_visit_get_by(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_get_by(self): self.reset_storage() - origin_id = self.storage.origin_add_one(self.origin) - origin_id2 = self.storage.origin_add_one(self.origin2) + self.storage.origin_add_one(self.origin) + self.storage.origin_add_one(self.origin2) - origin_id_or_url = self.origin['url'] if use_url else origin_id - origin2_id_or_url = self.origin2['url'] if use_url else origin_id2 + origin_url = self.origin['url'] + origin2_url = self.origin2['url'] origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date=self.date_visit2) self.storage.snapshot_add([self.snapshot]) self.storage.origin_visit_update( - origin_id_or_url, + origin_url, origin_visit1['visit'], snapshot=self.snapshot['id']) # Add some other {origin, visit} entries self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date=self.date_visit3) self.storage.origin_visit_add( - origin2_id_or_url, + origin2_url, date=self.date_visit3) # when @@ -2125,13 +2077,13 @@ } self.storage.origin_visit_update( - origin_id_or_url, + origin_url, origin_visit1['visit'], status='full', metadata=visit1_metadata) expected_origin_visit = origin_visit1.copy() expected_origin_visit.update({ - 'origin': origin_id, + 'origin': origin_url, 'visit': origin_visit1['visit'], 'date': self.date_visit2, 'type': self.origin['type'], @@ -2142,29 +2094,24 @@ # when actual_origin_visit1 = self.storage.origin_visit_get_by( - origin_id_or_url, + origin_url, origin_visit1['visit']) # then self.assertEqual(actual_origin_visit1, expected_origin_visit) def test_origin_visit_get_by__unknown_origin(self): - if self._test_origin_ids: - self.assertIsNone(self.storage.origin_visit_get_by(2, 10)) self.assertIsNone(self.storage.origin_visit_get_by('foo', 10)) - @given(strategies.booleans()) - def test_origin_visit_upsert_new(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_upsert_new(self): self.reset_storage() # given self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - origin_id = self.storage.origin_add_one(self.origin2) - origin_id_or_url = self.origin2['url'] if use_url else origin_id - self.assertIsNotNone(origin_id) + self.storage.origin_add_one(self.origin2) + origin_url = self.origin2['url'] + self.assertIsNotNone(origin_url) # when self.storage.origin_visit_upsert([ @@ -2190,10 +2137,10 @@ # then actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) + origin_url)) self.assertEqual(actual_origin_visits, [ { - 'origin': origin_id, + 'origin': origin_url, 'date': self.date_visit2, 'visit': 123, 'type': self.origin2['type'], @@ -2202,7 +2149,7 @@ 'snapshot': None, }, { - 'origin': origin_id, + 'origin': origin_url, 'date': self.date_visit3, 'visit': 1234, 'type': self.origin2['type'], @@ -2236,23 +2183,19 @@ ('origin_visit', data1), ('origin_visit', data2)]) - @settings(deadline=None) - @given(strategies.booleans()) - def test_origin_visit_upsert_existing(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_upsert_existing(self): self.reset_storage() # given self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - origin_id = self.storage.origin_add_one(self.origin2) - origin_id_or_url = self.origin2['url'] if use_url else origin_id - self.assertIsNotNone(origin_id) + self.storage.origin_add_one(self.origin2) + origin_url = self.origin2['url'] + self.assertIsNotNone(origin_url) # when origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, date=self.date_visit2) self.storage.origin_visit_upsert([{ 'origin': self.origin2, @@ -2265,14 +2208,14 @@ }]) # then - self.assertEqual(origin_visit1['origin'], origin_id) + self.assertEqual(origin_visit1['origin'], origin_url) self.assertIsNotNone(origin_visit1['visit']) actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) + origin_url)) self.assertEqual(actual_origin_visits, [{ - 'origin': origin_id, + 'origin': origin_url, 'date': self.date_visit2, 'visit': origin_visit1['visit'], 'type': self.origin2['type'], @@ -2306,38 +2249,28 @@ ('origin_visit', data2)]) def test_origin_visit_get_by_no_result(self): - if self._test_origin_ids: - actual_origin_visit = self.storage.origin_visit_get_by( - 10, 999) - self.assertIsNone(actual_origin_visit) - self.storage.origin_add([self.origin]) actual_origin_visit = self.storage.origin_visit_get_by( self.origin['url'], 999) self.assertIsNone(actual_origin_visit) - @settings(deadline=None) # this test is very slow - @given(strategies.booleans()) - def test_origin_visit_get_latest(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_origin_visit_get_latest(self): self.reset_storage() - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id + self.storage.origin_add_one(self.origin) origin_url = self.origin['url'] origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, self.date_visit1) visit1_id = origin_visit1['visit'] origin_visit2 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, self.date_visit2) visit2_id = origin_visit2['visit'] # Add a visit with the same date as the previous one origin_visit3 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, self.date_visit2) visit3_id = origin_visit3['visit'] @@ -2357,7 +2290,7 @@ # visit1 and require_snapshot=False still returns visit2 self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id_or_url, visit1_id, + origin_url, visit1_id, snapshot=self.complete_snapshot['id']) self.assertEqual( {**origin_visit1, 'snapshot': self.complete_snapshot['id']}, @@ -2378,7 +2311,7 @@ # Mark the first visit as completed and check status filter again self.storage.origin_visit_update( - origin_id_or_url, + origin_url, visit1_id, status='full') self.assertEqual( { @@ -2396,7 +2329,7 @@ # Add snapshot to visit2 and check that the new snapshot is returned self.storage.snapshot_add([self.empty_snapshot]) self.storage.origin_visit_update( - origin_id_or_url, visit2_id, + origin_url, visit2_id, snapshot=self.empty_snapshot['id']) self.assertEqual( {**origin_visit2, 'snapshot': self.empty_snapshot['id']}, @@ -2421,7 +2354,7 @@ # Add snapshot to visit3 (same date as visit2) self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id_or_url, visit3_id, snapshot=self.complete_snapshot['id']) + origin_url, visit3_id, snapshot=self.complete_snapshot['id']) self.assertEqual( { **origin_visit1, @@ -2472,8 +2405,9 @@ revisions[1]['committer']) def test_snapshot_add_get_empty(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit_id = origin_visit1['visit'] @@ -2481,12 +2415,12 @@ self.assertEqual(actual_result, {'snapshot:add': 1}) self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.empty_snapshot['id']) + origin_url, visit_id, snapshot=self.empty_snapshot['id']) by_id = self.storage.snapshot_get(self.empty_snapshot['id']) self.assertEqual(by_id, {**self.empty_snapshot, 'next_branch': None}) - by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id) + by_ov = self.storage.snapshot_get_by_origin_visit(origin_url, visit_id) self.assertEqual(by_ov, {**self.empty_snapshot, 'next_branch': None}) expected_origin = self.origin.copy() @@ -2515,21 +2449,22 @@ ('origin_visit', data2)]) def test_snapshot_add_get_complete(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit_id = origin_visit1['visit'] actual_result = self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) + origin_url, visit_id, snapshot=self.complete_snapshot['id']) self.assertEqual(actual_result, {'snapshot:add': 1}) by_id = self.storage.snapshot_get(self.complete_snapshot['id']) self.assertEqual(by_id, {**self.complete_snapshot, 'next_branch': None}) - by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id) + by_ov = self.storage.snapshot_get_by_origin_visit(origin_url, visit_id) self.assertEqual(by_ov, {**self.complete_snapshot, 'next_branch': None}) @@ -2590,14 +2525,15 @@ self.storage.snapshot_add([snap]) def test_snapshot_add_count_branches(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit_id = origin_visit1['visit'] actual_result = self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) + origin_url, visit_id, snapshot=self.complete_snapshot['id']) self.assertEqual(actual_result, {'snapshot:add': 1}) snp_id = self.complete_snapshot['id'] @@ -2616,14 +2552,15 @@ self.assertEqual(snp_size, expected_snp_size) def test_snapshot_add_get_paginated(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit_id = origin_visit1['visit'] self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id, visit_id, + origin_url, visit_id, snapshot=self.complete_snapshot['id']) snp_id = self.complete_snapshot['id'] @@ -2679,14 +2616,15 @@ self.assertEqual(snapshot, expected_snapshot) def test_snapshot_add_get_filtered(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit_id = origin_visit1['visit'] self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) + origin_url, visit_id, snapshot=self.complete_snapshot['id']) snp_id = self.complete_snapshot['id'] branches = self.complete_snapshot['branches'] @@ -2722,14 +2660,15 @@ self.assertEqual(snapshot, expected_snapshot) def test_snapshot_add_get_filtered_and_paginated(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit_id = origin_visit1['visit'] self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) + origin_url, visit_id, snapshot=self.complete_snapshot['id']) snp_id = self.complete_snapshot['id'] branches = self.complete_snapshot['branches'] @@ -2801,27 +2740,29 @@ self.assertEqual(snapshot, expected_snapshot) def test_snapshot_add_get(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit_id = origin_visit1['visit'] self.storage.snapshot_add([self.snapshot]) self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.snapshot['id']) + origin_url, visit_id, snapshot=self.snapshot['id']) by_id = self.storage.snapshot_get(self.snapshot['id']) self.assertEqual(by_id, {**self.snapshot, 'next_branch': None}) - by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id) + by_ov = self.storage.snapshot_get_by_origin_visit(origin_url, visit_id) self.assertEqual(by_ov, {**self.snapshot, 'next_branch': None}) - origin_visit_info = self.storage.origin_visit_get_by(origin_id, + origin_visit_info = self.storage.origin_visit_get_by(origin_url, visit_id) self.assertEqual(origin_visit_info['snapshot'], self.snapshot['id']) def test_snapshot_add_nonexistent_visit(self): - origin_id = self.storage.origin_add_one(self.origin) + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] visit_id = 54164461156 self.journal_writer.objects[:] = [] @@ -2830,33 +2771,34 @@ with self.assertRaises(ValueError): self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.snapshot['id']) + origin_url, visit_id, snapshot=self.snapshot['id']) self.assertEqual(list(self.journal_writer.objects), [ ('snapshot', self.snapshot)]) def test_snapshot_add_twice__by_origin_visit(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit1_id = origin_visit1['visit'] self.storage.snapshot_add([self.snapshot]) self.storage.origin_visit_update( - origin_id, visit1_id, snapshot=self.snapshot['id']) + origin_url, visit1_id, snapshot=self.snapshot['id']) - by_ov1 = self.storage.snapshot_get_by_origin_visit(origin_id, + by_ov1 = self.storage.snapshot_get_by_origin_visit(origin_url, visit1_id) self.assertEqual(by_ov1, {**self.snapshot, 'next_branch': None}) - origin_visit2 = self.storage.origin_visit_add(origin_id, + origin_visit2 = self.storage.origin_visit_add(origin_url, self.date_visit2) visit2_id = origin_visit2['visit'] self.storage.snapshot_add([self.snapshot]) self.storage.origin_visit_update( - origin_id, visit2_id, snapshot=self.snapshot['id']) + origin_url, visit2_id, snapshot=self.snapshot['id']) - by_ov2 = self.storage.snapshot_get_by_origin_visit(origin_id, + by_ov2 = self.storage.snapshot_get_by_origin_visit(origin_url, visit2_id) self.assertEqual(by_ov2, {**self.snapshot, 'next_branch': None}) @@ -2905,68 +2847,64 @@ ('origin_visit', data3), ('origin_visit', data4)]) - @settings(deadline=None) # this test is very slow - @given(strategies.booleans()) - def test_snapshot_get_latest(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_snapshot_get_latest(self): self.reset_storage() - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id - origin_visit1 = self.storage.origin_visit_add(origin_id, + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] + origin_visit1 = self.storage.origin_visit_add(origin_url, self.date_visit1) visit1_id = origin_visit1['visit'] - origin_visit2 = self.storage.origin_visit_add(origin_id, + origin_visit2 = self.storage.origin_visit_add(origin_url, self.date_visit2) visit2_id = origin_visit2['visit'] # Add a visit with the same date as the previous one - origin_visit3 = self.storage.origin_visit_add(origin_id, + origin_visit3 = self.storage.origin_visit_add(origin_url, self.date_visit2) visit3_id = origin_visit3['visit'] # Two visits, both with no snapshot: latest snapshot is None self.assertIsNone(self.storage.snapshot_get_latest( - origin_id_or_url)) + origin_url)) # Add snapshot to visit1, latest snapshot = visit 1 snapshot self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id, visit1_id, snapshot=self.complete_snapshot['id']) + origin_url, visit1_id, snapshot=self.complete_snapshot['id']) self.assertEqual({**self.complete_snapshot, 'next_branch': None}, self.storage.snapshot_get_latest( - origin_id_or_url)) + origin_url)) # Status filter: all three visits are status=ongoing, so no snapshot # returned self.assertIsNone( self.storage.snapshot_get_latest( - origin_id_or_url, + origin_url, allowed_statuses=['full']) ) # Mark the first visit as completed and check status filter again - self.storage.origin_visit_update(origin_id, visit1_id, status='full') + self.storage.origin_visit_update(origin_url, visit1_id, status='full') self.assertEqual( {**self.complete_snapshot, 'next_branch': None}, self.storage.snapshot_get_latest( - origin_id_or_url, + origin_url, allowed_statuses=['full']), ) # Add snapshot to visit2 and check that the new snapshot is returned self.storage.snapshot_add([self.empty_snapshot]) self.storage.origin_visit_update( - origin_id, visit2_id, snapshot=self.empty_snapshot['id']) + origin_url, visit2_id, snapshot=self.empty_snapshot['id']) self.assertEqual({**self.empty_snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest(origin_id)) + self.storage.snapshot_get_latest(origin_url)) # Check that the status filter is still working self.assertEqual( {**self.complete_snapshot, 'next_branch': None}, self.storage.snapshot_get_latest( - origin_id_or_url, + origin_url, allowed_statuses=['full']), ) @@ -2974,60 +2912,57 @@ # the new snapshot is returned self.storage.snapshot_add([self.complete_snapshot]) self.storage.origin_visit_update( - origin_id, visit3_id, snapshot=self.complete_snapshot['id']) + origin_url, visit3_id, snapshot=self.complete_snapshot['id']) self.assertEqual({**self.complete_snapshot, 'next_branch': None}, self.storage.snapshot_get_latest( - origin_id_or_url)) + origin_url)) - @given(strategies.booleans()) - def test_snapshot_get_latest__missing_snapshot(self, use_url): - if not self._test_origin_ids and not use_url: - return + def test_snapshot_get_latest__missing_snapshot(self): self.reset_storage() # Origin does not exist self.assertIsNone(self.storage.snapshot_get_latest( - self.origin['url'] if use_url else 999)) + self.origin['url'])) - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id + self.storage.origin_add_one(self.origin) + origin_url = self.origin['url'] origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, self.date_visit1) visit1_id = origin_visit1['visit'] origin_visit2 = self.storage.origin_visit_add( - origin_id_or_url, + origin_url, self.date_visit2) visit2_id = origin_visit2['visit'] # Two visits, both with no snapshot: latest snapshot is None self.assertIsNone(self.storage.snapshot_get_latest( - origin_id_or_url)) + origin_url)) # Add unknown snapshot to visit1, check that the inconsistency is # detected self.storage.origin_visit_update( - origin_id_or_url, + origin_url, visit1_id, snapshot=self.complete_snapshot['id']) with self.assertRaises(ValueError): self.storage.snapshot_get_latest( - origin_id_or_url) + origin_url) # Status filter: both visits are status=ongoing, so no snapshot # returned self.assertIsNone( self.storage.snapshot_get_latest( - origin_id_or_url, + origin_url, allowed_statuses=['full']) ) # Mark the first visit as completed and check status filter again self.storage.origin_visit_update( - origin_id_or_url, + origin_url, visit1_id, status='full') with self.assertRaises(ValueError): self.storage.snapshot_get_latest( - origin_id_or_url, + origin_url, allowed_statuses=['full']), # Actually add the snapshot and check status filter again @@ -3035,24 +2970,24 @@ self.assertEqual( {**self.complete_snapshot, 'next_branch': None}, self.storage.snapshot_get_latest( - origin_id_or_url) + origin_url) ) # Add unknown snapshot to visit2 and check that the inconsistency # is detected self.storage.origin_visit_update( - origin_id_or_url, + origin_url, visit2_id, snapshot=self.snapshot['id']) with self.assertRaises(ValueError): self.storage.snapshot_get_latest( - origin_id_or_url) + origin_url) # Actually add that snapshot and check that the new one is returned self.storage.snapshot_add([self.snapshot]) self.assertEqual( {**self.snapshot, 'next_branch': None}, self.storage.snapshot_get_latest( - origin_id_or_url) + origin_url) ) def test_stat_counters(self): @@ -3517,18 +3452,13 @@ # then self.assertTrue(provider_id, actual_provider['id']) - @given(strategies.booleans()) - def test_origin_metadata_add(self, use_url): + def test_origin_metadata_add(self): self.reset_storage() # given origin = self.storage.origin_add([self.origin])[0] - origin_id = origin['id'] - if use_url: - origin = origin['url'] - else: - origin = origin['id'] + origin_url = origin['url'] origin_metadata0 = list(self.storage.origin_metadata_get_by( - origin)) + origin_url)) self.assertEqual(len(origin_metadata0), 0, origin_metadata0) tools = self.storage.tool_add([self.metadata_tool]) @@ -3546,28 +3476,28 @@ # when adding for the same origin 2 metadatas self.storage.origin_metadata_add( - origin, + origin_url, self.origin_metadata['discovery_date'], provider['id'], tool['id'], self.origin_metadata['metadata']) self.storage.origin_metadata_add( - origin, + origin_url, '2015-01-01 23:00:00+00', provider['id'], tool['id'], self.origin_metadata2['metadata']) actual_om = list(self.storage.origin_metadata_get_by( - origin)) + origin_url)) # then self.assertCountEqual( - [item['origin_id'] for item in actual_om], - [origin_id, origin_id]) + [item['origin_url'] for item in actual_om], + [origin_url, origin_url]) def test_origin_metadata_get(self): # given - origin_id = self.storage.origin_add([self.origin])[0]['id'] - origin_id2 = self.storage.origin_add([self.origin2])[0]['id'] + origin_url = self.storage.origin_add([self.origin])[0]['url'] + origin_url2 = self.storage.origin_add([self.origin2])[0]['url'] self.storage.metadata_provider_add(self.provider['name'], self.provider['type'], @@ -3580,29 +3510,29 @@ tool = self.storage.tool_add([self.metadata_tool])[0] # when adding for the same origin 2 metadatas self.storage.origin_metadata_add( - origin_id, + origin_url, self.origin_metadata['discovery_date'], provider['id'], tool['id'], self.origin_metadata['metadata']) self.storage.origin_metadata_add( - origin_id2, + origin_url2, self.origin_metadata2['discovery_date'], provider['id'], tool['id'], self.origin_metadata2['metadata']) self.storage.origin_metadata_add( - origin_id, + origin_url, self.origin_metadata2['discovery_date'], provider['id'], tool['id'], self.origin_metadata2['metadata']) all_metadatas = list(self.storage.origin_metadata_get_by( - origin_id)) + origin_url)) metadatas_for_origin2 = list(self.storage.origin_metadata_get_by( - origin_id2)) + origin_url2)) expected_results = [{ - 'origin_id': origin_id, + 'origin_url': origin_url, 'discovery_date': datetime.datetime( 2017, 1, 1, 23, 0, tzinfo=datetime.timezone.utc), @@ -3616,7 +3546,7 @@ 'provider_url': 'http:///hal/inria', 'tool_id': tool['id'] }, { - 'origin_id': origin_id, + 'origin_url': origin_url, 'discovery_date': datetime.datetime( 2015, 1, 1, 23, 0, tzinfo=datetime.timezone.utc), @@ -3660,8 +3590,8 @@ def test_origin_metadata_get_by_provider_type(self): # given - origin_id = self.storage.origin_add([self.origin])[0]['id'] - origin_id2 = self.storage.origin_add([self.origin2])[0]['id'] + origin_url = self.storage.origin_add([self.origin])[0]['url'] + origin_url2 = self.storage.origin_add([self.origin2])[0]['url'] provider1_id = self.storage.metadata_provider_add( self.provider['name'], self.provider['type'], @@ -3693,26 +3623,26 @@ # when adding for the same origin 2 metadatas self.storage.origin_metadata_add( - origin_id, + origin_url, self.origin_metadata['discovery_date'], provider1['id'], tool['id'], self.origin_metadata['metadata']) self.storage.origin_metadata_add( - origin_id2, + origin_url2, self.origin_metadata2['discovery_date'], provider2['id'], tool['id'], self.origin_metadata2['metadata']) provider_type = 'registry' m_by_provider = list(self.storage.origin_metadata_get_by( - origin_id2, + origin_url2, provider_type)) for item in m_by_provider: if 'id' in item: del item['id'] expected_results = [{ - 'origin_id': origin_id2, + 'origin_url': origin_url2, 'discovery_date': datetime.datetime( 2017, 1, 1, 23, 0, tzinfo=datetime.timezone.utc), @@ -3733,8 +3663,6 @@ class CommonPropTestStorage: - _test_origin_ids = True - def assert_contents_ok(self, expected_contents, actual_contents, keys_to_check={'sha1', 'data'}): """Assert that a given list of contents matches on a given set of keys. @@ -3881,24 +3809,6 @@ keys_to_check) - def test_origin_get_invalid_id_legacy(self): - if self._test_origin_ids: - invalid_origin_id = 1 - origin_info = self.storage.origin_get({'id': invalid_origin_id}) - self.assertIsNone(origin_info) - - origin_visits = list(self.storage.origin_visit_get( - invalid_origin_id)) - self.assertEqual(origin_visits, []) - - def test_origin_get_invalid_id(self): - if self._test_origin_ids: - origin_info = self.storage.origin_get([{'id': 1}, {'id': 2}]) - self.assertEqual(origin_info, [None, None]) - - origin_visits = list(self.storage.origin_visit_get(1)) - self.assertEqual(origin_visits, []) - @given(strategies.lists(origins().map(lambda x: x.to_dict()), unique_by=lambda x: x['url'], min_size=6, max_size=15)) @@ -3916,9 +3826,6 @@ origin_count=origin_count)) for origin in actual_origins: - del origin['id'] - - for origin in actual_origins: self.assertIn(origin, new_origins) origin_from = -1 @@ -3979,11 +3886,11 @@ for (obj_type, obj) in objects: obj = obj.to_dict() if obj_type == 'origin_visit': - origin_id = self.storage.origin_add_one(obj.pop('origin')) + origin_url = self.storage.origin_add_one(obj.pop('origin')) if 'visit' in obj: del obj['visit'] self.storage.origin_visit_add( - origin_id, obj['date'], obj['type']) + origin_url, obj['date'], obj['type']) else: method = getattr(self.storage, obj_type + '_add') try: @@ -3997,37 +3904,6 @@ unittest.TestCase): """Test the local storage""" - # Can only be tested with local storage as you can't mock - # datetimes for the remote server - @given(strategies.booleans()) - def test_fetch_history(self, use_url): - if not self._test_origin_ids and not use_url: - return - self.reset_storage() - - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id - with patch('datetime.datetime'): - datetime.datetime.now.return_value = self.fetch_history_date - fetch_history_id = self.storage.fetch_history_start( - origin_id_or_url) - datetime.datetime.now.assert_called_with(tz=datetime.timezone.utc) - - with patch('datetime.datetime'): - datetime.datetime.now.return_value = self.fetch_history_end - self.storage.fetch_history_end(fetch_history_id, - self.fetch_history_data) - - fetch_history = self.storage.fetch_history_get(fetch_history_id) - expected_fetch_history = self.fetch_history_data.copy() - - expected_fetch_history['id'] = fetch_history_id - expected_fetch_history['origin'] = origin_id - expected_fetch_history['date'] = self.fetch_history_date - expected_fetch_history['duration'] = self.fetch_history_duration - - self.assertEqual(expected_fetch_history, fetch_history) - # This test is only relevant on the local storage, with an actual # objstorage raising an exception def test_content_add_objstorage_exception(self):