Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_storage.py
Show All 20 Lines | |||||
import pytest | import pytest | ||||
from hypothesis import given, strategies, settings, HealthCheck | from hypothesis import given, strategies, settings, HealthCheck | ||||
from typing import ClassVar, Optional | from typing import ClassVar, Optional | ||||
from swh.model import from_disk, identifiers | from swh.model import from_disk, identifiers | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Content, OriginVisit, Release, Revision | from swh.model.model import ( | ||||
Content, OriginVisit, Release, Revision | |||||
) | |||||
from swh.model.hypothesis_strategies import objects | from swh.model.hypothesis_strategies import objects | ||||
from swh.storage import HashCollision, get_storage | from swh.storage import HashCollision, get_storage | ||||
from swh.storage.converters import origin_url_to_sha1 as sha1 | from swh.storage.converters import origin_url_to_sha1 as sha1 | ||||
from swh.storage.exc import StorageArgumentException | from swh.storage.exc import StorageArgumentException | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from .storage_data import data | from .storage_data import data | ||||
▲ Show 20 Lines • Show All 1,346 Lines • ▼ Show 20 Lines | |||||
def test_origin_visit_get_random(self, swh_storage): | def test_origin_visit_get_random(self, swh_storage): | ||||
swh_storage.origin_add(data.origins) | swh_storage.origin_add(data.origins) | ||||
# Add some random visits within the selection range | # Add some random visits within the selection range | ||||
visits = self._generate_random_visits() | visits = self._generate_random_visits() | ||||
visit_type = 'git' | visit_type = 'git' | ||||
# Add visits to those origins | # Add visits to those origins | ||||
for origin in data.origins: | for origin in data.origins: | ||||
origin_url = origin['url'] | |||||
for date_visit in visits: | for date_visit in visits: | ||||
visit = swh_storage.origin_visit_add( | visit = swh_storage.origin_visit_add( | ||||
origin['url'], date=date_visit, type=visit_type) | origin_url, date=date_visit, type=visit_type) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin['url'], visit_id=visit['visit'], status='full') | origin_url, visit_id=visit.visit, status='full') | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
stats = swh_storage.stat_counters() | stats = swh_storage.stat_counters() | ||||
assert stats['origin'] == len(data.origins) | assert stats['origin'] == len(data.origins) | ||||
assert stats['origin_visit'] == len(data.origins) * len(visits) | assert stats['origin_visit'] == len(data.origins) * len(visits) | ||||
random_origin_visit = swh_storage.origin_visit_get_random(visit_type) | random_origin_visit = swh_storage.origin_visit_get_random(visit_type) | ||||
assert random_origin_visit | assert random_origin_visit | ||||
assert random_origin_visit['origin'] is not None | assert random_origin_visit['origin'] is not None | ||||
original_urls = [o['url'] for o in data.origins] | original_urls = [o['url'] for o in data.origins] | ||||
assert random_origin_visit['origin'] in original_urls | assert random_origin_visit['origin'] in original_urls | ||||
def test_origin_visit_get_random_nothing_found(self, swh_storage): | def test_origin_visit_get_random_nothing_found(self, swh_storage): | ||||
swh_storage.origin_add(data.origins) | swh_storage.origin_add(data.origins) | ||||
visit_type = 'hg' | visit_type = 'hg' | ||||
# Add some visits outside of the random generation selection so nothing | # Add some visits outside of the random generation selection so nothing | ||||
# will be found by the random selection | # will be found by the random selection | ||||
visits = self._generate_random_visits(nb_visits=3, start=13, end=24) | visits = self._generate_random_visits(nb_visits=3, start=13, end=24) | ||||
for origin in data.origins: | for origin in data.origins: | ||||
origin_url = origin['url'] | |||||
for date_visit in visits: | for date_visit in visits: | ||||
visit = swh_storage.origin_visit_add( | visit = swh_storage.origin_visit_add( | ||||
origin['url'], date=date_visit, type=visit_type) | origin_url, date=date_visit, type=visit_type) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin['url'], visit_id=visit['visit'], status='full') | origin_url, visit.visit, status='full') | ||||
random_origin_visit = swh_storage.origin_visit_get_random(visit_type) | random_origin_visit = swh_storage.origin_visit_get_random(visit_type) | ||||
assert random_origin_visit is None | assert random_origin_visit is None | ||||
def test_origin_get_by_sha1(self, swh_storage): | def test_origin_get_by_sha1(self, swh_storage): | ||||
assert swh_storage.origin_get(data.origin) is None | assert swh_storage.origin_get(data.origin) is None | ||||
swh_storage.origin_add_one(data.origin) | swh_storage.origin_add_one(data.origin) | ||||
▲ Show 20 Lines • Show All 119 Lines • ▼ Show 20 Lines | def test_origin_search_regexp_fullstring(self, swh_storage): | ||||
assert len(found_origins1) == 1 | assert len(found_origins1) == 1 | ||||
assert found_origins1[0] in [origin, origin2] | assert found_origins1[0] in [origin, origin2] | ||||
# check both origins were returned | # check both origins were returned | ||||
assert found_origins0 != found_origins1 | assert found_origins0 != found_origins1 | ||||
def test_origin_visit_add(self, swh_storage): | def test_origin_visit_add(self, swh_storage): | ||||
# given | # given | ||||
swh_storage.origin_add_one(data.origin2) | origin_url = swh_storage.origin_add_one(data.origin2) | ||||
origin_url = data.origin2['url'] | |||||
date_visit = datetime.datetime.now(datetime.timezone.utc) | date_visit = datetime.datetime.now(datetime.timezone.utc) | ||||
# Round to milliseconds before insertion, so equality doesn't fail | # Round to milliseconds before insertion, so equality doesn't fail | ||||
# after a round-trip through a DB (eg. Cassandra) | # after a round-trip through a DB (eg. Cassandra) | ||||
date_visit = date_visit.replace( | date_visit = date_visit.replace( | ||||
microsecond=round(date_visit.microsecond, -3)) | microsecond=round(date_visit.microsecond, -3)) | ||||
# when | # when | ||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit = swh_storage.origin_visit_add( | ||||
vlorentz: rename to `origin_visit`. | |||||
origin_url, | origin_url, type=data.type_visit1, date=date_visit) | ||||
type=data.type_visit1, | |||||
date=date_visit) | |||||
actual_origin_visits = list(swh_storage.origin_visit_get( | |||||
origin_url)) | |||||
assert { | |||||
'origin': origin_url, | |||||
'date': date_visit, | |||||
'visit': origin_visit1['visit'], | |||||
'type': data.type_visit1, | |||||
'status': 'ongoing', | |||||
'metadata': None, | |||||
'snapshot': None, | |||||
} in actual_origin_visits | |||||
origin_visit = { | expected_origin_visit = { | ||||
Done Inline Actionsrename to expected_origin_visit, and assert it's equal to what origin_visit_add returned. vlorentz: rename to `expected_origin_visit`, and assert it's equal to what `origin_visit_add` returned. | |||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit, | 'date': date_visit, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
assert origin_visit == OriginVisit.from_dict(expected_origin_visit) | |||||
actual_origin_visits = list(swh_storage.origin_visit_get( | |||||
origin_url)) | |||||
assert expected_origin_visit in actual_origin_visits | |||||
objects = list(swh_storage.journal_writer.journal.objects) | objects = list(swh_storage.journal_writer.journal.objects) | ||||
assert ('origin', data.origin2) in objects | assert ('origin', data.origin2) in objects | ||||
assert ('origin_visit', origin_visit) in objects | assert ('origin_visit', expected_origin_visit) in objects | ||||
def test_origin_visit_get__unknown_origin(self, swh_storage): | def test_origin_visit_get__unknown_origin(self, swh_storage): | ||||
assert [] == list(swh_storage.origin_visit_get('foo')) | assert [] == list(swh_storage.origin_visit_get('foo')) | ||||
def test_origin_visit_add_default_type(self, swh_storage): | def test_origin_visit_add_default_type(self, swh_storage): | ||||
# given | # given | ||||
swh_storage.origin_add_one(data.origin2) | origin_url = swh_storage.origin_add_one(data.origin2) | ||||
origin_url = data.origin2['url'] | |||||
# when | # when | ||||
date_visit = datetime.datetime.now(datetime.timezone.utc) | date_visit = datetime.datetime.now(datetime.timezone.utc) | ||||
date_visit2 = date_visit + datetime.timedelta(minutes=1) | date_visit2 = date_visit + datetime.timedelta(minutes=1) | ||||
# Round to milliseconds before insertion, so equality doesn't fail | # Round to milliseconds before insertion, so equality doesn't fail | ||||
# after a round-trip through a DB (eg. Cassandra) | # after a round-trip through a DB (eg. Cassandra) | ||||
date_visit = date_visit.replace( | date_visit = date_visit.replace( | ||||
microsecond=round(date_visit.microsecond, -3)) | microsecond=round(date_visit.microsecond, -3)) | ||||
date_visit2 = date_visit2.replace( | date_visit2 = date_visit2.replace( | ||||
microsecond=round(date_visit2.microsecond, -3)) | microsecond=round(date_visit2.microsecond, -3)) | ||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=date_visit, type=data.type_visit1) | ||||
date=date_visit, | |||||
type=data.type_visit1, | |||||
) | |||||
origin_visit2 = swh_storage.origin_visit_add( | origin_visit2 = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=date_visit2, type=data.type_visit2) | ||||
date=date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
# then | # then | ||||
assert origin_visit1['origin'] == origin_url | assert origin_visit1.origin == origin_url | ||||
assert origin_visit1['visit'] is not None | assert origin_visit1.visit is not None | ||||
actual_origin_visits = list(swh_storage.origin_visit_get( | actual_origin_visits = list(swh_storage.origin_visit_get( | ||||
origin_url)) | origin_url)) | ||||
expected_visits = [ | expected_visits = [ | ||||
{ | { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit, | 'date': date_visit, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}, | }, | ||||
{ | { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit2, | 'date': date_visit2, | ||||
'visit': origin_visit2['visit'], | 'visit': origin_visit2.visit, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}, | }, | ||||
] | ] | ||||
for visit in expected_visits: | for visit in expected_visits: | ||||
assert visit in actual_origin_visits | assert visit in actual_origin_visits | ||||
objects = list(swh_storage.journal_writer.journal.objects) | objects = list(swh_storage.journal_writer.journal.objects) | ||||
assert ('origin', data.origin2) in objects | assert ('origin', data.origin2) in objects | ||||
for visit in expected_visits: | for visit in expected_visits: | ||||
assert ('origin_visit', visit) in objects | assert ('origin_visit', visit) in objects | ||||
def test_origin_visit_add_validation(self, swh_storage): | def test_origin_visit_add_validation(self, swh_storage): | ||||
origin_url = swh_storage.origin_add_one(data.origin2) | origin_url = swh_storage.origin_add_one(data.origin2) | ||||
with pytest.raises(StorageArgumentException) as cm: | with pytest.raises(StorageArgumentException) as cm: | ||||
swh_storage.origin_visit_add(origin_url, date=[b'foo'], type='git') | swh_storage.origin_visit_add(origin_url, date=[b'foo'], type='git') | ||||
if type(cm.value) == psycopg2.ProgrammingError: | if type(cm.value) == psycopg2.ProgrammingError: | ||||
assert cm.value.pgcode \ | assert cm.value.pgcode \ | ||||
== psycopg2.errorcodes.UNDEFINED_FUNCTION | == psycopg2.errorcodes.UNDEFINED_FUNCTION | ||||
def test_origin_visit_update(self, swh_storage): | def test_origin_visit_update(self, swh_storage): | ||||
# given | # given | ||||
swh_storage.origin_add_one(data.origin) | origin_url = swh_storage.origin_add_one(data.origin) | ||||
origin_url = data.origin['url'] | origin_url2 = swh_storage.origin_add_one(data.origin2) | ||||
date_visit = datetime.datetime.now(datetime.timezone.utc) | date_visit = datetime.datetime.now(datetime.timezone.utc) | ||||
date_visit2 = date_visit + datetime.timedelta(minutes=1) | date_visit2 = date_visit + datetime.timedelta(minutes=1) | ||||
# Round to milliseconds before insertion, so equality doesn't fail | # Round to milliseconds before insertion, so equality doesn't fail | ||||
# after a round-trip through a DB (eg. Cassandra) | # after a round-trip through a DB (eg. Cassandra) | ||||
date_visit = date_visit.replace( | date_visit = date_visit.replace( | ||||
microsecond=round(date_visit.microsecond, -3)) | microsecond=round(date_visit.microsecond, -3)) | ||||
date_visit2 = date_visit2.replace( | date_visit2 = date_visit2.replace( | ||||
microsecond=round(date_visit2.microsecond, -3)) | microsecond=round(date_visit2.microsecond, -3)) | ||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=date_visit, type=data.type_visit1) | ||||
date=date_visit, | |||||
type=data.type_visit1, | |||||
) | |||||
origin_visit2 = swh_storage.origin_visit_add( | origin_visit2 = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=date_visit2, type=data.type_visit2) | ||||
date=date_visit2, | |||||
type=data.type_visit2 | |||||
) | |||||
swh_storage.origin_add_one(data.origin2) | |||||
origin_url2 = data.origin2['url'] | |||||
origin_visit3 = swh_storage.origin_visit_add( | origin_visit3 = swh_storage.origin_visit_add( | ||||
origin_url2, | origin_url2, date=date_visit2, type=data.type_visit3) | ||||
date=date_visit2, | |||||
type=data.type_visit3 | |||||
) | |||||
# when | # when | ||||
visit1_metadata = { | visit1_metadata = { | ||||
'contents': 42, | 'contents': 42, | ||||
'directories': 22, | 'directories': 22, | ||||
} | } | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, origin_visit1.visit, status='full', | ||||
origin_visit1['visit'], status='full', | |||||
metadata=visit1_metadata) | metadata=visit1_metadata) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url2, | origin_url2, origin_visit3.visit, status='partial') | ||||
origin_visit3['visit'], status='partial') | |||||
# then | # then | ||||
actual_origin_visits = list(swh_storage.origin_visit_get( | actual_origin_visits = list(swh_storage.origin_visit_get( | ||||
origin_url)) | origin_url)) | ||||
expected_visits = [{ | expected_visits = [{ | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit, | 'date': date_visit, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'full', | 'status': 'full', | ||||
'metadata': visit1_metadata, | 'metadata': visit1_metadata, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}, { | }, { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit2, | 'date': date_visit2, | ||||
'visit': origin_visit2['visit'], | 'visit': origin_visit2.visit, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}] | }] | ||||
for visit in expected_visits: | for visit in expected_visits: | ||||
assert visit in actual_origin_visits | assert visit in actual_origin_visits | ||||
actual_origin_visits_bis = list(swh_storage.origin_visit_get( | actual_origin_visits_bis = list(swh_storage.origin_visit_get( | ||||
origin_url, | origin_url, limit=1)) | ||||
limit=1)) | |||||
assert actual_origin_visits_bis == [ | assert actual_origin_visits_bis == [ | ||||
{ | { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit, | 'date': date_visit, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'full', | 'status': 'full', | ||||
'metadata': visit1_metadata, | 'metadata': visit1_metadata, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}] | }] | ||||
actual_origin_visits_ter = list(swh_storage.origin_visit_get( | actual_origin_visits_ter = list(swh_storage.origin_visit_get( | ||||
origin_url, | origin_url, last_visit=origin_visit1.visit)) | ||||
last_visit=origin_visit1['visit'])) | |||||
assert actual_origin_visits_ter == [ | assert actual_origin_visits_ter == [ | ||||
{ | { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit2, | 'date': date_visit2, | ||||
'visit': origin_visit2['visit'], | 'visit': origin_visit2.visit, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}] | }] | ||||
actual_origin_visits2 = list(swh_storage.origin_visit_get( | actual_origin_visits2 = list(swh_storage.origin_visit_get( | ||||
origin_url2)) | origin_url2)) | ||||
assert actual_origin_visits2 == [ | assert actual_origin_visits2 == [ | ||||
{ | { | ||||
'origin': origin_url2, | 'origin': origin_url2, | ||||
'date': date_visit2, | 'date': date_visit2, | ||||
'visit': origin_visit3['visit'], | 'visit': origin_visit3.visit, | ||||
'type': data.type_visit3, | 'type': data.type_visit3, | ||||
'status': 'partial', | 'status': 'partial', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}] | }] | ||||
data1 = { | data1 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit, | 'date': date_visit, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data2 = { | data2 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit2, | 'date': date_visit2, | ||||
'visit': origin_visit2['visit'], | 'visit': origin_visit2.visit, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data3 = { | data3 = { | ||||
'origin': origin_url2, | 'origin': origin_url2, | ||||
'date': date_visit2, | 'date': date_visit2, | ||||
'visit': origin_visit3['visit'], | 'visit': origin_visit3.visit, | ||||
'type': data.type_visit3, | 'type': data.type_visit3, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data4 = { | data4 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': date_visit, | 'date': date_visit, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'metadata': visit1_metadata, | 'metadata': visit1_metadata, | ||||
'status': 'full', | 'status': 'full', | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data5 = { | data5 = { | ||||
'origin': origin_url2, | 'origin': origin_url2, | ||||
'date': date_visit2, | 'date': date_visit2, | ||||
'visit': origin_visit3['visit'], | 'visit': origin_visit3.visit, | ||||
'type': data.type_visit3, | 'type': data.type_visit3, | ||||
'status': 'partial', | 'status': 'partial', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
objects = list(swh_storage.journal_writer.journal.objects) | objects = list(swh_storage.journal_writer.journal.objects) | ||||
assert ('origin', data.origin) in objects | assert ('origin', data.origin) in objects | ||||
assert ('origin', data.origin2) in objects | assert ('origin', data.origin2) in objects | ||||
assert ('origin_visit', data1) in objects | assert ('origin_visit', data1) in objects | ||||
assert ('origin_visit', data2) in objects | assert ('origin_visit', data2) in objects | ||||
assert ('origin_visit', data3) in objects | assert ('origin_visit', data3) in objects | ||||
assert ('origin_visit', data4) in objects | assert ('origin_visit', data4) in objects | ||||
assert ('origin_visit', data5) in objects | assert ('origin_visit', data5) in objects | ||||
def test_origin_visit_update_validation(self, swh_storage): | def test_origin_visit_update_validation(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin) | |||||
visit = swh_storage.origin_visit_add( | visit = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
date=data.date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
with pytest.raises(StorageArgumentException, match='status') as cm: | with pytest.raises(StorageArgumentException, match='status') as cm: | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit['visit'], status='foobar') | origin_url, visit.visit, status='foobar') | ||||
if type(cm.value) == psycopg2.DataError: | if type(cm.value) == psycopg2.DataError: | ||||
assert cm.value.pgcode == \ | assert cm.value.pgcode == \ | ||||
psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION | psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION | ||||
def test_origin_visit_find_by_date(self, swh_storage): | def test_origin_visit_find_by_date(self, swh_storage): | ||||
# given | # given | ||||
swh_storage.origin_add_one(data.origin) | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_visit_add( | swh_storage.origin_visit_add( | ||||
data.origin['url'], | origin_url, date=data.date_visit2, type=data.type_visit1) | ||||
date=data.date_visit2, | |||||
type=data.type_visit1, | |||||
) | |||||
origin_visit2 = swh_storage.origin_visit_add( | origin_visit2 = swh_storage.origin_visit_add( | ||||
data.origin['url'], | origin_url, date=data.date_visit3, type=data.type_visit2) | ||||
date=data.date_visit3, | |||||
type=data.type_visit2, | |||||
) | |||||
origin_visit3 = swh_storage.origin_visit_add( | origin_visit3 = swh_storage.origin_visit_add( | ||||
data.origin['url'], | origin_url, date=data.date_visit2, type=data.type_visit3) | ||||
date=data.date_visit2, | |||||
type=data.type_visit3, | |||||
) | |||||
# Simple case | # Simple case | ||||
visit = swh_storage.origin_visit_find_by_date( | visit = swh_storage.origin_visit_find_by_date( | ||||
data.origin['url'], data.date_visit3) | origin_url, data.date_visit3) | ||||
assert visit['visit'] == origin_visit2['visit'] | assert visit['visit'] == origin_visit2.visit | ||||
# There are two visits at the same date, the latest must be returned | # There are two visits at the same date, the latest must be returned | ||||
visit = swh_storage.origin_visit_find_by_date( | visit = swh_storage.origin_visit_find_by_date( | ||||
data.origin['url'], data.date_visit2) | origin_url, data.date_visit2) | ||||
assert visit['visit'] == origin_visit3['visit'] | assert visit['visit'] == origin_visit3.visit | ||||
def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): | def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): | ||||
swh_storage.origin_visit_find_by_date('foo', data.date_visit2) | swh_storage.origin_visit_find_by_date('foo', data.date_visit2) | ||||
def test_origin_visit_update_missing_snapshot(self, swh_storage): | def test_origin_visit_update_missing_snapshot(self, swh_storage): | ||||
# given | # given | ||||
swh_storage.origin_add_one(data.origin) | origin_url = swh_storage.origin_add_one(data.origin) | ||||
origin_url = data.origin['url'] | |||||
origin_visit = swh_storage.origin_visit_add( | origin_visit = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | |||||
type=data.type_visit1, | |||||
) | |||||
# when | # when | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, | ||||
origin_visit['visit'], | origin_visit.visit, | ||||
snapshot=data.snapshot['id']) | snapshot=data.snapshot['id']) | ||||
# then | # then | ||||
actual_origin_visit = swh_storage.origin_visit_get_by( | actual_origin_visit = swh_storage.origin_visit_get_by( | ||||
origin_url, | origin_url, origin_visit.visit) | ||||
origin_visit['visit']) | |||||
assert actual_origin_visit['snapshot'] == data.snapshot['id'] | assert actual_origin_visit['snapshot'] == data.snapshot['id'] | ||||
# when | # when | ||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
assert actual_origin_visit['snapshot'] == data.snapshot['id'] | assert actual_origin_visit['snapshot'] == data.snapshot['id'] | ||||
def test_origin_visit_get_by(self, swh_storage): | def test_origin_visit_get_by(self, swh_storage): | ||||
swh_storage.origin_add_one(data.origin) | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin2) | origin_url2 = swh_storage.origin_add_one(data.origin2) | ||||
origin_url = data.origin['url'] | |||||
origin2_url = data.origin2['url'] | |||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
date=data.date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, | ||||
origin_visit1['visit'], | origin_visit1.visit, | ||||
snapshot=data.snapshot['id']) | snapshot=data.snapshot['id']) | ||||
# Add some other {origin, visit} entries | # Add some other {origin, visit} entries | ||||
swh_storage.origin_visit_add( | swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=data.date_visit3, type=data.type_visit3) | ||||
date=data.date_visit3, | |||||
type=data.type_visit3, | |||||
) | |||||
swh_storage.origin_visit_add( | swh_storage.origin_visit_add( | ||||
origin2_url, | origin_url2, date=data.date_visit3, type=data.type_visit3) | ||||
date=data.date_visit3, | |||||
type=data.type_visit3, | |||||
) | |||||
# when | # when | ||||
visit1_metadata = { | visit1_metadata = { | ||||
'contents': 42, | 'contents': 42, | ||||
'directories': 22, | 'directories': 22, | ||||
} | } | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, origin_visit1.visit, | ||||
origin_visit1['visit'], status='full', | status='full', metadata=visit1_metadata) | ||||
metadata=visit1_metadata) | |||||
expected_origin_visit = origin_visit1.copy() | expected_origin_visit = origin_visit1.to_dict() | ||||
expected_origin_visit.update({ | expected_origin_visit.update({ | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
'metadata': visit1_metadata, | 'metadata': visit1_metadata, | ||||
'status': 'full', | 'status': 'full', | ||||
'snapshot': data.snapshot['id'], | 'snapshot': data.snapshot['id'], | ||||
}) | }) | ||||
# when | # when | ||||
actual_origin_visit1 = swh_storage.origin_visit_get_by( | actual_origin_visit1 = swh_storage.origin_visit_get_by( | ||||
origin_url, | origin_url, | ||||
origin_visit1['visit']) | origin_visit1.visit) | ||||
# then | # then | ||||
assert actual_origin_visit1 == expected_origin_visit | assert actual_origin_visit1 == expected_origin_visit | ||||
def test_origin_visit_get_by__unknown_origin(self, swh_storage): | def test_origin_visit_get_by__unknown_origin(self, swh_storage): | ||||
assert swh_storage.origin_visit_get_by('foo', 10) is None | assert swh_storage.origin_visit_get_by('foo', 10) is None | ||||
def test_origin_visit_upsert_new(self, swh_storage): | def test_origin_visit_upsert_new(self, swh_storage): | ||||
# given | # given | ||||
swh_storage.origin_add_one(data.origin2) | origin_url = swh_storage.origin_add_one(data.origin2) | ||||
origin_url = data.origin2['url'] | |||||
# when | # when | ||||
swh_storage.origin_visit_upsert([ | swh_storage.origin_visit_upsert([ | ||||
OriginVisit.from_dict({ | OriginVisit.from_dict({ | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'visit': 123, | 'visit': 123, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | def test_origin_visit_upsert_new(self, swh_storage): | ||||
} | } | ||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert list(swh_storage.journal_writer.journal.objects) == [ | ||||
('origin', data.origin2), | ('origin', data.origin2), | ||||
('origin_visit', data1), | ('origin_visit', data1), | ||||
('origin_visit', data2)] | ('origin_visit', data2)] | ||||
def test_origin_visit_upsert_existing(self, swh_storage): | def test_origin_visit_upsert_existing(self, swh_storage): | ||||
# given | # given | ||||
swh_storage.origin_add_one(data.origin2) | origin_url = swh_storage.origin_add_one(data.origin2) | ||||
origin_url = data.origin2['url'] | |||||
# when | # when | ||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin_url, | origin_url, date=data.date_visit2, type=data.type_visit1) | ||||
date=data.date_visit2, | |||||
type=data.type_visit1, | |||||
) | |||||
swh_storage.origin_visit_upsert([OriginVisit.from_dict({ | swh_storage.origin_visit_upsert([OriginVisit.from_dict({ | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'full', | 'status': 'full', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
})]) | })]) | ||||
# then | # then | ||||
assert origin_visit1['origin'] == origin_url | assert origin_visit1.origin == origin_url | ||||
assert origin_visit1['visit'] is not None | assert origin_visit1.visit is not None | ||||
actual_origin_visits = list(swh_storage.origin_visit_get( | actual_origin_visits = list(swh_storage.origin_visit_get( | ||||
origin_url)) | origin_url)) | ||||
assert actual_origin_visits == [ | assert actual_origin_visits == [ | ||||
{ | { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'full', | 'status': 'full', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
}] | }] | ||||
data1 = { | data1 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data2 = { | data2 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'full', | 'status': 'full', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert list(swh_storage.journal_writer.journal.objects) == [ | ||||
('origin', data.origin2), | ('origin', data.origin2), | ||||
('origin_visit', data1), | ('origin_visit', data1), | ||||
('origin_visit', data2)] | ('origin_visit', data2)] | ||||
def test_origin_visit_get_by_no_result(self, swh_storage): | def test_origin_visit_get_by_no_result(self, swh_storage): | ||||
swh_storage.origin_add([data.origin]) | swh_storage.origin_add([data.origin]) | ||||
actual_origin_visit = swh_storage.origin_visit_get_by( | actual_origin_visit = swh_storage.origin_visit_get_by( | ||||
data.origin['url'], 999) | data.origin['url'], 999) | ||||
assert actual_origin_visit is None | assert actual_origin_visit is None | ||||
def test_origin_visit_get_latest(self, swh_storage): | def test_origin_visit_get_latest(self, swh_storage): | ||||
swh_storage.origin_add_one(data.origin) | origin_url = swh_storage.origin_add_one(data.origin) | ||||
origin_url = data.origin['url'] | ov1 = swh_storage.origin_visit_add( | ||||
origin_visit1 = swh_storage.origin_visit_add( | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
origin=origin_url, | ov2 = swh_storage.origin_visit_add( | ||||
date=data.date_visit1, | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
type=data.type_visit1, | |||||
) | |||||
visit1_id = origin_visit1['visit'] | |||||
origin_visit2 = swh_storage.origin_visit_add( | |||||
origin=origin_url, | |||||
date=data.date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
visit2_id = origin_visit2['visit'] | |||||
# Add a visit with the same date as the previous one | # Add a visit with the same date as the previous one | ||||
origin_visit3 = swh_storage.origin_visit_add( | ov3 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
date=data.date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
visit3_id = origin_visit3['visit'] | |||||
origin_visit1 = swh_storage.origin_visit_get_by(origin_url, visit1_id) | |||||
origin_visit2 = swh_storage.origin_visit_get_by(origin_url, visit2_id) | |||||
origin_visit3 = swh_storage.origin_visit_get_by(origin_url, visit3_id) | |||||
origin_visit1 = swh_storage.origin_visit_get_by( | |||||
origin_url, ov1.visit) | |||||
origin_visit2 = swh_storage.origin_visit_get_by( | |||||
origin_url, ov2.visit) | |||||
origin_visit3 = swh_storage.origin_visit_get_by( | |||||
origin_url, ov3.visit) | |||||
# Two visits, both with no snapshot | # Two visits, both with no snapshot | ||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | ||||
assert swh_storage.origin_visit_get_latest( | assert swh_storage.origin_visit_get_latest( | ||||
origin_url, require_snapshot=True) is None | origin_url, require_snapshot=True) is None | ||||
# Add snapshot to visit1; require_snapshot=True makes it return | # Add snapshot to visit1; require_snapshot=True makes it return | ||||
# visit1 and require_snapshot=False still returns visit2 | # visit1 and require_snapshot=False still returns visit2 | ||||
swh_storage.snapshot_add([data.complete_snapshot]) | swh_storage.snapshot_add([data.complete_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit1_id, | origin_url, ov1.visit, snapshot=data.complete_snapshot['id']) | ||||
snapshot=data.complete_snapshot['id']) | assert {**origin_visit1, | ||||
assert {**origin_visit1, 'snapshot': data.complete_snapshot['id']} \ | 'snapshot': data.complete_snapshot['id']} \ | ||||
== swh_storage.origin_visit_get_latest( | == swh_storage.origin_visit_get_latest( | ||||
origin_url, require_snapshot=True) | origin_url, require_snapshot=True) | ||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | ||||
# Status filter: all three visits are status=ongoing, so no visit | # Status filter: all three visits are status=ongoing, so no visit | ||||
# returned | # returned | ||||
assert swh_storage.origin_visit_get_latest( | assert swh_storage.origin_visit_get_latest( | ||||
origin_url, allowed_statuses=['full']) is None | origin_url, allowed_statuses=['full']) is None | ||||
# Mark the first visit as completed and check status filter again | # Mark the first visit as completed and check status filter again | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, ov1.visit, status='full') | ||||
visit1_id, status='full') | |||||
assert { | assert { | ||||
**origin_visit1, | **origin_visit1, | ||||
'snapshot': data.complete_snapshot['id'], | 'snapshot': data.complete_snapshot['id'], | ||||
'status': 'full'} == swh_storage.origin_visit_get_latest( | 'status': 'full'} == swh_storage.origin_visit_get_latest( | ||||
origin_url, allowed_statuses=['full']) | origin_url, allowed_statuses=['full']) | ||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | ||||
# Add snapshot to visit2 and check that the new snapshot is returned | # Add snapshot to visit2 and check that the new snapshot is returned | ||||
swh_storage.snapshot_add([data.empty_snapshot]) | swh_storage.snapshot_add([data.empty_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit2_id, | origin_url, ov2.visit, snapshot=data.empty_snapshot['id']) | ||||
snapshot=data.empty_snapshot['id']) | |||||
assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \ | assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \ | ||||
swh_storage.origin_visit_get_latest( | swh_storage.origin_visit_get_latest( | ||||
origin_url, require_snapshot=True) | origin_url, require_snapshot=True) | ||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) | ||||
# Check that the status filter is still working | # Check that the status filter is still working | ||||
assert { | assert { | ||||
**origin_visit1, | **origin_visit1, | ||||
'snapshot': data.complete_snapshot['id'], | 'snapshot': data.complete_snapshot['id'], | ||||
'status': 'full'} == swh_storage.origin_visit_get_latest( | 'status': 'full'} == swh_storage.origin_visit_get_latest( | ||||
origin_url, allowed_statuses=['full']) | origin_url, allowed_statuses=['full']) | ||||
# Add snapshot to visit3 (same date as visit2) | # Add snapshot to visit3 (same date as visit2) | ||||
swh_storage.snapshot_add([data.complete_snapshot]) | swh_storage.snapshot_add([data.complete_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit3_id, snapshot=data.complete_snapshot['id']) | origin_url, ov3.visit, snapshot=data.complete_snapshot['id']) | ||||
assert { | assert { | ||||
**origin_visit1, | **origin_visit1, | ||||
'snapshot': data.complete_snapshot['id'], | 'snapshot': data.complete_snapshot['id'], | ||||
'status': 'full'} == swh_storage.origin_visit_get_latest( | 'status': 'full'} == swh_storage.origin_visit_get_latest( | ||||
origin_url, allowed_statuses=['full']) | origin_url, allowed_statuses=['full']) | ||||
assert { | assert { | ||||
**origin_visit1, | **origin_visit1, | ||||
'snapshot': data.complete_snapshot['id'], | 'snapshot': data.complete_snapshot['id'], | ||||
Show All 27 Lines | def test_person_fullname_unicity(self, swh_storage): | ||||
revisions = list( | revisions = list( | ||||
swh_storage.revision_get([revision['id'], revision2['id']])) | swh_storage.revision_get([revision['id'], revision2['id']])) | ||||
# then | # then | ||||
# check committers are the same | # check committers are the same | ||||
assert revisions[0]['committer'] == revisions[1]['committer'] | assert revisions[0]['committer'] == revisions[1]['committer'] | ||||
def test_snapshot_add_get_empty(self, swh_storage): | def test_snapshot_add_get_empty(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin) | |||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | visit_id = origin_visit1.visit | ||||
type=data.type_visit1, | |||||
) | |||||
visit_id = origin_visit1['visit'] | |||||
actual_result = swh_storage.snapshot_add([data.empty_snapshot]) | actual_result = swh_storage.snapshot_add([data.empty_snapshot]) | ||||
assert actual_result == {'snapshot:add': 1} | assert actual_result == {'snapshot:add': 1} | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit_id, snapshot=data.empty_snapshot['id']) | origin_url, origin_visit1.visit, | ||||
snapshot=data.empty_snapshot['id']) | |||||
by_id = swh_storage.snapshot_get(data.empty_snapshot['id']) | by_id = swh_storage.snapshot_get(data.empty_snapshot['id']) | ||||
assert by_id == {**data.empty_snapshot, 'next_branch': None} | assert by_id == {**data.empty_snapshot, 'next_branch': None} | ||||
by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) | by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) | ||||
assert by_ov == {**data.empty_snapshot, 'next_branch': None} | assert by_ov == {**data.empty_snapshot, 'next_branch': None} | ||||
data1 = { | data1 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit1, | 'date': data.date_visit1, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data2 = { | data2 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit1, | 'date': data.date_visit1, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': data.empty_snapshot['id'], | 'snapshot': data.empty_snapshot['id'], | ||||
} | } | ||||
assert list(swh_storage.journal_writer.journal.objects) == \ | assert list(swh_storage.journal_writer.journal.objects) == \ | ||||
[('origin', data.origin), | [('origin', data.origin), | ||||
('origin_visit', data1), | ('origin_visit', data1), | ||||
('snapshot', data.empty_snapshot), | ('snapshot', data.empty_snapshot), | ||||
('origin_visit', data2)] | ('origin_visit', data2)] | ||||
def test_snapshot_add_get_complete(self, swh_storage): | def test_snapshot_add_get_complete(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = data.origin['url'] | ||||
swh_storage.origin_add_one(data.origin) | origin_url = swh_storage.origin_add_one(data.origin) | ||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | visit_id = origin_visit1.visit | ||||
type=data.type_visit1, | |||||
) | |||||
visit_id = origin_visit1['visit'] | |||||
actual_result = swh_storage.snapshot_add([data.complete_snapshot]) | actual_result = swh_storage.snapshot_add([data.complete_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit_id, snapshot=data.complete_snapshot['id']) | origin_url, origin_visit1.visit, | ||||
snapshot=data.complete_snapshot['id']) | |||||
assert actual_result == {'snapshot:add': 1} | assert actual_result == {'snapshot:add': 1} | ||||
by_id = swh_storage.snapshot_get(data.complete_snapshot['id']) | by_id = swh_storage.snapshot_get(data.complete_snapshot['id']) | ||||
assert by_id == {**data.complete_snapshot, 'next_branch': None} | assert by_id == {**data.complete_snapshot, 'next_branch': None} | ||||
by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) | by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) | ||||
assert by_ov == {**data.complete_snapshot, 'next_branch': None} | assert by_ov == {**data.complete_snapshot, 'next_branch': None} | ||||
▲ Show 20 Lines • Show All 130 Lines • ▼ Show 20 Lines | def test_snapshot_add_get_paginated(self, swh_storage): | ||||
for name in branch_names[dir_idx:dir_idx + 3] | for name in branch_names[dir_idx:dir_idx + 3] | ||||
}, | }, | ||||
'next_branch': branch_names[dir_idx + 3], | 'next_branch': branch_names[dir_idx + 3], | ||||
} | } | ||||
assert snapshot == expected_snapshot | assert snapshot == expected_snapshot | ||||
def test_snapshot_add_get_filtered(self, swh_storage): | def test_snapshot_add_get_filtered(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin) | |||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | |||||
type=data.type_visit1, | |||||
) | |||||
visit_id = origin_visit1['visit'] | |||||
swh_storage.snapshot_add([data.complete_snapshot]) | swh_storage.snapshot_add([data.complete_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit_id, snapshot=data.complete_snapshot['id']) | origin_url, origin_visit1.visit, | ||||
snapshot=data.complete_snapshot['id']) | |||||
snp_id = data.complete_snapshot['id'] | snp_id = data.complete_snapshot['id'] | ||||
branches = data.complete_snapshot['branches'] | branches = data.complete_snapshot['branches'] | ||||
snapshot = swh_storage.snapshot_get_branches( | snapshot = swh_storage.snapshot_get_branches( | ||||
snp_id, target_types=['release', 'revision']) | snp_id, target_types=['release', 'revision']) | ||||
expected_snapshot = { | expected_snapshot = { | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | def test_snapshot_add_get_filtered_and_paginated(self, swh_storage): | ||||
branch_names[dir_idx]: branches[branch_names[dir_idx]], | branch_names[dir_idx]: branches[branch_names[dir_idx]], | ||||
}, | }, | ||||
'next_branch': b'release', | 'next_branch': b'release', | ||||
} | } | ||||
assert snapshot == expected_snapshot | assert snapshot == expected_snapshot | ||||
def test_snapshot_add_get(self, swh_storage): | def test_snapshot_add_get(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin) | |||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | visit_id = origin_visit1.visit | ||||
type=data.type_visit1, | |||||
) | |||||
visit_id = origin_visit1['visit'] | |||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit_id, snapshot=data.snapshot['id']) | origin_url, origin_visit1.visit, snapshot=data.snapshot['id']) | ||||
by_id = swh_storage.snapshot_get(data.snapshot['id']) | by_id = swh_storage.snapshot_get(data.snapshot['id']) | ||||
assert by_id == {**data.snapshot, 'next_branch': None} | assert by_id == {**data.snapshot, 'next_branch': None} | ||||
by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) | by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) | ||||
assert by_ov == {**data.snapshot, 'next_branch': None} | assert by_ov == {**data.snapshot, 'next_branch': None} | ||||
origin_visit_info = swh_storage.origin_visit_get_by( | origin_visit_info = swh_storage.origin_visit_get_by( | ||||
origin_url, visit_id) | origin_url, visit_id) | ||||
assert origin_visit_info['snapshot'] == data.snapshot['id'] | assert origin_visit_info['snapshot'] == data.snapshot['id'] | ||||
def test_snapshot_add_nonexistent_visit(self, swh_storage): | def test_snapshot_add_nonexistent_visit(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin) | # unknown visit | ||||
visit_id = 54164461156 | visit_id = 54164461156 | ||||
swh_storage.journal_writer.journal.objects[:] = [] | swh_storage.journal_writer.journal.objects[:] = [] | ||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
with pytest.raises(StorageArgumentException): | with pytest.raises(StorageArgumentException): | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit_id, snapshot=data.snapshot['id']) | origin_url, visit_id, snapshot=data.snapshot['id']) | ||||
Done Inline ActionsWhy create a complete object if you're just using the id? vlorentz: Why create a complete object if you're just using the id? | |||||
Done Inline Actionsthat's a left over from the gazillion rebases i did since the last 2 days ;) ardumont: that's a left over from the gazillion rebases i did since the last 2 days ;) | |||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert list(swh_storage.journal_writer.journal.objects) == [ | ||||
('snapshot', data.snapshot)] | ('snapshot', data.snapshot)] | ||||
def test_snapshot_add_twice__by_origin_visit(self, swh_storage): | def test_snapshot_add_twice__by_origin_visit(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin) | |||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | visit1_id = origin_visit1.visit | ||||
type=data.type_visit1, | |||||
) | |||||
visit1_id = origin_visit1['visit'] | |||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit1_id, snapshot=data.snapshot['id']) | origin_url, origin_visit1.visit, snapshot=data.snapshot['id']) | ||||
by_ov1 = swh_storage.snapshot_get_by_origin_visit( | by_ov1 = swh_storage.snapshot_get_by_origin_visit( | ||||
origin_url, visit1_id) | origin_url, visit1_id) | ||||
assert by_ov1 == {**data.snapshot, 'next_branch': None} | assert by_ov1 == {**data.snapshot, 'next_branch': None} | ||||
origin_visit2 = swh_storage.origin_visit_add( | origin_visit2 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
date=data.date_visit2, | visit2_id = origin_visit2.visit | ||||
type=data.type_visit2, | |||||
) | |||||
visit2_id = origin_visit2['visit'] | |||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit2_id, snapshot=data.snapshot['id']) | origin_url, origin_visit2.visit, snapshot=data.snapshot['id']) | ||||
by_ov2 = swh_storage.snapshot_get_by_origin_visit( | by_ov2 = swh_storage.snapshot_get_by_origin_visit( | ||||
origin_url, visit2_id) | origin_url, visit2_id) | ||||
assert by_ov2 == {**data.snapshot, 'next_branch': None} | assert by_ov2 == {**data.snapshot, 'next_branch': None} | ||||
data1 = { | data1 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit1, | 'date': data.date_visit1, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data2 = { | data2 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit1, | 'date': data.date_visit1, | ||||
'visit': origin_visit1['visit'], | 'visit': origin_visit1.visit, | ||||
'type': data.type_visit1, | 'type': data.type_visit1, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': data.snapshot['id'], | 'snapshot': data.snapshot['id'], | ||||
} | } | ||||
data3 = { | data3 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'visit': origin_visit2['visit'], | 'visit': origin_visit2.visit, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': None, | 'snapshot': None, | ||||
} | } | ||||
data4 = { | data4 = { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'date': data.date_visit2, | 'date': data.date_visit2, | ||||
'visit': origin_visit2['visit'], | 'visit': origin_visit2.visit, | ||||
'type': data.type_visit2, | 'type': data.type_visit2, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'metadata': None, | 'metadata': None, | ||||
'snapshot': data.snapshot['id'], | 'snapshot': data.snapshot['id'], | ||||
} | } | ||||
assert list(swh_storage.journal_writer.journal.objects) \ | assert list(swh_storage.journal_writer.journal.objects) \ | ||||
== [('origin', data.origin), | == [('origin', data.origin), | ||||
('origin_visit', data1), | ('origin_visit', data1), | ||||
('snapshot', data.snapshot), | ('snapshot', data.snapshot), | ||||
('origin_visit', data2), | ('origin_visit', data2), | ||||
('origin_visit', data3), | ('origin_visit', data3), | ||||
('origin_visit', data4)] | ('origin_visit', data4)] | ||||
def test_snapshot_get_latest(self, swh_storage): | def test_snapshot_get_latest(self, swh_storage): | ||||
origin_url = data.origin['url'] | origin_url = swh_storage.origin_add_one(data.origin) | ||||
swh_storage.origin_add_one(data.origin) | |||||
origin_url = data.origin['url'] | |||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | |||||
type=data.type_visit1, | |||||
) | |||||
visit1_id = origin_visit1['visit'] | |||||
origin_visit2 = swh_storage.origin_visit_add( | origin_visit2 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
date=data.date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
visit2_id = origin_visit2['visit'] | |||||
# Add a visit with the same date as the previous one | # Add a visit with the same date as the previous one | ||||
origin_visit3 = swh_storage.origin_visit_add( | origin_visit3 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit2, type=data.type_visit3) | ||||
date=data.date_visit2, | |||||
type=data.type_visit3, | |||||
) | |||||
visit3_id = origin_visit3['visit'] | |||||
# Two visits, both with no snapshot: latest snapshot is None | # Two visits, both with no snapshot: latest snapshot is None | ||||
assert swh_storage.snapshot_get_latest(origin_url) is None | assert swh_storage.snapshot_get_latest(origin_url) is None | ||||
# Add snapshot to visit1, latest snapshot = visit 1 snapshot | # Add snapshot to visit1, latest snapshot = visit 1 snapshot | ||||
swh_storage.snapshot_add([data.complete_snapshot]) | swh_storage.snapshot_add([data.complete_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit1_id, snapshot=data.complete_snapshot['id']) | origin_url, origin_visit1.visit, | ||||
snapshot=data.complete_snapshot['id']) | |||||
assert {**data.complete_snapshot, 'next_branch': None} \ | assert {**data.complete_snapshot, 'next_branch': None} \ | ||||
== swh_storage.snapshot_get_latest(origin_url) | == swh_storage.snapshot_get_latest(origin_url) | ||||
# Status filter: all three visits are status=ongoing, so no snapshot | # Status filter: all three visits are status=ongoing, so no snapshot | ||||
# returned | # returned | ||||
assert swh_storage.snapshot_get_latest( | assert swh_storage.snapshot_get_latest( | ||||
origin_url, | origin_url, | ||||
allowed_statuses=['full']) is None | allowed_statuses=['full']) is None | ||||
# Mark the first visit as completed and check status filter again | # Mark the first visit as completed and check status filter again | ||||
swh_storage.origin_visit_update(origin_url, visit1_id, status='full') | swh_storage.origin_visit_update( | ||||
origin_url, origin_visit1.visit, status='full') | |||||
assert {**data.complete_snapshot, 'next_branch': None} \ | assert {**data.complete_snapshot, 'next_branch': None} \ | ||||
== swh_storage.snapshot_get_latest( | == swh_storage.snapshot_get_latest( | ||||
origin_url, | origin_url, | ||||
allowed_statuses=['full']) | allowed_statuses=['full']) | ||||
# Add snapshot to visit2 and check that the new snapshot is returned | # Add snapshot to visit2 and check that the new snapshot is returned | ||||
swh_storage.snapshot_add([data.empty_snapshot]) | swh_storage.snapshot_add([data.empty_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit2_id, snapshot=data.empty_snapshot['id']) | origin_url, origin_visit2.visit, | ||||
snapshot=data.empty_snapshot['id']) | |||||
assert {**data.empty_snapshot, 'next_branch': None} \ | assert {**data.empty_snapshot, 'next_branch': None} \ | ||||
== swh_storage.snapshot_get_latest(origin_url) | == swh_storage.snapshot_get_latest(origin_url) | ||||
# Check that the status filter is still working | # Check that the status filter is still working | ||||
assert {**data.complete_snapshot, 'next_branch': None} \ | assert {**data.complete_snapshot, 'next_branch': None} \ | ||||
== swh_storage.snapshot_get_latest( | == swh_storage.snapshot_get_latest( | ||||
origin_url, | origin_url, | ||||
allowed_statuses=['full']) | allowed_statuses=['full']) | ||||
# Add snapshot to visit3 (same date as visit2) and check that | # Add snapshot to visit3 (same date as visit2) and check that | ||||
# the new snapshot is returned | # the new snapshot is returned | ||||
swh_storage.snapshot_add([data.complete_snapshot]) | swh_storage.snapshot_add([data.complete_snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, visit3_id, snapshot=data.complete_snapshot['id']) | origin_url, origin_visit3.visit, | ||||
snapshot=data.complete_snapshot['id']) | |||||
assert {**data.complete_snapshot, 'next_branch': None} \ | assert {**data.complete_snapshot, 'next_branch': None} \ | ||||
== swh_storage.snapshot_get_latest(origin_url) | == swh_storage.snapshot_get_latest(origin_url) | ||||
def test_snapshot_get_latest__missing_snapshot(self, swh_storage): | def test_snapshot_get_latest__missing_snapshot(self, swh_storage): | ||||
# Origin does not exist | origin_url = swh_storage.origin_add_one(data.origin) | ||||
origin_url = data.origin['url'] | |||||
assert swh_storage.snapshot_get_latest(origin_url) is None | assert swh_storage.snapshot_get_latest(origin_url) is None | ||||
swh_storage.origin_add_one(data.origin) | |||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit1, type=data.type_visit1) | ||||
date=data.date_visit1, | |||||
type=data.type_visit1, | |||||
) | |||||
visit1_id = origin_visit1['visit'] | |||||
origin_visit2 = swh_storage.origin_visit_add( | origin_visit2 = swh_storage.origin_visit_add( | ||||
origin=origin_url, | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
date=data.date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
visit2_id = origin_visit2['visit'] | |||||
# Two visits, both with no snapshot: latest snapshot is None | # Two visits, both with no snapshot: latest snapshot is None | ||||
assert swh_storage.snapshot_get_latest(origin_url) is None | assert swh_storage.snapshot_get_latest(origin_url) is None | ||||
# Add unknown snapshot to visit1, check that the inconsistency is | # Add unknown snapshot to visit1, check that the inconsistency is | ||||
# detected | # detected | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, origin_visit1.visit, | ||||
visit1_id, snapshot=data.complete_snapshot['id']) | snapshot=data.complete_snapshot['id']) | ||||
with pytest.raises(Exception): | with pytest.raises(Exception): | ||||
# XXX: should the exception be more specific than this? | # XXX: should the exception be more specific than this? | ||||
swh_storage.snapshot_get_latest(origin_url) | swh_storage.snapshot_get_latest(origin_url) | ||||
# Status filter: both visits are status=ongoing, so no snapshot | # Status filter: both visits are status=ongoing, so no snapshot | ||||
# returned | # returned | ||||
assert swh_storage.snapshot_get_latest( | assert swh_storage.snapshot_get_latest( | ||||
origin_url, | origin_url, allowed_statuses=['full']) is None | ||||
allowed_statuses=['full']) is None | |||||
# Mark the first visit as completed and check status filter again | # Mark the first visit as completed and check status filter again | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, origin_visit1.visit, status='full') | ||||
visit1_id, status='full') | |||||
with pytest.raises(Exception): | with pytest.raises(Exception): | ||||
# XXX: should the exception be more specific than this? | # XXX: should the exception be more specific than this? | ||||
swh_storage.snapshot_get_latest( | swh_storage.snapshot_get_latest( | ||||
origin_url, | origin_url, | ||||
allowed_statuses=['full']), | allowed_statuses=['full']), | ||||
# Actually add the snapshot and check status filter again | # Actually add the snapshot and check status filter again | ||||
swh_storage.snapshot_add([data.complete_snapshot]) | swh_storage.snapshot_add([data.complete_snapshot]) | ||||
assert {**data.complete_snapshot, 'next_branch': None} \ | assert {**data.complete_snapshot, 'next_branch': None} \ | ||||
== swh_storage.snapshot_get_latest(origin_url) | == swh_storage.snapshot_get_latest(origin_url) | ||||
# Add unknown snapshot to visit2 and check that the inconsistency | # Add unknown snapshot to visit2 and check that the inconsistency | ||||
# is detected | # is detected | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin_url, | origin_url, origin_visit2.visit, snapshot=data.snapshot['id']) | ||||
visit2_id, snapshot=data.snapshot['id']) | |||||
with pytest.raises(Exception): | with pytest.raises(Exception): | ||||
# XXX: should the exception be more specific than this? | # XXX: should the exception be more specific than this? | ||||
swh_storage.snapshot_get_latest( | swh_storage.snapshot_get_latest( | ||||
origin_url) | origin_url) | ||||
# Actually add that snapshot and check that the new one is returned | # Actually add that snapshot and check that the new one is returned | ||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
assert{**data.snapshot, 'next_branch': None} \ | assert{**data.snapshot, 'next_branch': None} \ | ||||
Show All 39 Lines | def test_stat_counters(self, swh_storage): | ||||
assert set(expected_keys) <= set(counters) | assert set(expected_keys) <= set(counters) | ||||
for key in expected_keys: | for key in expected_keys: | ||||
if key != 'content': | if key != 'content': | ||||
assert counters[key] == 0 | assert counters[key] == 0 | ||||
assert counters['content'] == 1 | assert counters['content'] == 1 | ||||
# Add other objects. Check their counter increased as well. | # Add other objects. Check their counter increased as well. | ||||
swh_storage.origin_add_one(data.origin2) | origin_url = swh_storage.origin_add_one(data.origin2) | ||||
origin_visit1 = swh_storage.origin_visit_add( | origin_visit1 = swh_storage.origin_visit_add( | ||||
origin=data.origin2['url'], | origin_url, date=data.date_visit2, type=data.type_visit2) | ||||
date=data.date_visit2, | |||||
type=data.type_visit2, | |||||
) | |||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
data.origin2['url'], origin_visit1['visit'], | origin_url, origin_visit1.visit, snapshot=data.snapshot['id']) | ||||
snapshot=data.snapshot['id']) | |||||
swh_storage.directory_add([data.dir]) | swh_storage.directory_add([data.dir]) | ||||
swh_storage.revision_add([data.revision]) | swh_storage.revision_add([data.revision]) | ||||
swh_storage.release_add([data.release]) | swh_storage.release_add([data.release]) | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
counters = swh_storage.stat_counters() | counters = swh_storage.stat_counters() | ||||
assert counters['content'] == 1 | assert counters['content'] == 1 | ||||
assert counters['directory'] == 1 | assert counters['directory'] == 1 | ||||
▲ Show 20 Lines • Show All 845 Lines • ▼ Show 20 Lines | def test_origin_count_with_visit_no_visits(self, swh_storage): | ||||
assert swh_storage.origin_count('.*user1.*', regexp=False, | assert swh_storage.origin_count('.*user1.*', regexp=False, | ||||
with_visit=True) == 0 | with_visit=True) == 0 | ||||
def test_origin_count_with_visit_with_visits_no_snapshot( | def test_origin_count_with_visit_with_visits_no_snapshot( | ||||
self, swh_storage): | self, swh_storage): | ||||
swh_storage.origin_add([{'url': url} for url in self.ORIGINS]) | swh_storage.origin_add([{'url': url} for url in self.ORIGINS]) | ||||
now = datetime.datetime.now(tz=datetime.timezone.utc) | now = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
origin_url = 'https://github.com/user1/repo1' | |||||
swh_storage.origin_visit_add( | swh_storage.origin_visit_add( | ||||
origin='https://github.com/user1/repo1', date=now, type='git') | origin_url, date=now, type='git') | ||||
assert swh_storage.origin_count('github', with_visit=False) == 3 | assert swh_storage.origin_count('github', with_visit=False) == 3 | ||||
# it has a visit, but no snapshot, so with_visit=True => 0 | # it has a visit, but no snapshot, so with_visit=True => 0 | ||||
assert swh_storage.origin_count('github', with_visit=True) == 0 | assert swh_storage.origin_count('github', with_visit=True) == 0 | ||||
assert swh_storage.origin_count('gitlab', with_visit=False) == 2 | assert swh_storage.origin_count('gitlab', with_visit=False) == 2 | ||||
# these gitlab origins have no visit | # these gitlab origins have no visit | ||||
assert swh_storage.origin_count('gitlab', with_visit=True) == 0 | assert swh_storage.origin_count('gitlab', with_visit=True) == 0 | ||||
assert swh_storage.origin_count('github.*user1', regexp=True, | assert swh_storage.origin_count('github.*user1', regexp=True, | ||||
with_visit=False) == 1 | with_visit=False) == 1 | ||||
assert swh_storage.origin_count('github.*user1', regexp=True, | assert swh_storage.origin_count('github.*user1', regexp=True, | ||||
with_visit=True) == 0 | with_visit=True) == 0 | ||||
assert swh_storage.origin_count('github', regexp=True, | assert swh_storage.origin_count('github', regexp=True, | ||||
with_visit=True) == 0 | with_visit=True) == 0 | ||||
def test_origin_count_with_visit_with_visits_and_snapshot( | def test_origin_count_with_visit_with_visits_and_snapshot( | ||||
self, swh_storage): | self, swh_storage): | ||||
swh_storage.origin_add([{'url': url} for url in self.ORIGINS]) | swh_storage.origin_add([{'url': url} for url in self.ORIGINS]) | ||||
now = datetime.datetime.now(tz=datetime.timezone.utc) | now = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
swh_storage.snapshot_add([data.snapshot]) | swh_storage.snapshot_add([data.snapshot]) | ||||
origin_url = 'https://github.com/user1/repo1' | |||||
visit = swh_storage.origin_visit_add( | visit = swh_storage.origin_visit_add( | ||||
origin='https://github.com/user1/repo1', date=now, type='git') | origin_url, date=now, type='git') | ||||
swh_storage.origin_visit_update( | swh_storage.origin_visit_update( | ||||
origin='https://github.com/user1/repo1', visit_id=visit['visit'], | origin_url, visit.visit, snapshot=data.snapshot['id']) | ||||
snapshot=data.snapshot['id']) | |||||
assert swh_storage.origin_count('github', with_visit=False) == 3 | assert swh_storage.origin_count('github', with_visit=False) == 3 | ||||
# github/user1 has a visit and a snapshot, so with_visit=True => 1 | # github/user1 has a visit and a snapshot, so with_visit=True => 1 | ||||
assert swh_storage.origin_count('github', with_visit=True) == 1 | assert swh_storage.origin_count('github', with_visit=True) == 1 | ||||
assert swh_storage.origin_count('github.*user1', regexp=True, | assert swh_storage.origin_count('github.*user1', regexp=True, | ||||
with_visit=False) == 1 | with_visit=False) == 1 | ||||
assert swh_storage.origin_count('github.*user1', regexp=True, | assert swh_storage.origin_count('github.*user1', regexp=True, | ||||
with_visit=True) == 1 | with_visit=True) == 1 | ||||
assert swh_storage.origin_count('github', regexp=True, | assert swh_storage.origin_count('github', regexp=True, | ||||
with_visit=True) == 1 | with_visit=True) == 1 | ||||
@settings(suppress_health_check=[HealthCheck.too_slow]) | @settings(suppress_health_check=[HealthCheck.too_slow]) | ||||
@given(strategies.lists(objects(), max_size=2)) | @given(strategies.lists(objects(), max_size=2)) | ||||
def test_add_arbitrary(self, swh_storage, objects): | def test_add_arbitrary(self, swh_storage, objects): | ||||
for (obj_type, obj) in objects: | for (obj_type, obj) in objects: | ||||
obj = obj.to_dict() | obj = obj.to_dict() | ||||
if obj_type == 'origin_visit': | if obj_type == 'origin_visit': | ||||
origin = obj.pop('origin') | origin_url = obj.pop('origin') | ||||
swh_storage.origin_add_one({'url': origin}) | swh_storage.origin_add_one({'url': origin_url}) | ||||
if 'visit' in obj: | if 'visit' in obj: | ||||
del obj['visit'] | del obj['visit'] | ||||
swh_storage.origin_visit_add( | swh_storage.origin_visit_add( | ||||
origin, obj['date'], obj['type']) | origin_url, obj['date'], obj['type']) | ||||
else: | else: | ||||
if obj_type == 'content' and obj['status'] == 'absent': | if obj_type == 'content' and obj['status'] == 'absent': | ||||
obj_type = 'skipped_content' | obj_type = 'skipped_content' | ||||
method = getattr(swh_storage, obj_type + '_add') | method = getattr(swh_storage, obj_type + '_add') | ||||
try: | try: | ||||
method([obj]) | method([obj]) | ||||
except HashCollision: | except HashCollision: | ||||
pass | pass | ||||
▲ Show 20 Lines • Show All 174 Lines • Show Last 20 Lines |
rename to origin_visit.