Changeset View
Standalone View
swh/storage/tests/test_storage.py
Show First 20 Lines • Show All 1,641 Lines • ▼ Show 20 Lines | def test_origin_visit_status_add(self, swh_storage, sample_data): | ||||
ovs2 = OriginVisitStatus( | ovs2 = OriginVisitStatus( | ||||
origin=origin2.url, | origin=origin2.url, | ||||
visit=ov2.visit, | visit=ov2.visit, | ||||
date=sample_data.date_visit2, | date=sample_data.date_visit2, | ||||
status="created", | status="created", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
date_visit_now = now() | date_visit_now = round_to_milliseconds(now()) | ||||
vlorentz: why wasn't the rounding needed before? | |||||
Done Inline ActionsNo idea. ardumont: No idea. | |||||
Done Inline ActionsTo be clear, that's been bothering me for a while... I have no idea why it's kinda not needed anywhere near we deal with date... Also, I don't really know what that rounding in itself entails for the day we really switch to cass instead of pg... (in terms of data). ardumont: To be clear, that's been bothering me for a while...
I have no idea why it's kinda not needed… | |||||
Not Done Inline Actions
It's only needed when doing equality comparisons.
Yes, but it is not useful data. vlorentz: > I have no idea why it's kinda not needed anywhere near we deal with date...
It's only needed… | |||||
Done Inline Actions
How so? If people wants to retrieve revision data and compute the hashes themselves, won't that impact them? ardumont: > Yes, but it is not useful data.
How so?
Isn't say, revision for example, concerned by this… | |||||
Not Done Inline ActionsThese are stored in a TimestampWithTimezone, which preserves exact dates (by storing them as integers in the DB), including microseconds and negative UTC. For visits we don't need exact dates, so we use datetime.datetime (and store them as the native timestamp type of the DB) vlorentz: These are stored in a TimestampWithTimezone, which preserves exact dates (by storing them as… | |||||
Done Inline Actionsoh right, thank you ;) ardumont: oh right, thank you ;) | |||||
Not Done Inline ActionsOh actually, the answer to my original question: it wasn't needed before because we only compared some fields, so date truncation didn't matter. vlorentz: Oh actually, the answer to my original question: it wasn't needed before because we only… | |||||
Done Inline Actionsoh, yeah, cool then. ardumont: oh, yeah, cool then. | |||||
visit_status1 = OriginVisitStatus( | visit_status1 = OriginVisitStatus( | ||||
origin=ov1.origin, | origin=ov1.origin, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=date_visit_now, | date=date_visit_now, | ||||
status="full", | status="full", | ||||
snapshot=snapshot.id, | snapshot=snapshot.id, | ||||
) | ) | ||||
date_visit_now = now() | date_visit_now = round_to_milliseconds(now()) | ||||
visit_status2 = OriginVisitStatus( | visit_status2 = OriginVisitStatus( | ||||
origin=ov2.origin, | origin=ov2.origin, | ||||
visit=ov2.visit, | visit=ov2.visit, | ||||
date=date_visit_now, | date=date_visit_now, | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=None, | snapshot=None, | ||||
metadata={"intrinsic": "something"}, | metadata={"intrinsic": "something"}, | ||||
) | ) | ||||
swh_storage.origin_visit_status_add([visit_status1, visit_status2]) | swh_storage.origin_visit_status_add([visit_status1, visit_status2]) | ||||
origin_visit1 = swh_storage.origin_visit_get_latest( | visit = swh_storage.origin_visit_get_latest(origin1.url, require_snapshot=True) | ||||
origin1.url, require_snapshot=True | visit_status = swh_storage.origin_visit_status_get_latest( | ||||
) | origin1.url, visit.visit, require_snapshot=True | ||||
assert origin_visit1 | ) | ||||
assert origin_visit1["status"] == "full" | assert visit_status == visit_status1 | ||||
assert origin_visit1["snapshot"] == snapshot.id | |||||
visit = swh_storage.origin_visit_get_latest(origin2.url, require_snapshot=False) | |||||
origin_visit2 = swh_storage.origin_visit_get_latest( | visit_status = swh_storage.origin_visit_status_get_latest( | ||||
origin2.url, require_snapshot=False | origin2.url, visit.visit, require_snapshot=False | ||||
) | ) | ||||
assert origin2.url != origin1.url | assert origin2.url != origin1.url | ||||
assert origin_visit2 | assert visit_status == visit_status2 | ||||
assert origin_visit2["status"] == "ongoing" | |||||
assert origin_visit2["snapshot"] is None | |||||
assert origin_visit2["metadata"] == {"intrinsic": "something"} | |||||
actual_objects = list(swh_storage.journal_writer.journal.objects) | actual_objects = list(swh_storage.journal_writer.journal.objects) | ||||
expected_origins = [origin1, origin2] | expected_origins = [origin1, origin2] | ||||
expected_visits = [ov1, ov2] | expected_visits = [ov1, ov2] | ||||
expected_visit_statuses = [ovs1, ovs2, visit_status1, visit_status2] | expected_visit_statuses = [ovs1, ovs2, visit_status1, visit_status2] | ||||
expected_objects = ( | expected_objects = ( | ||||
▲ Show 20 Lines • Show All 226 Lines • ▼ Show 20 Lines | |||||
def test_origin_visit_get_latest_filter_type(self, swh_storage, sample_data): | def test_origin_visit_get_latest_filter_type(self, swh_storage, sample_data): | ||||
"""Filtering origin visit get latest with filter type should be ok | """Filtering origin visit get latest with filter type should be ok | ||||
""" | """ | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
swh_storage.origin_add([origin]) | swh_storage.origin_add([origin]) | ||||
visit1 = OriginVisit( | visit1 = OriginVisit( | ||||
origin=origin.url, | origin=origin.url, date=sample_data.date_visit1, type="git", | ||||
date=sample_data.date_visit1, | |||||
type=sample_data.type_visit1, | |||||
) | ) | ||||
visit2 = OriginVisit( | visit2 = OriginVisit( | ||||
origin=origin.url, | origin=origin.url, date=sample_data.date_visit2, type="hg", | ||||
date=sample_data.date_visit2, | |||||
type=sample_data.type_visit2, | |||||
) | ) | ||||
# Add a visit with the same date as the previous one | date_now = round_to_milliseconds(now()) | ||||
visit3 = OriginVisit( | visit3 = OriginVisit(origin=origin.url, date=date_now, type="hg",) | ||||
origin=origin.url, | |||||
date=sample_data.date_visit2, | |||||
type=sample_data.type_visit2, | |||||
) | |||||
assert sample_data.type_visit1 != sample_data.type_visit2 | |||||
assert sample_data.date_visit1 < sample_data.date_visit2 | assert sample_data.date_visit1 < sample_data.date_visit2 | ||||
assert sample_data.date_visit2 < date_now | |||||
ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) | ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) | ||||
origin_visit1 = swh_storage.origin_visit_get_by(origin.url, ov1.visit) | |||||
origin_visit3 = swh_storage.origin_visit_get_by(origin.url, ov3.visit) | |||||
assert sample_data.type_visit1 != sample_data.type_visit2 | |||||
# Check type filter is ok | # Check type filter is ok | ||||
actual_ov1 = swh_storage.origin_visit_get_latest( | actual_visit = swh_storage.origin_visit_get_latest(origin.url, type="git") | ||||
origin.url, type=sample_data.type_visit1, | assert actual_visit == ov1 | ||||
) | actual_visit = swh_storage.origin_visit_get_latest(origin.url, type="hg") | ||||
assert actual_ov1 == origin_visit1 | assert actual_visit == ov3 | ||||
actual_visit_unknown_type = swh_storage.origin_visit_get_latest( | |||||
actual_ov3 = swh_storage.origin_visit_get_latest( | origin.url, type="npm", # no visit matching that type | ||||
origin.url, type=sample_data.type_visit2, | |||||
) | |||||
assert actual_ov3 == origin_visit3 | |||||
new_type = "npm" | |||||
assert new_type not in [sample_data.type_visit1, sample_data.type_visit2] | |||||
assert ( | |||||
swh_storage.origin_visit_get_latest( | |||||
origin.url, type=new_type, # no visit matching that type | |||||
) | |||||
is None | |||||
) | ) | ||||
assert actual_visit_unknown_type is None | |||||
def test_origin_visit_get_latest(self, swh_storage, sample_data): | def test_origin_visit_get_latest(self, swh_storage, sample_data): | ||||
empty_snapshot, complete_snapshot = sample_data.snapshots[1:3] | empty_snapshot, complete_snapshot = sample_data.snapshots[1:3] | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
swh_storage.origin_add([origin]) | swh_storage.origin_add([origin]) | ||||
visit1 = OriginVisit( | visit1 = OriginVisit( | ||||
origin=origin.url, | origin=origin.url, date=sample_data.date_visit1, type="git", | ||||
date=sample_data.date_visit1, | |||||
type=sample_data.type_visit1, | |||||
) | ) | ||||
visit2 = OriginVisit( | visit2 = OriginVisit( | ||||
origin=origin.url, | origin=origin.url, date=sample_data.date_visit2, type="hg", | ||||
date=sample_data.date_visit2, | |||||
type=sample_data.type_visit2, | |||||
) | |||||
# Add a visit with the same date as the previous one | |||||
visit3 = OriginVisit( | |||||
origin=origin.url, | |||||
date=sample_data.date_visit2, | |||||
type=sample_data.type_visit2, | |||||
) | ) | ||||
date_now = round_to_milliseconds(now()) | |||||
visit3 = OriginVisit(origin=origin.url, date=date_now, type="hg",) | |||||
assert visit1.date < visit2.date | |||||
assert visit2.date < visit3.date | |||||
ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) | ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) | ||||
origin_visit1 = swh_storage.origin_visit_get_by(origin.url, ov1.visit) | # no filters, latest visit is the last one (whose date is most recent) | ||||
origin_visit2 = swh_storage.origin_visit_get_by(origin.url, ov2.visit) | actual_visit = swh_storage.origin_visit_get_latest(origin.url) | ||||
origin_visit3 = swh_storage.origin_visit_get_by(origin.url, ov3.visit) | assert actual_visit == ov3 | ||||
# Two visits, both with no snapshot | |||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) | # 3 visits, none has snapshot so nothing is returned | ||||
assert ( | actual_visit = swh_storage.origin_visit_get_latest( | ||||
swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) | origin.url, require_snapshot=True | ||||
is None | |||||
) | ) | ||||
assert actual_visit is None | |||||
# Add snapshot to visit1; require_snapshot=True makes it return | # visit are created with "created" status, so nothing will get returned | ||||
# visit1 and require_snapshot=False still returns visit2 | actual_visit = swh_storage.origin_visit_get_latest( | ||||
origin.url, allowed_statuses=["partial"] | |||||
) | |||||
assert actual_visit is None | |||||
# visit are created with "created" status, so most recent again | |||||
actual_visit = swh_storage.origin_visit_get_latest( | |||||
origin.url, allowed_statuses=["created"] | |||||
) | |||||
assert actual_visit == ov3 | |||||
# Add snapshot to visit1; require_snapshot=True makes it return first visit | |||||
swh_storage.snapshot_add([complete_snapshot]) | swh_storage.snapshot_add([complete_snapshot]) | ||||
swh_storage.origin_visit_status_add( | visit_status_with_snapshot = OriginVisitStatus( | ||||
[ | |||||
OriginVisitStatus( | |||||
origin=origin.url, | origin=origin.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=now(), | date=round_to_milliseconds(now()), | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=complete_snapshot.id, | snapshot=complete_snapshot.id, | ||||
) | ) | ||||
] | swh_storage.origin_visit_status_add([visit_status_with_snapshot]) | ||||
) | # only the first visit has a snapshot now | ||||
actual_visit = swh_storage.origin_visit_get_latest( | actual_visit = swh_storage.origin_visit_get_latest( | ||||
origin.url, require_snapshot=True | origin.url, require_snapshot=True | ||||
) | ) | ||||
assert actual_visit == { | assert actual_visit == ov1 | ||||
**origin_visit1, | |||||
"snapshot": complete_snapshot.id, | # only the first visit has a status ongoing now | ||||
"status": "ongoing", # visit1 has status created now | actual_visit = swh_storage.origin_visit_get_latest( | ||||
} | origin.url, allowed_statuses=["ongoing"] | ||||
) | |||||
assert actual_visit == ov1 | |||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) | actual_visit_status = swh_storage.origin_visit_status_get_latest( | ||||
origin.url, ov1.visit, require_snapshot=True | |||||
) | |||||
Not Done Inline ActionsYou could switch the equality operands to make it more readable imo (same comment for other instances below) vlorentz: You could switch the equality operands to make it more readable imo (same comment for other… | |||||
Done Inline ActionsI did it that way because it matches most of what we do elsewhere (in other modules or even here i mean). actual == expected. Which i now found clearer (force of habits i guess). ardumont: I did it that way because it matches most of what we do elsewhere (in other modules or even… | |||||
Not Done Inline ActionsThen could you add temporary variables (eg. with names containing actual and expected) and compare them? vlorentz: Then could you add temporary variables (eg. with names containing `actual` and `expected`) and… | |||||
Done Inline ActionsThat's indeed specifically how it's usually done. I thought the indirection was unneeded here as we do it a lot... Note: ardumont: That's indeed specifically how it's usually done.
I thought the indirection was unneeded here… | |||||
Not Done Inline ActionsUsually it is, but Black abuses parentheses here. Compare: assert ( swh_storage.origin_visit_status_get_latest( origin.url, ov1.visit, require_snapshot=True ) == visit_status_with_snapshot ) with: actual_visit_status = swh_storage.origin_visit_status_get_latest( origin.url, ov1.visit, require_snapshot=True ) assert actual_visit_status == visit_status_with_snapshot vlorentz: Usually it is, but Black abuses parentheses here. Compare:
```
assert… | |||||
Done Inline Actionsyou convinced me, thanks ;) ardumont: you convinced me, thanks ;) | |||||
assert actual_visit_status == visit_status_with_snapshot | |||||
# ... and require_snapshot=False (defaults) still returns latest visit (3rd) | |||||
actual_visit = swh_storage.origin_visit_get_latest( | |||||
origin.url, require_snapshot=False | |||||
) | |||||
assert actual_visit == ov3 | |||||
# no specific filter, this returns as before the latest visit | |||||
actual_visit = swh_storage.origin_visit_get_latest(origin.url) | |||||
assert actual_visit == ov3 | |||||
# Status filter: all three visits are status=ongoing, so no visit | # Status filter: all three visits are status=ongoing, so no visit | ||||
# returned | # returned | ||||
assert ( | actual_visit = swh_storage.origin_visit_get_latest( | ||||
swh_storage.origin_visit_get_latest(origin.url, allowed_statuses=["full"]) | origin.url, allowed_statuses=["full"] | ||||
is None | |||||
) | ) | ||||
assert actual_visit is None | |||||
# Mark the first visit as completed and check status filter again | visit_status1_full = OriginVisitStatus( | ||||
swh_storage.origin_visit_status_add( | |||||
[ | |||||
OriginVisitStatus( | |||||
origin=origin.url, | origin=origin.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=now(), | date=round_to_milliseconds(now()), | ||||
status="full", | status="full", | ||||
snapshot=complete_snapshot.id, | snapshot=complete_snapshot.id, | ||||
) | ) | ||||
] | # Mark the first visit as completed and check status filter again | ||||
swh_storage.origin_visit_status_add([visit_status1_full]) | |||||
# only the first visit has the full status | |||||
actual_visit = swh_storage.origin_visit_get_latest( | |||||
origin.url, allowed_statuses=["full"] | |||||
) | ) | ||||
assert actual_visit == ov1 | |||||
assert { | actual_visit_status = swh_storage.origin_visit_status_get_latest( | ||||
**origin_visit1, | origin.url, ov1.visit, allowed_statuses=["full"] | ||||
"snapshot": complete_snapshot.id, | ) | ||||
"status": "full", | assert actual_visit_status == visit_status1_full | ||||
} == swh_storage.origin_visit_get_latest(origin.url, allowed_statuses=["full"]) | |||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) | # no specific filter, this returns as before the latest visit | ||||
actual_visit = swh_storage.origin_visit_get_latest(origin.url) | |||||
assert actual_visit == ov3 | |||||
# Add snapshot to visit2 and check that the new snapshot is returned | # Add snapshot to visit2 and check that the new snapshot is returned | ||||
swh_storage.snapshot_add([empty_snapshot]) | swh_storage.snapshot_add([empty_snapshot]) | ||||
swh_storage.origin_visit_status_add( | visit_status2_full = OriginVisitStatus( | ||||
[ | |||||
OriginVisitStatus( | |||||
origin=origin.url, | origin=origin.url, | ||||
visit=ov2.visit, | visit=ov2.visit, | ||||
date=now(), | date=round_to_milliseconds(now()), | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=empty_snapshot.id, | snapshot=empty_snapshot.id, | ||||
) | ) | ||||
] | swh_storage.origin_visit_status_add([visit_status2_full]) | ||||
actual_visit = swh_storage.origin_visit_get_latest( | |||||
origin.url, require_snapshot=True | |||||
) | ) | ||||
assert { | # 2nd visit is most recent with a snapshot | ||||
**origin_visit2, | assert actual_visit == ov2 | ||||
"snapshot": empty_snapshot.id, | actual_visit_status = swh_storage.origin_visit_status_get_latest( | ||||
"status": "ongoing", | origin.url, ov2.visit, require_snapshot=True | ||||
} == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) | ) | ||||
assert actual_visit_status == visit_status2_full | |||||
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) | |||||
# no specific filter, this returns as before the latest visit, 3rd one | |||||
actual_origin = swh_storage.origin_visit_get_latest(origin.url) | |||||
assert actual_origin == ov3 | |||||
# Check that the status filter is still working | # full status is still the first visit | ||||
assert { | actual_visit = swh_storage.origin_visit_get_latest( | ||||
**origin_visit1, | origin.url, allowed_statuses=["full"] | ||||
"snapshot": complete_snapshot.id, | ) | ||||
"status": "full", | assert actual_visit == ov1 | ||||
} == swh_storage.origin_visit_get_latest(origin.url, allowed_statuses=["full"]) | |||||
# Add snapshot to visit3 (same date as visit2) | # Add snapshot to visit3 (same date as visit2) | ||||
swh_storage.origin_visit_status_add( | visit_status3_with_snapshot = OriginVisitStatus( | ||||
[ | |||||
OriginVisitStatus( | |||||
origin=origin.url, | origin=origin.url, | ||||
visit=ov3.visit, | visit=ov3.visit, | ||||
date=now(), | date=round_to_milliseconds(now()), | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=complete_snapshot.id, | snapshot=complete_snapshot.id, | ||||
) | ) | ||||
] | swh_storage.origin_visit_status_add([visit_status3_with_snapshot]) | ||||
# full status is still the first visit | |||||
actual_visit = swh_storage.origin_visit_get_latest( | |||||
origin.url, allowed_statuses=["full"], require_snapshot=True, | |||||
) | ) | ||||
assert { | assert actual_visit == ov1 | ||||
**origin_visit1, | |||||
"snapshot": complete_snapshot.id, | actual_visit_status = swh_storage.origin_visit_status_get_latest( | ||||
"status": "full", | origin.url, | ||||
} == swh_storage.origin_visit_get_latest(origin.url, allowed_statuses=["full"]) | visit=actual_visit.visit, | ||||
assert { | allowed_statuses=["full"], | ||||
**origin_visit1, | require_snapshot=True, | ||||
"snapshot": complete_snapshot.id, | |||||
"status": "full", | |||||
} == swh_storage.origin_visit_get_latest( | |||||
origin.url, allowed_statuses=["full"], require_snapshot=True | |||||
) | ) | ||||
assert { | assert actual_visit_status == visit_status1_full | ||||
**origin_visit3, | |||||
"snapshot": complete_snapshot.id, | |||||
"status": "ongoing", | |||||
} == swh_storage.origin_visit_get_latest(origin.url) | |||||
assert { | # most recent is still the 3rd visit | ||||
**origin_visit3, | actual_visit = swh_storage.origin_visit_get_latest(origin.url) | ||||
"snapshot": complete_snapshot.id, | assert actual_visit == ov3 | ||||
"status": "ongoing", | |||||
} == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) | # 3rd visit has a snapshot now, so it's elected | ||||
actual_visit = swh_storage.origin_visit_get_latest( | |||||
origin.url, require_snapshot=True | |||||
) | |||||
assert actual_visit == ov3 | |||||
actual_visit_status = swh_storage.origin_visit_status_get_latest( | |||||
origin.url, ov3.visit, require_snapshot=True | |||||
) | |||||
assert actual_visit_status == visit_status3_with_snapshot | |||||
def test_origin_visit_status_get_latest(self, swh_storage, sample_data): | def test_origin_visit_status_get_latest(self, swh_storage, sample_data): | ||||
snapshot = sample_data.snapshots[2] | snapshot = sample_data.snapshots[2] | ||||
origin1 = sample_data.origin | origin1 = sample_data.origin | ||||
swh_storage.origin_add([origin1]) | swh_storage.origin_add([origin1]) | ||||
# to have some reference visits | # to have some reference visits | ||||
ov1, ov2 = swh_storage.origin_visit_add( | ov1, ov2 = swh_storage.origin_visit_add( | ||||
[ | [ | ||||
OriginVisit( | OriginVisit( | ||||
origin=origin1.url, | origin=origin1.url, | ||||
date=sample_data.date_visit1, | date=sample_data.date_visit1, | ||||
type=sample_data.type_visit1, | type=sample_data.type_visit1, | ||||
), | ), | ||||
OriginVisit( | OriginVisit( | ||||
origin=origin1.url, | origin=origin1.url, | ||||
date=sample_data.date_visit2, | date=sample_data.date_visit2, | ||||
type=sample_data.type_visit2, | type=sample_data.type_visit2, | ||||
), | ), | ||||
] | ] | ||||
) | ) | ||||
swh_storage.snapshot_add([snapshot]) | swh_storage.snapshot_add([snapshot]) | ||||
date_now = now() | date_now = round_to_milliseconds(now()) | ||||
date_now = round_to_milliseconds(date_now) | |||||
assert sample_data.date_visit1 < sample_data.date_visit2 | assert sample_data.date_visit1 < sample_data.date_visit2 | ||||
assert sample_data.date_visit2 < date_now | assert sample_data.date_visit2 < date_now | ||||
ovs1 = OriginVisitStatus( | ovs1 = OriginVisitStatus( | ||||
origin=origin1.url, | origin=origin1.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=sample_data.date_visit1, | date=sample_data.date_visit1, | ||||
status="partial", | status="partial", | ||||
▲ Show 20 Lines • Show All 2,014 Lines • Show Last 20 Lines |
why wasn't the rounding needed before?