Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124865
D5978.id21889.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D5978.id21889.diff
View Options
diff --git a/sql/updates/29.sql b/sql/updates/29.sql
--- a/sql/updates/29.sql
+++ b/sql/updates/29.sql
@@ -12,8 +12,12 @@
alter table origin_visit_stats
add column next_position_offset int not null default 4;
+alter table origin_visit_stats
+add column successive_visits int not null default 0;
+
comment on column origin_visit_stats.next_visit_queue_position is 'Time at which some new objects are expected to be found';
comment on column origin_visit_stats.next_position_offset is 'Duration that we expect to wait between visits of this origin';
+comment on column origin_visit_stats.successive_visits is 'number of successive visits with the same status';
create table visit_scheduler_queue_position (
visit_type text not null,
diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py
--- a/swh/scheduler/journal_client.py
+++ b/swh/scheduler/journal_client.py
@@ -220,6 +220,8 @@
# eventfulness
last_visit_status, eventful = get_last_status(msg_dict, visit_stats_d)
+ increment_successive_visits = False
+
# Update the position offset according to the visit status,
# if we had already visited this origin before.
@@ -230,6 +232,10 @@
visit_stats_d["next_position_offset"] = max(
0, visit_stats_d["next_position_offset"] + increment
)
+ # increment the counter when last_visit_status is the same
+ increment_successive_visits = (
+ last_visit_status == visit_stats_d["last_visit_status"]
+ )
# Record current visit date as highest known date (we've rejected out of order
# messages earlier).
@@ -250,6 +256,11 @@
queue_position_per_visit_type, visit_stats_d
)
+ if increment_successive_visits:
+ visit_stats_d["successive_visits"] += 1
+ else:
+ visit_stats_d["successive_visits"] = 0
+
scheduler.origin_visit_stats_upsert(
OriginVisitStats(**ovs) for ovs in origin_visit_stats.values()
)
diff --git a/swh/scheduler/model.py b/swh/scheduler/model.py
--- a/swh/scheduler/model.py
+++ b/swh/scheduler/model.py
@@ -226,6 +226,8 @@
)
next_position_offset = attr.ib(type=int, validator=type_validator(), default=4)
+ successive_visits = attr.ib(type=int, validator=type_validator(), default=0)
+
@last_successful.validator
def check_last_successful(self, attribute, value):
check_timestamptz(value)
diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql
--- a/swh/scheduler/sql/30-schema.sql
+++ b/swh/scheduler/sql/30-schema.sql
@@ -178,6 +178,7 @@
next_visit_queue_position timestamptz,
-- duration that we expect to wait between visits of this origin
next_position_offset int not null default 4,
+ successive_visits int not null default 0,
primary key (url, visit_type)
);
@@ -193,6 +194,7 @@
comment on column origin_visit_stats.next_visit_queue_position is 'Time at which some new objects are expected to be found';
comment on column origin_visit_stats.next_position_offset is 'Duration that we expect to wait between visits of this origin';
+comment on column origin_visit_stats.successive_visits is 'number of successive visits with the same status';
create table visit_scheduler_queue_position (
visit_type text not null,
diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py
--- a/swh/scheduler/tests/test_journal_client.py
+++ b/swh/scheduler/tests/test_journal_client.py
@@ -171,6 +171,7 @@
last_visit=visit_status["date"],
last_visit_status=LastVisitStatus.not_found,
next_position_offset=4,
+ successive_visits=0,
),
)
@@ -206,6 +207,7 @@
last_visit=DATE3,
last_visit_status=LastVisitStatus.not_found,
next_position_offset=6,
+ successive_visits=2,
),
)
@@ -259,6 +261,7 @@
last_visit=DATE3,
last_visit_status=LastVisitStatus.failed,
next_position_offset=6,
+ successive_visits=2,
),
)
@@ -296,6 +299,7 @@
last_visit=DATE2,
last_visit_status=LastVisitStatus.failed,
next_position_offset=5,
+ successive_visits=1,
),
)
@@ -351,6 +355,7 @@
last_visit_status=LastVisitStatus.successful,
last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"),
next_position_offset=0,
+ successive_visits=2,
),
)
@@ -377,6 +382,7 @@
last_snapshot=visit_status["snapshot"],
next_visit_queue_position=None,
next_position_offset=4,
+ successive_visits=1,
)
]
)
@@ -388,6 +394,7 @@
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
[(visit_status["origin"], visit_status["type"])]
)
+
assert_visit_stats_ok(
actual_origin_visit_stats[0],
OriginVisitStats(
@@ -399,6 +406,7 @@
last_snapshot=visit_status["snapshot"],
next_visit_queue_position=None,
next_position_offset=5,
+ successive_visits=0,
),
)
@@ -463,13 +471,19 @@
last_visit_status=LastVisitStatus.successful,
last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
),
- ignore_fields=["next_visit_queue_position", "next_position_offset"],
+ ignore_fields=[
+ "next_visit_queue_position",
+ "next_position_offset",
+ "successive_visits",
+ ],
)
# We ignore out of order messages, so the next_position_offset isn't exact
# depending on the permutation. What matters is consistency of the final
# dates (last_visit and last_successful).
assert 4 <= visit_stats.next_position_offset <= 5
+ # same goes for successive_visits
+ assert 0 <= visit_stats.successive_visits <= 1
VISIT_STATUSES_1 = [
@@ -533,13 +547,19 @@
last_visit_status=LastVisitStatus.successful,
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
),
- ignore_fields=["next_visit_queue_position", "next_position_offset"],
+ ignore_fields=[
+ "next_visit_queue_position",
+ "next_position_offset",
+ "successive_visits",
+ ],
)
# We ignore out of order messages, so the next_position_offset isn't exact
# depending on the permutation. What matters is consistency of the final
# dates (last_visit and last_successful).
assert 2 <= visit_stats.next_position_offset <= 5
+ # same goes for successive_visits
+ assert 0 <= visit_stats.successive_visits <= 3
VISIT_STATUSES_2 = [
@@ -680,6 +700,7 @@
last_visit=DATE1,
last_visit_status=LastVisitStatus.successful,
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
+ successive_visits=0,
),
)
@@ -725,6 +746,7 @@
last_visit_status=LastVisitStatus.successful,
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
next_position_offset=4,
+ successive_visits=0,
),
)
@@ -787,13 +809,19 @@
last_visit_status=LastVisitStatus.successful,
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
),
- ignore_fields=["next_visit_queue_position", "next_position_offset"],
+ ignore_fields=[
+ "next_visit_queue_position",
+ "next_position_offset",
+ "successive_visits",
+ ],
)
# We ignore out of order messages, so the next_position_offset isn't exact
# depending on the permutation. What matters is consistency of the final
# dates (last_visit and last_successful).
assert 4 <= visit_stats.next_position_offset <= 6
+ # same goes for successive_visits
+ assert 0 <= visit_stats.successive_visits <= 2
@pytest.mark.parametrize(
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 10:00 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231994
Attached To
D5978: Add a successive_visits counter to origin visit stats
Event Timeline
Log In to Comment