Page MenuHomeSoftware Heritage

D2295.diff
No OneTemporary

D2295.diff

diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -688,7 +688,12 @@
WHERE """
if with_visit:
query += """
- EXISTS (SELECT 1 from origin_visit WHERE origin=origin.id)
+ EXISTS (
+ SELECT 1
+ FROM origin_visit
+ INNER JOIN snapshot ON snapshot=snapshot.id
+ WHERE origin=origin.id
+ )
AND """
query += 'url %s %%s '
if not count:
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -1122,8 +1122,13 @@
else:
origins = [orig for orig in origins if url_pattern in orig['url']]
if with_visit:
- origins = [orig for orig in origins
- if len(self._origin_visits[orig['url']]) > 0]
+ origins = [
+ orig for orig in origins
+ if len(self._origin_visits[orig['url']]) > 0 and
+ set(ov.snapshot
+ for ov in self._origin_visits[orig['url']]
+ if ov.snapshot) &
+ set(self._snapshots)]
return origins[offset:offset+limit]
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -3165,31 +3165,17 @@
assert actual_origins[-1]['id'] == 100
assert actual_origins[-1]['url'] == swh_origins[99]['url']
- def test_origin_count(self, swh_storage):
- new_origins = [
- {
- 'type': 'git',
- 'url': 'https://github.com/user1/repo1'
- },
- {
- 'type': 'git',
- 'url': 'https://github.com/user2/repo1'
- },
- {
- 'type': 'git',
- 'url': 'https://github.com/user3/repo1'
- },
- {
- 'type': 'git',
- 'url': 'https://gitlab.com/user1/repo1'
- },
- {
- 'type': 'git',
- 'url': 'https://gitlab.com/user2/repo1'
- }
- ]
+ ORIGINS = [
+ 'https://github.com/user1/repo1',
+ 'https://github.com/user2/repo1',
+ 'https://github.com/user3/repo1',
+ 'https://gitlab.com/user1/repo1',
+ 'https://gitlab.com/user2/repo1',
+ 'https://forge.softwareheritage.org/source/repo1',
+ ]
- swh_storage.origin_add(new_origins)
+ def test_origin_count(self, swh_storage):
+ swh_storage.origin_add([{'url': url} for url in self.ORIGINS])
assert swh_storage.origin_count('github') == 3
assert swh_storage.origin_count('gitlab') == 2
@@ -3198,6 +3184,67 @@
assert swh_storage.origin_count('.*user1.*', regexp=True) == 2
assert swh_storage.origin_count('.*user1.*', regexp=False) == 0
+ def test_origin_count_with_visit_no_visits(self, swh_storage):
+ swh_storage.origin_add([{'url': url} for url in self.ORIGINS])
+
+ # none of them have visits, so with_visit=True => 0
+ assert swh_storage.origin_count('github', with_visit=True) == 0
+ assert swh_storage.origin_count('gitlab', with_visit=True) == 0
+ assert swh_storage.origin_count('.*user.*', regexp=True,
+ with_visit=True) == 0
+ assert swh_storage.origin_count('.*user.*', regexp=False,
+ with_visit=True) == 0
+ assert swh_storage.origin_count('.*user1.*', regexp=True,
+ with_visit=True) == 0
+ assert swh_storage.origin_count('.*user1.*', regexp=False,
+ with_visit=True) == 0
+
+ def test_origin_count_with_visit_with_visits_no_snapshot(
+ self, swh_storage):
+ swh_storage.origin_add([{'url': url} for url in self.ORIGINS])
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+
+ swh_storage.origin_visit_add(
+ origin='https://github.com/user1/repo1', date=now, type='git')
+
+ assert swh_storage.origin_count('github', with_visit=False) == 3
+ # it has a visit, but no snapshot, so with_visit=True => 0
+ assert swh_storage.origin_count('github', with_visit=True) == 0
+
+ assert swh_storage.origin_count('gitlab', with_visit=False) == 2
+ # these gitlab origins have no visit
+ assert swh_storage.origin_count('gitlab', with_visit=True) == 0
+
+ assert swh_storage.origin_count('github.*user1', regexp=True,
+ with_visit=False) == 1
+ assert swh_storage.origin_count('github.*user1', regexp=True,
+ with_visit=True) == 0
+ assert swh_storage.origin_count('github', regexp=True,
+ with_visit=True) == 0
+
+ def test_origin_count_with_visit_with_visits_and_snapshot(
+ self, swh_storage):
+ swh_storage.origin_add([{'url': url} for url in self.ORIGINS])
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+
+ swh_storage.snapshot_add([data.snapshot])
+ visit = swh_storage.origin_visit_add(
+ origin='https://github.com/user1/repo1', date=now, type='git')
+ swh_storage.origin_visit_update(
+ origin='https://github.com/user1/repo1', visit_id=visit['visit'],
+ snapshot=data.snapshot['id'])
+
+ assert swh_storage.origin_count('github', with_visit=False) == 3
+ # github/user1 has a visit and a snapshot, so with_visit=True => 1
+ assert swh_storage.origin_count('github', with_visit=True) == 1
+
+ assert swh_storage.origin_count('github.*user1', regexp=True,
+ with_visit=False) == 1
+ assert swh_storage.origin_count('github.*user1', regexp=True,
+ with_visit=True) == 1
+ assert swh_storage.origin_count('github', regexp=True,
+ with_visit=True) == 1
+
@settings(suppress_health_check=[HealthCheck.too_slow])
@given(strategies.lists(objects(), max_size=2))
def test_add_arbitrary(self, swh_storage, objects):

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 8:11 AM (10 w, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220684

Event Timeline