Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123271
D1581.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
D1581.diff
View Options
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -208,6 +208,13 @@
return self.post('origin/visit/getby', {'origin': origin,
'visit': visit})
+ def origin_visit_get_latest(self, origin, allowed_statuses=None,
+ require_snapshot=False):
+ return self.post(
+ 'origin/visit/get_latest',
+ {'origin': origin, 'allowed_statuses': allowed_statuses,
+ 'require_snapshot': require_snapshot})
+
def person_get(self, person):
return self.post('person', {'person': person})
diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py
--- a/swh/storage/api/server.py
+++ b/swh/storage/api/server.py
@@ -397,6 +397,13 @@
get_storage().origin_visit_get_by(**decode_request(request)))
+@app.route('/origin/visit/get_latest', methods=['POST'])
+@timed
+def origin_visit_get_latest():
+ return encode_data(
+ get_storage().origin_visit_get_latest(**decode_request(request)))
+
+
@app.route('/origin/visit/add', methods=['POST'])
@timed
@encode
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -431,34 +431,45 @@
return bool(cur.fetchone())
- def origin_visit_get_latest_snapshot(self, origin_id,
- allowed_statuses=None,
- cur=None):
- """Retrieve the most recent origin_visit which references a snapshot
+ def origin_visit_get_latest(
+ self, origin_id, allowed_statuses=None, require_snapshot=False,
+ cur=None):
+ """Retrieve the most recent origin_visit of the given origin,
+ with optional filters.
Args:
origin_id: the origin concerned
allowed_statuses: the visit statuses allowed for the returned visit
+ require_snapshot (bool): If True, only a visit with a known
+ snapshot will be returned.
Returns:
The origin_visit information, or None if no visit matches.
"""
cur = self._cursor(cur)
- extra_clause = ""
+ query_parts = [
+ 'SELECT %s' % ', '.join(self.origin_visit_get_cols),
+ 'FROM origin_visit']
+
+ if require_snapshot:
+ # Makes sure the snapshot is known
+ query_parts.append(
+ 'INNER JOIN snapshot ON (origin_visit.snapshot=snapshot.id)')
+
+ query_parts.append('WHERE origin = %s')
+
+ if require_snapshot:
+ query_parts.append('AND snapshot is not null')
+
if allowed_statuses:
- extra_clause = cur.mogrify("AND status IN %s",
- (tuple(allowed_statuses),)).decode()
+ query_parts.append(
+ cur.mogrify('AND status IN %s',
+ (tuple(allowed_statuses),)).decode())
- query = """\
- SELECT %s
- FROM origin_visit
- INNER JOIN snapshot ON (origin_visit.snapshot=snapshot.id)
- WHERE
- origin = %%s AND snapshot is not null %s
- ORDER BY date DESC, visit DESC
- LIMIT 1
- """ % (', '.join(self.origin_visit_get_cols), extra_clause)
+ query_parts.append('ORDER BY date DESC, visit DESC LIMIT 1')
+
+ query = '\n'.join(query_parts)
cur.execute(query, (origin_id,))
r = cur.fetchone()
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -792,13 +792,13 @@
.. warning:: At most 1000 branches contained in the snapshot will be
returned for performance reasons. In order to browse the whole
- set of branches, the method :meth:`snapshot_get_branches`
- should be used instead.
+ set of branches, the methods :meth:`origin_visit_get_latest`
+ and :meth:`snapshot_get_branches` should be used instead.
Args:
origin (Union[str,int]): the origin's URL or identifier
allowed_statuses (list of str): list of visit statuses considered
- to find the latest snapshot for the visit. For instance,
+ to find the latest snapshot for the origin. For instance,
``allowed_statuses=['full']`` will only consider visits that
have successfully run to completion.
Returns:
@@ -810,21 +810,13 @@
or :const:`None` if the snapshot has less than 1000
branches.
"""
- if isinstance(origin, str):
- origin = self.origin_get({'url': origin})['id']
- visits = self._origin_visits[origin-1]
- if allowed_statuses is not None:
- visits = [visit for visit in visits
- if visit['status'] in allowed_statuses]
- snapshot = None
- for visit in sorted(visits, key=lambda v: (v['date'], v['visit']),
- reverse=True):
- snapshot_id = visit['snapshot']
- snapshot = self.snapshot_get(snapshot_id)
- if snapshot:
- break
+ if isinstance(origin, int):
+ origin = self.origin_get({'id': origin})['url']
- return snapshot
+ visit = self.origin_visit_get_latest(
+ origin, allowed_statuses=allowed_statuses, require_snapshot=True)
+ if visit and visit['snapshot']:
+ return self.snapshot_get(visit['snapshot'])
def snapshot_count_branches(self, snapshot_id, db=None, cur=None):
"""Count the number of branches in the snapshot with the given id
@@ -1323,12 +1315,52 @@
it does not exist
"""
+ if isinstance(origin, str):
+ origin = self.origin_get({'url': origin})['id']
origin_visit = None
if origin <= len(self._origin_visits) and \
visit <= len(self._origin_visits[origin-1]):
origin_visit = self._origin_visits[origin-1][visit-1]
return copy.deepcopy(origin_visit)
+ def origin_visit_get_latest(
+ self, origin, allowed_statuses=None, require_snapshot=False):
+ """Get the latest origin visit for the given origin, optionally
+ looking only for those with one of the given allowed_statuses
+ or for those with a known snapshot.
+
+ Args:
+ origin (str): the origin's URL
+ allowed_statuses (list of str): list of visit statuses considered
+ to find the latest visit. For instance,
+ ``allowed_statuses=['full']`` will only consider visits that
+ have successfully run to completion.
+ require_snapshot (bool): If True, only a visit with a snapshot
+ will be returned.
+ Returns:
+ dict: a dict with the following keys:
+
+ origin: the URL of the origin
+ visit: origin visit id
+ type: type of loader used for the visit
+ date: timestamp of such visit
+ status: Visit's new status
+ metadata: Data associated to the visit
+ snapshot (Optional[sha1_git]): identifier of the snapshot
+ associated to the visit
+ """
+ origin = self.origin_get({'url': origin})['id']
+ visits = self._origin_visits[origin-1]
+ if allowed_statuses is not None:
+ visits = [visit for visit in visits
+ if visit['status'] in allowed_statuses]
+ if require_snapshot:
+ visits = [visit for visit in visits
+ if visit['snapshot']
+ and visit['snapshot'] in self._snapshots]
+
+ return max(visits, key=lambda v: (v['date'], v['visit']), default=None)
+
def person_get(self, person):
"""Return the persons identified by their ids.
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -1063,13 +1063,13 @@
or :const:`None` if the snapshot has less than 1000
branches.
"""
- if isinstance(origin, str):
- origin = self.origin_get({'url': origin})['id']
+ if isinstance(origin, int):
+ origin = self.origin_get({'id': origin}, db=db, cur=cur)['url']
- origin_visit = db.origin_visit_get_latest_snapshot(
- origin, allowed_statuses=allowed_statuses, cur=cur)
- if origin_visit:
- origin_visit = dict(zip(db.origin_visit_get_cols, origin_visit))
+ origin_visit = self.origin_visit_get_latest(
+ origin, allowed_statuses=allowed_statuses, require_snapshot=True,
+ db=db, cur=cur)
+ if origin_visit and origin_visit['snapshot']:
return self.snapshot_get(origin_visit['snapshot'], db=db, cur=cur)
@db_transaction(statement_timeout=2000)
@@ -1328,12 +1328,50 @@
it does not exist
"""
+ if isinstance(origin, str):
+ origin = self.origin_get({'url': origin}, db=db, cur=cur)['id']
ori_visit = db.origin_visit_get(origin, visit, cur)
if not ori_visit:
return None
return dict(zip(db.origin_visit_get_cols, ori_visit))
+ @db_transaction(statement_timeout=4000)
+ def origin_visit_get_latest(
+ self, origin, allowed_statuses=None, require_snapshot=False,
+ db=None, cur=None):
+ """Get the latest origin visit for the given origin, optionally
+ looking only for those with one of the given allowed_statuses
+ or for those with a known snapshot.
+
+ Args:
+ origin (str): the origin's URL
+ allowed_statuses (list of str): list of visit statuses considered
+ to find the latest visit. For instance,
+ ``allowed_statuses=['full']`` will only consider visits that
+ have successfully run to completion.
+ require_snapshot (bool): If True, only a visit with a snapshot
+ will be returned.
+ Returns:
+ dict: a dict with the following keys:
+
+ origin: the URL of the origin
+ visit: origin visit id
+ type: type of loader used for the visit
+ date: timestamp of such visit
+ status: Visit's new status
+ metadata: Data associated to the visit
+ snapshot (Optional[sha1_git]): identifier of the snapshot
+ associated to the visit
+ """
+ origin = self.origin_get({'url': origin}, db=db, cur=cur)['id']
+
+ origin_visit = db.origin_visit_get_latest(
+ origin, allowed_statuses=allowed_statuses,
+ require_snapshot=require_snapshot, cur=cur)
+ if origin_visit:
+ return dict(zip(db.origin_visit_get_cols, origin_visit))
+
@db_transaction(statement_timeout=2000)
def object_find_by_sha1_git(self, ids, db=None, cur=None):
"""Return the objects found with the given ids.
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -2089,6 +2089,125 @@
expected_persons.reverse()
self.assertEqual(list(actual_persons), expected_persons)
+ def test_origin_visit_get_latest(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_url = self.origin['url']
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit1_id = origin_visit1['visit']
+ origin_visit2 = self.storage.origin_visit_add(origin_id,
+ self.date_visit2)
+ visit2_id = origin_visit2['visit']
+
+ # Add a visit with the same date as the previous one
+ origin_visit3 = self.storage.origin_visit_add(origin_id,
+ self.date_visit2)
+ visit3_id = origin_visit3['visit']
+
+ origin_visit1 = self.storage.origin_visit_get_by(origin_url, visit1_id)
+ origin_visit2 = self.storage.origin_visit_get_by(origin_url, visit2_id)
+ origin_visit3 = self.storage.origin_visit_get_by(origin_url, visit3_id)
+
+ # Two visits, both with no snapshot
+ self.assertEqual(
+ origin_visit3,
+ self.storage.origin_visit_get_latest(origin_url))
+ self.assertIsNone(
+ self.storage.origin_visit_get_latest(origin_url,
+ require_snapshot=True))
+
+ # Add snapshot to visit1; require_snapshot=True makes it return
+ # visit1 and require_snapshot=False still returns visit2
+ self.storage.snapshot_add([self.complete_snapshot])
+ self.storage.origin_visit_update(
+ origin_id, visit1_id, snapshot=self.complete_snapshot['id'])
+ self.assertEqual(
+ {**origin_visit1, 'snapshot': self.complete_snapshot['id']},
+ self.storage.origin_visit_get_latest(
+ origin_url, require_snapshot=True)
+ )
+ self.assertEqual(
+ origin_visit3,
+ self.storage.origin_visit_get_latest(origin_url)
+ )
+
+ # Status filter: all three visits are status=ongoing, so no visit
+ # returned
+ self.assertIsNone(
+ self.storage.origin_visit_get_latest(
+ origin_url, allowed_statuses=['full'])
+ )
+
+ # Mark the first visit as completed and check status filter again
+ self.storage.origin_visit_update(origin_id, visit1_id, status='full')
+ self.assertEqual(
+ {
+ **origin_visit1,
+ 'snapshot': self.complete_snapshot['id'],
+ 'status': 'full'},
+ self.storage.origin_visit_get_latest(
+ origin_url, allowed_statuses=['full']),
+ )
+ self.assertEqual(
+ origin_visit3,
+ self.storage.origin_visit_get_latest(origin_url),
+ )
+
+ # Add snapshot to visit2 and check that the new snapshot is returned
+ self.storage.snapshot_add([self.empty_snapshot])
+ self.storage.origin_visit_update(
+ origin_id, visit2_id, snapshot=self.empty_snapshot['id'])
+ self.assertEqual(
+ {**origin_visit2, 'snapshot': self.empty_snapshot['id']},
+ self.storage.origin_visit_get_latest(
+ origin_url, require_snapshot=True),
+ )
+ self.assertEqual(
+ origin_visit3,
+ self.storage.origin_visit_get_latest(origin_url),
+ )
+
+ # Check that the status filter is still working
+ self.assertEqual(
+ {
+ **origin_visit1,
+ 'snapshot': self.complete_snapshot['id'],
+ 'status': 'full'},
+ self.storage.origin_visit_get_latest(
+ origin_url, allowed_statuses=['full']),
+ )
+
+ # Add snapshot to visit3 (same date as visit2)
+ self.storage.snapshot_add([self.complete_snapshot])
+ self.storage.origin_visit_update(
+ origin_id, visit3_id, snapshot=self.complete_snapshot['id'])
+ self.assertEqual(
+ {
+ **origin_visit1,
+ 'snapshot': self.complete_snapshot['id'],
+ 'status': 'full'},
+ self.storage.origin_visit_get_latest(
+ origin_url, allowed_statuses=['full']),
+ )
+ self.assertEqual(
+ {
+ **origin_visit1,
+ 'snapshot': self.complete_snapshot['id'],
+ 'status': 'full'},
+ self.storage.origin_visit_get_latest(
+ origin_url, allowed_statuses=['full'], require_snapshot=True),
+ )
+ self.assertEqual(
+ {**origin_visit3, 'snapshot': self.complete_snapshot['id']},
+ self.storage.origin_visit_get_latest(
+ origin_url),
+ )
+ self.assertEqual(
+ {**origin_visit3, 'snapshot': self.complete_snapshot['id']},
+ self.storage.origin_visit_get_latest(
+ origin_url, require_snapshot=True),
+ )
+
def test_person_get_fullname_unicity(self):
# given (person injection through revisions for example)
revision = self.revision
@@ -2670,7 +2789,7 @@
self.assertEqual(self.complete_snapshot,
self.storage.snapshot_get_latest(origin_id))
- # Status filter: both visits are status=ongoing, so no snapshot
+ # Status filter: all three visits are status=ongoing, so no snapshot
# returned
self.assertIsNone(
self.storage.snapshot_get_latest(origin_id,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 9:53 AM (16 h, 56 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224016
Attached To
D1581: Add a new origin_visit_get_latest endpoint.
Event Timeline
Log In to Comment