diff --git a/sql/swh-func.sql b/sql/swh-func.sql --- a/sql/swh-func.sql +++ b/sql/swh-func.sql @@ -832,7 +832,22 @@ end; $$; - +-- add a new origin_visit for origin origin_id at date. +-- +-- Returns the new visit id. +create or replace function swh_origin_visit_add(origin_id bigint, date timestamptz) + returns bigint + language sql +as $$ + with last_known_visit as ( + select coalesce(max(visit), 0) as visit + from origin_visit + where origin = origin_id + ) + insert into origin_visit (origin, date, visit, status) + values (origin_id, date, (select visit from last_known_visit) + 1, 'ongoing') + returning visit; +$$; -- add tmp_occurrence_history entries to occurrence_history -- @@ -987,7 +1002,7 @@ language sql stable as $$ - select origin, visit, date + select origin, visit, date, status from origin_visit where origin=origin order by date desc diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql --- a/sql/swh-schema.sql +++ b/sql/swh-schema.sql @@ -14,7 +14,7 @@ ); insert into dbversion(version, release, description) - values(74, now(), 'Work In Progress'); + values(75, now(), 'Work In Progress'); -- a SHA1 checksum (not necessarily originating from Git) create domain sha1 as bytea check (length(value) = 20); @@ -452,15 +452,29 @@ create index on revision_history(parent_id); +create type origin_visit_status as enum ( + 'ongoing', + 'full', + 'partial' +); + +comment on type origin_visit_status IS 'Possible visit status'; + -- The timestamps at which Software Heritage has made a visit of the given origin. create table origin_visit ( origin bigint not null references origin(id), visit bigint not null, date timestamptz not null, + status origin_visit_status not null, primary key (origin, visit) ); +comment on column origin_visit.origin is 'Visited origin'; +comment on column origin_visit.visit is 'Visit number the visit occurred for that origin'; +comment on column origin_visit.date is 'Visit date for that origin'; +comment on column origin_visit.status is 'Visit status for that origin'; + create index on origin_visit(date); -- Asynchronous notification of new origin visits diff --git a/sql/upgrades/075.sql b/sql/upgrades/075.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/075.sql @@ -0,0 +1,47 @@ +-- SWH DB schema upgrade +-- from_version: 74 +-- to_version: 75 +-- description: Add completion information to origin_visit + +INSERT INTO dbversion(version, release, description) + VALUES(75, now(), 'Work In Progress'); + +CREATE TYPE origin_visit_status AS ENUM ( + 'ongoing', + 'full', + 'partial' +); + +COMMENT ON TYPE origin_visit_status IS 'Possible visit status'; + +ALTER TABLE origin_visit + ADD COLUMN status origin_visit_status; + +-- Already visited origins are considered full +UPDATE origin_visit SET status = 'full'; + +-- provide a status for visits is mandatory +ALTER TABLE origin_visit + ALTER COLUMN status SET NOT NULL; + +comment on column origin_visit.origin is 'Visited origin'; +comment on column origin_visit.visit is 'The numbered visit occurrence for that origin'; +comment on column origin_visit.date is 'Visit date for that origin'; +comment on column origin_visit.status is 'Visit status for that origin'; + +-- add a new origin_visit for origin origin_id at date. +-- +-- Returns the new visit id. +create or replace function swh_origin_visit_add(origin_id bigint, date timestamptz) + returns bigint + language sql +as $$ + with last_known_visit as ( + select coalesce(max(visit), 0) as visit + from origin_visit + where origin = origin_id + ) + insert into origin_visit (origin, date, visit, status) + values (origin_id, date, (select visit from last_known_visit) + 1, 'ongoing') + returning visit; +$$; diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -151,6 +151,14 @@ def origin_add_one(self, origin): return self.post('origin/add', {'origin': origin}) + def origin_visit_add(self, origin, ts): + return self.post('origin/visit/add', {'origin': origin, 'ts': ts}) + + def origin_visit_update(self, origin, visit_id, status): + return self.post('origin/visit/update', {'origin': origin, + 'visit_id': visit_id, + 'status': status}) + def origin_visit_get(self, origin): return self.post('origin/visit/get', {'origin': origin}) diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py --- a/swh/storage/api/server.py +++ b/swh/storage/api/server.py @@ -186,6 +186,17 @@ return encode_data(g.storage.origin_visit_get(**decode_request(request))) +@app.route('/origin/visit/add', methods=['POST']) +def origin_visit_add(): + return encode_data(g.storage.origin_visit_add(**decode_request(request))) + + +@app.route('/origin/visit/update', methods=['POST']) +def origin_visit_update(): + return encode_data(g.storage.origin_visit_update( + **decode_request(request))) + + @app.route('/person', methods=['POST']) def person_get(): return encode_data(g.storage.person_get(**decode_request(request))) diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015 The Software Heritage developers +# Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -344,12 +344,37 @@ revision_get_cols = revision_add_cols + [ 'author_id', 'committer_id', 'parents'] + def origin_visit_add(self, origin, ts, cur=None): + """Add a new origin_visit for origin origin at timestamp ts with + status 'ongoing'. + + Args: + origin: origin concerned by the visit + ts: the date of the visit + + Returns: + The new visit index step for that origin + + """ + cur = self._cursor(cur) + self._cursor(cur).execute('SELECT swh_origin_visit_add(%s, %s)', + (origin, ts)) + return cur.fetchone()[0] + + def origin_visit_update(self, origin, visit_id, status, cur): + """Update origin_visit's status.""" + cur = self._cursor(cur) + update = """UPDATE origin_visit + SET status=%s + WHERE origin=%s AND visit=%s""" + cur.execute(update, (status, origin, visit_id)) + origin_visit_get_cols = [ - 'origin', 'visit', 'date' + 'origin', 'visit', 'date', 'status' ] def origin_visit_get(self, origin_id, cur=None): - """Retrieve occurrence's history information by origin_id. + """Retrieve all visits for origin with id origin_id. Args: origin_id: The occurrence's origin @@ -361,7 +386,9 @@ cur = self._cursor(cur) cur.execute( - 'SELECT origin, visit, date FROM origin_visit where origin=%s', + """SELECT origin, visit, date, status + FROM origin_visit + WHERE origin=%s""", (origin_id, )) yield from cursor_to_bytes(cur) diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -738,6 +738,40 @@ 'target_type': line[3], } + @db_transaction + def origin_visit_add(self, origin, ts, cur=None): + """Add an origin_visit for the origin at ts with status 'ongoing'. + + Args: + origin: Visited Origin id + ts: timestamp of such visit + + Returns: + Dict with keys origin and visit where: + - origin: origin identifier + - visit: the visit identifier for the new visit occurrence + + """ + return { + 'origin': origin, + 'visit': self.db.origin_visit_add(origin, ts, cur) + } + + @db_transaction + def origin_visit_update(self, origin, visit_id, status, cur=None): + """Update an origin_visit's status. + + Args: + origin: Visited Origin id + visit_id: Visit's id + status: Visit's new status + + Returns: + None + + """ + return self.db.origin_visit_update(origin, visit_id, status, cur) + @db_transaction_generator def origin_visit_get(self, origin, cur=None): """Retrieve origin's visit dates. diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015 The Software Heritage developers +# Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -1085,56 +1085,79 @@ 'project': None}) @istest - def origin_visit_get(self): - # 1- given + def origin_visit_add(self): + # given self.assertIsNone(self.storage.origin_get(self.origin2)) - self.storage.content_add([self.cont2]) - self.storage.directory_add([self.dir2]) - self.storage.revision_add([self.revision2, self.revision3]) origin_id = self.storage.origin_add_one(self.origin2) + self.assertIsNotNone(origin_id) - # occurrence2 points to 'revision2' with branch 'master', we - # need to point to the right origin - occurrence2 = self.occurrence2.copy() - occurrence2.update({'origin': origin_id, - 'date': occurrence2['date']}) - - dt = datetime.timedelta(days=1) - - occurrence3 = self.occurrence2.copy() - occurrence3.update({'origin': origin_id, - 'date': occurrence3['date'] + dt, - 'target': self.revision3['id']}) + # when + origin_visit1 = self.storage.origin_visit_add( + origin_id, + ts=self.occurrence2['date']) - # 2 occurrences on same revision with lower validity date with 1h delta - self.storage.occurrence_add([occurrence2]) + # then + self.assertEquals(origin_visit1['origin'], origin_id) + self.assertIsNotNone(origin_visit1['visit']) + self.assertTrue(origin_visit1['visit'] > 0) - # when actual_origin_visits = list(self.storage.origin_visit_get(origin_id)) - self.assertEquals(len(actual_origin_visits), 1) self.assertEquals(actual_origin_visits, [{ 'origin': origin_id, - 'date': occurrence2['date'], - 'visit': 1 + 'date': self.occurrence2['date'], + 'visit': origin_visit1['origin'], + 'status': 'ongoing', }]) - # 2- given - self.storage.occurrence_add([occurrence3]) + @istest + def origin_visit_update(self): + # given + origin_id = self.storage.origin_add_one(self.origin2) + origin_id2 = self.storage.origin_add_one(self.origin) + + origin_visit1 = self.storage.origin_visit_add( + origin_id, + ts=self.occurrence2['date']) + + origin_visit2 = self.storage.origin_visit_add( + origin_id, + ts=self.occurrence3['date']) + + origin_visit3 = self.storage.origin_visit_add( + origin_id2, + ts=self.occurrence3['date']) # when + self.storage.origin_visit_update(origin_id, origin_visit1['visit'], + status='full') + self.storage.origin_visit_update(origin_id2, origin_visit3['visit'], + status='partial') + + # then actual_origin_visits = list(self.storage.origin_visit_get(origin_id)) - self.assertEquals(len(actual_origin_visits), 2) self.assertEquals(actual_origin_visits, [{ - 'origin': origin_id, - 'date': occurrence2['date'], - 'visit': 1 - }, { - 'origin': origin_id, - 'date': occurrence3['date'], - 'visit': 2 + 'origin': origin_visit2['origin'], + 'date': self.occurrence2['date'], + 'visit': origin_visit1['visit'], + 'status': 'full' + }, + { + 'origin': origin_visit2['origin'], + 'date': self.occurrence3['date'], + 'visit': origin_visit2['visit'], + 'status': 'ongoing' + }]) + + actual_origin_visits2 = list(self.storage.origin_visit_get(origin_id2)) + self.assertEquals(actual_origin_visits2, + [{ + 'origin': origin_visit3['origin'], + 'date': self.occurrence3['date'], + 'visit': origin_visit3['visit'], + 'status': 'partial' }]) @istest