diff --git a/sql/upgrades/136.sql b/sql/upgrades/136.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/136.sql @@ -0,0 +1,41 @@ +-- SWH DB schema upgrade +-- from_version: 135 +-- to_version: 136 +-- description: Add a 'type' column to the origin_visit table. + +insert into dbversion(version, release, description) + values(136, now(), 'Work In Progress'); + +-- Stop swh-storage first + +alter table origin_visit + add column type text; + +comment on column origin_visit.type is 'Type of loader that did the visit (hg, git, ...)'; + + +create or replace function swh_origin_visit_add(origin_id bigint, date timestamptz, type text) + returns bigint + language sql +as $$ + with last_known_visit as ( + select coalesce(max(visit), 0) as visit + from origin_visit + where origin = origin_id + ) + insert into origin_visit (origin, date, type, visit, status) + values (origin_id, date, type, (select visit from last_known_visit) + 1, 'ongoing') + returning visit; +$$; + + +-- Start swh-storage here + + +update origin_visit + set origin_visit.type = origin.type + from origin + where origin_visit.origin = origin.id; + +alter table origin_visit + alter column type text set not null; diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -175,7 +175,7 @@ def origin_add_one(self, origin): return self.post('origin/add', {'origin': origin}) - def origin_visit_add(self, origin, date, *, ts=None): + def origin_visit_add(self, origin, date, type=None, *, ts=None): if ts is None: if date is None: raise TypeError('origin_visit_add expected 2 arguments.') @@ -185,7 +185,9 @@ "to 'date' in v0.0.109.", DeprecationWarning) date = ts - return self.post('origin/visit/add', {'origin': origin, 'date': date}) + return self.post( + 'origin/visit/add', + {'origin': origin, 'date': date, 'type': type}) def origin_visit_update(self, origin, visit_id, status=None, metadata=None, snapshot=None): diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -301,21 +301,22 @@ revision_get_cols = revision_add_cols + [ 'author_id', 'committer_id', 'parents'] - def origin_visit_add(self, origin, ts, cur=None): + def origin_visit_add(self, origin, ts, type, cur=None): """Add a new origin_visit for origin origin at timestamp ts with status 'ongoing'. Args: origin: origin concerned by the visit ts: the date of the visit + type: type of loader for the visit Returns: The new visit index step for that origin """ cur = self._cursor(cur) - self._cursor(cur).execute('SELECT swh_origin_visit_add(%s, %s)', - (origin, ts)) + self._cursor(cur).execute('SELECT swh_origin_visit_add(%s, %s, %s)', + (origin, ts, type)) return cur.fetchone()[0] def origin_visit_update(self, origin_id, visit_id, updates, cur=None): @@ -346,7 +347,7 @@ }) cur.execute(query, (*values, *where_values)) - def origin_visit_upsert(self, origin, visit, date, status, + def origin_visit_upsert(self, origin, visit, date, type, status, metadata, snapshot, cur=None): cur = self._cursor(cur) query = """INSERT INTO origin_visit ({cols}) VALUES ({values}) @@ -356,10 +357,11 @@ values=', '.join('%s' for col in self.origin_visit_get_cols), updates=', '.join('{0}=excluded.{0}'.format(col) for col in self.origin_visit_get_cols)) - cur.execute(query, (origin, visit, date, status, metadata, snapshot)) + cur.execute( + query, (origin, visit, date, type, status, metadata, snapshot)) - origin_visit_get_cols = ['origin', 'visit', 'date', 'status', 'metadata', - 'snapshot'] + origin_visit_get_cols = ['origin', 'visit', 'date', 'type', 'status', + 'metadata', 'snapshot'] def origin_visit_get_all(self, origin_id, last_visit=None, limit=None, cur=None): diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -1139,12 +1139,16 @@ raise NotImplementedError('fetch_history_get is deprecated, use ' 'origin_visit_get instead.') - def origin_visit_add(self, origin, date=None, *, ts=None): + def origin_visit_add(self, origin, date=None, type=None, *, ts=None): """Add an origin_visit for the origin at date with status 'ongoing'. + For backward compatibility, `type` is optional and defaults to + the origin's type. + Args: origin (int): visited origin's identifier date: timestamp of such visit + type (str): the type of loader used for the visit (hg, git, ...) Returns: dict: dictionary with keys origin and visit where: @@ -1176,6 +1180,7 @@ visit = { 'origin': origin_id, 'date': date, + 'type': type or self._origins[origin_id-1]['type'], 'status': status, 'snapshot': None, 'metadata': None, @@ -1221,7 +1226,8 @@ origin = self.origin_get([{'id': origin_id}])[0] del origin['id'] self.journal_writer.write_update('origin_visit', { - 'origin': origin, 'visit': visit_id, + 'origin': origin, 'type': origin['type'], + 'visit': visit_id, 'status': status or visit['status'], 'date': visit['date'], 'metadata': metadata or visit['metadata'], @@ -1246,6 +1252,7 @@ origin: Visited Origin id visit: origin visit id + type: type of loader used for the visit date: timestamp of such visit status: Visit's new status metadata: Data associated to the visit diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql --- a/swh/storage/sql/30-swh-schema.sql +++ b/swh/storage/sql/30-swh-schema.sql @@ -209,6 +209,7 @@ origin bigint not null, visit bigint not null, date timestamptz not null, + type text not null, status origin_visit_status not null, metadata jsonb, snapshot sha1_git @@ -217,6 +218,7 @@ comment on column origin_visit.origin is 'Visited origin'; comment on column origin_visit.visit is 'Sequential visit number for the origin'; comment on column origin_visit.date is 'Visit timestamp'; +comment on column origin_visit.type is 'Type of loader that did the visit (hg, git, ...)'; comment on column origin_visit.status is 'Visit result'; comment on column origin_visit.metadata is 'Origin metadata at visit time'; comment on column origin_visit.snapshot is 'Origin snapshot at visit time'; diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql --- a/swh/storage/sql/40-swh-func.sql +++ b/swh/storage/sql/40-swh-func.sql @@ -693,7 +693,7 @@ -- add a new origin_visit for origin origin_id at date. -- -- Returns the new visit id. -create or replace function swh_origin_visit_add(origin_id bigint, date timestamptz) +create or replace function swh_origin_visit_add(origin_id bigint, date timestamptz, type text) returns bigint language sql as $$ @@ -702,8 +702,8 @@ from origin_visit where origin = origin_id ) - insert into origin_visit (origin, date, visit, status) - values (origin_id, date, (select visit from last_known_visit) + 1, 'ongoing') + insert into origin_visit (origin, date, type, visit, status) + values (origin_id, date, type, (select visit from last_known_visit) + 1, 'ongoing') returning visit; $$; diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1149,13 +1149,17 @@ return None @db_transaction() - def origin_visit_add(self, origin, date=None, db=None, cur=None, *, - ts=None): + def origin_visit_add(self, origin, date=None, type=None, + db=None, cur=None, *, ts=None): """Add an origin_visit for the origin at ts with status 'ongoing'. + For backward compatibility, `type` is optional and defaults to + the origin's type. + Args: origin: Visited Origin id date: timestamp of such visit + type (str): the type of loader used for the visit (hg, git, ...) Returns: dict: dictionary with keys origin and visit where: @@ -1179,7 +1183,11 @@ if isinstance(date, str): date = dateutil.parser.parse(date) - visit_id = db.origin_visit_add(origin_id, date, cur) + if type is None: + origin = self.origin_get({'id': origin}, db=db, cur=cur) + type = origin['type'] + + visit_id = db.origin_visit_add(origin_id, date, type, cur) if self.journal_writer: # We can write to the journal only after inserting to the @@ -1187,7 +1195,8 @@ origin = self.origin_get([{'id': origin_id}], db=db, cur=cur)[0] del origin['id'] self.journal_writer.write_addition('origin_visit', { - 'origin': origin, 'date': date, 'visit': visit_id, + 'origin': origin, 'date': date, 'type': type, + 'visit': visit_id, 'status': 'ongoing', 'metadata': None, 'snapshot': None}) return { @@ -1265,8 +1274,11 @@ if self.journal_writer: for visit in visits: visit = visit.copy() - visit['origin'] = self.origin_get( + origin = self.origin_get( [{'id': visit['origin']}], db=db, cur=cur)[0] + visit['origin'] = origin + if visit.get('type') is None: + visit['type'] = origin['type'] del visit['origin']['id'] self.journal_writer.write_addition('origin_visit', visit) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -355,7 +355,7 @@ self.origin2 = { 'url': 'file:///dev/zero', - 'type': 'git', + 'type': 'hg', } self.provider = { @@ -1461,6 +1461,45 @@ # when origin_visit1 = self.storage.origin_visit_add( origin_id, + type='git', + date=self.date_visit2) + + actual_origin_visits = list(self.storage.origin_visit_get(origin_id)) + self.assertEqual(actual_origin_visits, + [{ + 'origin': origin_id, + 'date': self.date_visit2, + 'visit': origin_visit1['visit'], + 'type': 'git', + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }]) + + expected_origin = self.origin2.copy() + data = { + 'origin': expected_origin, + 'date': self.date_visit2, + 'visit': origin_visit1['visit'], + 'type': 'git', + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + } + self.assertEqual(list(self.journal_writer.objects), + [('origin', expected_origin), + ('origin_visit', data)]) + + def test_origin_visit_add_default_type(self): + # given + self.assertIsNone(self.storage.origin_get([self.origin2])[0]) + + origin_id = self.storage.origin_add_one(self.origin2) + self.assertIsNotNone(origin_id) + + # when + origin_visit1 = self.storage.origin_visit_add( + origin_id, date=self.date_visit2) # then @@ -1473,6 +1512,7 @@ 'origin': origin_id, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': 'hg', 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1483,6 +1523,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': 'hg', 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1493,8 +1534,8 @@ def test_origin_visit_update(self): # given - origin_id = self.storage.origin_add_one(self.origin2) - origin_id2 = self.storage.origin_add_one(self.origin) + origin_id = self.storage.origin_add_one(self.origin) + origin_id2 = self.storage.origin_add_one(self.origin2) origin_visit1 = self.storage.origin_visit_add( origin_id, @@ -1525,6 +1566,7 @@ 'origin': origin_visit2['origin'], 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'full', 'metadata': visit1_metadata, 'snapshot': None, @@ -1532,6 +1574,7 @@ 'origin': origin_visit2['origin'], 'date': self.date_visit3, 'visit': origin_visit2['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1544,6 +1587,7 @@ 'origin': origin_visit2['origin'], 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'full', 'metadata': visit1_metadata, 'snapshot': None, @@ -1556,6 +1600,7 @@ 'origin': origin_visit2['origin'], 'date': self.date_visit3, 'visit': origin_visit2['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1567,17 +1612,19 @@ 'origin': origin_visit3['origin'], 'date': self.date_visit3, 'visit': origin_visit3['visit'], + 'type': self.origin2['type'], 'status': 'partial', 'metadata': None, 'snapshot': None, }]) - expected_origin = self.origin2.copy() - expected_origin2 = self.origin.copy() + expected_origin = self.origin.copy() + expected_origin2 = self.origin2.copy() data1 = { 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1586,6 +1633,7 @@ 'origin': expected_origin, 'date': self.date_visit3, 'visit': origin_visit2['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1594,6 +1642,7 @@ 'origin': expected_origin2, 'date': self.date_visit3, 'visit': origin_visit3['visit'], + 'type': self.origin2['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1602,6 +1651,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'metadata': visit1_metadata, 'status': 'full', 'snapshot': None, @@ -1610,6 +1660,7 @@ 'origin': expected_origin2, 'date': self.date_visit3, 'visit': origin_visit3['visit'], + 'type': self.origin2['type'], 'status': 'partial', 'metadata': None, 'snapshot': None, @@ -1646,8 +1697,8 @@ self.assertEqual(actual_origin_visit['snapshot'], self.snapshot['id']) def test_origin_visit_get_by(self): - origin_id = self.storage.origin_add_one(self.origin2) - origin_id2 = self.storage.origin_add_one(self.origin) + origin_id = self.storage.origin_add_one(self.origin) + origin_id2 = self.storage.origin_add_one(self.origin2) origin_visit1 = self.storage.origin_visit_add( origin_id, @@ -1675,6 +1726,7 @@ 'origin': origin_id, 'visit': origin_visit1['visit'], 'date': self.date_visit2, + 'type': self.origin['type'], 'metadata': visit1_metadata, 'status': 'full', 'snapshot': self.snapshot['id'], @@ -1700,6 +1752,7 @@ 'origin': origin_id, 'date': self.date_visit2, 'visit': 123, + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1708,6 +1761,7 @@ 'origin': origin_id, 'date': '2018-01-01 23:00:00+00', 'visit': 1234, + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1721,6 +1775,7 @@ 'origin': origin_id, 'date': self.date_visit2, 'visit': 123, + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1729,6 +1784,7 @@ 'origin': origin_id, 'date': self.date_visit3, 'visit': 1234, + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1740,6 +1796,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': 123, + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1748,6 +1805,7 @@ 'origin': expected_origin, 'date': self.date_visit3, 'visit': 1234, + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1772,6 +1830,7 @@ 'origin': origin_id, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1787,6 +1846,7 @@ 'origin': origin_id, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1797,6 +1857,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin2['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1805,6 +1866,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit1['visit'], + 'type': self.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, @@ -1913,6 +1975,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1921,6 +1984,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': self.empty_snapshot['id'], @@ -1950,6 +2014,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -1958,6 +2023,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': self.empty_snapshot['id'], @@ -2211,6 +2277,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -2219,6 +2286,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': self.snapshot['id'], @@ -2227,6 +2295,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit2['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -2235,6 +2304,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit2['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': self.snapshot['id'], @@ -2273,6 +2343,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -2281,6 +2352,7 @@ 'origin': expected_origin, 'date': self.date_visit1, 'visit': origin_visit1['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': self.snapshot['id'], @@ -2289,6 +2361,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit2['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, @@ -2297,6 +2370,7 @@ 'origin': expected_origin, 'date': self.date_visit2, 'visit': origin_visit2['visit'], + 'type': self.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': self.snapshot['id'],