diff --git a/sql/upgrades/142.sql b/sql/upgrades/142.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/142.sql @@ -0,0 +1,10 @@ +-- SWH DB schema upgrade +-- from_version: 141 +-- to_version: 142 +-- description: Remove origin.type + +insert into dbversion(version, release, description) + values(142, now(), 'Work In Progress'); + +alter table origin drop column type; + diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -604,15 +604,15 @@ cur.execute('SELECT * FROM swh_stat_counters()') yield from cur - def origin_add(self, type, url, cur=None): + def origin_add(self, url, cur=None): """Insert a new origin and return the new identifier.""" - insert = """INSERT INTO origin (type, url) values (%s, %s) + insert = """INSERT INTO origin (url) values (%s) RETURNING id""" - cur.execute(insert, (type, url)) + cur.execute(insert, (url,)) return cur.fetchone()[0] - origin_cols = ['id', 'type', 'url'] + origin_cols = ['id', 'url'] def origin_get_by_url(self, origins, cur=None): """Retrieve origin `(id, type, url)` from urls if found.""" diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -1014,21 +1014,21 @@ origin = origin.to_dict() if ENABLE_ORIGIN_IDS: origin['id'] = origin_id + + if 'type' in origin: + del origin['type'] + return origin def origin_get(self, origins): """Return origins, either all identified by their ids or all - identified by tuples (type, url). - - If the url is given and the type is omitted, one of the origins with - that url is returned. + identified by urls. Args: origin: a list of dictionaries representing the individual origins to find. These dicts have either the key url (and optionally type): - - type (FIXME: enum TBD): the origin type ('git', 'wget', ...) - url (bytes): the url the origin points to or the id: @@ -1039,7 +1039,6 @@ dict: the origin dictionary with the keys: - id: origin's id - - type: origin's type - url: origin's url Raises: @@ -1169,7 +1168,6 @@ origins: list of dictionaries representing the individual origins, with the following keys: - - type: the origin type ('git', 'svn', 'deb', ...) - url (bytes): the url the origin points to Returns: @@ -1191,7 +1189,6 @@ origin: dictionary representing the individual origin to add. This dict has the following keys: - - type (FIXME: enum TBD): the origin type ('git', 'wget', ...) - url (bytes): the url the origin points to Returns: @@ -1200,6 +1197,7 @@ """ origin = Origin.from_dict(origin) + if origin.url in self._origins: if ENABLE_ORIGIN_IDS: (origin_id, _) = self._origins[origin.url] diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql --- a/swh/storage/sql/30-swh-schema.sql +++ b/swh/storage/sql/30-swh-schema.sql @@ -17,7 +17,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(141, now(), 'Work In Progress'); + values(142, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -76,12 +76,10 @@ create table origin ( id bigserial not null, - type text, -- TODO use an enum here (?) url text not null ); comment on column origin.id is 'Artifact origin id'; -comment on column origin.type is 'Type of origin'; comment on column origin.url is 'URL of origin'; diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1475,7 +1475,7 @@ return ret - origin_keys = ['id', 'type', 'url'] + origin_keys = ['id', 'url'] @db_transaction(statement_timeout=500) def origin_get(self, origins, db=None, cur=None): @@ -1488,9 +1488,8 @@ Args: origin: a list of dictionaries representing the individual origins to find. - These dicts have either the key url (and optionally type): + These dicts have either the key url: - - type (FIXME: enum TBD): the origin type ('git', 'wget', ...) - url (bytes): the url the origin points to or the id: @@ -1501,11 +1500,10 @@ dict: the origin dictionary with the keys: - id: origin's id - - type: origin's type - url: origin's url Raises: - ValueError: if the keys does not match (url and type) nor id. + ValueError: if the url or the id don't exist. """ if isinstance(origins, dict): @@ -1655,7 +1653,7 @@ if self.journal_writer: self.journal_writer.write_addition('origin', origin) - return db.origin_add(origin['type'], origin['url'], cur) + return db.origin_add(origin['url'], cur) @db_transaction(statement_timeout=500) def stat_counters(self, db=None, cur=None): diff --git a/swh/storage/tests/algos/test_origin.py b/swh/storage/tests/algos/test_origin.py --- a/swh/storage/tests/algos/test_origin.py +++ b/swh/storage/tests/algos/test_origin.py @@ -16,9 +16,9 @@ def test_iter_origins(): storage = Storage() origins = storage.origin_add([ - {'type': 'foo', 'url': 'bar'}, - {'type': 'baz', 'url': 'qux'}, - {'type': 'quux', 'url': 'quuz'}, + {'url': 'bar'}, + {'url': 'qux'}, + {'url': 'quuz'}, ]) assert_list_eq(iter_origins(storage), origins) assert_list_eq(iter_origins(storage, batch_size=1), origins) diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -338,12 +338,10 @@ origin = { 'url': 'file:///dev/null', - 'type': 'git', } origin2 = { 'url': 'file:///dev/zero', - 'type': 'hg', } origins = (origin, origin2) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -941,7 +941,6 @@ actual_origin1 = swh_storage.origin_get({'id': id}) assert actual_origin1 == {'id': id, - 'type': data.origin['type'], 'url': data.origin['url']} def test_origin_get(self, swh_storage): @@ -960,7 +959,6 @@ assert len(actual_origin1) == 1 assert actual_origin1[0] == {'id': origin_id, - 'type': data.origin['type'], 'url': data.origin['url']} def test_origin_get_consistency(self, swh_storage): @@ -982,7 +980,6 @@ swh_storage.origin_add_one(data.origin) origin_data = { - 'type': data.origin['type'], 'url': data.origin['url']} found_origins = list(swh_storage.origin_search(data.origin['url'])) assert len(found_origins) == 1 @@ -998,9 +995,7 @@ assert found_origins[0] == origin_data swh_storage.origin_add_one(data.origin2) - origin2_data = { - 'type': data.origin2['type'], - 'url': data.origin2['url']} + origin2_data = {'url': data.origin2['url']} found_origins = list(swh_storage.origin_search(data.origin2['url'])) assert len(found_origins) == 1 if 'id' in found_origins[0]: