diff --git a/sql/Makefile b/sql/Makefile --- a/sql/Makefile +++ b/sql/Makefile @@ -7,10 +7,9 @@ SQL_ENUMS = swh-enums.sql SQL_SCHEMA = swh-schema.sql SQL_FUNC = swh-func.sql -SQL_DATA = swh-data.sql SQL_INDEX = swh-indexes.sql SQL_TRIGGER = swh-triggers.sql -SQLS = $(SQL_INIT) $(SQL_ENUMS) $(SQL_SCHEMA) $(SQL_FUNC) $(SQL_INDEX) $(SQL_TRIGGER) $(SQL_DATA) +SQLS = $(SQL_INIT) $(SQL_ENUMS) $(SQL_SCHEMA) $(SQL_FUNC) $(SQL_INDEX) $(SQL_TRIGGER) PSQL_BIN = psql PSQL_FLAGS = --echo-all -X -v ON_ERROR_STOP=1 diff --git a/sql/swh-data.sql b/sql/swh-data.sql deleted file mode 100644 --- a/sql/swh-data.sql +++ /dev/null @@ -1,39 +0,0 @@ -insert into entity_history - (uuid, parent, name, type, description, homepage, active, generated, validity) -values - ('5f4d4c51-498a-4e28-88b3-b3e4e8396cba', NULL, 'softwareheritage', - 'organization', 'Software Heritage', - 'http://www.softwareheritage.org/', true, false, ARRAY[now()]), - ('6577984d-64c8-4fab-b3ea-3cf63ebb8589', NULL, 'gnu', 'organization', - 'GNU is not UNIX', 'https://gnu.org/', true, false, ARRAY[now()]), - ('7c33636b-8f11-4bda-89d9-ba8b76a42cec', '6577984d-64c8-4fab-b3ea-3cf63ebb8589', - 'GNU Hosting', 'group_of_entities', - 'GNU Hosting facilities', NULL, true, false, ARRAY[now()]), - ('4706c92a-8173-45d9-93d7-06523f249398', '6577984d-64c8-4fab-b3ea-3cf63ebb8589', - 'GNU rsync mirror', 'hosting', - 'GNU rsync mirror', 'rsync://mirror.gnu.org/', true, false, ARRAY[now()]), - ('5cb20137-c052-4097-b7e9-e1020172c48e', '6577984d-64c8-4fab-b3ea-3cf63ebb8589', - 'GNU Projects', 'group_of_entities', - 'GNU Projects', 'https://gnu.org/software/', true, false, ARRAY[now()]), - ('4bfb38f6-f8cd-4bc2-b256-5db689bb8da4', NULL, 'GitHub', 'organization', - 'GitHub', 'https://github.org/', true, false, ARRAY[now()]), - ('aee991a0-f8d7-4295-a201-d1ce2efc9fb2', '4bfb38f6-f8cd-4bc2-b256-5db689bb8da4', - 'GitHub Hosting', 'group_of_entities', - 'GitHub Hosting facilities', 'https://github.org/', true, false, ARRAY[now()]), - ('34bd6b1b-463f-43e5-a697-785107f598e4', 'aee991a0-f8d7-4295-a201-d1ce2efc9fb2', - 'GitHub git hosting', 'hosting', - 'GitHub git hosting', 'https://github.org/', true, false, ARRAY[now()]), - ('e8c3fc2e-a932-4fd7-8f8e-c40645eb35a7', 'aee991a0-f8d7-4295-a201-d1ce2efc9fb2', - 'GitHub asset hosting', 'hosting', - 'GitHub asset hosting', 'https://github.org/', true, false, ARRAY[now()]), - ('9f7b34d9-aa98-44d4-8907-b332c1036bc3', '4bfb38f6-f8cd-4bc2-b256-5db689bb8da4', - 'GitHub Organizations', 'group_of_entities', - 'GitHub Organizations', 'https://github.org/', true, false, ARRAY[now()]), - ('ad6df473-c1d2-4f40-bc58-2b091d4a750e', '4bfb38f6-f8cd-4bc2-b256-5db689bb8da4', - 'GitHub Users', 'group_of_entities', - 'GitHub Users', 'https://github.org/', true, false, ARRAY[now()]); - -insert into listable_entity - (uuid, list_engine) -values - ('34bd6b1b-463f-43e5-a697-785107f598e4', 'swh.lister.github'); diff --git a/sql/swh-enums.sql b/sql/swh-enums.sql --- a/sql/swh-enums.sql +++ b/sql/swh-enums.sql @@ -5,38 +5,6 @@ create type content_status as enum ('absent', 'visible', 'hidden'); comment on type content_status is 'Content visibility'; --- Types of entities. --- --- - organization: a root entity, usually backed by a non-profit, a --- company, or another kind of "association". (examples: Software --- Heritage, Debian, GNU, GitHub) --- --- - group_of_entities: used for hierarchies, doesn't need to have a --- concrete existence. (examples: GNU hosting facilities, Debian --- hosting facilities, GitHub users, ...) --- --- - hosting: a hosting facility, can usually be listed to generate --- other data. (examples: GitHub git hosting, alioth.debian.org, --- snapshot.debian.org) --- --- - group_of_persons: an entity representing a group of --- persons. (examples: a GitHub organization, a Debian team) --- --- - person: an entity representing a person. (examples: --- a GitHub user, a Debian developer) --- --- - project: an entity representing a software project. (examples: a --- GitHub project, Apache httpd, a Debian source package, ...) -create type entity_type as enum ( - 'organization', - 'group_of_entities', - 'hosting', - 'group_of_persons', - 'person', - 'project' -); -comment on type entity_type is 'Entity types'; - create type revision_type as enum ('git', 'tar', 'dsc', 'svn', 'hg'); comment on type revision_type is 'Possible revision types'; diff --git a/sql/swh-func.sql b/sql/swh-func.sql --- a/sql/swh-func.sql +++ b/sql/swh-func.sql @@ -105,29 +105,6 @@ drop column object_id; $$; --- create a temporary table for entity_history, sans id -create or replace function swh_mktemp_entity_history() - returns void - language sql -as $$ - create temporary table tmp_entity_history ( - like entity_history including defaults) on commit drop; - alter table tmp_entity_history drop column id; -$$; - --- create a temporary table for entities called tmp_entity_lister, --- with only the columns necessary for retrieving the uuid of a listed --- entity. -create or replace function swh_mktemp_entity_lister() - returns void - language sql -as $$ - create temporary table tmp_entity_lister ( - id bigint, - lister_metadata jsonb - ) on commit drop; -$$; - -- create a temporary table for the branches of a snapshot create or replace function swh_mktemp_snapshot_branch() returns void @@ -1012,109 +989,6 @@ left join person c on c.id = r.committer; $$; --- Create entries in entity_history from tmp_entity_history --- --- TODO: do something smarter to compress the entries if the data --- didn't change. -create or replace function swh_entity_history_add() - returns void - language plpgsql -as $$ -begin - insert into entity_history ( - uuid, parent, name, type, description, homepage, active, generated, lister_metadata, metadata, validity - ) select * from tmp_entity_history; - return; -end -$$; - - -create or replace function swh_update_entity_from_entity_history() - returns trigger - language plpgsql -as $$ -begin - insert into entity (uuid, parent, name, type, description, homepage, active, generated, - lister_metadata, metadata, last_seen, last_id) - select uuid, parent, name, type, description, homepage, active, generated, - lister_metadata, metadata, unnest(validity), id - from entity_history - where uuid = NEW.uuid - order by unnest(validity) desc limit 1 - on conflict (uuid) do update set - parent = EXCLUDED.parent, - name = EXCLUDED.name, - type = EXCLUDED.type, - description = EXCLUDED.description, - homepage = EXCLUDED.homepage, - active = EXCLUDED.active, - generated = EXCLUDED.generated, - lister_metadata = EXCLUDED.lister_metadata, - metadata = EXCLUDED.metadata, - last_seen = EXCLUDED.last_seen, - last_id = EXCLUDED.last_id; - - return null; -end -$$; - -create trigger update_entity - after insert or update - on entity_history - for each row - execute procedure swh_update_entity_from_entity_history(); - --- map an id of tmp_entity_lister to a full entity -create type entity_id as ( - id bigint, - uuid uuid, - parent uuid, - name text, - type entity_type, - description text, - homepage text, - active boolean, - generated boolean, - lister_metadata jsonb, - metadata jsonb, - last_seen timestamptz, - last_id bigint -); - --- find out the uuid of the entries of entity with the metadata --- contained in tmp_entity_lister -create or replace function swh_entity_from_tmp_entity_lister() - returns setof entity_id - language plpgsql -as $$ -begin - return query - select t.id, e.* - from tmp_entity_lister t - left join entity e - on e.lister_metadata @> t.lister_metadata; - return; -end -$$; - -create or replace function swh_entity_get(entity_uuid uuid) - returns setof entity - language sql - stable -as $$ - with recursive entity_hierarchy as ( - select e.* - from entity e where uuid = entity_uuid - union - select p.* - from entity_hierarchy e - join entity p on e.parent = p.uuid - ) - select * - from entity_hierarchy; -$$; - - -- Object listing by object_id create or replace function swh_content_list_by_object_id( @@ -1269,8 +1143,6 @@ 'origin', 'origin_visit', 'person', - 'entity', - 'entity_history', 'release', 'revision', 'revision_history', diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql --- a/sql/swh-indexes.sql +++ b/sql/swh-indexes.sql @@ -10,67 +10,12 @@ alter table content add primary key using index content_pkey; --- entity_history - -create unique index concurrently entity_history_pkey on entity_history(id); -create index concurrently on entity_history(uuid); -create index concurrently on entity_history(name); - -alter table entity_history add primary key using index entity_history_pkey; - --- entity - -create unique index concurrently entity_pkey on entity(uuid); - -create index concurrently on entity(name); -create index concurrently on entity using gin(lister_metadata jsonb_path_ops); - -alter table entity add primary key using index entity_pkey; -alter table entity add constraint entity_parent_fkey foreign key (parent) references entity(uuid) deferrable initially deferred not valid; -alter table entity validate constraint entity_parent_fkey; -alter table entity add constraint entity_last_id_fkey foreign key (last_id) references entity_history(id) not valid; -alter table entity validate constraint entity_last_id_fkey; - --- entity_equivalence - -create unique index concurrently entity_equivalence_pkey on entity_equivalence(entity1, entity2); -alter table entity_equivalence add primary key using index entity_equivalence_pkey; - - -alter table entity_equivalence add constraint "entity_equivalence_entity1_fkey" foreign key (entity1) references entity(uuid) not valid; -alter table entity_equivalence validate constraint entity_equivalence_entity1_fkey; -alter table entity_equivalence add constraint "entity_equivalence_entity2_fkey" foreign key (entity2) references entity(uuid) not valid; -alter table entity_equivalence validate constraint entity_equivalence_entity2_fkey; -alter table entity_equivalence add constraint "order_entities" check (entity1 < entity2) not valid; -alter table entity_equivalence validate constraint order_entities; - --- listable_entity - -create unique index concurrently listable_entity_pkey on listable_entity(uuid); -alter table listable_entity add primary key using index listable_entity_pkey; - -alter table listable_entity add constraint listable_entity_uuid_fkey foreign key (uuid) references entity(uuid) not valid; -alter table listable_entity validate constraint listable_entity_uuid_fkey; - --- list_history - -create unique index concurrently list_history_pkey on list_history(id); -alter table list_history add primary key using index list_history_pkey; - -alter table list_history add constraint list_history_entity_fkey foreign key (entity) references listable_entity(uuid) not valid; -alter table list_history validate constraint list_history_entity_fkey; - -- origin create unique index concurrently origin_pkey on origin(id); alter table origin add primary key using index origin_pkey; create index concurrently on origin(type, url); -alter table origin add constraint origin_lister_fkey foreign key (lister) references listable_entity(uuid) not valid; -alter table origin validate constraint origin_lister_fkey; - -alter table origin add constraint origin_project_fkey foreign key (project) references entity(uuid) not valid; -alter table origin validate constraint origin_project_fkey; -- skipped_content diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql --- a/sql/swh-schema.sql +++ b/sql/swh-schema.sql @@ -12,7 +12,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(124, now(), 'Work In Progress'); + values(125, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -51,99 +51,6 @@ ); --- Entities constitute a typed hierarchy of organization, hosting --- facilities, groups, people and software projects. --- --- Examples of entities: Software Heritage, Debian, GNU, GitHub, --- Apache, The Linux Foundation, the Debian Python Modules Team, the --- torvalds GitHub user, the torvalds/linux GitHub project. --- --- The data model is hierarchical (via the parent attribute) and might --- store sub-branches of existing entities. The key feature of an --- entity is might be *listed* (if it is available in listable_entity) --- to retrieve information about its content, i.e: sub-entities, --- projects, origins. - --- The history of entities. Allows us to keep historical metadata --- about entities. The temporal invariant is the uuid. Root --- organization uuids are manually generated (and available in --- swh-data.sql). --- --- For generated entities (generated = true), we can provide --- generation_metadata to allow listers to retrieve the uuids of previous --- iterations of the entity. --- --- Inactive entities that have been active in the past (active = --- false) should register the timestamp at which we saw them --- deactivate, in a new entry of entity_history. -create table entity_history -( - id bigserial not null, - uuid uuid, - parent uuid, -- should reference entity_history(uuid) - name text not null, - type entity_type not null, - description text, - homepage text, - active boolean not null, -- whether the entity was seen on the last listing - generated boolean not null, -- whether this entity has been generated by a lister - lister_metadata jsonb, -- lister-specific metadata, used for queries - metadata jsonb, - validity timestamptz[] -- timestamps at which we have seen this entity -); - --- The entity table provides a view of the latest information on a --- given entity. It is updated via a trigger on entity_history. -create table entity -( - uuid uuid not null, - parent uuid, - name text not null, - type entity_type not null, - description text, - homepage text, - active boolean not null, -- whether the entity was seen on the last listing - generated boolean not null, -- whether this entity has been generated by a lister - lister_metadata jsonb, -- lister-specific metadata, used for queries - metadata jsonb, - last_seen timestamptz, -- last listing time or disappearance time for active=false - last_id bigint -- last listing id -); - --- Register the equivalence between two entities. Allows sideways --- navigation in the entity table -create table entity_equivalence -( - entity1 uuid, - entity2 uuid -); - --- Register a lister for a specific entity. -create table listable_entity -( - uuid uuid, - enabled boolean not null default true, -- do we list this entity automatically? - list_engine text, -- crawler to be used to list entity's content - list_url text, -- root URL to start the listing - list_params jsonb, -- org-specific listing parameter - latest_list timestamptz -- last time the entity's content has been listed -); - --- Log of all entity listings (i.e., entity crawling) that have been --- done in the past, or are still ongoing. -create table list_history -( - id bigserial not null, - date timestamptz not null, - status boolean, -- true if and only if the listing has been successful - result jsonb, -- more detailed return value, depending on status - stdout text, - stderr text, - duration interval, -- fetch duration of NULL if still ongoing - entity uuid -); - - -- An origin is a place, identified by an URL, where software source code -- artifacts can be found. We support different kinds of origins, e.g., git and -- other VCS repositories, web pages that list tarballs URLs (e.g., @@ -155,9 +62,7 @@ ( id bigserial not null, type text, -- TODO use an enum here (?) - url text not null, - lister uuid, - project uuid + url text not null ); diff --git a/sql/upgrades/125.sql b/sql/upgrades/125.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/125.sql @@ -0,0 +1,70 @@ +-- SWH DB schema upgrade +-- from_version: 124 +-- to_version: 125 +-- description: Drop useless entity tables + +insert into dbversion(version, release, description) + values(125, now(), 'Work In Progress'); + +DROP FUNCTION swh_entity_from_tmp_entity_lister(); + +DROP FUNCTION swh_entity_get(entity_uuid uuid); + +DROP FUNCTION swh_entity_history_add(); + +DROP FUNCTION swh_mktemp_entity_history(); + +DROP FUNCTION swh_mktemp_entity_lister(); + +DROP FUNCTION swh_update_entity_from_entity_history(); + +ALTER TABLE origin + DROP CONSTRAINT origin_lister_fkey; + +ALTER TABLE origin + DROP CONSTRAINT origin_project_fkey; + +DROP TABLE entity; + +DROP TABLE entity_equivalence; + +DROP TABLE entity_history; + +DROP TABLE list_history; + +DROP TABLE listable_entity; + +DROP SEQUENCE entity_history_id_seq; + +DROP SEQUENCE list_history_id_seq; + +DROP TYPE entity_type; + +DROP TYPE entity_id; + +ALTER TABLE origin + DROP COLUMN lister, + DROP COLUMN project; + +CREATE OR REPLACE FUNCTION swh_stat_counters() RETURNS SETOF public.counter + LANGUAGE sql STABLE + AS $$ + select object_type as label, value as value + from object_counts + where object_type in ( + 'content', + 'directory', + 'directory_entry_dir', + 'directory_entry_file', + 'directory_entry_rev', + 'occurrence_history', + 'origin', + 'origin_visit', + 'person', + 'release', + 'revision', + 'revision_history', + 'skipped_content', + 'snapshot' + ); +$$; diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -241,12 +241,6 @@ @stored_procedure('swh_mktemp_snapshot_branch') def mktemp_snapshot_branch(self, cur=None): pass - @stored_procedure('swh_mktemp_entity_lister') - def mktemp_entity_lister(self, cur=None): pass - - @stored_procedure('swh_mktemp_entity_history') - def mktemp_entity_history(self, cur=None): pass - def register_listener(self, notify_queue, cur=None): """Register a listener for NOTIFY queue `notify_queue`""" self._cursor(cur).execute("LISTEN %s" % notify_queue) @@ -278,9 +272,6 @@ @stored_procedure('swh_occurrence_history_add') def occurrence_history_add_from_temp(self, cur=None): pass - @stored_procedure('swh_entity_history_add') - def entity_history_add_from_temp(self, cur=None): pass - def content_update_from_temp(self, keys_to_update, cur=None): cur = self._cursor(cur) cur.execute("""select swh_content_update(ARRAY[%s] :: text[])""" % @@ -792,14 +783,6 @@ cur.execute(query, [jsonize(fetch_history.get(col)) for col in self.fetch_history_cols + ['id']]) - base_entity_cols = ['uuid', 'parent', 'name', 'type', - 'description', 'homepage', 'active', - 'generated', 'lister_metadata', - 'metadata'] - - entity_cols = base_entity_cols + ['last_seen', 'last_id'] - entity_history_cols = base_entity_cols + ['id', 'validity'] - def origin_add(self, type, url, cur=None): """Insert a new origin and return the new identifier.""" insert = """INSERT INTO origin (type, url) values (%s, %s) @@ -808,7 +791,7 @@ cur.execute(insert, (type, url)) return cur.fetchone()[0] - origin_cols = ['id', 'type', 'url', 'lister', 'project'] + origin_cols = ['id', 'type', 'url'] def origin_get_with(self, type, url, cur=None): """Retrieve the origin id from its type and url if found.""" @@ -967,32 +950,6 @@ cur.execute(query, (origin_id, branch_name, datetime, limit)) yield from cursor_to_bytes(cur) - def entity_get(self, uuid, cur=None): - """Retrieve the entity and its parent hierarchy chain per uuid. - - """ - cur = self._cursor(cur) - cur.execute("""SELECT %s - FROM swh_entity_get(%%s)""" % ( - ', '.join(self.entity_cols)), - (uuid, )) - yield from cursor_to_bytes(cur) - - def entity_get_one(self, uuid, cur=None): - """Retrieve a single entity given its uuid. - - """ - cur = self._cursor(cur) - cur.execute("""SELECT %s - FROM entity - WHERE uuid = %%s""" % ( - ', '.join(self.entity_cols)), - (uuid, )) - data = cur.fetchone() - if not data: - return None - return line_to_bytes(data) - def origin_metadata_add(self, origin, ts, provider, tool, metadata, cur=None): """ Add an origin_metadata for the origin at ts with provider, tool and diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -701,8 +701,6 @@ - id: origin's id - revision: origin's type - url: origin's url - - lister: lister's uuid - - project: project's uuid (FIXME, retrieve this information) Raises: ValueError: if the keys does not match (url and type) nor id. @@ -1107,7 +1105,7 @@ return ret - origin_keys = ['id', 'type', 'url', 'lister', 'project'] + origin_keys = ['id', 'type', 'url'] @db_transaction(statement_timeout=500) def origin_get(self, origin, db=None, cur=None): @@ -1131,8 +1129,6 @@ - id: origin's id - type: origin's type - url: origin's url - - lister: lister's uuid - - project: project's uuid (FIXME, retrieve this information) Raises: ValueError: if the keys does not match (url and type) nor id. @@ -1282,113 +1278,6 @@ """ return db.get_fetch_history(fetch_history_id, cur) - @db_transaction() - def entity_add(self, entities, db=None, cur=None): - """Add the given entitites to the database (in entity_history). - - Args: - entities (iterable): iterable of dictionaries with the following - keys: - - - uuid (uuid): id of the entity - - parent (uuid): id of the parent entity - - name (str): name of the entity - - type (str): type of entity (one of 'organization', - 'group_of_entities', 'hosting', 'group_of_persons', 'person', - 'project') - - description (str, optional): description of the entity - - homepage (str): url of the entity's homepage - - active (bool): whether the entity is active - - generated (bool): whether the entity was generated - - lister_metadata (dict): lister-specific entity metadata - - metadata (dict): other metadata for the entity - - validity (datetime.DateTime array): timestamps at which we - listed the entity. - - """ - cols = list(db.entity_history_cols) - cols.remove('id') - - db.mktemp_entity_history() - db.copy_to(entities, 'tmp_entity_history', cols, cur) - db.entity_history_add_from_temp() - - @db_transaction_generator() - def entity_get_from_lister_metadata(self, entities, db=None, cur=None): - """Fetch entities from the database, matching with the lister and - associated metadata. - - Args: - entities (iterable): dictionaries containing the lister metadata to - look for. Useful keys are 'lister', 'type', 'id', ... - - Yields: - fetched entities with all their attributes. If no match was found, - the returned entity is None. - - """ - - db.mktemp_entity_lister(cur) - - mapped_entities = [] - for i, entity in enumerate(entities): - mapped_entity = { - 'id': i, - 'lister_metadata': entity, - } - mapped_entities.append(mapped_entity) - - db.copy_to(mapped_entities, 'tmp_entity_lister', - ['id', 'lister_metadata'], cur) - - cur.execute('''select id, %s - from swh_entity_from_tmp_entity_lister() - order by id''' % - ','.join(db.entity_cols)) - - for id, *entity_vals in cur: - fetched_entity = dict(zip(db.entity_cols, entity_vals)) - if fetched_entity['uuid']: - yield fetched_entity - else: - yield { - 'uuid': None, - 'lister_metadata': entities[i], - } - - @db_transaction_generator(statement_timeout=2000) - def entity_get(self, uuid, db=None, cur=None): - """Returns the list of entity per its uuid identifier and also its - parent hierarchy. - - Args: - uuid: entity's identifier - - Returns: - List of entities starting with entity with uuid and the parent - hierarchy from such entity. - - """ - for entity in db.entity_get(uuid, cur): - yield dict(zip(db.entity_cols, entity)) - - @db_transaction(statement_timeout=500) - def entity_get_one(self, uuid, db=None, cur=None): - """Returns one entity using its uuid identifier. - - Args: - uuid: entity's identifier - - Returns: - the object corresponding to the given entity - - """ - entity = db.entity_get_one(uuid, cur) - if entity: - return dict(zip(db.entity_cols, entity)) - else: - return None - @db_transaction(statement_timeout=500) def stat_counters(self, db=None, cur=None): """compute statistics about the number of tuples in various tables diff --git a/swh/storage/tests/storage_testing.py b/swh/storage/tests/storage_testing.py --- a/swh/storage/tests/storage_testing.py +++ b/swh/storage/tests/storage_testing.py @@ -59,15 +59,5 @@ super().tearDown() def reset_storage_tables(self): - excluded = {'dbversion', 'entity', 'entity_history', 'listable_entity', - 'tool'} + excluded = {'dbversion', 'tool'} self.reset_db_tables(self.TEST_STORAGE_DB_NAME, excluded=excluded) - - db = self.test_db[self.TEST_STORAGE_DB_NAME] - conn = db.conn - cursor = db.cursor - - cursor.execute('delete from entity where generated=true') - cursor.execute('delete from entity_history where generated=true') - - conn.commit() diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -9,8 +9,6 @@ from operator import itemgetter import psycopg2 import unittest -from uuid import UUID - from unittest.mock import Mock, patch from nose.tools import istest @@ -477,114 +475,6 @@ 'stderr': 'blablabla', } - self.entity1 = { - 'uuid': UUID('f96a7ec1-0058-4920-90cc-7327e4b5a4bf'), - # GitHub users - 'parent': UUID('ad6df473-c1d2-4f40-bc58-2b091d4a750e'), - 'name': 'github:user:olasd', - 'type': 'person', - 'description': 'Nicolas Dandrimont', - 'homepage': 'http://example.com', - 'active': True, - 'generated': True, - 'lister_metadata': { - # swh.lister.github - 'lister': '34bd6b1b-463f-43e5-a697-785107f598e4', - 'id': 12877, - 'type': 'user', - 'last_activity': '2015-11-03', - }, - 'metadata': None, - 'validity': [ - datetime.datetime(2015, 11, 3, 11, 0, 0, - tzinfo=datetime.timezone.utc), - ] - } - - self.entity1_query = { - 'lister': '34bd6b1b-463f-43e5-a697-785107f598e4', - 'id': 12877, - 'type': 'user', - } - - self.entity2 = { - 'uuid': UUID('3903d075-32d6-46d4-9e29-0aef3612c4eb'), - # GitHub users - 'parent': UUID('ad6df473-c1d2-4f40-bc58-2b091d4a750e'), - 'name': 'github:user:zacchiro', - 'type': 'person', - 'description': 'Stefano Zacchiroli', - 'homepage': 'http://example.com', - 'active': True, - 'generated': True, - 'lister_metadata': { - # swh.lister.github - 'lister': '34bd6b1b-463f-43e5-a697-785107f598e4', - 'id': 216766, - 'type': 'user', - 'last_activity': '2015-11-03', - }, - 'metadata': None, - 'validity': [ - datetime.datetime(2015, 11, 3, 11, 0, 0, - tzinfo=datetime.timezone.utc), - ] - } - - self.entity3 = { - 'uuid': UUID('111df473-c1d2-4f40-bc58-2b091d4a7111'), - # GitHub users - 'parent': UUID('222df473-c1d2-4f40-bc58-2b091d4a7222'), - 'name': 'github:user:ardumont', - 'type': 'person', - 'description': 'Antoine R. Dumont a.k.a tony', - 'homepage': 'https://ardumont.github.io', - 'active': True, - 'generated': True, - 'lister_metadata': { - 'lister': '34bd6b1b-463f-43e5-a697-785107f598e4', - 'id': 666, - 'type': 'user', - 'last_activity': '2016-01-15', - }, - 'metadata': None, - 'validity': [ - datetime.datetime(2015, 11, 3, 11, 0, 0, - tzinfo=datetime.timezone.utc), - ] - } - - self.entity4 = { - 'uuid': UUID('222df473-c1d2-4f40-bc58-2b091d4a7222'), - # GitHub users - 'parent': None, - 'name': 'github:user:ToNyX', - 'type': 'person', - 'description': 'ToNyX', - 'homepage': 'https://ToNyX.github.io', - 'active': True, - 'generated': True, - 'lister_metadata': { - 'lister': '34bd6b1b-463f-43e5-a697-785107f598e4', - 'id': 999, - 'type': 'user', - 'last_activity': '2015-12-24', - }, - 'metadata': None, - 'validity': [ - datetime.datetime(2015, 11, 3, 11, 0, 0, - tzinfo=datetime.timezone.utc), - ] - } - - self.entity2_query = { - 'lister_metadata': { - 'lister': '34bd6b1b-463f-43e5-a697-785107f598e4', - 'id': 216766, - 'type': 'user', - }, - } - self.snapshot = { 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'), 'branches': { @@ -1298,9 +1188,7 @@ self.assertEqual(actual_origin1, {'id': id, 'type': self.origin['type'], - 'url': self.origin['url'], - 'lister': None, - 'project': None}) + 'url': self.origin['url']}) @istest def origin_search(self): @@ -1314,9 +1202,7 @@ id = self.storage.origin_add_one(self.origin) origin_data = {'id': id, 'type': self.origin['type'], - 'url': self.origin['url'], - 'lister': None, - 'project': None} + 'url': self.origin['url']} found_origins = list(self.storage.origin_search(self.origin['url'])) self.assertEqual(len(found_origins), 1) self.assertEqual(found_origins[0], origin_data) @@ -1329,9 +1215,7 @@ id2 = self.storage.origin_add_one(self.origin2) origin2_data = {'id': id2, 'type': self.origin2['type'], - 'url': self.origin2['url'], - 'lister': None, - 'project': None} + 'url': self.origin2['url']} found_origins = list(self.storage.origin_search(self.origin2['url'])) self.assertEqual(len(found_origins), 1) self.assertEqual(found_origins[0], origin2_data) @@ -1814,89 +1698,6 @@ ) @istest - def entity_get_from_lister_metadata(self): - self.storage.entity_add([self.entity1]) - - fetched_entities = list( - self.storage.entity_get_from_lister_metadata( - [self.entity1_query, self.entity2_query])) - - # Entity 1 should have full metadata, with last_seen/last_id instead - # of validity - entity1 = self.entity1.copy() - entity1['last_seen'] = entity1['validity'][0] - del fetched_entities[0]['last_id'] - del entity1['validity'] - # Entity 2 should have no metadata - entity2 = { - 'uuid': None, - 'lister_metadata': self.entity2_query.copy(), - } - - self.assertEquals(fetched_entities, [entity1, entity2]) - - @istest - def entity_get_from_lister_metadata_twice(self): - self.storage.entity_add([self.entity1]) - - fetched_entities1 = list( - self.storage.entity_get_from_lister_metadata( - [self.entity1_query])) - fetched_entities2 = list( - self.storage.entity_get_from_lister_metadata( - [self.entity1_query])) - - self.assertEquals(fetched_entities1, fetched_entities2) - - @istest - def entity_get(self): - # given - self.storage.entity_add([self.entity4]) - self.storage.entity_add([self.entity3]) - - # when: entity3 -child-of-> entity4 - actual_entity3 = list(self.storage.entity_get(self.entity3['uuid'])) - - self.assertEquals(len(actual_entity3), 2) - # remove dynamic data (modified by db) - entity3 = self.entity3.copy() - entity4 = self.entity4.copy() - del entity3['validity'] - del entity4['validity'] - del actual_entity3[0]['last_seen'] - del actual_entity3[0]['last_id'] - del actual_entity3[1]['last_seen'] - del actual_entity3[1]['last_id'] - self.assertEquals(actual_entity3, [entity3, entity4]) - - # when: entity4 only child - actual_entity4 = list(self.storage.entity_get(self.entity4['uuid'])) - - self.assertEquals(len(actual_entity4), 1) - # remove dynamic data (modified by db) - entity4 = self.entity4.copy() - del entity4['validity'] - del actual_entity4[0]['last_id'] - del actual_entity4[0]['last_seen'] - - self.assertEquals(actual_entity4, [entity4]) - - @istest - def entity_get_one(self): - # given - self.storage.entity_add([self.entity3, self.entity4]) - - # when: entity3 -child-of-> entity4 - actual_entity3 = self.storage.entity_get_one(self.entity3['uuid']) - - # remove dynamic data (modified by db) - entity3 = self.entity3.copy() - del entity3['validity'] - del actual_entity3['last_seen'] - del actual_entity3['last_id'] - self.assertEquals(actual_entity3, entity3) - - @istest def stat_counters(self): expected_keys = ['content', 'directory', 'directory_entry_dir', 'origin', 'person', 'revision']