Page MenuHomeSoftware Heritage

D268.id931.diff
No OneTemporary

D268.id931.diff

diff --git a/sql/swh-func.sql b/sql/swh-func.sql
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -130,6 +130,17 @@
) on commit drop;
$$;
+-- create a temporary table for the branches of a snapshot
+create or replace function swh_mktemp_snapshot_branch()
+ returns void
+ language sql
+as $$
+ create temporary table tmp_snapshot_branch (
+ name bytea not null,
+ target bytea,
+ target_type object_type
+ ) on commit drop;
+$$;
create or replace function swh_mktemp_tool()
returns void
@@ -937,6 +948,88 @@
end
$$;
+create or replace function swh_snapshot_add(origin bigint, visit bigint, snapshot_id snapshot.id%type)
+ returns void
+ language plpgsql
+as $$
+declare
+ snapshot_object_id snapshot.object_id%type;
+begin
+ select object_id from snapshot where id = snapshot_id into snapshot_object_id;
+ if snapshot_object_id is null then
+ insert into snapshot (id) values (snapshot_id) returning object_id into snapshot_object_id;
+ with all_branches(name, target_type, target_id) as (
+ select name, target_type, (
+ case target_type
+ when null then null
+ when 'revision' then (select object_id from revision where id = target)
+ when 'release' then (select object_id from release where id = target)
+ when 'directory' then (select object_id from directory where id = target)
+ when 'content' then (select object_id from content where sha1_git = target)
+ end
+ ) from tmp_snapshot_branch
+ ), inserted as (
+ insert into snapshot_branch (name, target_type, target_id)
+ select name, target_type, target_id from all_branches
+ on conflict do nothing
+ returning object_id
+ )
+ insert into snapshot_branches (snapshot_id, branch_id)
+ select snapshot_object_id, object_id as branch_id from inserted
+ union all
+ select snapshot_object_id, object_id as branch_id
+ from all_branches ab
+ join snapshot_branch sb
+ on sb.name = ab.name
+ and sb.target_type is not distinct from ab.target_type
+ and sb.target_id is not distinct from ab.target_id;
+ end if;
+ update origin_visit ov
+ set snapshot_id = snapshot_object_id
+ where ov.origin=swh_snapshot_add.origin and ov.visit=swh_snapshot_add.visit;
+end;
+$$;
+
+create type snapshot_result as (
+ snapshot_id sha1_git,
+ name bytea,
+ target bytea,
+ target_type object_type
+);
+
+create or replace function swh_snapshot_get_by_id(id snapshot.id%type)
+ returns setof snapshot_result
+ language sql
+ stable
+as $$
+ select
+ swh_snapshot_get_by_id.id as snapshot_id,
+ name,
+ (case target_type
+ when null then null
+ when 'revision' then (select id from revision where object_id = target_id)
+ when 'release' then (select id from release where object_id = target_id)
+ when 'directory' then (select id from directory where object_id = target_id)
+ when 'content' then (select sha1_git from content where object_id = target_id)
+ end
+ ) as target,
+ target_type
+ from snapshot_branches
+ inner join snapshot_branch on snapshot_branches.branch_id = snapshot_branch.object_id
+ where snapshot_id = (select object_id from snapshot where snapshot.id = swh_snapshot_get_by_id.id)
+$$;
+
+create or replace function swh_snapshot_get_by_origin_visit(origin_id bigint, visit_id bigint)
+ returns snapshot.id%type
+ language sql
+ stable
+as $$
+ select snapshot.id
+ from origin_visit
+ left join snapshot
+ on snapshot.object_id = origin_visit.snapshot_id
+ where origin_visit.origin=origin_id and origin_visit.visit=visit_id;
+$$;
-- Absolute path: directory reference + complete path relative to it
create type content_dir as (
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -153,6 +153,32 @@
alter table revision_history add constraint revision_history_id_fkey foreign key (id) references revision(id) not valid;
alter table revision_history validate constraint revision_history_id_fkey;
+-- snapshot
+create unique index concurrently snapshot_pkey on snapshot(object_id);
+alter table snapshot add primary key using index snapshot_pkey;
+
+create unique index concurrently on snapshot(id);
+
+-- snapshot_branch
+create unique index concurrently snapshot_branch_pkey on snapshot_branch(object_id);
+alter table snapshot_branch add primary key using index snapshot_branch_pkey;
+
+create unique index concurrently on snapshot_branch (target_type, target_id, name);
+alter table snapshot_branch add constraint snapshot_branch_target_check check ((target_type is null) = (target_id is null)) not valid;
+alter table snapshot_branch validate constraint snapshot_branch_target_check;
+
+create unique index concurrently on snapshot_branch (name) where target_type is null and target_id is null;
+
+-- snapshot_branches
+create unique index concurrently snapshot_branches_pkey on snapshot_branches(snapshot_id, branch_id);
+alter table snapshot_branches add primary key using index snapshot_branches_pkey;
+
+alter table snapshot_branches add constraint snapshot_branches_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
+alter table snapshot_branches validate constraint snapshot_branches_snapshot_id_fkey;
+
+alter table snapshot_branches add constraint snapshot_branches_branch_id_fkey foreign key (branch_id) references snapshot_branch(object_id) not valid;
+alter table snapshot_branches validate constraint snapshot_branches_branch_id_fkey;
+
-- origin_visit
create unique index concurrently origin_visit_pkey on origin_visit(origin, visit);
alter table origin_visit add primary key using index origin_visit_pkey;
@@ -162,6 +188,9 @@
alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid;
alter table origin_visit validate constraint origin_visit_origin_fkey;
+alter table origin_visit add constraint origin_visit_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
+alter table origin_visit validate constraint origin_visit_snapshot_id_fkey;
+
-- occurrence_history
create unique index concurrently occurrence_history_pkey on occurrence_history(object_id);
alter table occurrence_history add primary key using index occurrence_history_pkey;
@@ -180,7 +209,6 @@
alter table occurrence add constraint occurrence_origin_fkey foreign key (origin) references origin(id) not valid;
alter table occurrence validate constraint occurrence_origin_fkey;
-
-- release
create unique index concurrently release_pkey on release(id);
alter table release add primary key using index release_pkey;
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(114, now(), 'Work In Progress');
+ values(115, now(), 'Work In Progress');
-- a SHA1 checksum (not necessarily originating from Git)
create domain sha1 as bytea check (length(value) = 20);
@@ -293,11 +293,12 @@
-- The timestamps at which Software Heritage has made a visit of the given origin.
create table origin_visit
(
- origin bigint not null,
- visit bigint not null,
- date timestamptz not null,
- status origin_visit_status not null,
- metadata jsonb
+ origin bigint not null,
+ visit bigint not null,
+ date timestamptz not null,
+ status origin_visit_status not null,
+ metadata jsonb,
+ snapshot_id bigint
);
comment on column origin_visit.origin is 'Visited origin';
@@ -305,6 +306,7 @@
comment on column origin_visit.date is 'Visit date for that origin';
comment on column origin_visit.status is 'Visit status for that origin';
comment on column origin_visit.metadata is 'Metadata associated with the visit';
+comment on column origin_visit.snapshot_id is 'id of the snapshot associated with the visit';
-- The content of software origins is indexed starting from top-level pointers
@@ -321,7 +323,8 @@
target_type object_type not null, -- ref target type
visits bigint[] not null, -- the visits where that occurrence was valid. References
-- origin_visit(visit), where o_h.origin = origin_visit.origin.
- object_id bigserial not null -- short object identifier
+ object_id bigserial not null, -- short object identifier
+ snapshot_branch_id bigint
);
-- Materialized view of occurrence_history, storing the *current* value of each
@@ -334,6 +337,25 @@
target_type object_type not null
);
+
+create table snapshot (
+ object_id bigserial not null,
+ id sha1_git
+);
+
+create table snapshot_branch (
+ object_id bigserial not null,
+ name bytea not null,
+ target_id bigint,
+ target_type object_type
+);
+
+create table snapshot_branches (
+ snapshot_id bigint not null,
+ branch_id bigint not null
+);
+
+
-- A "memorable" point in the development history of a project.
--
-- Synonyms/mappings:
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -104,6 +104,17 @@
def occurrence_add(self, occurrences):
return self.post('occurrence/add', {'occurrences': occurrences})
+ def snapshot_add(self, origin, visit, snapshot):
+ return self.post('snapshot/add', {'origin': origin, 'visit': visit,
+ 'snapshot': snapshot})
+
+ def snapshot_get(self, snapshot_id):
+ return self.post('snapshot', {'snapshot_id': snapshot_id})
+
+ def snapshot_get_by_origin_visit(self, origin, visit):
+ return self.post('snapshot/by_origin_visit', {'origin': origin,
+ 'visit': visit})
+
def origin_get(self, origin):
return self.post('origin/get', {'origin': origin})
diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py
--- a/swh/storage/api/server.py
+++ b/swh/storage/api/server.py
@@ -192,6 +192,22 @@
return encode_data(g.storage.occurrence_add(**decode_request(request)))
+@app.route('/snapshot/add', methods=['POST'])
+def snapshot_add():
+ return encode_data(g.storage.snapshot_add(**decode_request(request)))
+
+
+@app.route('/snapshot', methods=['POST'])
+def snapshot_get():
+ return encode_data(g.storage.snapshot_get(**decode_request(request)))
+
+
+@app.route('/snapshot/by_origin_visit', methods=['POST'])
+def snapshot_get_by_origin_visit():
+ return encode_data(g.storage.snapshot_get_by_origin_visit(
+ **decode_request(request)))
+
+
@app.route('/origin/get', methods=['POST'])
def origin_get():
return encode_data(g.storage.origin_get(**decode_request(request)))
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -205,6 +205,9 @@
@stored_procedure('swh_mktemp_occurrence_history')
def mktemp_occurrence_history(self, cur=None): pass
+ @stored_procedure('swh_mktemp_snapshot_branch')
+ def mktemp_snapshot_branch(self, cur=None): pass
+
@stored_procedure('swh_mktemp_entity_lister')
def mktemp_entity_lister(self, cur=None): pass
@@ -316,6 +319,44 @@
yield from cursor_to_bytes(cur)
+ def snapshot_exists(self, snapshot_id, cur=None):
+ """Check whether a snapshot with the given id exists"""
+ cur = self._cursor(cur)
+
+ cur.execute("""SELECT 1 FROM snapshot where id=%s""", (snapshot_id,))
+
+ return bool(cur.fetchone())
+
+ def snapshot_add(self, origin, visit, snapshot_id, cur=None):
+ """Add a snapshot for origin/visit from the temporary table"""
+ cur = self._cursor(cur)
+
+ cur.execute("""SELECT swh_snapshot_add(%s, %s, %s)""",
+ (origin, visit, snapshot_id))
+
+ snapshot_get_cols = ['snapshot_id', 'name', 'target', 'target_type']
+
+ def snapshot_get_by_id(self, snapshot_id, cur=None):
+ cur = self._cursor(cur)
+ query = """\
+ SELECT %s FROM swh_snapshot_get_by_id(%%s)
+ """ % ', '.join(self.snapshot_get_cols)
+
+ cur.execute(query, (snapshot_id,))
+
+ yield from cursor_to_bytes(cur)
+
+ def snapshot_get_by_origin_visit(self, origin_id, visit_id, cur=None):
+ cur = self._cursor(cur)
+ query = """\
+ SELECT swh_snapshot_get_by_origin_visit(%s, %s)
+ """
+
+ cur.execute(query, (origin_id, visit_id))
+ ret = cur.fetchone()
+ if ret:
+ return line_to_bytes(ret)[0]
+
content_find_cols = ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length',
'ctime', 'status']
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -759,6 +759,111 @@
dict(zip(db.release_get_cols, release))
)
+ @db_transaction
+ def snapshot_add(self, origin, visit, snapshot, cur=None):
+ """Add a snapshot for the given origin/visit couple
+
+ Args:
+ origin (int): id of the origin
+ visit (int): id of the visit
+ snapshot (dict): Dictionary with the following keys::
+ id (bytes): id of the snapshot
+ branches (dict): branches the snapshot contains, mapping the
+ branch name (str) to the branch target, each a dict with the
+ following keys::
+ target_type (str): one of 'content', 'directory', 'revision',
+ 'release'
+ target (bytes): identifier of the target (currently a
+ sha1_git for all object kinds)
+ """
+ db = self.db
+
+ if not db.snapshot_exists(snapshot['id'], cur):
+ db.mktemp_snapshot_branch(cur)
+ db.copy_to((
+ {
+ 'name': name,
+ 'target': info['target'],
+ 'target_type': info['target_type']
+ }
+ for name, info in snapshot['branches'].items()
+ ), 'tmp_snapshot_branch',
+ ['name', 'target', 'target_type'], cur)
+
+ db.snapshot_add(origin, visit, snapshot['id'], cur)
+
+ # TODO: drop this compat feature
+ self.occurrence_add([
+ {
+ 'origin': origin,
+ 'visit': visit,
+ 'branch': name,
+ 'target': info['target'],
+ 'target_type': info['target_type'],
+ } for name, info in snapshot['branches'].items()
+ ])
+
+ @db_transaction
+ def snapshot_get(self, snapshot_id, cur=None):
+ """Get the snapshot with the given id
+
+ Args:
+ snapshot_id (bytes): id of the snapshot
+ Returns:
+ dict: a snapshot with two keys:
+ id:: identifier for the snapshot
+ branches:: a list of branches contained by the snapshot
+
+ """
+ db = self.db
+
+ branches = {}
+ for branch in db.snapshot_get_by_id(snapshot_id, cur):
+ branch = dict(zip(db.snapshot_get_cols, branch))
+ assert branch['snapshot_id'] == snapshot_id
+ del branch['snapshot_id']
+ branches[branch.pop('name')] = branch
+
+ if branches:
+ return {'id': snapshot_id, 'branches': branches}
+
+ if db.snapshot_exists(snapshot_id, cur):
+ # empty snapshot
+ return {'id': snapshot_id, 'branches': {}}
+
+ return None
+
+ @db_transaction
+ def snapshot_get_by_origin_visit(self, origin, visit, cur=None):
+ """Get the snapshot for the given origin visit
+
+ Args:
+ origin (int): the origin identifier
+ visit (int): the visit identifier
+ Returns:
+ dict: a snapshot with two keys:
+ id:: identifier for the snapshot
+ branches:: a dictionary containing the snapshot branch information
+
+ """
+ db = self.db
+
+ snapshot_id = db.snapshot_get_by_origin_visit(origin, visit, cur)
+
+ if snapshot_id:
+ return self.snapshot_get(snapshot_id, cur=cur)
+ else:
+ # compatibility code during the snapshot migration
+ origin_visit_info = self.origin_visit_get_by(origin, visit,
+ cur=cur)
+ if origin_visit_info is None:
+ return None
+ ret = {'id': None}
+ ret['branches'] = origin_visit_info['occurrences']
+ return ret
+
+ return None
+
@db_transaction
def occurrence_add(self, occurrences, cur=None):
"""Add occurrences to the storage
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -380,7 +380,7 @@
self.occurrence = {
'branch': b'master',
- 'target': b'67890123456789012345',
+ 'target': self.revision['id'],
'target_type': 'revision',
}
@@ -584,6 +584,21 @@
},
}
+ self.snapshot = {
+ 'id': b'23456654322345665432',
+ 'branches': {
+ self.occurrence['branch']: {
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ },
+ },
+ }
+
+ self.empty_snapshot = {
+ 'id': b'12345543211234554321',
+ 'branches': {},
+ }
+
def tearDown(self):
self.reset_storage_tables()
super().tearDown()
@@ -1603,6 +1618,93 @@
self.assertEquals(len(actual_occurrence), 1)
self.assertEquals(actual_occurrence[0], expected_occurrence)
+ @istest
+ def snapshot_add_get_empty(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ self.storage.snapshot_add(origin_id, visit_id, self.empty_snapshot)
+
+ by_id = self.storage.snapshot_get(self.empty_snapshot['id'])
+ self.assertEqual(by_id, self.empty_snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.empty_snapshot)
+
+ @istest
+ def snapshot_add_get(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+ self.storage.revision_add([self.revision])
+
+ self.storage.snapshot_add(origin_id, visit_id, self.snapshot)
+
+ by_id = self.storage.snapshot_get(self.snapshot['id'])
+ self.assertEqual(by_id, self.snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.snapshot)
+
+ # retrocompat test
+ origin_visit_info = self.storage.origin_visit_get_by(origin_id, visit_id)
+ self.assertEqual(origin_visit_info['occurrences'], self.snapshot['branches'])
+
+
+ @istest
+ def snapshot_get_nonexistent(self):
+ bogus_snapshot_id = b'bogus snapshot id 00'
+ bogus_origin_id = 1
+ bogus_visit_id = 1
+
+ by_id = self.storage.snapshot_get(bogus_snapshot_id)
+ self.assertIsNone(by_id)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(bogus_origin_id,
+ bogus_visit_id)
+ self.assertIsNone(by_ov)
+
+ @istest
+ def snapshot_get_retrocompat(self):
+ empty_retro_snapshot = {
+ 'id': None,
+ 'branches': {},
+ }
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+
+ self.assertEqual(by_ov, empty_retro_snapshot)
+
+ self.storage.revision_add([self.revision])
+ self.storage.occurrence_add([{
+ 'origin': origin_id,
+ 'visit': visit_id,
+ 'branch': self.occurrence['branch'],
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ }])
+
+ one_branch_retro_snapshot = {
+ 'id': None,
+ 'branches': {
+ self.occurrence['branch']: {
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ },
+ },
+ }
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, one_branch_retro_snapshot)
+
+
@istest
def entity_get_from_lister_metadata(self):
self.storage.entity_add([self.entity1])

File Metadata

Mime Type
text/plain
Expires
Nov 4 2024, 6:13 PM (11 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220273

Event Timeline