Page MenuHomeSoftware Heritage

D268.diff
No OneTemporary

D268.diff

diff --git a/sql/swh-enums.sql b/sql/swh-enums.sql
--- a/sql/swh-enums.sql
+++ b/sql/swh-enums.sql
@@ -40,9 +40,12 @@
create type revision_type as enum ('git', 'tar', 'dsc', 'svn');
comment on type revision_type is 'Possible revision types';
-create type object_type as enum ('content', 'directory', 'revision', 'release');
+create type object_type as enum ('content', 'directory', 'revision', 'release', 'snapshot');
comment on type object_type is 'Data object types stored in data model';
+create type snapshot_target as enum ('content', 'directory', 'revision', 'release', 'snapshot', 'alias');
+comment on type snapshot_target is 'Types of targets for snapshot branches';
+
create type origin_visit_status as enum (
'ongoing',
'full',
diff --git a/sql/swh-func.sql b/sql/swh-func.sql
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -130,6 +130,17 @@
) on commit drop;
$$;
+-- create a temporary table for the branches of a snapshot
+create or replace function swh_mktemp_snapshot_branch()
+ returns void
+ language sql
+as $$
+ create temporary table tmp_snapshot_branch (
+ name bytea not null,
+ target bytea,
+ target_type snapshot_target
+ ) on commit drop;
+$$;
create or replace function swh_mktemp_tool()
returns void
@@ -937,6 +948,70 @@
end
$$;
+create or replace function swh_snapshot_add(origin bigint, visit bigint, snapshot_id snapshot.id%type)
+ returns void
+ language plpgsql
+as $$
+declare
+ snapshot_object_id snapshot.object_id%type;
+begin
+ select object_id from snapshot where id = snapshot_id into snapshot_object_id;
+ if snapshot_object_id is null then
+ insert into snapshot (id) values (snapshot_id) returning object_id into snapshot_object_id;
+ with all_branches(name, target_type, target) as (
+ select name, target_type, target from tmp_snapshot_branch
+ ), inserted as (
+ insert into snapshot_branch (name, target_type, target)
+ select name, target_type, target from all_branches
+ on conflict do nothing
+ returning object_id
+ )
+ insert into snapshot_branches (snapshot_id, branch_id)
+ select snapshot_object_id, object_id as branch_id from inserted
+ union all
+ select snapshot_object_id, object_id as branch_id
+ from all_branches ab
+ join snapshot_branch sb
+ on sb.name = ab.name
+ and sb.target_type is not distinct from ab.target_type
+ and sb.target is not distinct from ab.target;
+ end if;
+ update origin_visit ov
+ set snapshot_id = snapshot_object_id
+ where ov.origin=swh_snapshot_add.origin and ov.visit=swh_snapshot_add.visit;
+end;
+$$;
+
+create type snapshot_result as (
+ snapshot_id sha1_git,
+ name bytea,
+ target bytea,
+ target_type snapshot_target
+);
+
+create or replace function swh_snapshot_get_by_id(id snapshot.id%type)
+ returns setof snapshot_result
+ language sql
+ stable
+as $$
+ select
+ swh_snapshot_get_by_id.id as snapshot_id, name, target, target_type
+ from snapshot_branches
+ inner join snapshot_branch on snapshot_branches.branch_id = snapshot_branch.object_id
+ where snapshot_id = (select object_id from snapshot where snapshot.id = swh_snapshot_get_by_id.id)
+$$;
+
+create or replace function swh_snapshot_get_by_origin_visit(origin_id bigint, visit_id bigint)
+ returns snapshot.id%type
+ language sql
+ stable
+as $$
+ select snapshot.id
+ from origin_visit
+ left join snapshot
+ on snapshot.object_id = origin_visit.snapshot_id
+ where origin_visit.origin=origin_id and origin_visit.visit=visit_id;
+$$;
-- Absolute path: directory reference + complete path relative to it
create type content_dir as (
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -153,6 +153,34 @@
alter table revision_history add constraint revision_history_id_fkey foreign key (id) references revision(id) not valid;
alter table revision_history validate constraint revision_history_id_fkey;
+-- snapshot
+create unique index concurrently snapshot_pkey on snapshot(object_id);
+alter table snapshot add primary key using index snapshot_pkey;
+
+create unique index concurrently on snapshot(id);
+
+-- snapshot_branch
+create unique index concurrently snapshot_branch_pkey on snapshot_branch(object_id);
+alter table snapshot_branch add primary key using index snapshot_branch_pkey;
+
+create unique index concurrently on snapshot_branch (target_type, target, name);
+alter table snapshot_branch add constraint snapshot_branch_target_check check ((target_type is null) = (target is null)) not valid;
+alter table snapshot_branch validate constraint snapshot_branch_target_check;
+alter table snapshot_branch add constraint snapshot_target_check check (target_type not in ('content', 'directory', 'revision', 'release', 'snapshot') or length(target) = 20) not valid;
+alter table snapshot_branch validate constraint snapshot_target_check;
+
+create unique index concurrently on snapshot_branch (name) where target_type is null and target is null;
+
+-- snapshot_branches
+create unique index concurrently snapshot_branches_pkey on snapshot_branches(snapshot_id, branch_id);
+alter table snapshot_branches add primary key using index snapshot_branches_pkey;
+
+alter table snapshot_branches add constraint snapshot_branches_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
+alter table snapshot_branches validate constraint snapshot_branches_snapshot_id_fkey;
+
+alter table snapshot_branches add constraint snapshot_branches_branch_id_fkey foreign key (branch_id) references snapshot_branch(object_id) not valid;
+alter table snapshot_branches validate constraint snapshot_branches_branch_id_fkey;
+
-- origin_visit
create unique index concurrently origin_visit_pkey on origin_visit(origin, visit);
alter table origin_visit add primary key using index origin_visit_pkey;
@@ -162,6 +190,9 @@
alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid;
alter table origin_visit validate constraint origin_visit_origin_fkey;
+alter table origin_visit add constraint origin_visit_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
+alter table origin_visit validate constraint origin_visit_snapshot_id_fkey;
+
-- occurrence_history
create unique index concurrently occurrence_history_pkey on occurrence_history(object_id);
alter table occurrence_history add primary key using index occurrence_history_pkey;
@@ -180,7 +211,6 @@
alter table occurrence add constraint occurrence_origin_fkey foreign key (origin) references origin(id) not valid;
alter table occurrence validate constraint occurrence_origin_fkey;
-
-- release
create unique index concurrently release_pkey on release(id);
alter table release add primary key using index release_pkey;
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(114, now(), 'Work In Progress');
+ values(115, now(), 'Work In Progress');
-- a SHA1 checksum (not necessarily originating from Git)
create domain sha1 as bytea check (length(value) = 20);
@@ -293,11 +293,12 @@
-- The timestamps at which Software Heritage has made a visit of the given origin.
create table origin_visit
(
- origin bigint not null,
- visit bigint not null,
- date timestamptz not null,
- status origin_visit_status not null,
- metadata jsonb
+ origin bigint not null,
+ visit bigint not null,
+ date timestamptz not null,
+ status origin_visit_status not null,
+ metadata jsonb,
+ snapshot_id bigint
);
comment on column origin_visit.origin is 'Visited origin';
@@ -305,6 +306,7 @@
comment on column origin_visit.date is 'Visit date for that origin';
comment on column origin_visit.status is 'Visit status for that origin';
comment on column origin_visit.metadata is 'Metadata associated with the visit';
+comment on column origin_visit.snapshot_id is 'id of the snapshot associated with the visit';
-- The content of software origins is indexed starting from top-level pointers
@@ -321,7 +323,8 @@
target_type object_type not null, -- ref target type
visits bigint[] not null, -- the visits where that occurrence was valid. References
-- origin_visit(visit), where o_h.origin = origin_visit.origin.
- object_id bigserial not null -- short object identifier
+ object_id bigserial not null, -- short object identifier
+ snapshot_branch_id bigint
);
-- Materialized view of occurrence_history, storing the *current* value of each
@@ -334,6 +337,25 @@
target_type object_type not null
);
+
+create table snapshot (
+ object_id bigserial not null,
+ id sha1_git
+);
+
+create table snapshot_branch (
+ object_id bigserial not null,
+ name bytea not null,
+ target bytea,
+ target_type snapshot_target
+);
+
+create table snapshot_branches (
+ snapshot_id bigint not null,
+ branch_id bigint not null
+);
+
+
-- A "memorable" point in the development history of a project.
--
-- Synonyms/mappings:
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -104,6 +104,17 @@
def occurrence_add(self, occurrences):
return self.post('occurrence/add', {'occurrences': occurrences})
+ def snapshot_add(self, origin, visit, snapshot):
+ return self.post('snapshot/add', {'origin': origin, 'visit': visit,
+ 'snapshot': snapshot})
+
+ def snapshot_get(self, snapshot_id):
+ return self.post('snapshot', {'snapshot_id': snapshot_id})
+
+ def snapshot_get_by_origin_visit(self, origin, visit):
+ return self.post('snapshot/by_origin_visit', {'origin': origin,
+ 'visit': visit})
+
def origin_get(self, origin):
return self.post('origin/get', {'origin': origin})
diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py
--- a/swh/storage/api/server.py
+++ b/swh/storage/api/server.py
@@ -192,6 +192,22 @@
return encode_data(g.storage.occurrence_add(**decode_request(request)))
+@app.route('/snapshot/add', methods=['POST'])
+def snapshot_add():
+ return encode_data(g.storage.snapshot_add(**decode_request(request)))
+
+
+@app.route('/snapshot', methods=['POST'])
+def snapshot_get():
+ return encode_data(g.storage.snapshot_get(**decode_request(request)))
+
+
+@app.route('/snapshot/by_origin_visit', methods=['POST'])
+def snapshot_get_by_origin_visit():
+ return encode_data(g.storage.snapshot_get_by_origin_visit(
+ **decode_request(request)))
+
+
@app.route('/origin/get', methods=['POST'])
def origin_get():
return encode_data(g.storage.origin_get(**decode_request(request)))
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -205,6 +205,9 @@
@stored_procedure('swh_mktemp_occurrence_history')
def mktemp_occurrence_history(self, cur=None): pass
+ @stored_procedure('swh_mktemp_snapshot_branch')
+ def mktemp_snapshot_branch(self, cur=None): pass
+
@stored_procedure('swh_mktemp_entity_lister')
def mktemp_entity_lister(self, cur=None): pass
@@ -316,6 +319,44 @@
yield from cursor_to_bytes(cur)
+ def snapshot_exists(self, snapshot_id, cur=None):
+ """Check whether a snapshot with the given id exists"""
+ cur = self._cursor(cur)
+
+ cur.execute("""SELECT 1 FROM snapshot where id=%s""", (snapshot_id,))
+
+ return bool(cur.fetchone())
+
+ def snapshot_add(self, origin, visit, snapshot_id, cur=None):
+ """Add a snapshot for origin/visit from the temporary table"""
+ cur = self._cursor(cur)
+
+ cur.execute("""SELECT swh_snapshot_add(%s, %s, %s)""",
+ (origin, visit, snapshot_id))
+
+ snapshot_get_cols = ['snapshot_id', 'name', 'target', 'target_type']
+
+ def snapshot_get_by_id(self, snapshot_id, cur=None):
+ cur = self._cursor(cur)
+ query = """\
+ SELECT %s FROM swh_snapshot_get_by_id(%%s)
+ """ % ', '.join(self.snapshot_get_cols)
+
+ cur.execute(query, (snapshot_id,))
+
+ yield from cursor_to_bytes(cur)
+
+ def snapshot_get_by_origin_visit(self, origin_id, visit_id, cur=None):
+ cur = self._cursor(cur)
+ query = """\
+ SELECT swh_snapshot_get_by_origin_visit(%s, %s)
+ """
+
+ cur.execute(query, (origin_id, visit_id))
+ ret = cur.fetchone()
+ if ret:
+ return line_to_bytes(ret)[0]
+
content_find_cols = ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length',
'ctime', 'status']
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -760,6 +760,134 @@
)
@db_transaction
+ def snapshot_add(self, origin, visit, snapshot, cur=None):
+ """Add a snapshot for the given origin/visit couple
+
+ Args:
+ origin (int): id of the origin
+ visit (int): id of the visit
+ snapshot (dict): the snapshot to add to the visit, containing the
+ following keys:
+
+ - **id** (:class:`bytes`): id of the snapshot
+ - **branches** (:class:`dict`): branches the snapshot contains,
+ mapping the branch name (:class:`bytes`) to the branch target,
+ itself a :class:`dict` (or ``None`` if the branch points to an
+ unknown object)
+
+ - **target_type** (:class:`str`): one of ``content``,
+ ``directory``, ``revision``, ``release``,
+ ``snapshot``, ``alias``
+ - **target** (:class:`bytes`): identifier of the target
+ (currently a ``sha1_git`` for all object kinds, or the name
+ of the target branch for aliases)
+ """
+ db = self.db
+
+ if not db.snapshot_exists(snapshot['id'], cur):
+ db.mktemp_snapshot_branch(cur)
+ db.copy_to(
+ (
+ {
+ 'name': name,
+ 'target': info['target'] if info else None,
+ 'target_type': info['target_type'] if info else None,
+ }
+ for name, info in snapshot['branches'].items()
+ ),
+ 'tmp_snapshot_branch',
+ ['name', 'target', 'target_type'],
+ cur,
+ )
+
+ db.snapshot_add(origin, visit, snapshot['id'], cur)
+
+ # TODO: drop this compat feature
+ occurrences = []
+ for name, info in snapshot['branches'].items():
+ if not info:
+ target = b'\x00' * 20
+ target_type = 'revision'
+ elif info['target_type'] == 'alias':
+ continue
+ else:
+ target = info['target']
+ target_type = info['target_type']
+
+ occurrences.append({
+ 'origin': origin,
+ 'visit': visit,
+ 'branch': name,
+ 'target': target,
+ 'target_type': target_type,
+ })
+
+ self.occurrence_add(occurrences)
+
+ @db_transaction
+ def snapshot_get(self, snapshot_id, cur=None):
+ """Get the snapshot with the given id
+
+ Args:
+ snapshot_id (bytes): id of the snapshot
+ Returns:
+ dict: a snapshot with two keys:
+ id:: identifier for the snapshot
+ branches:: a list of branches contained by the snapshot
+
+ """
+ db = self.db
+
+ branches = {}
+ for branch in db.snapshot_get_by_id(snapshot_id, cur):
+ branch = dict(zip(db.snapshot_get_cols, branch))
+ del branch['snapshot_id']
+ name = branch.pop('name')
+ if branch == {'target': None, 'target_type': None}:
+ branch = None
+ branches[name] = branch
+
+ if branches:
+ return {'id': snapshot_id, 'branches': branches}
+
+ if db.snapshot_exists(snapshot_id, cur):
+ # empty snapshot
+ return {'id': snapshot_id, 'branches': {}}
+
+ return None
+
+ @db_transaction
+ def snapshot_get_by_origin_visit(self, origin, visit, cur=None):
+ """Get the snapshot for the given origin visit
+
+ Args:
+ origin (int): the origin identifier
+ visit (int): the visit identifier
+ Returns:
+ dict: a snapshot with two keys:
+ id:: identifier for the snapshot
+ branches:: a dictionary containing the snapshot branch information
+
+ """
+ db = self.db
+
+ snapshot_id = db.snapshot_get_by_origin_visit(origin, visit, cur)
+
+ if snapshot_id:
+ return self.snapshot_get(snapshot_id, cur=cur)
+ else:
+ # compatibility code during the snapshot migration
+ origin_visit_info = self.origin_visit_get_by(origin, visit,
+ cur=cur)
+ if origin_visit_info is None:
+ return None
+ ret = {'id': None}
+ ret['branches'] = origin_visit_info['occurrences']
+ return ret
+
+ return None
+
+ @db_transaction
def occurrence_add(self, occurrences, cur=None):
"""Add occurrences to the storage
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -380,7 +380,7 @@
self.occurrence = {
'branch': b'master',
- 'target': b'67890123456789012345',
+ 'target': self.revision['id'],
'target_type': 'revision',
}
@@ -584,6 +584,57 @@
},
}
+ self.snapshot = {
+ 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'),
+ 'branches': {
+ self.occurrence['branch']: {
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ },
+ },
+ }
+
+ self.empty_snapshot = {
+ 'id': hash_to_bytes('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'),
+ 'branches': {},
+ }
+
+ self.complete_snapshot = {
+ 'id': hash_to_bytes('6e65b86363953b780d92b0a928f3e8fcdd10db36'),
+ 'branches': {
+ b'directory': {
+ 'target': hash_to_bytes(
+ '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'),
+ 'target_type': 'directory',
+ },
+ b'content': {
+ 'target': hash_to_bytes(
+ 'fe95a46679d128ff167b7c55df5d02356c5a1ae1'),
+ 'target_type': 'content',
+ },
+ b'alias': {
+ 'target': b'revision',
+ 'target_type': 'alias',
+ },
+ b'revision': {
+ 'target': hash_to_bytes(
+ 'aafb16d69fd30ff58afdd69036a26047f3aebdc6'),
+ 'target_type': 'revision',
+ },
+ b'release': {
+ 'target': hash_to_bytes(
+ '7045404f3d1c54e6473c71bbb716529fbad4be24'),
+ 'target_type': 'release',
+ },
+ b'snapshot': {
+ 'target': hash_to_bytes(
+ '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'),
+ 'target_type': 'snapshot',
+ },
+ b'dangling': None,
+ }
+ }
+
def tearDown(self):
self.reset_storage_tables()
super().tearDown()
@@ -1604,6 +1655,129 @@
self.assertEquals(actual_occurrence[0], expected_occurrence)
@istest
+ def snapshot_add_get_empty(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ self.storage.snapshot_add(origin_id, visit_id, self.empty_snapshot)
+
+ by_id = self.storage.snapshot_get(self.empty_snapshot['id'])
+ self.assertEqual(by_id, self.empty_snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.empty_snapshot)
+
+ @istest
+ def snapshot_add_get_complete(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ self.storage.snapshot_add(origin_id, visit_id, self.complete_snapshot)
+
+ by_id = self.storage.snapshot_get(self.complete_snapshot['id'])
+ self.assertEqual(by_id, self.complete_snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.complete_snapshot)
+
+ @istest
+ def snapshot_add_get(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ self.storage.snapshot_add(origin_id, visit_id, self.snapshot)
+
+ by_id = self.storage.snapshot_get(self.snapshot['id'])
+ self.assertEqual(by_id, self.snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.snapshot)
+
+ # retrocompat test
+ origin_visit_info = self.storage.origin_visit_get_by(origin_id,
+ visit_id)
+ self.assertEqual(origin_visit_info['occurrences'],
+ self.snapshot['branches'])
+
+ @istest
+ def snapshot_add_twice(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit1_id = origin_visit1['visit']
+ self.storage.snapshot_add(origin_id, visit1_id, self.snapshot)
+
+ by_ov1 = self.storage.snapshot_get_by_origin_visit(origin_id,
+ visit1_id)
+ self.assertEqual(by_ov1, self.snapshot)
+
+ origin_visit2 = self.storage.origin_visit_add(origin_id,
+ self.date_visit2)
+ visit2_id = origin_visit2['visit']
+
+ self.storage.snapshot_add(origin_id, visit2_id, self.snapshot)
+
+ by_ov2 = self.storage.snapshot_get_by_origin_visit(origin_id,
+ visit2_id)
+ self.assertEqual(by_ov2, self.snapshot)
+
+ @istest
+ def snapshot_get_nonexistent(self):
+ bogus_snapshot_id = b'bogus snapshot id 00'
+ bogus_origin_id = 1
+ bogus_visit_id = 1
+
+ by_id = self.storage.snapshot_get(bogus_snapshot_id)
+ self.assertIsNone(by_id)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(bogus_origin_id,
+ bogus_visit_id)
+ self.assertIsNone(by_ov)
+
+ @istest
+ def snapshot_get_retrocompat(self):
+ empty_retro_snapshot = {
+ 'id': None,
+ 'branches': {},
+ }
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+
+ self.assertEqual(by_ov, empty_retro_snapshot)
+
+ self.storage.revision_add([self.revision])
+ self.storage.occurrence_add([{
+ 'origin': origin_id,
+ 'visit': visit_id,
+ 'branch': self.occurrence['branch'],
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ }])
+
+ one_branch_retro_snapshot = {
+ 'id': None,
+ 'branches': {
+ self.occurrence['branch']: {
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ },
+ },
+ }
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, one_branch_retro_snapshot)
+
+ @istest
def entity_get_from_lister_metadata(self):
self.storage.entity_add([self.entity1])

File Metadata

Mime Type
text/plain
Expires
Nov 4 2024, 5:54 PM (9 w, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217559

Event Timeline