Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066134
D268.id944.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
24 KB
Subscribers
None
D268.id944.diff
View Options
diff --git a/sql/swh-enums.sql b/sql/swh-enums.sql
--- a/sql/swh-enums.sql
+++ b/sql/swh-enums.sql
@@ -40,9 +40,12 @@
create type revision_type as enum ('git', 'tar', 'dsc', 'svn');
comment on type revision_type is 'Possible revision types';
-create type object_type as enum ('content', 'directory', 'revision', 'release');
+create type object_type as enum ('content', 'directory', 'revision', 'release', 'snapshot');
comment on type object_type is 'Data object types stored in data model';
+create type snapshot_target as enum ('content', 'directory', 'revision', 'release', 'snapshot', 'alias');
+comment on type snapshot_target is 'Types of targets for snapshot branches';
+
create type origin_visit_status as enum (
'ongoing',
'full',
diff --git a/sql/swh-func.sql b/sql/swh-func.sql
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -130,6 +130,17 @@
) on commit drop;
$$;
+-- create a temporary table for the branches of a snapshot
+create or replace function swh_mktemp_snapshot_branch()
+ returns void
+ language sql
+as $$
+ create temporary table tmp_snapshot_branch (
+ name bytea not null,
+ target bytea,
+ target_type snapshot_target
+ ) on commit drop;
+$$;
create or replace function swh_mktemp_tool()
returns void
@@ -937,6 +948,70 @@
end
$$;
+create or replace function swh_snapshot_add(origin bigint, visit bigint, snapshot_id snapshot.id%type)
+ returns void
+ language plpgsql
+as $$
+declare
+ snapshot_object_id snapshot.object_id%type;
+begin
+ select object_id from snapshot where id = snapshot_id into snapshot_object_id;
+ if snapshot_object_id is null then
+ insert into snapshot (id) values (snapshot_id) returning object_id into snapshot_object_id;
+ with all_branches(name, target_type, target) as (
+ select name, target_type, target from tmp_snapshot_branch
+ ), inserted as (
+ insert into snapshot_branch (name, target_type, target)
+ select name, target_type, target from all_branches
+ on conflict do nothing
+ returning object_id
+ )
+ insert into snapshot_branches (snapshot_id, branch_id)
+ select snapshot_object_id, object_id as branch_id from inserted
+ union all
+ select snapshot_object_id, object_id as branch_id
+ from all_branches ab
+ join snapshot_branch sb
+ on sb.name = ab.name
+ and sb.target_type is not distinct from ab.target_type
+ and sb.target is not distinct from ab.target;
+ end if;
+ update origin_visit ov
+ set snapshot_id = snapshot_object_id
+ where ov.origin=swh_snapshot_add.origin and ov.visit=swh_snapshot_add.visit;
+end;
+$$;
+
+create type snapshot_result as (
+ snapshot_id sha1_git,
+ name bytea,
+ target bytea,
+ target_type snapshot_target
+);
+
+create or replace function swh_snapshot_get_by_id(id snapshot.id%type)
+ returns setof snapshot_result
+ language sql
+ stable
+as $$
+ select
+ swh_snapshot_get_by_id.id as snapshot_id, name, target, target_type
+ from snapshot_branches
+ inner join snapshot_branch on snapshot_branches.branch_id = snapshot_branch.object_id
+ where snapshot_id = (select object_id from snapshot where snapshot.id = swh_snapshot_get_by_id.id)
+$$;
+
+create or replace function swh_snapshot_get_by_origin_visit(origin_id bigint, visit_id bigint)
+ returns snapshot.id%type
+ language sql
+ stable
+as $$
+ select snapshot.id
+ from origin_visit
+ left join snapshot
+ on snapshot.object_id = origin_visit.snapshot_id
+ where origin_visit.origin=origin_id and origin_visit.visit=visit_id;
+$$;
-- Absolute path: directory reference + complete path relative to it
create type content_dir as (
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -153,6 +153,34 @@
alter table revision_history add constraint revision_history_id_fkey foreign key (id) references revision(id) not valid;
alter table revision_history validate constraint revision_history_id_fkey;
+-- snapshot
+create unique index concurrently snapshot_pkey on snapshot(object_id);
+alter table snapshot add primary key using index snapshot_pkey;
+
+create unique index concurrently on snapshot(id);
+
+-- snapshot_branch
+create unique index concurrently snapshot_branch_pkey on snapshot_branch(object_id);
+alter table snapshot_branch add primary key using index snapshot_branch_pkey;
+
+create unique index concurrently on snapshot_branch (target_type, target, name);
+alter table snapshot_branch add constraint snapshot_branch_target_check check ((target_type is null) = (target is null)) not valid;
+alter table snapshot_branch validate constraint snapshot_branch_target_check;
+alter table snapshot_branch add constraint snapshot_target_check check (target_type not in ('content', 'directory', 'revision', 'release', 'snapshot') or length(target) = 20) not valid;
+alter table snapshot_branch validate constraint snapshot_target_check;
+
+create unique index concurrently on snapshot_branch (name) where target_type is null and target is null;
+
+-- snapshot_branches
+create unique index concurrently snapshot_branches_pkey on snapshot_branches(snapshot_id, branch_id);
+alter table snapshot_branches add primary key using index snapshot_branches_pkey;
+
+alter table snapshot_branches add constraint snapshot_branches_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
+alter table snapshot_branches validate constraint snapshot_branches_snapshot_id_fkey;
+
+alter table snapshot_branches add constraint snapshot_branches_branch_id_fkey foreign key (branch_id) references snapshot_branch(object_id) not valid;
+alter table snapshot_branches validate constraint snapshot_branches_branch_id_fkey;
+
-- origin_visit
create unique index concurrently origin_visit_pkey on origin_visit(origin, visit);
alter table origin_visit add primary key using index origin_visit_pkey;
@@ -162,6 +190,9 @@
alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid;
alter table origin_visit validate constraint origin_visit_origin_fkey;
+alter table origin_visit add constraint origin_visit_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
+alter table origin_visit validate constraint origin_visit_snapshot_id_fkey;
+
-- occurrence_history
create unique index concurrently occurrence_history_pkey on occurrence_history(object_id);
alter table occurrence_history add primary key using index occurrence_history_pkey;
@@ -180,7 +211,6 @@
alter table occurrence add constraint occurrence_origin_fkey foreign key (origin) references origin(id) not valid;
alter table occurrence validate constraint occurrence_origin_fkey;
-
-- release
create unique index concurrently release_pkey on release(id);
alter table release add primary key using index release_pkey;
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(114, now(), 'Work In Progress');
+ values(115, now(), 'Work In Progress');
-- a SHA1 checksum (not necessarily originating from Git)
create domain sha1 as bytea check (length(value) = 20);
@@ -293,11 +293,12 @@
-- The timestamps at which Software Heritage has made a visit of the given origin.
create table origin_visit
(
- origin bigint not null,
- visit bigint not null,
- date timestamptz not null,
- status origin_visit_status not null,
- metadata jsonb
+ origin bigint not null,
+ visit bigint not null,
+ date timestamptz not null,
+ status origin_visit_status not null,
+ metadata jsonb,
+ snapshot_id bigint
);
comment on column origin_visit.origin is 'Visited origin';
@@ -305,6 +306,7 @@
comment on column origin_visit.date is 'Visit date for that origin';
comment on column origin_visit.status is 'Visit status for that origin';
comment on column origin_visit.metadata is 'Metadata associated with the visit';
+comment on column origin_visit.snapshot_id is 'id of the snapshot associated with the visit';
-- The content of software origins is indexed starting from top-level pointers
@@ -321,7 +323,8 @@
target_type object_type not null, -- ref target type
visits bigint[] not null, -- the visits where that occurrence was valid. References
-- origin_visit(visit), where o_h.origin = origin_visit.origin.
- object_id bigserial not null -- short object identifier
+ object_id bigserial not null, -- short object identifier
+ snapshot_branch_id bigint
);
-- Materialized view of occurrence_history, storing the *current* value of each
@@ -334,6 +337,25 @@
target_type object_type not null
);
+
+create table snapshot (
+ object_id bigserial not null,
+ id sha1_git
+);
+
+create table snapshot_branch (
+ object_id bigserial not null,
+ name bytea not null,
+ target bytea,
+ target_type snapshot_target
+);
+
+create table snapshot_branches (
+ snapshot_id bigint not null,
+ branch_id bigint not null
+);
+
+
-- A "memorable" point in the development history of a project.
--
-- Synonyms/mappings:
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -104,6 +104,17 @@
def occurrence_add(self, occurrences):
return self.post('occurrence/add', {'occurrences': occurrences})
+ def snapshot_add(self, origin, visit, snapshot):
+ return self.post('snapshot/add', {'origin': origin, 'visit': visit,
+ 'snapshot': snapshot})
+
+ def snapshot_get(self, snapshot_id):
+ return self.post('snapshot', {'snapshot_id': snapshot_id})
+
+ def snapshot_get_by_origin_visit(self, origin, visit):
+ return self.post('snapshot/by_origin_visit', {'origin': origin,
+ 'visit': visit})
+
def origin_get(self, origin):
return self.post('origin/get', {'origin': origin})
diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py
--- a/swh/storage/api/server.py
+++ b/swh/storage/api/server.py
@@ -192,6 +192,22 @@
return encode_data(g.storage.occurrence_add(**decode_request(request)))
+@app.route('/snapshot/add', methods=['POST'])
+def snapshot_add():
+ return encode_data(g.storage.snapshot_add(**decode_request(request)))
+
+
+@app.route('/snapshot', methods=['POST'])
+def snapshot_get():
+ return encode_data(g.storage.snapshot_get(**decode_request(request)))
+
+
+@app.route('/snapshot/by_origin_visit', methods=['POST'])
+def snapshot_get_by_origin_visit():
+ return encode_data(g.storage.snapshot_get_by_origin_visit(
+ **decode_request(request)))
+
+
@app.route('/origin/get', methods=['POST'])
def origin_get():
return encode_data(g.storage.origin_get(**decode_request(request)))
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -205,6 +205,9 @@
@stored_procedure('swh_mktemp_occurrence_history')
def mktemp_occurrence_history(self, cur=None): pass
+ @stored_procedure('swh_mktemp_snapshot_branch')
+ def mktemp_snapshot_branch(self, cur=None): pass
+
@stored_procedure('swh_mktemp_entity_lister')
def mktemp_entity_lister(self, cur=None): pass
@@ -316,6 +319,44 @@
yield from cursor_to_bytes(cur)
+ def snapshot_exists(self, snapshot_id, cur=None):
+ """Check whether a snapshot with the given id exists"""
+ cur = self._cursor(cur)
+
+ cur.execute("""SELECT 1 FROM snapshot where id=%s""", (snapshot_id,))
+
+ return bool(cur.fetchone())
+
+ def snapshot_add(self, origin, visit, snapshot_id, cur=None):
+ """Add a snapshot for origin/visit from the temporary table"""
+ cur = self._cursor(cur)
+
+ cur.execute("""SELECT swh_snapshot_add(%s, %s, %s)""",
+ (origin, visit, snapshot_id))
+
+ snapshot_get_cols = ['snapshot_id', 'name', 'target', 'target_type']
+
+ def snapshot_get_by_id(self, snapshot_id, cur=None):
+ cur = self._cursor(cur)
+ query = """\
+ SELECT %s FROM swh_snapshot_get_by_id(%%s)
+ """ % ', '.join(self.snapshot_get_cols)
+
+ cur.execute(query, (snapshot_id,))
+
+ yield from cursor_to_bytes(cur)
+
+ def snapshot_get_by_origin_visit(self, origin_id, visit_id, cur=None):
+ cur = self._cursor(cur)
+ query = """\
+ SELECT swh_snapshot_get_by_origin_visit(%s, %s)
+ """
+
+ cur.execute(query, (origin_id, visit_id))
+ ret = cur.fetchone()
+ if ret:
+ return line_to_bytes(ret)[0]
+
content_find_cols = ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length',
'ctime', 'status']
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -759,6 +759,128 @@
dict(zip(db.release_get_cols, release))
)
+ @db_transaction
+ def snapshot_add(self, origin, visit, snapshot, cur=None):
+ """Add a snapshot for the given origin/visit couple
+
+ Args:
+ origin (int): id of the origin
+ visit (int): id of the visit
+ snapshot (dict): Dictionary with the following keys::
+ id (bytes): id of the snapshot
+ branches (dict): branches the snapshot contains, mapping the
+ branch name (str) to the branch target, each a dict with the
+ following keys::
+ target_type (str): one of 'content', 'directory', 'revision',
+ 'release'
+ target (bytes): identifier of the target (currently a
+ sha1_git for all object kinds)
+ """
+ db = self.db
+
+ if not db.snapshot_exists(snapshot['id'], cur):
+ db.mktemp_snapshot_branch(cur)
+ db.copy_to(
+ (
+ {
+ 'name': name,
+ 'target': info['target'] if info else None,
+ 'target_type': info['target_type'] if info else None,
+ }
+ for name, info in snapshot['branches'].items()
+ ),
+ 'tmp_snapshot_branch',
+ ['name', 'target', 'target_type'],
+ cur,
+ )
+
+ db.snapshot_add(origin, visit, snapshot['id'], cur)
+
+ # TODO: drop this compat feature
+ occurrences = []
+ for name, info in snapshot['branches'].items():
+ if not info:
+ target = b'\x00' * 20
+ target_type = 'revision'
+ elif info['target_type'] == 'alias':
+ continue
+ else:
+ target = info['target']
+ target_type = info['target_type']
+
+ occurrences.append({
+ 'origin': origin,
+ 'visit': visit,
+ 'branch': name,
+ 'target': target,
+ 'target_type': target_type,
+ })
+
+ self.occurrence_add(occurrences)
+
+ @db_transaction
+ def snapshot_get(self, snapshot_id, cur=None):
+ """Get the snapshot with the given id
+
+ Args:
+ snapshot_id (bytes): id of the snapshot
+ Returns:
+ dict: a snapshot with two keys:
+ id:: identifier for the snapshot
+ branches:: a list of branches contained by the snapshot
+
+ """
+ db = self.db
+
+ branches = {}
+ for branch in db.snapshot_get_by_id(snapshot_id, cur):
+ branch = dict(zip(db.snapshot_get_cols, branch))
+ del branch['snapshot_id']
+ name = branch.pop('name')
+ if branch == {'target': None, 'target_type': None}:
+ branch = None
+ branches[name] = branch
+
+ if branches:
+ return {'id': snapshot_id, 'branches': branches}
+
+ if db.snapshot_exists(snapshot_id, cur):
+ # empty snapshot
+ return {'id': snapshot_id, 'branches': {}}
+
+ return None
+
+ @db_transaction
+ def snapshot_get_by_origin_visit(self, origin, visit, cur=None):
+ """Get the snapshot for the given origin visit
+
+ Args:
+ origin (int): the origin identifier
+ visit (int): the visit identifier
+ Returns:
+ dict: a snapshot with two keys:
+ id:: identifier for the snapshot
+ branches:: a dictionary containing the snapshot branch information
+
+ """
+ db = self.db
+
+ snapshot_id = db.snapshot_get_by_origin_visit(origin, visit, cur)
+
+ if snapshot_id:
+ return self.snapshot_get(snapshot_id, cur=cur)
+ else:
+ # compatibility code during the snapshot migration
+ origin_visit_info = self.origin_visit_get_by(origin, visit,
+ cur=cur)
+ if origin_visit_info is None:
+ return None
+ ret = {'id': None}
+ ret['branches'] = origin_visit_info['occurrences']
+ return ret
+
+ return None
+
@db_transaction
def occurrence_add(self, occurrences, cur=None):
"""Add occurrences to the storage
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -380,7 +380,7 @@
self.occurrence = {
'branch': b'master',
- 'target': b'67890123456789012345',
+ 'target': self.revision['id'],
'target_type': 'revision',
}
@@ -584,6 +584,57 @@
},
}
+ self.snapshot = {
+ 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'),
+ 'branches': {
+ self.occurrence['branch']: {
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ },
+ },
+ }
+
+ self.empty_snapshot = {
+ 'id': hash_to_bytes('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'),
+ 'branches': {},
+ }
+
+ self.complete_snapshot = {
+ 'id': hash_to_bytes('6e65b86363953b780d92b0a928f3e8fcdd10db36'),
+ 'branches': {
+ b'directory': {
+ 'target': hash_to_bytes(
+ '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'),
+ 'target_type': 'directory',
+ },
+ b'content': {
+ 'target': hash_to_bytes(
+ 'fe95a46679d128ff167b7c55df5d02356c5a1ae1'),
+ 'target_type': 'content',
+ },
+ b'alias': {
+ 'target': b'revision',
+ 'target_type': 'alias',
+ },
+ b'revision': {
+ 'target': hash_to_bytes(
+ 'aafb16d69fd30ff58afdd69036a26047f3aebdc6'),
+ 'target_type': 'revision',
+ },
+ b'release': {
+ 'target': hash_to_bytes(
+ '7045404f3d1c54e6473c71bbb716529fbad4be24'),
+ 'target_type': 'release',
+ },
+ b'snapshot': {
+ 'target': hash_to_bytes(
+ '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'),
+ 'target_type': 'snapshot',
+ },
+ b'dangling': None,
+ }
+ }
+
def tearDown(self):
self.reset_storage_tables()
super().tearDown()
@@ -1603,6 +1654,129 @@
self.assertEquals(len(actual_occurrence), 1)
self.assertEquals(actual_occurrence[0], expected_occurrence)
+ @istest
+ def snapshot_add_get_empty(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ self.storage.snapshot_add(origin_id, visit_id, self.empty_snapshot)
+
+ by_id = self.storage.snapshot_get(self.empty_snapshot['id'])
+ self.assertEqual(by_id, self.empty_snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.empty_snapshot)
+
+ @istest
+ def snapshot_add_get_complete(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ self.storage.snapshot_add(origin_id, visit_id, self.complete_snapshot)
+
+ by_id = self.storage.snapshot_get(self.complete_snapshot['id'])
+ self.assertEqual(by_id, self.complete_snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.complete_snapshot)
+
+ @istest
+ def snapshot_add_get(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ self.storage.snapshot_add(origin_id, visit_id, self.snapshot)
+
+ by_id = self.storage.snapshot_get(self.snapshot['id'])
+ self.assertEqual(by_id, self.snapshot)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, self.snapshot)
+
+ # retrocompat test
+ origin_visit_info = self.storage.origin_visit_get_by(origin_id,
+ visit_id)
+ self.assertEqual(origin_visit_info['occurrences'],
+ self.snapshot['branches'])
+
+ @istest
+ def snapshot_add_twice(self):
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit1_id = origin_visit1['visit']
+ self.storage.snapshot_add(origin_id, visit1_id, self.snapshot)
+
+ by_ov1 = self.storage.snapshot_get_by_origin_visit(origin_id,
+ visit1_id)
+ self.assertEqual(by_ov1, self.snapshot)
+
+ origin_visit2 = self.storage.origin_visit_add(origin_id,
+ self.date_visit2)
+ visit2_id = origin_visit2['visit']
+
+ self.storage.snapshot_add(origin_id, visit2_id, self.snapshot)
+
+ by_ov2 = self.storage.snapshot_get_by_origin_visit(origin_id,
+ visit2_id)
+ self.assertEqual(by_ov2, self.snapshot)
+
+ @istest
+ def snapshot_get_nonexistent(self):
+ bogus_snapshot_id = b'bogus snapshot id 00'
+ bogus_origin_id = 1
+ bogus_visit_id = 1
+
+ by_id = self.storage.snapshot_get(bogus_snapshot_id)
+ self.assertIsNone(by_id)
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(bogus_origin_id,
+ bogus_visit_id)
+ self.assertIsNone(by_ov)
+
+ @istest
+ def snapshot_get_retrocompat(self):
+ empty_retro_snapshot = {
+ 'id': None,
+ 'branches': {},
+ }
+ origin_id = self.storage.origin_add_one(self.origin)
+ origin_visit1 = self.storage.origin_visit_add(origin_id,
+ self.date_visit1)
+ visit_id = origin_visit1['visit']
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+
+ self.assertEqual(by_ov, empty_retro_snapshot)
+
+ self.storage.revision_add([self.revision])
+ self.storage.occurrence_add([{
+ 'origin': origin_id,
+ 'visit': visit_id,
+ 'branch': self.occurrence['branch'],
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ }])
+
+ one_branch_retro_snapshot = {
+ 'id': None,
+ 'branches': {
+ self.occurrence['branch']: {
+ 'target': self.occurrence['target'],
+ 'target_type': self.occurrence['target_type'],
+ },
+ },
+ }
+
+ by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
+ self.assertEqual(by_ov, one_branch_retro_snapshot)
+
@istest
def entity_get_from_lister_metadata(self):
self.storage.entity_add([self.entity1])
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 4 2024, 6:10 PM (11 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220641
Attached To
D268: Add snapshot models
Event Timeline
Log In to Comment