Page MenuHomeSoftware Heritage

D535.diff
No OneTemporary

D535.diff

diff --git a/sql/swh-func.sql b/sql/swh-func.sql
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -91,20 +91,6 @@
alter table tmp_release drop column object_id;
$$;
--- create a temporary table for occurrence_history
-create or replace function swh_mktemp_occurrence_history()
- returns void
- language sql
-as $$
- create temporary table tmp_occurrence_history(
- like occurrence_history including defaults,
- visit bigint not null
- ) on commit drop;
- alter table tmp_occurrence_history
- drop column visits,
- drop column object_id;
-$$;
-
-- create a temporary table for the branches of a snapshot
create or replace function swh_mktemp_snapshot_branch()
returns void
@@ -720,42 +706,6 @@
returning visit;
$$;
--- add tmp_occurrence_history entries to occurrence_history
---
--- operates in bulk: 0. swh_mktemp(occurrence_history), 1. COPY to tmp_occurrence_history,
--- 2. call this function
-create or replace function swh_occurrence_history_add()
- returns void
- language plpgsql
-as $$
-declare
- origin_id origin.id%type;
-begin
- -- Create or update occurrence_history
- with occurrence_history_id_visit as (
- select tmp_occurrence_history.*, object_id, visits from tmp_occurrence_history
- left join occurrence_history using(origin, branch, target, target_type)
- ),
- occurrences_to_update as (
- select object_id, visit from occurrence_history_id_visit where object_id is not null
- ),
- update_occurrences as (
- update occurrence_history
- set visits = array(select unnest(occurrence_history.visits) as e
- union
- select occurrences_to_update.visit as e
- order by e)
- from occurrences_to_update
- where occurrence_history.object_id = occurrences_to_update.object_id
- )
- insert into occurrence_history (origin, branch, target, target_type, visits)
- select origin, branch, target, target_type, ARRAY[visit]
- from occurrence_history_id_visit
- where object_id is null;
- return;
-end
-$$;
-
create or replace function swh_snapshot_add(origin bigint, visit bigint, snapshot_id snapshot.id%type)
returns void
language plpgsql
@@ -926,44 +876,6 @@
order by date desc
$$;
-
--- Retrieve occurrence by filtering on origin_id and optionally on
--- branch_name and/or validity range
-create or replace function swh_occurrence_get_by(
- origin_id bigint,
- branch_name bytea default NULL,
- date timestamptz default NULL)
- returns setof occurrence_history
- language plpgsql
-as $$
-declare
- filters text[] := array[] :: text[]; -- AND-clauses used to filter content
- visit_id bigint;
- q text;
-begin
- if origin_id is null then
- raise exception 'Needs an origin_id to get an occurrence.';
- end if;
- filters := filters || format('origin = %L', origin_id);
- if branch_name is not null then
- filters := filters || format('branch = %L', branch_name);
- end if;
- if date is not null then
- select visit from swh_visit_find_by_date(origin_id, date) into visit_id;
- else
- select visit from origin_visit where origin = origin_id order by origin_visit.date desc limit 1 into visit_id;
- end if;
- if visit_id is null then
- return;
- end if;
- filters := filters || format('%L = any(visits)', visit_id);
-
- q = format('select * from occurrence_history where %s',
- array_to_string(filters, ' and '));
- return query execute q;
-end
-$$;
-
-- Object listing by object_id
create or replace function swh_content_list_by_object_id(
@@ -1114,7 +1026,6 @@
'directory_entry_dir',
'directory_entry_file',
'directory_entry_rev',
- 'occurrence_history',
'origin',
'origin_visit',
'person',
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -138,17 +138,6 @@
alter table origin_visit add constraint origin_visit_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
alter table origin_visit validate constraint origin_visit_snapshot_id_fkey;
--- occurrence_history
-create unique index concurrently occurrence_history_pkey on occurrence_history(object_id);
-alter table occurrence_history add primary key using index occurrence_history_pkey;
-
-create index concurrently on occurrence_history(target, target_type);
-create index concurrently on occurrence_history(origin, branch);
-create unique index concurrently on occurrence_history(origin, branch, target, target_type);
-
-alter table occurrence_history add constraint occurrence_history_origin_fkey foreign key (origin) references origin(id) not valid;
-alter table occurrence_history validate constraint occurrence_history_origin_fkey;
-
-- release
create unique index concurrently release_pkey on release(id);
alter table release add primary key using index release_pkey;
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -12,7 +12,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(126, now(), 'Work In Progress');
+ values(127, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
@@ -222,28 +222,6 @@
comment on column origin_visit.snapshot_id is 'Origin snapshot at visit time';
--- BEGIN legacy section (T830)
-
--- The content of software origins is indexed starting from top-level pointers
--- called "branches". Every time we fetch some origin we store in this table
--- where the branches pointed to at fetch time.
---
--- Synonyms/mappings:
--- * git: ref (in the "git update-ref" sense)
-create table occurrence_history
-(
- origin bigint not null,
- branch bytea not null, -- e.g., b"master" (for VCS), or b"sid" (for Debian)
- target sha1_git not null, -- ref target, e.g., commit id
- target_type object_type not null, -- ref target type
- visits bigint[] not null, -- the visits where that occurrence was valid. References
- -- origin_visit(visit), where o_h.origin = origin_visit.origin.
- object_id bigserial not null, -- short object identifier
- snapshot_branch_id bigint
-);
-
--- END legacy section (T830)
-
-- A snapshot represents the entire state of a software origin as crawled by
-- Software Heritage. This table is a simple mapping between (public) intrinsic
-- snapshot identifiers and (private) numeric sequential identifiers.
diff --git a/sql/upgrades/127.sql b/sql/upgrades/127.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/127.sql
@@ -0,0 +1,39 @@
+-- SWH DB schema upgrade
+-- from_version: 126
+-- to_version: 127
+-- description: Drop the now superseded occurrence_history table
+
+insert into dbversion(version, release, description)
+ values(127, now(), 'Work In Progress');
+
+DROP FUNCTION swh_mktemp_occurrence_history();
+
+DROP FUNCTION swh_occurrence_get_by(origin_id bigint, branch_name bytea, "date" timestamp with time zone);
+
+DROP FUNCTION swh_occurrence_history_add();
+
+DROP TABLE occurrence_history;
+
+DROP SEQUENCE occurrence_history_object_id_seq;
+
+CREATE OR REPLACE FUNCTION swh_stat_counters() RETURNS SETOF public.counter
+ LANGUAGE sql STABLE
+ AS $$
+ select object_type as label, value as value
+ from object_counts
+ where object_type in (
+ 'content',
+ 'directory',
+ 'directory_entry_dir',
+ 'directory_entry_file',
+ 'directory_entry_rev',
+ 'origin',
+ 'origin_visit',
+ 'person',
+ 'release',
+ 'revision',
+ 'revision_history',
+ 'skipped_content',
+ 'snapshot'
+ );
+$$;
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -80,9 +80,6 @@
def object_find_by_sha1_git(self, ids):
return self.post('object/find_by_sha1_git', {'ids': ids})
- def occurrence_add(self, occurrences):
- return self.post('occurrence/add', {'occurrences': occurrences})
-
def snapshot_add(self, origin, visit, snapshot):
return self.post('snapshot/add', {
'origin': origin, 'visit': visit, 'snapshot': snapshot,
diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py
--- a/swh/storage/api/server.py
+++ b/swh/storage/api/server.py
@@ -180,11 +180,6 @@
**decode_request(request)))
-@app.route('/occurrence/add', methods=['POST'])
-def occurrence_add():
- return encode_data(get_storage().occurrence_add(**decode_request(request)))
-
-
@app.route('/snapshot/add', methods=['POST'])
def snapshot_add():
return encode_data(get_storage().snapshot_add(**decode_request(request)))
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -235,9 +235,6 @@
@stored_procedure('swh_mktemp_release')
def mktemp_release(self, cur=None): pass
- @stored_procedure('swh_mktemp_occurrence_history')
- def mktemp_occurrence_history(self, cur=None): pass
-
@stored_procedure('swh_mktemp_snapshot_branch')
def mktemp_snapshot_branch(self, cur=None): pass
@@ -269,9 +266,6 @@
@stored_procedure('swh_release_add')
def release_add_from_temp(self, cur=None): pass
- @stored_procedure('swh_occurrence_history_add')
- def occurrence_history_add_from_temp(self, cur=None): pass
-
def content_update_from_temp(self, keys_to_update, cur=None):
cur = self._cursor(cur)
cur.execute("""select swh_content_update(ARRAY[%s] :: text[])""" %
@@ -611,8 +605,6 @@
return None
return line_to_bytes(r)
- occurrence_cols = ['origin', 'branch', 'target', 'target_type']
-
@staticmethod
def mangle_query_key(key, main_table):
if key == 'id':
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -914,31 +914,6 @@
return None
@db_transaction()
- def occurrence_add(self, occurrences, db=None, cur=None):
- """Add occurrences to the storage
-
- Args:
- occurrences: iterable of dictionaries representing the individual
- occurrences to add. Each dict has the following keys:
-
- - origin (int): id of the origin corresponding to the
- occurrence
- - visit (int): id of the visit corresponding to the
- occurrence
- - branch (str): the reference name of the occurrence
- - target (sha1_git): the id of the object pointed to by
- the occurrence
- - target_type (str): the type of object pointed to by the
- occurrence
-
- """
- db.mktemp_occurrence_history(cur)
- db.copy_to(occurrences, 'tmp_occurrence_history',
- ['origin', 'branch', 'target', 'target_type', 'visit'], cur)
-
- db.occurrence_history_add_from_temp(cur)
-
- @db_transaction()
def origin_visit_add(self, origin, ts, db=None, cur=None):
"""Add an origin_visit for the origin at ts with status 'ongoing'.
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -376,21 +376,9 @@
self.date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0,
tzinfo=datetime.timezone.utc)
- self.occurrence = {
- 'branch': b'master',
- 'target': self.revision['id'],
- 'target_type': 'revision',
- }
-
self.date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0,
tzinfo=datetime.timezone.utc)
- self.occurrence2 = {
- 'branch': b'master',
- 'target': self.revision2['id'],
- 'target_type': 'revision',
- }
-
self.date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0,
tzinfo=datetime.timezone.utc)
@@ -471,9 +459,9 @@
self.snapshot = {
'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'),
'branches': {
- self.occurrence['branch']: {
- 'target': self.occurrence['target'],
- 'target_type': self.occurrence['target_type'],
+ b'master': {
+ 'target': self.revision['id'],
+ 'target_type': 'revision',
},
},
'next_branch': None
@@ -1167,65 +1155,6 @@
self.assertIsNone(actual_origin_visit)
- def test_occurrence_add(self):
- occur = self.occurrence.copy()
-
- origin_id = self.storage.origin_add_one(self.origin2)
- date_visit1 = self.date_visit1
- origin_visit1 = self.storage.origin_visit_add(origin_id, date_visit1)
-
- revision = self.revision.copy()
- revision['id'] = occur['target']
- self.storage.revision_add([revision])
-
- occur.update({
- 'origin': origin_id,
- 'visit': origin_visit1['visit'],
- })
- self.storage.occurrence_add([occur])
-
- test_query = '''
- with indiv_occurrences as (
- select origin, branch, target, target_type, unnest(visits) as visit
- from occurrence_history
- )
- select origin, branch, target, target_type, date
- from indiv_occurrences
- left join origin_visit using(origin, visit)
- order by origin, date'''
-
- self.cursor.execute(test_query)
- ret = self.cursor.fetchall()
- self.assertEqual(len(ret), 1)
- self.assertEqual(
- (ret[0][0], ret[0][1].tobytes(), ret[0][2].tobytes(),
- ret[0][3], ret[0][4]),
- (occur['origin'], occur['branch'], occur['target'],
- occur['target_type'], self.date_visit1))
-
- date_visit2 = date_visit1 + datetime.timedelta(hours=10)
-
- origin_visit2 = self.storage.origin_visit_add(origin_id, date_visit2)
- occur2 = occur.copy()
- occur2.update({
- 'visit': origin_visit2['visit'],
- })
- self.storage.occurrence_add([occur2])
-
- self.cursor.execute(test_query)
- ret = self.cursor.fetchall()
- self.assertEqual(len(ret), 2)
- self.assertEqual(
- (ret[0][0], ret[0][1].tobytes(), ret[0][2].tobytes(),
- ret[0][3], ret[0][4]),
- (occur['origin'], occur['branch'], occur['target'],
- occur['target_type'], date_visit1))
- self.assertEqual(
- (ret[1][0], ret[1][1].tobytes(), ret[1][2].tobytes(),
- ret[1][3], ret[1][4]),
- (occur2['origin'], occur2['branch'], occur2['target'],
- occur2['target_type'], date_visit2))
-
def test_snapshot_add_get_empty(self):
origin_id = self.storage.origin_add_one(self.origin)
origin_visit1 = self.storage.origin_visit_add(origin_id,

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 5:14 PM (11 w, 17 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218130

Event Timeline