Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066646
D535.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
D535.diff
View Options
diff --git a/sql/swh-func.sql b/sql/swh-func.sql
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -91,20 +91,6 @@
alter table tmp_release drop column object_id;
$$;
--- create a temporary table for occurrence_history
-create or replace function swh_mktemp_occurrence_history()
- returns void
- language sql
-as $$
- create temporary table tmp_occurrence_history(
- like occurrence_history including defaults,
- visit bigint not null
- ) on commit drop;
- alter table tmp_occurrence_history
- drop column visits,
- drop column object_id;
-$$;
-
-- create a temporary table for the branches of a snapshot
create or replace function swh_mktemp_snapshot_branch()
returns void
@@ -720,42 +706,6 @@
returning visit;
$$;
--- add tmp_occurrence_history entries to occurrence_history
---
--- operates in bulk: 0. swh_mktemp(occurrence_history), 1. COPY to tmp_occurrence_history,
--- 2. call this function
-create or replace function swh_occurrence_history_add()
- returns void
- language plpgsql
-as $$
-declare
- origin_id origin.id%type;
-begin
- -- Create or update occurrence_history
- with occurrence_history_id_visit as (
- select tmp_occurrence_history.*, object_id, visits from tmp_occurrence_history
- left join occurrence_history using(origin, branch, target, target_type)
- ),
- occurrences_to_update as (
- select object_id, visit from occurrence_history_id_visit where object_id is not null
- ),
- update_occurrences as (
- update occurrence_history
- set visits = array(select unnest(occurrence_history.visits) as e
- union
- select occurrences_to_update.visit as e
- order by e)
- from occurrences_to_update
- where occurrence_history.object_id = occurrences_to_update.object_id
- )
- insert into occurrence_history (origin, branch, target, target_type, visits)
- select origin, branch, target, target_type, ARRAY[visit]
- from occurrence_history_id_visit
- where object_id is null;
- return;
-end
-$$;
-
create or replace function swh_snapshot_add(origin bigint, visit bigint, snapshot_id snapshot.id%type)
returns void
language plpgsql
@@ -926,44 +876,6 @@
order by date desc
$$;
-
--- Retrieve occurrence by filtering on origin_id and optionally on
--- branch_name and/or validity range
-create or replace function swh_occurrence_get_by(
- origin_id bigint,
- branch_name bytea default NULL,
- date timestamptz default NULL)
- returns setof occurrence_history
- language plpgsql
-as $$
-declare
- filters text[] := array[] :: text[]; -- AND-clauses used to filter content
- visit_id bigint;
- q text;
-begin
- if origin_id is null then
- raise exception 'Needs an origin_id to get an occurrence.';
- end if;
- filters := filters || format('origin = %L', origin_id);
- if branch_name is not null then
- filters := filters || format('branch = %L', branch_name);
- end if;
- if date is not null then
- select visit from swh_visit_find_by_date(origin_id, date) into visit_id;
- else
- select visit from origin_visit where origin = origin_id order by origin_visit.date desc limit 1 into visit_id;
- end if;
- if visit_id is null then
- return;
- end if;
- filters := filters || format('%L = any(visits)', visit_id);
-
- q = format('select * from occurrence_history where %s',
- array_to_string(filters, ' and '));
- return query execute q;
-end
-$$;
-
-- Object listing by object_id
create or replace function swh_content_list_by_object_id(
@@ -1114,7 +1026,6 @@
'directory_entry_dir',
'directory_entry_file',
'directory_entry_rev',
- 'occurrence_history',
'origin',
'origin_visit',
'person',
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -138,17 +138,6 @@
alter table origin_visit add constraint origin_visit_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid;
alter table origin_visit validate constraint origin_visit_snapshot_id_fkey;
--- occurrence_history
-create unique index concurrently occurrence_history_pkey on occurrence_history(object_id);
-alter table occurrence_history add primary key using index occurrence_history_pkey;
-
-create index concurrently on occurrence_history(target, target_type);
-create index concurrently on occurrence_history(origin, branch);
-create unique index concurrently on occurrence_history(origin, branch, target, target_type);
-
-alter table occurrence_history add constraint occurrence_history_origin_fkey foreign key (origin) references origin(id) not valid;
-alter table occurrence_history validate constraint occurrence_history_origin_fkey;
-
-- release
create unique index concurrently release_pkey on release(id);
alter table release add primary key using index release_pkey;
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -12,7 +12,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(126, now(), 'Work In Progress');
+ values(127, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
@@ -222,28 +222,6 @@
comment on column origin_visit.snapshot_id is 'Origin snapshot at visit time';
--- BEGIN legacy section (T830)
-
--- The content of software origins is indexed starting from top-level pointers
--- called "branches". Every time we fetch some origin we store in this table
--- where the branches pointed to at fetch time.
---
--- Synonyms/mappings:
--- * git: ref (in the "git update-ref" sense)
-create table occurrence_history
-(
- origin bigint not null,
- branch bytea not null, -- e.g., b"master" (for VCS), or b"sid" (for Debian)
- target sha1_git not null, -- ref target, e.g., commit id
- target_type object_type not null, -- ref target type
- visits bigint[] not null, -- the visits where that occurrence was valid. References
- -- origin_visit(visit), where o_h.origin = origin_visit.origin.
- object_id bigserial not null, -- short object identifier
- snapshot_branch_id bigint
-);
-
--- END legacy section (T830)
-
-- A snapshot represents the entire state of a software origin as crawled by
-- Software Heritage. This table is a simple mapping between (public) intrinsic
-- snapshot identifiers and (private) numeric sequential identifiers.
diff --git a/sql/upgrades/127.sql b/sql/upgrades/127.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/127.sql
@@ -0,0 +1,39 @@
+-- SWH DB schema upgrade
+-- from_version: 126
+-- to_version: 127
+-- description: Drop the now superseded occurrence_history table
+
+insert into dbversion(version, release, description)
+ values(127, now(), 'Work In Progress');
+
+DROP FUNCTION swh_mktemp_occurrence_history();
+
+DROP FUNCTION swh_occurrence_get_by(origin_id bigint, branch_name bytea, "date" timestamp with time zone);
+
+DROP FUNCTION swh_occurrence_history_add();
+
+DROP TABLE occurrence_history;
+
+DROP SEQUENCE occurrence_history_object_id_seq;
+
+CREATE OR REPLACE FUNCTION swh_stat_counters() RETURNS SETOF public.counter
+ LANGUAGE sql STABLE
+ AS $$
+ select object_type as label, value as value
+ from object_counts
+ where object_type in (
+ 'content',
+ 'directory',
+ 'directory_entry_dir',
+ 'directory_entry_file',
+ 'directory_entry_rev',
+ 'origin',
+ 'origin_visit',
+ 'person',
+ 'release',
+ 'revision',
+ 'revision_history',
+ 'skipped_content',
+ 'snapshot'
+ );
+$$;
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -80,9 +80,6 @@
def object_find_by_sha1_git(self, ids):
return self.post('object/find_by_sha1_git', {'ids': ids})
- def occurrence_add(self, occurrences):
- return self.post('occurrence/add', {'occurrences': occurrences})
-
def snapshot_add(self, origin, visit, snapshot):
return self.post('snapshot/add', {
'origin': origin, 'visit': visit, 'snapshot': snapshot,
diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py
--- a/swh/storage/api/server.py
+++ b/swh/storage/api/server.py
@@ -180,11 +180,6 @@
**decode_request(request)))
-@app.route('/occurrence/add', methods=['POST'])
-def occurrence_add():
- return encode_data(get_storage().occurrence_add(**decode_request(request)))
-
-
@app.route('/snapshot/add', methods=['POST'])
def snapshot_add():
return encode_data(get_storage().snapshot_add(**decode_request(request)))
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -235,9 +235,6 @@
@stored_procedure('swh_mktemp_release')
def mktemp_release(self, cur=None): pass
- @stored_procedure('swh_mktemp_occurrence_history')
- def mktemp_occurrence_history(self, cur=None): pass
-
@stored_procedure('swh_mktemp_snapshot_branch')
def mktemp_snapshot_branch(self, cur=None): pass
@@ -269,9 +266,6 @@
@stored_procedure('swh_release_add')
def release_add_from_temp(self, cur=None): pass
- @stored_procedure('swh_occurrence_history_add')
- def occurrence_history_add_from_temp(self, cur=None): pass
-
def content_update_from_temp(self, keys_to_update, cur=None):
cur = self._cursor(cur)
cur.execute("""select swh_content_update(ARRAY[%s] :: text[])""" %
@@ -611,8 +605,6 @@
return None
return line_to_bytes(r)
- occurrence_cols = ['origin', 'branch', 'target', 'target_type']
-
@staticmethod
def mangle_query_key(key, main_table):
if key == 'id':
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -914,31 +914,6 @@
return None
@db_transaction()
- def occurrence_add(self, occurrences, db=None, cur=None):
- """Add occurrences to the storage
-
- Args:
- occurrences: iterable of dictionaries representing the individual
- occurrences to add. Each dict has the following keys:
-
- - origin (int): id of the origin corresponding to the
- occurrence
- - visit (int): id of the visit corresponding to the
- occurrence
- - branch (str): the reference name of the occurrence
- - target (sha1_git): the id of the object pointed to by
- the occurrence
- - target_type (str): the type of object pointed to by the
- occurrence
-
- """
- db.mktemp_occurrence_history(cur)
- db.copy_to(occurrences, 'tmp_occurrence_history',
- ['origin', 'branch', 'target', 'target_type', 'visit'], cur)
-
- db.occurrence_history_add_from_temp(cur)
-
- @db_transaction()
def origin_visit_add(self, origin, ts, db=None, cur=None):
"""Add an origin_visit for the origin at ts with status 'ongoing'.
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -376,21 +376,9 @@
self.date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0,
tzinfo=datetime.timezone.utc)
- self.occurrence = {
- 'branch': b'master',
- 'target': self.revision['id'],
- 'target_type': 'revision',
- }
-
self.date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0,
tzinfo=datetime.timezone.utc)
- self.occurrence2 = {
- 'branch': b'master',
- 'target': self.revision2['id'],
- 'target_type': 'revision',
- }
-
self.date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0,
tzinfo=datetime.timezone.utc)
@@ -471,9 +459,9 @@
self.snapshot = {
'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'),
'branches': {
- self.occurrence['branch']: {
- 'target': self.occurrence['target'],
- 'target_type': self.occurrence['target_type'],
+ b'master': {
+ 'target': self.revision['id'],
+ 'target_type': 'revision',
},
},
'next_branch': None
@@ -1167,65 +1155,6 @@
self.assertIsNone(actual_origin_visit)
- def test_occurrence_add(self):
- occur = self.occurrence.copy()
-
- origin_id = self.storage.origin_add_one(self.origin2)
- date_visit1 = self.date_visit1
- origin_visit1 = self.storage.origin_visit_add(origin_id, date_visit1)
-
- revision = self.revision.copy()
- revision['id'] = occur['target']
- self.storage.revision_add([revision])
-
- occur.update({
- 'origin': origin_id,
- 'visit': origin_visit1['visit'],
- })
- self.storage.occurrence_add([occur])
-
- test_query = '''
- with indiv_occurrences as (
- select origin, branch, target, target_type, unnest(visits) as visit
- from occurrence_history
- )
- select origin, branch, target, target_type, date
- from indiv_occurrences
- left join origin_visit using(origin, visit)
- order by origin, date'''
-
- self.cursor.execute(test_query)
- ret = self.cursor.fetchall()
- self.assertEqual(len(ret), 1)
- self.assertEqual(
- (ret[0][0], ret[0][1].tobytes(), ret[0][2].tobytes(),
- ret[0][3], ret[0][4]),
- (occur['origin'], occur['branch'], occur['target'],
- occur['target_type'], self.date_visit1))
-
- date_visit2 = date_visit1 + datetime.timedelta(hours=10)
-
- origin_visit2 = self.storage.origin_visit_add(origin_id, date_visit2)
- occur2 = occur.copy()
- occur2.update({
- 'visit': origin_visit2['visit'],
- })
- self.storage.occurrence_add([occur2])
-
- self.cursor.execute(test_query)
- ret = self.cursor.fetchall()
- self.assertEqual(len(ret), 2)
- self.assertEqual(
- (ret[0][0], ret[0][1].tobytes(), ret[0][2].tobytes(),
- ret[0][3], ret[0][4]),
- (occur['origin'], occur['branch'], occur['target'],
- occur['target_type'], date_visit1))
- self.assertEqual(
- (ret[1][0], ret[1][1].tobytes(), ret[1][2].tobytes(),
- ret[1][3], ret[1][4]),
- (occur2['origin'], occur2['branch'], occur2['target'],
- occur2['target_type'], date_visit2))
-
def test_snapshot_add_get_empty(self):
origin_id = self.storage.origin_add_one(self.origin)
origin_visit1 = self.storage.origin_visit_add(origin_id,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 5:14 PM (11 w, 17 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218130
Attached To
D535: Drop table occurrence_history
Event Timeline
Log In to Comment