Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066694
D1556.id5118.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D1556.id5118.diff
View Options
diff --git a/sql/upgrades/135.sql b/sql/upgrades/135.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/135.sql
@@ -0,0 +1,13 @@
+-- SWH DB schema upgrade
+-- from_version: 134
+-- to_version: 135
+-- description: Add an index on origin.url, drop the index on (origin.type, origin.url)
+
+insert into dbversion(version, release, description)
+ values(135, now(), 'Work In Progress');
+
+create extension if not exists pg_trgm;
+
+drop index origin_type_url_idx;
+create index concurrently on origin using gin (url gin_trgm_ops);
+create index concurrently on origin using hash (url);
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -650,7 +650,8 @@
query = """SELECT %s FROM (VALUES %%s) as t(type, url)
LEFT JOIN origin
- ON (t.type=origin.type AND t.url=origin.url)
+ ON ((t.type IS NULL OR t.type=origin.type)
+ AND t.url=origin.url)
""" % ','.join('origin.' + col for col in self.origin_cols)
yield from execute_values_generator(
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -929,10 +929,13 @@
"""Return origins, either all identified by their ids or all
identified by tuples (type, url).
+ If the url is given and the type is omitted, one of the origins with
+ that url is returned.
+
Args:
origin: a list of dictionaries representing the individual
origins to find.
- These dicts have either the keys type and url:
+ These dicts have either the key url (and optionally type):
- type (FIXME: enum TBD): the origin type ('git', 'wget', ...)
- url (bytes): the url the origin points to
@@ -964,18 +967,17 @@
and not all('id' in origin for origin in origins):
raise ValueError(
'Either all origins or none at all should have an "id".')
- if any('type' in origin and 'url' in origin for origin in origins) \
- and not all('type' in origin and 'url' in origin
- for origin in origins):
+ if any('url' in origin for origin in origins) \
+ and not all('url' in origin for origin in origins):
raise ValueError(
- 'Either all origins or none at all should have a '
- '"type" and an "url".')
+ 'Either all origins or none at all should have '
+ 'an "url" key.')
results = []
for origin in origins:
if 'id' in origin:
origin_id = origin['id']
- elif 'type' in origin and 'url' in origin:
+ elif 'url' in origin:
origin_id = self._origin_id(origin)
else:
raise ValueError(
@@ -1512,8 +1514,9 @@
def _origin_id(self, origin):
origin_id = None
for stored_origin in self._origins:
- if stored_origin['type'] == origin['type'] and \
- stored_origin['url'] == origin['url']:
+ if stored_origin['url'] == origin['url'] \
+ and ('type' not in origin
+ or stored_origin['type'] == origin['type']):
origin_id = stored_origin['id']
break
return origin_id
diff --git a/swh/storage/sql/10-swh-init.sql b/swh/storage/sql/10-swh-init.sql
--- a/swh/storage/sql/10-swh-init.sql
+++ b/swh/storage/sql/10-swh-init.sql
@@ -2,5 +2,6 @@
create extension if not exists btree_gist;
create extension if not exists pgcrypto;
+create extension if not exists pg_trgm;
create or replace language plpgsql;
diff --git a/swh/storage/sql/60-swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql
--- a/swh/storage/sql/60-swh-indexes.sql
+++ b/swh/storage/sql/60-swh-indexes.sql
@@ -14,7 +14,8 @@
create unique index concurrently origin_pkey on origin(id);
alter table origin add primary key using index origin_pkey;
-create index concurrently on origin(type, url);
+create index concurrently on origin using gin (url gin_trgm_ops);
+create index concurrently on origin using hash (url);
-- skipped_content
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -1346,10 +1346,13 @@
"""Return origins, either all identified by their ids or all
identified by tuples (type, url).
+ If the url is given and the type is omitted, one of the origins with
+ that url is returned.
+
Args:
origin: a list of dictionaries representing the individual
origins to find.
- These dicts have either the keys type and url:
+ These dicts have either the key url (and optionally type):
- type (FIXME: enum TBD): the origin type ('git', 'wget', ...)
- url (bytes): the url the origin points to
@@ -1388,16 +1391,16 @@
else:
raise ValueError(
'Either all origins or none at all should have an "id".')
- elif any(type_ and url for (type_, url) in origin_types_and_urls):
+ elif any(url for (type_, url) in origin_types_and_urls):
# Lookup per type + URL
- if all(type_ and url for (type_, url) in origin_types_and_urls):
+ if all(url for (type_, url) in origin_types_and_urls):
results = db.origin_get_with(origin_types_and_urls, cur)
else:
raise ValueError(
- 'Either all origins or none at all should have a '
- '"type" and an "url".')
+ 'Either all origins or none at all should have '
+ 'an "url" key.')
else: # unsupported lookup
- raise ValueError('Origin must have either id or (type and url).')
+ raise ValueError('Origin must have either id or url.')
results = [dict(zip(self.origin_keys, result))
for result in results]
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1290,6 +1290,37 @@
self.assertEqual(add1, add2)
+ def test_origin_get_without_type(self):
+ origin0 = self.storage.origin_get([self.origin])[0]
+ self.assertIsNone(origin0)
+
+ origin3 = self.origin2.copy()
+ origin3['type'] += 'foo'
+
+ origin1, origin2, origin3 = self.storage.origin_add(
+ [self.origin, self.origin2, origin3])
+
+ actual_origin = self.storage.origin_get([{
+ 'url': self.origin['url'],
+ }])[0]
+ self.assertEqual(actual_origin['id'], origin1['id'])
+
+ actual_origin_2_or_3 = self.storage.origin_get([{
+ 'url': self.origin2['url'],
+ }])[0]
+ self.assertIn(
+ actual_origin_2_or_3['id'],
+ [origin2['id'], origin3['id']])
+
+ del actual_origin['id']
+ del actual_origin_2_or_3['id']
+ del origin3['id']
+
+ self.assertEqual(list(self.journal_writer.objects),
+ [('origin', self.origin),
+ ('origin', self.origin2),
+ ('origin', origin3)])
+
def test_origin_get_legacy(self):
self.assertIsNone(self.storage.origin_get(self.origin))
id = self.storage.origin_add_one(self.origin)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 6:35 PM (11 w, 14 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222770
Attached To
D1556: Add support for omitting origin type when calling origin_get.
Event Timeline
Log In to Comment