Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066288
D1010.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
15 KB
Subscribers
None
D1010.id.diff
View Options
diff --git a/sql/upgrades/119.sql b/sql/upgrades/119.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/119.sql
@@ -0,0 +1,19 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 118
+-- to_version: 119
+-- description: metadata tables: add 'mappings' column
+
+insert into dbversion(version, release, description)
+values(119, now(), 'Work In Progress');
+
+alter table revision_metadata
+ add column mappings text array not null default {};
+alter table revision_metadata
+ alter column mappings
+ drop default;
+
+alter table origin_intrinsic_metadata
+ add column mappings text array not null default {};
+alter table origin_intrinsic_metadata
+ alter column mappings
+ drop default;
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(118, now(), 'Work In Progress');
+ values(119, now(), 'Work In Progress');
-- Computing metadata on sha1's contents
-- a SHA1 checksum (not necessarily originating from Git)
@@ -118,7 +118,8 @@
create table revision_metadata(
id sha1_git not null,
translated_metadata jsonb not null,
- indexer_configuration_id bigint not null
+ indexer_configuration_id bigint not null,
+ mappings text array not null
);
comment on table revision_metadata is 'metadata semantically detected and translated in a revision';
@@ -131,7 +132,8 @@
metadata jsonb,
indexer_configuration_id bigint not null,
from_revision sha1_git not null,
- metadata_tsvector tsvector
+ metadata_tsvector tsvector,
+ mappings text array not null
);
comment on table origin_intrinsic_metadata is 'keeps intrinsic metadata for an origin';
diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql
--- a/swh/indexer/sql/40-swh-func.sql
+++ b/swh/indexer/sql/40-swh-func.sql
@@ -315,15 +315,15 @@
as $$
begin
if conflict_update then
- insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
+ select id, translated_metadata, mappings, indexer_configuration_id
from tmp_revision_metadata tcm
on conflict(id, indexer_configuration_id)
do update set translated_metadata = excluded.translated_metadata;
else
- insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
+ select id, translated_metadata, mappings, indexer_configuration_id
from tmp_revision_metadata tcm
on conflict(id, indexer_configuration_id)
do nothing;
@@ -410,17 +410,17 @@
begin
perform swh_origin_intrinsic_metadata_compute_tsvector();
if conflict_update then
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector)
+ insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
select origin_id, metadata, indexer_configuration_id, from_revision,
- metadata_tsvector
+ metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
on conflict(origin_id, indexer_configuration_id)
do update set metadata = excluded.metadata;
else
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector)
+ insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
select origin_id, metadata, indexer_configuration_id, from_revision,
- metadata_tsvector
+ metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
on conflict(origin_id, indexer_configuration_id)
do nothing;
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -586,6 +586,8 @@
- **id** (bytes)
- **translated_metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
"""
for c in db.revision_metadata_get_from_list(ids, cur):
@@ -604,6 +606,8 @@
- **id**: sha1_git of revision
- **translated_metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
conflict_update: Flag to determine if we want to overwrite (true)
or skip duplicates (false, the default)
@@ -612,7 +616,8 @@
db.mktemp_revision_metadata(cur)
db.copy_to(metadata, 'tmp_revision_metadata',
- ['id', 'translated_metadata', 'indexer_configuration_id'],
+ ['id', 'translated_metadata', 'mappings',
+ 'indexer_configuration_id'],
cur)
db.revision_metadata_add_from_temp(conflict_update, cur)
@@ -630,6 +635,8 @@
- **origin_id** (int)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
"""
for c in db.origin_intrinsic_metadata_get_from_list(ids, cur):
@@ -651,6 +658,8 @@
these metadata.
- **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
conflict_update: Flag to determine if we want to overwrite (true)
or skip duplicates (false, the default)
@@ -660,7 +669,7 @@
db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
['origin_id', 'metadata', 'indexer_configuration_id',
- 'from_revision'],
+ 'from_revision', 'mappings'],
cur)
db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
@@ -680,6 +689,8 @@
- **id** (int)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
"""
for c in db.origin_intrinsic_metadata_search_fulltext(
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -295,7 +295,8 @@
yield from self._get_from_list(
'content_metadata', ids, self.content_metadata_cols, cur=cur)
- revision_metadata_hash_keys = ['id', 'indexer_configuration_id']
+ revision_metadata_hash_keys = [
+ 'id', 'indexer_configuration_id']
def revision_metadata_missing_from_list(self, metadata, cur=None):
"""List missing metadata.
@@ -306,7 +307,7 @@
cur=cur)
revision_metadata_cols = [
- 'id', 'translated_metadata',
+ 'id', 'translated_metadata', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
@stored_procedure('swh_mktemp_revision_metadata')
@@ -321,7 +322,7 @@
'revision_metadata', ids, self.revision_metadata_cols, cur=cur)
origin_intrinsic_metadata_cols = [
- 'origin_id', 'metadata', 'from_revision',
+ 'origin_id', 'metadata', 'from_revision', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
origin_intrinsic_metadata_regconfig = 'pg_catalog.simple'
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -1217,6 +1217,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': [],
'indexer_configuration_id': tool_id
}])
@@ -1250,6 +1251,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': ['mapping1', 'mapping2'],
'indexer_configuration_id': tool_id
}
@@ -1263,9 +1265,11 @@
expected_metadata = [{
'id': self.revision_id_2,
'translated_metadata': metadata_rev['translated_metadata'],
+ 'mappings': ['mapping1', 'mapping2'],
'tool': self.tools['swh-metadata-detector']
}]
+ self.maxDiff = None
self.assertEqual(actual_metadata, expected_metadata)
def test_revision_metadata_add_drop_duplicate(self):
@@ -1291,6 +1295,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
@@ -1304,6 +1309,7 @@
expected_metadata_v1 = [{
'id': self.revision_id_1,
'translated_metadata': metadata_v1['translated_metadata'],
+ 'mappings': [],
'tool': self.tools['swh-metadata-detector']
}]
@@ -1350,6 +1356,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
@@ -1364,6 +1371,7 @@
expected_metadata_v1 = [{
'id': self.revision_id_2,
'translated_metadata': metadata_v1['translated_metadata'],
+ 'mappings': [],
'tool': self.tools['swh-metadata-detector']
}]
self.assertEqual(actual_metadata, expected_metadata_v1)
@@ -1384,6 +1392,7 @@
expected_metadata_v2 = [{
'id': self.revision_id_2,
'translated_metadata': metadata_v2['translated_metadata'],
+ 'mappings': [],
'tool': self.tools['swh-metadata-detector']
}]
@@ -1414,12 +1423,14 @@
metadata_rev = {
'id': self.revision_id_2,
'translated_metadata': metadata,
+ 'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
'origin_id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
+ 'mappings': ['mapping1'],
'from_revision': self.revision_id_2,
}
@@ -1436,6 +1447,7 @@
'metadata': metadata,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
+ 'mappings': ['mapping1'],
}]
self.assertEqual(actual_metadata, expected_metadata)
@@ -1464,12 +1476,14 @@
metadata_rev_v1 = {
'id': self.revision_id_1,
'translated_metadata': metadata_v1.copy(),
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
'origin_id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
+ 'mappings': [],
'from_revision': self.revision_id_1,
}
@@ -1486,6 +1500,7 @@
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_1,
+ 'mappings': [],
}]
self.assertEqual(actual_metadata, expected_metadata_v1)
@@ -1535,12 +1550,14 @@
metadata_rev_v1 = {
'id': self.revision_id_2,
'translated_metadata': metadata_v1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
'origin_id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
+ 'mappings': [],
'from_revision': self.revision_id_2,
}
@@ -1558,6 +1575,7 @@
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
+ 'mappings': [],
}]
self.assertEqual(actual_metadata, expected_metadata_v1)
@@ -1585,6 +1603,7 @@
'metadata': metadata_v2,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
+ 'mappings': [],
}]
# metadata did change as the v2 was used to overwrite v1
@@ -1600,11 +1619,13 @@
metadata1_rev = {
'id': self.revision_id_1,
'translated_metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
'origin_id': self.origin_id_1,
'metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_1,
}
@@ -1614,11 +1635,13 @@
metadata2_rev = {
'id': self.revision_id_2,
'translated_metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
'origin_id': self.origin_id_2,
'metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_2,
}
@@ -1662,11 +1685,13 @@
metadata1_rev = {
'id': self.revision_id_1,
'translated_metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
'origin_id': self.origin_id_1,
'metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_1,
}
@@ -1679,11 +1704,13 @@
metadata2_rev = {
'id': self.revision_id_2,
'translated_metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
'origin_id': self.origin_id_2,
'metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_2,
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 5:23 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217524
Attached To
D1010: Make metadata indexers store the mappings used to translate metadata.
Event Timeline
Log In to Comment