Page MenuHomeSoftware Heritage

D1010.id.diff
No OneTemporary

D1010.id.diff

diff --git a/sql/upgrades/119.sql b/sql/upgrades/119.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/119.sql
@@ -0,0 +1,19 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 118
+-- to_version: 119
+-- description: metadata tables: add 'mappings' column
+
+insert into dbversion(version, release, description)
+values(119, now(), 'Work In Progress');
+
+alter table revision_metadata
+ add column mappings text array not null default {};
+alter table revision_metadata
+ alter column mappings
+ drop default;
+
+alter table origin_intrinsic_metadata
+ add column mappings text array not null default {};
+alter table origin_intrinsic_metadata
+ alter column mappings
+ drop default;
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(118, now(), 'Work In Progress');
+ values(119, now(), 'Work In Progress');
-- Computing metadata on sha1's contents
-- a SHA1 checksum (not necessarily originating from Git)
@@ -118,7 +118,8 @@
create table revision_metadata(
id sha1_git not null,
translated_metadata jsonb not null,
- indexer_configuration_id bigint not null
+ indexer_configuration_id bigint not null,
+ mappings text array not null
);
comment on table revision_metadata is 'metadata semantically detected and translated in a revision';
@@ -131,7 +132,8 @@
metadata jsonb,
indexer_configuration_id bigint not null,
from_revision sha1_git not null,
- metadata_tsvector tsvector
+ metadata_tsvector tsvector,
+ mappings text array not null
);
comment on table origin_intrinsic_metadata is 'keeps intrinsic metadata for an origin';
diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql
--- a/swh/indexer/sql/40-swh-func.sql
+++ b/swh/indexer/sql/40-swh-func.sql
@@ -315,15 +315,15 @@
as $$
begin
if conflict_update then
- insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
+ select id, translated_metadata, mappings, indexer_configuration_id
from tmp_revision_metadata tcm
on conflict(id, indexer_configuration_id)
do update set translated_metadata = excluded.translated_metadata;
else
- insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
+ select id, translated_metadata, mappings, indexer_configuration_id
from tmp_revision_metadata tcm
on conflict(id, indexer_configuration_id)
do nothing;
@@ -410,17 +410,17 @@
begin
perform swh_origin_intrinsic_metadata_compute_tsvector();
if conflict_update then
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector)
+ insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
select origin_id, metadata, indexer_configuration_id, from_revision,
- metadata_tsvector
+ metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
on conflict(origin_id, indexer_configuration_id)
do update set metadata = excluded.metadata;
else
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector)
+ insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
select origin_id, metadata, indexer_configuration_id, from_revision,
- metadata_tsvector
+ metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
on conflict(origin_id, indexer_configuration_id)
do nothing;
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -586,6 +586,8 @@
- **id** (bytes)
- **translated_metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
"""
for c in db.revision_metadata_get_from_list(ids, cur):
@@ -604,6 +606,8 @@
- **id**: sha1_git of revision
- **translated_metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
conflict_update: Flag to determine if we want to overwrite (true)
or skip duplicates (false, the default)
@@ -612,7 +616,8 @@
db.mktemp_revision_metadata(cur)
db.copy_to(metadata, 'tmp_revision_metadata',
- ['id', 'translated_metadata', 'indexer_configuration_id'],
+ ['id', 'translated_metadata', 'mappings',
+ 'indexer_configuration_id'],
cur)
db.revision_metadata_add_from_temp(conflict_update, cur)
@@ -630,6 +635,8 @@
- **origin_id** (int)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
"""
for c in db.origin_intrinsic_metadata_get_from_list(ids, cur):
@@ -651,6 +658,8 @@
these metadata.
- **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
conflict_update: Flag to determine if we want to overwrite (true)
or skip duplicates (false, the default)
@@ -660,7 +669,7 @@
db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
['origin_id', 'metadata', 'indexer_configuration_id',
- 'from_revision'],
+ 'from_revision', 'mappings'],
cur)
db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
@@ -680,6 +689,8 @@
- **id** (int)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
+ - **mappings** (List[str]): list of mappings used to translate
+ these metadata
"""
for c in db.origin_intrinsic_metadata_search_fulltext(
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -295,7 +295,8 @@
yield from self._get_from_list(
'content_metadata', ids, self.content_metadata_cols, cur=cur)
- revision_metadata_hash_keys = ['id', 'indexer_configuration_id']
+ revision_metadata_hash_keys = [
+ 'id', 'indexer_configuration_id']
def revision_metadata_missing_from_list(self, metadata, cur=None):
"""List missing metadata.
@@ -306,7 +307,7 @@
cur=cur)
revision_metadata_cols = [
- 'id', 'translated_metadata',
+ 'id', 'translated_metadata', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
@stored_procedure('swh_mktemp_revision_metadata')
@@ -321,7 +322,7 @@
'revision_metadata', ids, self.revision_metadata_cols, cur=cur)
origin_intrinsic_metadata_cols = [
- 'origin_id', 'metadata', 'from_revision',
+ 'origin_id', 'metadata', 'from_revision', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
origin_intrinsic_metadata_regconfig = 'pg_catalog.simple'
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -1217,6 +1217,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': [],
'indexer_configuration_id': tool_id
}])
@@ -1250,6 +1251,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': ['mapping1', 'mapping2'],
'indexer_configuration_id': tool_id
}
@@ -1263,9 +1265,11 @@
expected_metadata = [{
'id': self.revision_id_2,
'translated_metadata': metadata_rev['translated_metadata'],
+ 'mappings': ['mapping1', 'mapping2'],
'tool': self.tools['swh-metadata-detector']
}]
+ self.maxDiff = None
self.assertEqual(actual_metadata, expected_metadata)
def test_revision_metadata_add_drop_duplicate(self):
@@ -1291,6 +1295,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
@@ -1304,6 +1309,7 @@
expected_metadata_v1 = [{
'id': self.revision_id_1,
'translated_metadata': metadata_v1['translated_metadata'],
+ 'mappings': [],
'tool': self.tools['swh-metadata-detector']
}]
@@ -1350,6 +1356,7 @@
'softwareRequirements': None,
'identifier': None
},
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
@@ -1364,6 +1371,7 @@
expected_metadata_v1 = [{
'id': self.revision_id_2,
'translated_metadata': metadata_v1['translated_metadata'],
+ 'mappings': [],
'tool': self.tools['swh-metadata-detector']
}]
self.assertEqual(actual_metadata, expected_metadata_v1)
@@ -1384,6 +1392,7 @@
expected_metadata_v2 = [{
'id': self.revision_id_2,
'translated_metadata': metadata_v2['translated_metadata'],
+ 'mappings': [],
'tool': self.tools['swh-metadata-detector']
}]
@@ -1414,12 +1423,14 @@
metadata_rev = {
'id': self.revision_id_2,
'translated_metadata': metadata,
+ 'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
'origin_id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
+ 'mappings': ['mapping1'],
'from_revision': self.revision_id_2,
}
@@ -1436,6 +1447,7 @@
'metadata': metadata,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
+ 'mappings': ['mapping1'],
}]
self.assertEqual(actual_metadata, expected_metadata)
@@ -1464,12 +1476,14 @@
metadata_rev_v1 = {
'id': self.revision_id_1,
'translated_metadata': metadata_v1.copy(),
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
'origin_id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
+ 'mappings': [],
'from_revision': self.revision_id_1,
}
@@ -1486,6 +1500,7 @@
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_1,
+ 'mappings': [],
}]
self.assertEqual(actual_metadata, expected_metadata_v1)
@@ -1535,12 +1550,14 @@
metadata_rev_v1 = {
'id': self.revision_id_2,
'translated_metadata': metadata_v1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
'origin_id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
+ 'mappings': [],
'from_revision': self.revision_id_2,
}
@@ -1558,6 +1575,7 @@
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
+ 'mappings': [],
}]
self.assertEqual(actual_metadata, expected_metadata_v1)
@@ -1585,6 +1603,7 @@
'metadata': metadata_v2,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
+ 'mappings': [],
}]
# metadata did change as the v2 was used to overwrite v1
@@ -1600,11 +1619,13 @@
metadata1_rev = {
'id': self.revision_id_1,
'translated_metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
'origin_id': self.origin_id_1,
'metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_1,
}
@@ -1614,11 +1635,13 @@
metadata2_rev = {
'id': self.revision_id_2,
'translated_metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
'origin_id': self.origin_id_2,
'metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_2,
}
@@ -1662,11 +1685,13 @@
metadata1_rev = {
'id': self.revision_id_1,
'translated_metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
'origin_id': self.origin_id_1,
'metadata': metadata1,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_1,
}
@@ -1679,11 +1704,13 @@
metadata2_rev = {
'id': self.revision_id_2,
'translated_metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
'origin_id': self.origin_id_2,
'metadata': metadata2,
+ 'mappings': [],
'indexer_configuration_id': tool_id,
'from_revision': self.revision_id_2,
}

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 5:23 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217524

Event Timeline