Page MenuHomeSoftware Heritage

D714.id2259.diff
No OneTemporary

D714.id2259.diff

diff --git a/sql/upgrades/117.sql b/sql/upgrades/117.sql
--- a/sql/upgrades/117.sql
+++ b/sql/upgrades/117.sql
@@ -3,6 +3,9 @@
-- to_version: 117
-- description: Add fulltext search index for origin intrinsic metadata
+insert into dbversion(version, release, description)
+values(117, now(), 'Work In Progress');
+
alter table origin_intrinsic_metadata add column metadata_tsvector tsvector;
update origin_intrinsic_metadata set metadata_tsvector = to_tsvector('pg_catalog.simple', metadata);
create index origin_intrinsic_metadata_fulltext_idx on origin_intrinsic_metadata using gin (metadata_tsvector);
diff --git a/sql/upgrades/118.sql b/sql/upgrades/118.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/118.sql
@@ -0,0 +1,11 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 117
+-- to_version: 118
+-- description: content_mimetype: Migrate bytes column to text
+
+insert into dbversion(version, release, description)
+values(118, now(), 'Work In Progress');
+
+alter table content_mimetype
+ alter column mimetype set data type text,
+ alter column encoding set data type text;
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -23,8 +23,8 @@
"""
r = magic.detect_from_content(raw_content)
return {
- 'mimetype': r.mime_type.encode('utf-8'),
- 'encoding': r.encoding.encode('utf-8'),
+ 'mimetype': r.mime_type,
+ 'encoding': r.encoding,
}
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(116, now(), 'Work In Progress');
+ values(117, now(), 'Work In Progress');
-- Computing metadata on sha1's contents
-- a SHA1 checksum (not necessarily originating from Git)
@@ -39,8 +39,8 @@
-- Properties (mimetype, encoding, etc...)
create table content_mimetype (
id sha1 not null,
- mimetype bytea not null,
- encoding bytea not null,
+ mimetype text not null,
+ encoding text not null,
indexer_configuration_id bigint not null
);
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -116,8 +116,8 @@
# given
self.storage.content_mimetype_add([{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}])
@@ -133,8 +133,8 @@
mimetype_v1 = {
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}
@@ -148,8 +148,8 @@
# then
expected_mimetypes_v1 = [{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'tool': self.tools['file'],
}]
self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
@@ -157,8 +157,8 @@
# given
mimetype_v2 = mimetype_v1.copy()
mimetype_v2.update({
- 'mimetype': b'text/html',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/html',
+ 'encoding': 'us-ascii',
})
self.storage.content_mimetype_add([mimetype_v2])
@@ -175,8 +175,8 @@
mimetype_v1 = {
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}
@@ -189,8 +189,8 @@
expected_mimetypes_v1 = [{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'tool': self.tools['file'],
}]
@@ -200,8 +200,8 @@
# given
mimetype_v2 = mimetype_v1.copy()
mimetype_v2.update({
- 'mimetype': b'text/html',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/html',
+ 'encoding': 'us-ascii',
})
self.storage.content_mimetype_add([mimetype_v2], conflict_update=True)
@@ -211,8 +211,8 @@
expected_mimetypes_v2 = [{
'id': self.sha1_2,
- 'mimetype': b'text/html',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/html',
+ 'encoding': 'us-ascii',
'tool': {
'id': 2,
'name': 'file',
@@ -232,8 +232,8 @@
mimetype1 = {
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}
@@ -246,8 +246,8 @@
# then
expected_mimetypes = [{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'tool': self.tools['file']
}]
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -37,8 +37,8 @@
actual_result = compute_mimetype_encoding(_input)
self.assertEqual(actual_result, {
- 'mimetype': _mimetype.encode('utf-8'),
- 'encoding': _encoding.encode('utf-8'),
+ 'mimetype': _mimetype,
+ 'encoding': _encoding
})
@@ -83,20 +83,20 @@
self.id0: {
'id': self.id0,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/plain',
+ 'encoding': 'us-ascii',
},
self.id1: {
'id': self.id1,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/plain',
+ 'encoding': 'us-ascii',
},
self.id2: {
'id': self.id2,
'indexer_configuration_id': tool_id,
- 'mimetype': b'application/x-empty',
- 'encoding': b'binary',
+ 'mimetype': 'application/x-empty',
+ 'encoding': 'binary',
}
}
@@ -154,20 +154,20 @@
self.expected_results = {
self.id0: {
- 'encoding': b'us-ascii',
+ 'encoding': 'us-ascii',
'id': self.id0,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain'},
+ 'mimetype': 'text/plain'},
self.id1: {
- 'encoding': b'us-ascii',
+ 'encoding': 'us-ascii',
'id': self.id1,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/x-python'},
+ 'mimetype': 'text/x-python'},
self.id2: {
- 'encoding': b'us-ascii',
+ 'encoding': 'us-ascii',
'id': self.id2,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain'}
+ 'mimetype': 'text/plain'}
}

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 9:51 AM (5 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231703

Event Timeline