Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9339690
D714.id2259.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D714.id2259.diff
View Options
diff --git a/sql/upgrades/117.sql b/sql/upgrades/117.sql
--- a/sql/upgrades/117.sql
+++ b/sql/upgrades/117.sql
@@ -3,6 +3,9 @@
-- to_version: 117
-- description: Add fulltext search index for origin intrinsic metadata
+insert into dbversion(version, release, description)
+values(117, now(), 'Work In Progress');
+
alter table origin_intrinsic_metadata add column metadata_tsvector tsvector;
update origin_intrinsic_metadata set metadata_tsvector = to_tsvector('pg_catalog.simple', metadata);
create index origin_intrinsic_metadata_fulltext_idx on origin_intrinsic_metadata using gin (metadata_tsvector);
diff --git a/sql/upgrades/118.sql b/sql/upgrades/118.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/118.sql
@@ -0,0 +1,11 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 117
+-- to_version: 118
+-- description: content_mimetype: Migrate bytes column to text
+
+insert into dbversion(version, release, description)
+values(118, now(), 'Work In Progress');
+
+alter table content_mimetype
+ alter column mimetype set data type text,
+ alter column encoding set data type text;
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -23,8 +23,8 @@
"""
r = magic.detect_from_content(raw_content)
return {
- 'mimetype': r.mime_type.encode('utf-8'),
- 'encoding': r.encoding.encode('utf-8'),
+ 'mimetype': r.mime_type,
+ 'encoding': r.encoding,
}
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(116, now(), 'Work In Progress');
+ values(117, now(), 'Work In Progress');
-- Computing metadata on sha1's contents
-- a SHA1 checksum (not necessarily originating from Git)
@@ -39,8 +39,8 @@
-- Properties (mimetype, encoding, etc...)
create table content_mimetype (
id sha1 not null,
- mimetype bytea not null,
- encoding bytea not null,
+ mimetype text not null,
+ encoding text not null,
indexer_configuration_id bigint not null
);
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -116,8 +116,8 @@
# given
self.storage.content_mimetype_add([{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}])
@@ -133,8 +133,8 @@
mimetype_v1 = {
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}
@@ -148,8 +148,8 @@
# then
expected_mimetypes_v1 = [{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'tool': self.tools['file'],
}]
self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
@@ -157,8 +157,8 @@
# given
mimetype_v2 = mimetype_v1.copy()
mimetype_v2.update({
- 'mimetype': b'text/html',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/html',
+ 'encoding': 'us-ascii',
})
self.storage.content_mimetype_add([mimetype_v2])
@@ -175,8 +175,8 @@
mimetype_v1 = {
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}
@@ -189,8 +189,8 @@
expected_mimetypes_v1 = [{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'tool': self.tools['file'],
}]
@@ -200,8 +200,8 @@
# given
mimetype_v2 = mimetype_v1.copy()
mimetype_v2.update({
- 'mimetype': b'text/html',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/html',
+ 'encoding': 'us-ascii',
})
self.storage.content_mimetype_add([mimetype_v2], conflict_update=True)
@@ -211,8 +211,8 @@
expected_mimetypes_v2 = [{
'id': self.sha1_2,
- 'mimetype': b'text/html',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/html',
+ 'encoding': 'us-ascii',
'tool': {
'id': 2,
'name': 'file',
@@ -232,8 +232,8 @@
mimetype1 = {
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'indexer_configuration_id': tool_id,
}
@@ -246,8 +246,8 @@
# then
expected_mimetypes = [{
'id': self.sha1_2,
- 'mimetype': b'text/plain',
- 'encoding': b'utf-8',
+ 'mimetype': 'text/plain',
+ 'encoding': 'utf-8',
'tool': self.tools['file']
}]
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -37,8 +37,8 @@
actual_result = compute_mimetype_encoding(_input)
self.assertEqual(actual_result, {
- 'mimetype': _mimetype.encode('utf-8'),
- 'encoding': _encoding.encode('utf-8'),
+ 'mimetype': _mimetype,
+ 'encoding': _encoding
})
@@ -83,20 +83,20 @@
self.id0: {
'id': self.id0,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/plain',
+ 'encoding': 'us-ascii',
},
self.id1: {
'id': self.id1,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
+ 'mimetype': 'text/plain',
+ 'encoding': 'us-ascii',
},
self.id2: {
'id': self.id2,
'indexer_configuration_id': tool_id,
- 'mimetype': b'application/x-empty',
- 'encoding': b'binary',
+ 'mimetype': 'application/x-empty',
+ 'encoding': 'binary',
}
}
@@ -154,20 +154,20 @@
self.expected_results = {
self.id0: {
- 'encoding': b'us-ascii',
+ 'encoding': 'us-ascii',
'id': self.id0,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain'},
+ 'mimetype': 'text/plain'},
self.id1: {
- 'encoding': b'us-ascii',
+ 'encoding': 'us-ascii',
'id': self.id1,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/x-python'},
+ 'mimetype': 'text/x-python'},
self.id2: {
- 'encoding': b'us-ascii',
+ 'encoding': 'us-ascii',
'id': self.id2,
'indexer_configuration_id': tool_id,
- 'mimetype': b'text/plain'}
+ 'mimetype': 'text/plain'}
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jul 3 2025, 9:51 AM (5 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231703
Attached To
D714: mimetype: Migrate to indexed data as text
Event Timeline
Log In to Comment