Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345948
D4722.id16726.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D4722.id16726.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,5 @@
# Add here internal Software Heritage dependencies, one per line.
swh.core[http] >= 0.3.0
+swh.indexer
swh.journal >= 0.1.0
swh.model
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -10,6 +10,7 @@
from elasticsearch.helpers import bulk, scan
import msgpack
+from swh.indexer import codemeta
from swh.model import model
from swh.model.identifiers import origin_identifier
from swh.search.interface import PagedResult
@@ -21,6 +22,8 @@
for field_name in ("intrinsic_metadata", "has_visits"):
if field_name in origin:
res[field_name] = origin.pop(field_name)
+ if "intrinsic_metadata" in res:
+ res["intrinsic_metadata"] = codemeta.expand(res["intrinsic_metadata"])
return res
diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py
--- a/swh/search/tests/test_in_memory.py
+++ b/swh/search/tests/test_in_memory.py
@@ -39,3 +39,7 @@
@pytest.mark.skip("Not implemented in the in-memory search")
def test_origin_intrinsic_metadata_paging(self):
pass
+
+ @pytest.mark.skip("Not implemented in the in-memory search")
+ def test_origin_intrinsic_metadata_inconsistent_type(self):
+ pass
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -205,6 +205,75 @@
assert actual_page.next_page_token is None
assert actual_page.results == [origin3_barbaz]
+ def test_origin_intrinsic_metadata_inconsistent_type(self):
+ """Checks the same field can have a concrete value, an object, or an array
+ in different documents."""
+ origin1_foobar = {"url": "http://origin1"}
+ origin2_barbaz = {"url": "http://origin2"}
+ origin3_bazqux = {"url": "http://origin3"}
+
+ self.search.origin_update(
+ [
+ {
+ **origin1_foobar,
+ "intrinsic_metadata": {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "author": {"familyName": "Foo", "givenName": "Bar",},
+ },
+ },
+ ]
+ )
+ self.search.flush()
+ self.search.origin_update(
+ [
+ {
+ **origin2_barbaz,
+ "intrinsic_metadata": {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "author": "Bar Baz",
+ },
+ },
+ {
+ **origin3_bazqux,
+ "intrinsic_metadata": {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "author": ["Baz", "Qux"],
+ },
+ },
+ ]
+ )
+ self.search.flush()
+
+ actual_page = self.search.origin_search(metadata_pattern="bar")
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [origin2_barbaz, origin1_foobar]
+
+ actual_page = self.search.origin_search(metadata_pattern="baz")
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [origin2_barbaz, origin3_bazqux]
+
+ actual_page = self.search.origin_search(metadata_pattern="foo")
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [origin1_foobar]
+
+ actual_page = self.search.origin_search(metadata_pattern="bar baz")
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [origin2_barbaz]
+
+ actual_page = self.search.origin_search(metadata_pattern="qux")
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [origin3_bazqux]
+
+ actual_page = self.search.origin_search(metadata_pattern="baz qux")
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [origin3_bazqux]
+
+ # FIXME: the following won't work because "foo" and "bar" are not in the
+ # same field.
+ # actual_page = self.search.origin_search(metadata_pattern="foo bar")
+ # assert actual_page.next_page_token is None
+ # assert actual_page.results == [origin2_foobar]
+
# TODO: add more tests with more codemeta terms
# TODO: add more tests with edge cases
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:37 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218276
Attached To
D4722: Normalize Codemeta documents by expanding them.
Event Timeline
Log In to Comment