Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7343083
D5070.id18090.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D5070.id18090.diff
View Options
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@
# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
click
elasticsearch>=7.0.0,<8.0.0
+typing-extensions
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -13,7 +13,7 @@
from swh.indexer import codemeta
from swh.model import model
from swh.model.identifiers import origin_identifier
-from swh.search.interface import PagedResult
+from swh.search.interface import MinimalOriginDict, OriginDict, PagedResult
def _sanitize_origin(origin):
@@ -121,7 +121,7 @@
def flush(self) -> None:
self._backend.indices.refresh(index=self.origin_index)
- def origin_update(self, documents: Iterable[Dict]) -> None:
+ def origin_update(self, documents: Iterable[OriginDict]) -> None:
documents = map(_sanitize_origin, documents)
documents_with_sha1 = (
(origin_identifier(document), document) for document in documents
@@ -176,7 +176,7 @@
visit_types: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: int = 50,
- ) -> PagedResult[Dict[str, Any]]:
+ ) -> PagedResult[MinimalOriginDict]:
query_clauses: List[Dict[str, Any]] = []
if url_pattern:
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -9,7 +9,7 @@
from typing import Any, Dict, Iterable, Iterator, List, Optional
from swh.model.identifiers import origin_identifier
-from swh.search.interface import PagedResult
+from swh.search.interface import MinimalOriginDict, OriginDict, PagedResult
class InMemorySearch:
@@ -33,14 +33,15 @@
_url_splitter = re.compile(r"\W")
- def origin_update(self, documents: Iterable[Dict]) -> None:
- for document in documents:
- document = document.copy()
+ def origin_update(self, documents: Iterable[OriginDict]) -> None:
+ for source_document in documents:
+ document = dict(source_document)
id_ = origin_identifier(document)
if "url" in document:
- document["_url_tokens"] = set(self._url_splitter.split(document["url"]))
+ document["_url_tokens"] = set(
+ self._url_splitter.split(source_document["url"])
+ )
if "visit_types" in self._origins[id_]:
- document = dict(document)
document["visit_types"] = list(
set(
self._origins[id_]["visit_types"]
@@ -61,7 +62,7 @@
visit_types: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: int = 50,
- ) -> PagedResult[Dict[str, Any]]:
+ ) -> PagedResult[MinimalOriginDict]:
hits: Iterator[Dict[str, Any]] = (
self._origins[id_] for id_ in self._origin_ids
)
diff --git a/swh/search/interface.py b/swh/search/interface.py
--- a/swh/search/interface.py
+++ b/swh/search/interface.py
@@ -3,7 +3,9 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Any, Dict, Iterable, List, Optional, TypeVar
+from typing import Iterable, List, Optional, TypeVar
+
+from typing_extensions import TypedDict
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
@@ -12,6 +14,19 @@
PagedResult = CorePagedResult[TResult, str]
+class MinimalOriginDict(TypedDict):
+ """Mandatory keys of an :cls:`OriginDict`"""
+
+ url: str
+
+
+class OriginDict(MinimalOriginDict, total=False):
+ """Argument passed to :meth:`SearchInterface.origin_update`."""
+
+ visit_types: List[str]
+ has_visits: bool
+
+
class SearchInterface:
@remote_api_endpoint("check")
def check(self):
@@ -29,7 +44,7 @@
...
@remote_api_endpoint("origin/update")
- def origin_update(self, documents: Iterable[Dict]) -> None:
+ def origin_update(self, documents: Iterable[OriginDict]) -> None:
"""Persist documents to the search backend.
"""
@@ -45,7 +60,7 @@
visit_types: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: int = 50,
- ) -> PagedResult[Dict[str, Any]]:
+ ) -> PagedResult[MinimalOriginDict]:
"""Searches for origins matching the `url_pattern`.
Args:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 7:16 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231835
Attached To
D5070: Add typing to origin_update's argument and origin_search's return
Event Timeline
Log In to Comment