Changeset View
Standalone View
swh/graphql/resolvers/content.py
Show All 12 Lines | |||||
from .snapshot_branch import SnapshotBranchNode | from .snapshot_branch import SnapshotBranchNode | ||||
class BaseContentNode(BaseSWHNode): | class BaseContentNode(BaseSWHNode): | ||||
""" | """ | ||||
Base resolver for all the content nodes | Base resolver for all the content nodes | ||||
""" | """ | ||||
def _get_content_by_id(self, content_id): | def _get_content_by_hash(self, checksums: dict): | ||||
content = archive.Archive().get_content(content_id) | content = archive.Archive().get_contents(checksums) | ||||
# in case of a conflict, return the first element | |||||
return content[0] if content else None | return content[0] if content else None | ||||
@property | @property | ||||
def checksum(self): | def checksum(self): | ||||
# FIXME, return a Node object | # FIXME, use a Node instead | ||||
return {k: v.hex() for (k, v) in self._node.hashes().items()} | return {k: v.hex() for (k, v) in self._node.hashes().items()} | ||||
@property | @property | ||||
def id(self): | def id(self): | ||||
return self._node.sha1_git | return self._node.sha1_git | ||||
@property | |||||
def data(self): | |||||
# FIXME, return a Node object | |||||
# FIXME, add more ways to retrieve data (eg: a static URL) | |||||
content_sha1 = self._node.hashes()["sha1"] | |||||
anlambert: I think it will be safer to reference content data by an URL here instead of raw bytes, you… | |||||
Done Inline ActionsI agree. Or is there a way to have a static URL instead, like an S3 one? jayeshv: I agree.
Do you think adding a link to https://archive.softwareheritage. | |||||
Not Done Inline Actions
I think so, this endpoint does not use SWHID by the way as they were not yet in place when it was implemented.
For the record, you can define default config values using Python code (see swh-web config.py).
Not that I know. anlambert: > Do you think adding a link to https://archive.softwareheritage.org/api/1/content/<swhid>/raw/… | |||||
Done Inline Actions
Ok. Here also data is served by an application server (Django in this case). So, too many requests could bring that too down. About rate limiting, a basic rate limiter is in place for GraphQL and I'm planning to improve that soon. jayeshv: > > Do you think adding a link to https://archive.softwareheritage. | |||||
Done Inline Actions@anlambert For the time being I have added both the raw binary data and a URL to get file data. I have added the API URL in the code instead of a config now. I will move that to a config in another diff that improves the config. jayeshv: @anlambert For the time being I have added both the raw binary data and a URL to get file data. | |||||
Not Done Inline ActionsI am still thinking that raw content data should not be included in GraphQL responses as it remains a flaw to easily DDoS us with a single request In Web API responses (being REST or GraphQL) we should only return metadata about the archived objects, getting raw content data should be done Anyway, do as you want but my guess is that we will not keep that feature in the future.
Ideally the credits for getting a raw content should be proportional to its size in bytes. anlambert: I am still thinking that raw content data should not be included in GraphQL responses as it… | |||||
return {"raw": archive.Archive().get_content_data(content_sha1)} | |||||
def ConetentFileType(self): | |||||
Not Done Inline Actionss/Conetent/Content/ anlambert: s/Conetent/Content/ | |||||
# FIXME, fetch data from the indexers | |||||
return None | |||||
def ConetentLanguage(self): | |||||
# FIXME, fetch data from the indexers | |||||
return None | |||||
def ConetentLicense(self): | |||||
# FIXME, fetch data from the indexers | |||||
Not Done Inline Actionss/Conetent/Content/ anlambert: s/Conetent/Content/ | |||||
return None | |||||
def is_type_of(self): | def is_type_of(self): | ||||
# is_type_of is required only when resolving a UNION type | # is_type_of is required only when resolving a UNION type | ||||
# This is for ariadne to return the right type | # This is for ariadne to return the right type | ||||
Not Done Inline Actionss/Conetent/Content/ anlambert: s/Conetent/Content/ | |||||
return "Content" | return "Content" | ||||
class ContentNode(BaseContentNode): | class ContentNode(BaseContentNode): | ||||
""" | """ | ||||
Node resolver for a content requested directly with its SWHID | Node resolver for a content requested directly with its SWHID | ||||
""" | """ | ||||
def _get_node_data(self): | def _get_node_data(self): | ||||
return self._get_content_by_id(self.kwargs.get("swhid").object_id) | checksums = {"sha1_git": self.kwargs.get("swhid").object_id} | ||||
return self._get_content_by_hash(checksums) | |||||
class HashContentNode(BaseContentNode): | |||||
""" | |||||
Node resolver for a content requested with one or more checksums | |||||
""" | |||||
def _get_node_data(self): | |||||
checksums = dict(self.kwargs.get("checksums")) | |||||
return self._get_content_by_hash(checksums) | |||||
class TargetContentNode(BaseContentNode): | class TargetContentNode(BaseContentNode): | ||||
""" | """ | ||||
Node resolver for a content requested from a | Node resolver for a content requested as a target | ||||
directory entry or from a release target | This request could be from directory entry, release or a branch | ||||
""" | """ | ||||
obj: Union[DirectoryEntryNode, BaseReleaseNode, SnapshotBranchNode] | obj: Union[DirectoryEntryNode, BaseReleaseNode, SnapshotBranchNode] | ||||
def _get_node_data(self): | def _get_node_data(self): | ||||
content_id = self.obj.target_hash | return self._get_content_by_hash(checksums={"sha1_git": self.obj.target_hash}) | ||||
return self._get_content_by_id(content_id) |
I think it will be safer to reference content data by an URL here instead of raw bytes, you could use this Web API endpoint for instance.
Fetching content data could put some pressure on our object storage depending on the input GraphQL query and responses could also be very large depending on content sizes,
so better offering links here.