Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/functional_package/lister.py
- This file was added.
# Copyright (C) 2019 the Software Heritage developers | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import json | |||||
import requests | |||||
from .models import FunctionalPackageModel | |||||
from swh.scheduler import utils | |||||
from swh.lister.core.simple_lister import SimpleLister | |||||
class FunctionalPackageLister(SimpleLister): | |||||
MODEL = FunctionalPackageModel | |||||
LISTER_NAME = 'functional_package' | |||||
instance = 'functional_package' | |||||
def __init__(self, override_config=None): | |||||
self.url = "https://sources.nixos.org/sources.json" | |||||
super().__init__(override_config=override_config) | |||||
def task_dict(self, origin_type, origin_url, **kwargs): | |||||
""" | |||||
Return task format dict | |||||
This is overridden from the lister_base as more information is | |||||
needed for the ingestion task creation. | |||||
ardumont: Please, drop this duplicated comment and explain a bit what the generated tasks are.
why is… | |||||
lewoAuthorUnsubmitted Done Inline ActionsI added a comment. lewo: I added a comment.
If you need more details, I explained why the initial package file is not… | |||||
ardumontUnsubmitted Not Done Inline ActionsThanks! ardumont: Thanks! | |||||
""" | |||||
return utils.create_task_dict( | |||||
'load-tar', kwargs.get('policy', 'oneshot'), | |||||
origin_url, tarballs=[{'archive': origin_url, 'date': 0}]) | |||||
ardumontUnsubmitted Not Done Inline ActionsSo now, this needs to be changed. The actual loader to use now is swh.loader.core.package.archive.ArchiveLoader. The task referring to this is: @shared_task(name=__name__ + '.LoadArchive') def load_archive(url=None, artifacts=None, identity_artifact_keys=None): return ArchiveLoader(url, artifacts, identity_artifact_keys=identity_artifact_keys).load() ... So your code can change to something like (untested): return utils.create_task_dict( 'load-tar', kwargs.get('policy', 'oneshot'), url=origin_url, # <- prefer to use kwargs instead of args artifacts=[{'archive': origin_url}], # <- only provide what you can identity_artifact_keys=['archive'], # <- unicity key retries_left=3) # <- that will fail otherwise when actually running ^ then you'd need to adapt the test below. ardumont: So now, this needs to be changed.
The actual loader to use now is swh.loader.core.package. | |||||
lewoAuthorUnsubmitted Done Inline ActionsThanks ;) lewo: Thanks ;) | |||||
def safely_issue_request(self, identifier): | |||||
''' | |||||
Make network request to download the JSON file. | |||||
Args: | |||||
identifier: resource identifier (unused) | |||||
Returns: | |||||
Server response | |||||
''' | |||||
response = requests.get(self.url, | |||||
allow_redirects=True) | |||||
# TODO: support gzip content as well | |||||
return json.loads(response.content.decode('utf-8')) | |||||
def list_packages(self, response): | |||||
"""List packages from the response | |||||
""" | |||||
return [r for r in response["sources"] if r["type"] == "url"] | |||||
def get_model_from_repo(self, source): | |||||
"""Transform from source representation to model | |||||
""" | |||||
return { | |||||
# We could use the content hash if it is provided | |||||
'uid': source['url'], | |||||
'name': source['url'], | |||||
'full_name': source['url'], | |||||
'html_url': source['url'], | |||||
'origin_url': source['url'], | |||||
'origin_type': 'tar', | |||||
} |
Please, drop this duplicated comment and explain a bit what the generated tasks are.
why is there only one artifact per package for example (<- i don't remember the detail, so that has double purposes here ;)