Changeset View
Standalone View
swh/loader/base/dowload.py
- This file was added.
# Copyright (C) 2019 The Software Heritage developers | |||||
anlambert: I am not a big fan of using inheritance in derived package loaders implementation (CRAN, GNU, .. | |||||
Not Done Inline Actions
Yes, thank you. I did not find the right way to explicit that. We must try and keep separated what's logically separated (artefacts retrieval, ingestion, etc...).
Yes, that's what i call a collaborator. ardumont: > I am not a big fan of using inheritance in derived package loaders implementation (CRAN, GNU… | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import requests | |||||
from .abstractattribute import AbstractAttribute | |||||
try: | |||||
from _version import __version__ | |||||
except ImportError: | |||||
__version__ = 'devel' | |||||
# This wil check and download and then sent it to hash fuction | |||||
''' | |||||
class ArchiveFetcher: | |||||
"""Http/Local client in charge of downloading archives from a | |||||
remote/local server. | |||||
Args: | |||||
temp_directory (str): Path to the temporary disk location used | |||||
for downloading the release artifacts | |||||
""" | |||||
def __init__(self): | |||||
self.session = requests.session() | |||||
self.params = { | |||||
'headers': { | |||||
'User-Agent': 'Software Heritage Tar Loader (%s)' % ( | |||||
__version__ | |||||
) | |||||
} | |||||
} | |||||
def download(self, url): | |||||
"""Download the remote tarball url locally. | |||||
Args: | |||||
url (str): Url (file or http*) | |||||
Raises: | |||||
ValueError in case of failing to query | |||||
Returns: | |||||
Tuple of local (filepath, hashes of filepath) | |||||
""" | |||||
response = self.session.get(url, **self.params, stream=True) | |||||
if response.status_code != 200: | |||||
raise ValueError("Fail to query '%s'. Reason: %s" % ( | |||||
url, response.status_code)) | |||||
''' | |||||
class if_modified_then: | |||||
""" | |||||
""" | |||||
def __init__(self): | |||||
self.session = requests.session() | |||||
self.params = { | |||||
'headers': { | |||||
'User-Agent': 'Software Heritage Tar Loader (%s)' % ( | |||||
__version__ | |||||
) | |||||
} | |||||
} | |||||
def make_request(self, url): | |||||
"""Request the remote tarball url. | |||||
Args: | |||||
url (str): Url (file or http*) | |||||
Raises: | |||||
ValueError in case of failing to query | |||||
Returns: | |||||
Tuple of local (filepath, hashes of filepath) | |||||
""" | |||||
response = self.session.get(url, **self.params, stream=True) | |||||
if response.status_code != 200: | |||||
raise ValueError("Fail to query '%s'. Reason: %s" % ( | |||||
url, response.status_code)) | |||||
return self.generate_hash(response, url) | |||||
def filter_release_artifact(): | |||||
pass | |||||
class compare_field: | |||||
""" | |||||
""" | |||||
compare_field = AbstractAttribute("Field used to identify if the package" | |||||
"version is previously archived") | |||||
# eg for pypi loader compare_field = 'sha' | |||||
def __init__(self): | |||||
self.session = requests.session() | |||||
self.params = { | |||||
'headers': { | |||||
'User-Agent': 'Software Heritage Tar Loader (%s)' % ( | |||||
__version__ | |||||
) | |||||
} | |||||
} | |||||
def download_new_releases(): | |||||
pass | |||||
def make_request(self, url): | |||||
"""Request the remote tarball url locally. | |||||
Args: | |||||
url (str): Url (file or http*) | |||||
Raises: | |||||
ValueError in case of failing to query | |||||
Returns: | |||||
Tuple of local (filepath, hashes of filepath) | |||||
""" | |||||
response = self.session.get(url, **self.params, stream=True) | |||||
if response.status_code != 200 and response.status_code != 304: | |||||
raise ValueError("Fail to query '%s'. Reason: %s" % ( | |||||
url, response.status_code)) | |||||
return self.generate_hash(response, url) | |||||
def filter_release_artifacts(): | |||||
pass | |||||
''' | |||||
This is implemented in loader but called here | |||||
def generate_hash(self, response): | |||||
"""Store file in temp directory and generate its hash | |||||
""" | |||||
length = int(response.headers['content-length']) | |||||
filepath = os.path.join(self.temp_directory, os.path.basename(url)) | |||||
h = MultiHash(length=length) | |||||
with open(filepath, 'wb') as f: | |||||
for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE): | |||||
h.update(chunk) | |||||
f.write(chunk) | |||||
actual_length = os.path.getsize(filepath) | |||||
if length != actual_length: | |||||
raise ValueError('Error when checking size: %s != %s' % ( | |||||
length, actual_length)) | |||||
hashes = { | |||||
'length': length, | |||||
**h.hexdigest() | |||||
} | |||||
return filepath, hashes | |||||
''' | |||||
Done Inline ActionsCan you suggest a better name for this class? nahimilega: Can you suggest a better name for this class? | |||||
Done Inline ActionsCan you suggest a better name for this class? nahimilega: Can you suggest a better name for this class?
| |||||
Done Inline ActionsAs mentioned, remove that abstract attribute and just add a plain docstring with triple backquotes. ardumont: As mentioned, remove that abstract attribute and just add a plain docstring with triple… | |||||
Done Inline ActionsUse camel case like everywhere else. ardumont: Use camel case like everywhere else. | |||||
Done Inline ActionsThe docstring is below the variable definition. compare_field = None """Field used to identify if the p...""" ardumont: The docstring is below the variable definition.
```
compare_field = None
"""Field used to… | |||||
Not Done Inline Actionsping ;) ardumont: ping ;) | |||||
Done Inline ActionsChanged known_versions=None to known_versions={} because if known_versions is None then it would create a problem later in code nahimilega: Changed `known_versions=None` to `known_versions={}` because if known_versions is None then it… |
I am not a big fan of using inheritance in derived package loaders implementation (CRAN, GNU, ...) to declare the package tarballs retrieval behavior.
I would rather use a single class, named PackageDownloader for instance, that will internally be used by the PackageLoader class through composition instead.
Derived classes implementing real packages loader (GNU, CRAN, ..) should only inherit from the PackageLoader class and reimplement some hook methods to override the default behavior.
I need to further analyze what is implemented in that file before suggesting improvements.