Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/loader.py
# Copyright (C) 2015-2018 The Software Heritage developers | # Copyright (C) 2015-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
import os | import os | ||||
▲ Show 20 Lines • Show All 195 Lines • ▼ Show 20 Lines | def save_data(self): | ||||
"""Save the data associated to the current load""" | """Save the data associated to the current load""" | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def get_save_data_path(self): | def get_save_data_path(self): | ||||
"""The path to which we archive the loader's raw data""" | """The path to which we archive the loader's raw data""" | ||||
if not hasattr(self, '__save_data_path'): | if not hasattr(self, '__save_data_path'): | ||||
year = str(self.visit_date.year) | year = str(self.visit_date.year) | ||||
origin_url_hash = hashlib.sha1(self.origin['url']).hexdigest() | url = self.origin['url'].encode('utf-8') | ||||
vlorentz: is the isinstance actually needed? If yes: why is the url sometimes bytes and sometimes str? | |||||
Done Inline ActionsI'm not sure it is needed indeed. I checked the storage's model, it's indeed a string so that should fairly be always a string. ardumont: I'm not sure it is needed indeed.
It was to be on the safe side (since only the loader-git… | |||||
origin_url_hash = hashlib.sha1(url).hexdigest() | |||||
path = os.path.join( | path = '%s/sha1:%s/%s/%s' % ( | ||||
self.config['save_data_path'], | self.config['save_data_path'], | ||||
'sha1:' + origin_url_hash[0:2], | origin_url_hash[0:2], | ||||
origin_url_hash, | origin_url_hash, | ||||
year, | year, | ||||
) | ) | ||||
os.makedirs(path, exist_ok=True) | os.makedirs(path, exist_ok=True) | ||||
self.__save_data_path = path | self.__save_data_path = path | ||||
return self.__save_data_path | return self.__save_data_path | ||||
▲ Show 20 Lines • Show All 755 Lines • Show Last 20 Lines |
is the isinstance actually needed? If yes: why is the url sometimes bytes and sometimes str?