Page MenuHomeSoftware Heritage

D1813.id6100.diff
No OneTemporary

D1813.id6100.diff

diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -1,10 +1,10 @@
-
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
+import shutil
from abc import abstractmethod
from tempfile import mkdtemp
@@ -14,6 +14,12 @@
from swh.loader.core.loader import BufferedLoader
from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE
from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.model.from_disk import Directory
+
+from swh.model.identifiers import (
+ identifier_to_bytes, revision_identifier
+)
+
from .build_revision import BuildRevision
@@ -335,3 +341,92 @@
except Exception:
self.tarball_invalid = True
return None
+
+ def fetch_data(self):
+ """Called once per release artifact version (can be many for one
+ release).
+
+ This will for each call:
+ - retrieve a release artifact (associated to a release version)
+ - Computes the swh objects
+
+ Returns:
+ True as long as data to fetch exist
+
+ """
+ data = None
+ if self.done:
+ return False
+
+ try:
+ data = next(self.new_versions)
+ self._load_status = 'eventful'
+ except StopIteration:
+ self.done = True
+ return False
+
+ package_source_data, dir_path = data
+
+ # package release tarball was corrupted
+ if self.tarball_invalid:
+ return not self.done
+
+ dir_path = dir_path.encode('utf-8')
+ directory = Directory.from_disk(path=dir_path, data=True)
+ objects = directory.collect()
+
+ objects = self.check_objects(objects)
+
+ self.package_contents = objects['content'].values()
+ self.package_directories = objects['directory'].values()
+
+ revision = self.compute_revision(directory,
+ package_source_data)
+
+ revision['id'] = identifier_to_bytes(
+ revision_identifier(revision))
+ self.package_revisions.append(revision)
+
+ self.update_known_version(package_source_data, revision['id'])
+
+ self.log.debug('Removing unpacked package files at %s', dir_path)
+ shutil.rmtree(dir_path)
+
+ return not self.done
+
+ def check_objects(self, objects):
+ """The the object for necessary fields and initialise it if not
+ already present.
+
+ """
+ if 'content' not in objects:
+ objects['content'] = {}
+ if 'directory' not in objects:
+ objects['directory'] = {}
+ return objects
+
+ def update_known_version(self, package_source_data, revision_id):
+ """Update the `known_versions` variable with new discovered versions
+
+ Args:
+ package_source_data (dict): Metadata available for a particular package
+ version
+ revision_id (str): Revision id of the revsion of focused package
+ version
+
+ """
+ key = self.get_key()
+ package_key = package_source_data[key]
+ self.known_versions[package_key] = revision_id # SEE ME
+
+ def store_data(self):
+ """Store fetched data in the database.
+
+ """
+ self.maybe_load_contents(self.package_contents)
+ self.maybe_load_directories(self.package_directories)
+ self.maybe_load_revisions(self.package_revisions)
+
+ if self.done:
+ self.generate_and_load_snapshot()
+ self.flush()

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 9:07 PM (3 h, 38 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219325

Event Timeline