Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/PKG-INFO b/PKG-INFO
index 37fe6f6..6e91a52 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.loader.tar
-Version: 0.0.20
+Version: 0.0.21
Summary: Software Heritage Tarball Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/debian/control b/debian/control
index fd13d3b..5fe4dc6 100644
--- a/debian/control
+++ b/debian/control
@@ -1,23 +1,22 @@
Source: swh-loader-tar
Maintainer: Software Heritage developers <swh-devel@inria.fr>
Section: python
Priority: optional
Build-Depends: debhelper (>= 9),
dh-python,
python3-all,
python3-nose,
python3-setuptools,
python3-swh.core (>= 0.0.14~),
python3-swh.scheduler,
python3-swh.storage (>= 0.0.31~),
- python3-swh.loader.dir (>= 0.0.21~),
- python3-swh.loader.core (>= 0.0.10~),
+ python3-swh.loader.dir (>= 0.0.22~),
python3-vcversioner
Standards-Version: 3.9.6
Homepage: https://forge.softwareheritage.org/diffusion/DLDTAR/
Package: python3-swh.loader.tar
Architecture: all
Depends: ${misc:Depends},
${python3:Depends}
Description: Software Heritage Tarball Loader
diff --git a/requirements.txt b/requirements.txt
index f45e2c0..cfc0175 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,9 @@
# Add here external Python modules dependencies, one per line. Module names
# should match https://pypi.python.org/pypi names. For the full spec or
# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
vcversioner
swh.core >= 0.0.14
swh.scheduler
swh.storage >= 0.0.31
-swh.loader.dir >= 0.0.21
-swh.loader.core >= 0.0.10
+swh.loader.dir >= 0.0.22
retrying
diff --git a/swh.loader.tar.egg-info/PKG-INFO b/swh.loader.tar.egg-info/PKG-INFO
index 37fe6f6..6e91a52 100644
--- a/swh.loader.tar.egg-info/PKG-INFO
+++ b/swh.loader.tar.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.loader.tar
-Version: 0.0.20
+Version: 0.0.21
Summary: Software Heritage Tarball Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/swh.loader.tar.egg-info/requires.txt b/swh.loader.tar.egg-info/requires.txt
index f8992da..c895c5d 100644
--- a/swh.loader.tar.egg-info/requires.txt
+++ b/swh.loader.tar.egg-info/requires.txt
@@ -1,7 +1,6 @@
retrying
swh.core>=0.0.14
-swh.loader.core>=0.0.10
-swh.loader.dir>=0.0.21
+swh.loader.dir>=0.0.22
swh.scheduler
swh.storage>=0.0.31
vcversioner
diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py
index 002f091..9ef1376 100644
--- a/swh/loader/tar/loader.py
+++ b/swh/loader/tar/loader.py
@@ -1,93 +1,130 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import datetime
import os
import tempfile
import shutil
from swh.core import hashutil
from swh.loader.dir import loader
from swh.loader.tar import tarball, utils
class TarLoader(loader.DirLoader):
"""A tarball loader.
"""
CONFIG_BASE_FILENAME = 'loader/tar.ini'
ADDITIONAL_CONFIG = {
'extraction_dir': ('string', '/tmp')
}
- def __init__(self, origin_id):
- super().__init__(origin_id,
- logging_class='swh.loader.tar.TarLoader')
+ def __init__(self):
+ super().__init__(logging_class='swh.loader.tar.TarLoader')
- def process(self, tarpath, origin, revision, release, occurrences):
- """Load a tarball in backend.
+ def load(self, tarpath, origin, visit, revision, release, occurrences):
+ """
+ Load a tarball in backend.
This will:
- - persist the origin if it does not exist.
- - write an entry in fetch_history to mark the loading tarball start
- uncompress locally the tarballs in a temporary location
- process the content of the tarballs to persist on swh storage
- clean up the temporary location
- write an entry in fetch_history to mark the loading tarball end
Args:
- tarpath: path to the tarball to uncompress
- origin: Dictionary origin
- url: url origin we fetched
- type: type of the origin
+ - visit: Numbered visit
- revision: Dictionary of information needed, keys are:
- author_name: revision's author name
- author_email: revision's author email
- author_date: timestamp (e.g. 1444054085)
- author_offset: date offset e.g. -0220, +0100
- committer_name: revision's committer name
- committer_email: revision's committer email
- committer_date: timestamp
- committer_offset: date offset e.g. -0220, +0100
- type: type of revision dir, tar
- message: synthetic message for the revision
- release: Dictionary of information needed, keys are:
- name: release name
- date: release timestamp (e.g. 1444054085)
- offset: release date offset e.g. -0220, +0100
- author_name: release author's name
- author_email: release author's email
- comment: release's comment message
- occurrences: List of occurrence dictionary.
Information needed, keys are:
- branch: occurrence's branch name
- authority_id: authority id (e.g. 1 for swh)
- validity: validity date (e.g. 2015-01-01 00:00:00+00)
"""
# Prepare the extraction path
extraction_dir = self.config['extraction_dir']
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-',
dir=extraction_dir)
# add checksums in revision
artifact = utils.convert_to_hex(hashutil.hashfile(tarpath))
artifact['name'] = os.path.basename(tarpath)
try:
self.log.info('Uncompress %s to %s' % (tarpath, dir_path))
nature = tarball.uncompress(tarpath, dir_path)
artifact['archive_type'] = nature
artifact['length'] = os.path.getsize(tarpath)
revision['metadata'] = {
'original_artifact': [artifact],
}
- return super().process(dir_path, origin, revision, release,
- occurrences)
+ return super().load(
+ dir_path, origin, visit, revision, release, occurrences)
finally:
shutil.rmtree(dir_path)
+
+ def prepare_and_load(self,
+ tarpath, origin, revision, release, occurrences):
+ """
+ Prepare origin, fetch_origin, origin_visit
+ Then load a tarball 'tarpath'.
+ Then close origin_visit, fetch_history
+
+ First:
+ - creates an origin if it does not exist
+ - creates a fetch_history entry
+ - creates an origin_visit
+ - Then loads the tarball
+
+ """
+ if 'type' not in origin: # let the type flow if present
+ origin['type'] = 'tar'
+
+ self.origin_id = self.storage.origin_add_one(origin)
+ origin['id'] = self.origin_id
+
+ date_visit = datetime.datetime.now(tz=datetime.timezone.utc)
+ origin_visit = self.storage.origin_visit_add(origin['id'], date_visit)
+ visit = origin_visit['visit']
+
+ fetch_history_id = self.open_fetch_history()
+
+ try:
+ self.load(tarpath, origin, visit, revision, release, occurrences)
+ self.close_fetch_history_success(fetch_history_id)
+ self.storage.origin_visit_update(
+ self.origin_id, origin_visit['visit'], status='full')
+ except:
+ self.close_fetch_history_failure(fetch_history_id)
+ self.storage.origin_visit_update(
+ self.origin_id, origin_visit['visit'], status='partial')
+ raise
diff --git a/swh/loader/tar/tasks.py b/swh/loader/tar/tasks.py
index d40cb2a..c6ea825 100644
--- a/swh/loader/tar/tasks.py
+++ b/swh/loader/tar/tasks.py
@@ -1,40 +1,27 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.loader.core import tasks
+from swh.scheduler.task import Task
from swh.loader.tar.loader import TarLoader
-class LoadTarRepository(tasks.LoaderCoreTask):
+class LoadTarRepository(Task):
"""Import a directory to Software Heritage
"""
task_queue = 'swh_loader_tar'
- CONFIG_BASE_FILENAME = 'loader/tar.ini'
def run(self, tarpath, origin, revision, release, occurrences):
"""Import a tarball into swh.
Args:
- tarpath: path to a tarball file
- origin, revision, release, occurrences:
cf. swh.loader.dir.loader.run docstring
"""
- if 'type' not in origin: # let the type flow if present
- origin['type'] = 'tar'
-
- origin['id'] = self.storage.origin_add_one(origin)
-
- fetch_history_id = self.open_fetch_history(origin['id'])
-
- result = TarLoader(origin['id']).process(tarpath,
- origin,
- revision,
- release,
- occurrences)
-
- self.close_fetch_history(fetch_history_id, result)
+ TarLoader().prepare_and_load(
+ tarpath, origin, revision, release, occurrences)
diff --git a/version.txt b/version.txt
index 58f39a6..b9937ac 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.20-0-ge777e55
\ No newline at end of file
+v0.0.21-0-gf330178
\ No newline at end of file

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 6:35 PM (5 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3310097

Event Timeline