Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9348518
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
View Options
diff --git a/PKG-INFO b/PKG-INFO
index 37fe6f6..6e91a52 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.loader.tar
-Version: 0.0.20
+Version: 0.0.21
Summary: Software Heritage Tarball Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/debian/control b/debian/control
index fd13d3b..5fe4dc6 100644
--- a/debian/control
+++ b/debian/control
@@ -1,23 +1,22 @@
Source: swh-loader-tar
Maintainer: Software Heritage developers <swh-devel@inria.fr>
Section: python
Priority: optional
Build-Depends: debhelper (>= 9),
dh-python,
python3-all,
python3-nose,
python3-setuptools,
python3-swh.core (>= 0.0.14~),
python3-swh.scheduler,
python3-swh.storage (>= 0.0.31~),
- python3-swh.loader.dir (>= 0.0.21~),
- python3-swh.loader.core (>= 0.0.10~),
+ python3-swh.loader.dir (>= 0.0.22~),
python3-vcversioner
Standards-Version: 3.9.6
Homepage: https://forge.softwareheritage.org/diffusion/DLDTAR/
Package: python3-swh.loader.tar
Architecture: all
Depends: ${misc:Depends},
${python3:Depends}
Description: Software Heritage Tarball Loader
diff --git a/requirements.txt b/requirements.txt
index f45e2c0..cfc0175 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,9 @@
# Add here external Python modules dependencies, one per line. Module names
# should match https://pypi.python.org/pypi names. For the full spec or
# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
vcversioner
swh.core >= 0.0.14
swh.scheduler
swh.storage >= 0.0.31
-swh.loader.dir >= 0.0.21
-swh.loader.core >= 0.0.10
+swh.loader.dir >= 0.0.22
retrying
diff --git a/swh.loader.tar.egg-info/PKG-INFO b/swh.loader.tar.egg-info/PKG-INFO
index 37fe6f6..6e91a52 100644
--- a/swh.loader.tar.egg-info/PKG-INFO
+++ b/swh.loader.tar.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.loader.tar
-Version: 0.0.20
+Version: 0.0.21
Summary: Software Heritage Tarball Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/swh.loader.tar.egg-info/requires.txt b/swh.loader.tar.egg-info/requires.txt
index f8992da..c895c5d 100644
--- a/swh.loader.tar.egg-info/requires.txt
+++ b/swh.loader.tar.egg-info/requires.txt
@@ -1,7 +1,6 @@
retrying
swh.core>=0.0.14
-swh.loader.core>=0.0.10
-swh.loader.dir>=0.0.21
+swh.loader.dir>=0.0.22
swh.scheduler
swh.storage>=0.0.31
vcversioner
diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py
index 002f091..9ef1376 100644
--- a/swh/loader/tar/loader.py
+++ b/swh/loader/tar/loader.py
@@ -1,93 +1,130 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import datetime
import os
import tempfile
import shutil
from swh.core import hashutil
from swh.loader.dir import loader
from swh.loader.tar import tarball, utils
class TarLoader(loader.DirLoader):
"""A tarball loader.
"""
CONFIG_BASE_FILENAME = 'loader/tar.ini'
ADDITIONAL_CONFIG = {
'extraction_dir': ('string', '/tmp')
}
- def __init__(self, origin_id):
- super().__init__(origin_id,
- logging_class='swh.loader.tar.TarLoader')
+ def __init__(self):
+ super().__init__(logging_class='swh.loader.tar.TarLoader')
- def process(self, tarpath, origin, revision, release, occurrences):
- """Load a tarball in backend.
+ def load(self, tarpath, origin, visit, revision, release, occurrences):
+ """
+ Load a tarball in backend.
This will:
- - persist the origin if it does not exist.
- - write an entry in fetch_history to mark the loading tarball start
- uncompress locally the tarballs in a temporary location
- process the content of the tarballs to persist on swh storage
- clean up the temporary location
- write an entry in fetch_history to mark the loading tarball end
Args:
- tarpath: path to the tarball to uncompress
- origin: Dictionary origin
- url: url origin we fetched
- type: type of the origin
+ - visit: Numbered visit
- revision: Dictionary of information needed, keys are:
- author_name: revision's author name
- author_email: revision's author email
- author_date: timestamp (e.g. 1444054085)
- author_offset: date offset e.g. -0220, +0100
- committer_name: revision's committer name
- committer_email: revision's committer email
- committer_date: timestamp
- committer_offset: date offset e.g. -0220, +0100
- type: type of revision dir, tar
- message: synthetic message for the revision
- release: Dictionary of information needed, keys are:
- name: release name
- date: release timestamp (e.g. 1444054085)
- offset: release date offset e.g. -0220, +0100
- author_name: release author's name
- author_email: release author's email
- comment: release's comment message
- occurrences: List of occurrence dictionary.
Information needed, keys are:
- branch: occurrence's branch name
- authority_id: authority id (e.g. 1 for swh)
- validity: validity date (e.g. 2015-01-01 00:00:00+00)
"""
# Prepare the extraction path
extraction_dir = self.config['extraction_dir']
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-',
dir=extraction_dir)
# add checksums in revision
artifact = utils.convert_to_hex(hashutil.hashfile(tarpath))
artifact['name'] = os.path.basename(tarpath)
try:
self.log.info('Uncompress %s to %s' % (tarpath, dir_path))
nature = tarball.uncompress(tarpath, dir_path)
artifact['archive_type'] = nature
artifact['length'] = os.path.getsize(tarpath)
revision['metadata'] = {
'original_artifact': [artifact],
}
- return super().process(dir_path, origin, revision, release,
- occurrences)
+ return super().load(
+ dir_path, origin, visit, revision, release, occurrences)
finally:
shutil.rmtree(dir_path)
+
+ def prepare_and_load(self,
+ tarpath, origin, revision, release, occurrences):
+ """
+ Prepare origin, fetch_origin, origin_visit
+ Then load a tarball 'tarpath'.
+ Then close origin_visit, fetch_history
+
+ First:
+ - creates an origin if it does not exist
+ - creates a fetch_history entry
+ - creates an origin_visit
+ - Then loads the tarball
+
+ """
+ if 'type' not in origin: # let the type flow if present
+ origin['type'] = 'tar'
+
+ self.origin_id = self.storage.origin_add_one(origin)
+ origin['id'] = self.origin_id
+
+ date_visit = datetime.datetime.now(tz=datetime.timezone.utc)
+ origin_visit = self.storage.origin_visit_add(origin['id'], date_visit)
+ visit = origin_visit['visit']
+
+ fetch_history_id = self.open_fetch_history()
+
+ try:
+ self.load(tarpath, origin, visit, revision, release, occurrences)
+ self.close_fetch_history_success(fetch_history_id)
+ self.storage.origin_visit_update(
+ self.origin_id, origin_visit['visit'], status='full')
+ except:
+ self.close_fetch_history_failure(fetch_history_id)
+ self.storage.origin_visit_update(
+ self.origin_id, origin_visit['visit'], status='partial')
+ raise
diff --git a/swh/loader/tar/tasks.py b/swh/loader/tar/tasks.py
index d40cb2a..c6ea825 100644
--- a/swh/loader/tar/tasks.py
+++ b/swh/loader/tar/tasks.py
@@ -1,40 +1,27 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.loader.core import tasks
+from swh.scheduler.task import Task
from swh.loader.tar.loader import TarLoader
-class LoadTarRepository(tasks.LoaderCoreTask):
+class LoadTarRepository(Task):
"""Import a directory to Software Heritage
"""
task_queue = 'swh_loader_tar'
- CONFIG_BASE_FILENAME = 'loader/tar.ini'
def run(self, tarpath, origin, revision, release, occurrences):
"""Import a tarball into swh.
Args:
- tarpath: path to a tarball file
- origin, revision, release, occurrences:
cf. swh.loader.dir.loader.run docstring
"""
- if 'type' not in origin: # let the type flow if present
- origin['type'] = 'tar'
-
- origin['id'] = self.storage.origin_add_one(origin)
-
- fetch_history_id = self.open_fetch_history(origin['id'])
-
- result = TarLoader(origin['id']).process(tarpath,
- origin,
- revision,
- release,
- occurrences)
-
- self.close_fetch_history(fetch_history_id, result)
+ TarLoader().prepare_and_load(
+ tarpath, origin, revision, release, occurrences)
diff --git a/version.txt b/version.txt
index 58f39a6..b9937ac 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.20-0-ge777e55
\ No newline at end of file
+v0.0.21-0-gf330178
\ No newline at end of file
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 6:35 PM (5 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3310097
Attached To
rDLDTAR Tarball Loader
Event Timeline
Log In to Comment