Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9347734
loader.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
loader.py
View Options
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
iso8601
import
logging
from
os
import
path
from
typing
import
Any
,
Dict
,
Generator
,
Mapping
,
Optional
,
Sequence
,
Tuple
from
swh.loader.package.loader
import
PackageLoader
from
swh.loader.package.utils
import
release_name
,
artifact_identity
from
swh.model.model
import
(
Sha1Git
,
Person
,
TimestampWithTimezone
,
Revision
,
RevisionType
,
)
logger
=
logging
.
getLogger
(
__name__
)
SWH_PERSON
=
Person
(
name
=
b
"Software Heritage"
,
fullname
=
b
"Software Heritage"
,
email
=
b
"robot@softwareheritage.org"
,
)
REVISION_MESSAGE
=
b
"swh-loader-package: synthetic revision message"
class
ArchiveLoader
(
PackageLoader
):
"""Load archive origin's artifact files into swh archive
"""
visit_type
=
"tar"
def
__init__
(
self
,
url
:
str
,
artifacts
:
Sequence
[
Mapping
[
str
,
Any
]],
identity_artifact_keys
:
Optional
[
Sequence
[
str
]]
=
None
,
):
"""Loader constructor.
For now, this is the lister's task output.
Args:
url: Origin url
artifacts: List of artifact information with keys:
- **time**: last modification time as either isoformat date
string or timestamp
- **url**: the artifact url to retrieve filename
- **artifact's filename version**: artifact's version length
- **length**: artifact's length
identity_artifact_keys: Optional List of keys forming the
"identity" of an artifact
"""
super
()
.
__init__
(
url
=
url
)
self
.
artifacts
=
artifacts
# assume order is enforced in the lister
if
not
identity_artifact_keys
:
# default keys for gnu
identity_artifact_keys
=
[
"time"
,
"url"
,
"length"
,
"version"
]
self
.
identity_artifact_keys
=
identity_artifact_keys
def
get_versions
(
self
)
->
Sequence
[
str
]:
versions
=
[]
for
archive
in
self
.
artifacts
:
v
=
archive
.
get
(
"version"
)
if
v
:
versions
.
append
(
v
)
return
versions
def
get_default_version
(
self
)
->
str
:
# It's the most recent, so for this loader, it's the last one
return
self
.
artifacts
[
-
1
][
"version"
]
def
get_package_info
(
self
,
version
:
str
)
->
Generator
[
Tuple
[
str
,
Mapping
[
str
,
Any
]],
None
,
None
]:
for
a_metadata
in
self
.
artifacts
:
url
=
a_metadata
[
"url"
]
package_version
=
a_metadata
[
"version"
]
if
version
==
package_version
:
filename
=
a_metadata
.
get
(
"filename"
)
p_info
=
{
"url"
:
url
,
"filename"
:
filename
if
filename
else
path
.
split
(
url
)[
-
1
],
"raw"
:
a_metadata
,
}
# FIXME: this code assumes we have only 1 artifact per
# versioned package
yield
release_name
(
version
),
p_info
def
resolve_revision_from
(
self
,
known_artifacts
:
Dict
,
artifact_metadata
:
Dict
)
->
Optional
[
bytes
]:
identity
=
artifact_identity
(
artifact_metadata
,
id_keys
=
self
.
identity_artifact_keys
)
for
rev_id
,
known_artifact
in
known_artifacts
.
items
():
logging
.
debug
(
"known_artifact:
%s
"
,
known_artifact
)
reference_artifact
=
known_artifact
[
"extrinsic"
][
"raw"
]
known_identity
=
artifact_identity
(
reference_artifact
,
id_keys
=
self
.
identity_artifact_keys
)
if
identity
==
known_identity
:
return
rev_id
return
None
def
build_revision
(
self
,
a_metadata
:
Mapping
[
str
,
Any
],
uncompressed_path
:
str
,
directory
:
Sha1Git
)
->
Optional
[
Revision
]:
time
=
a_metadata
[
"time"
]
# assume it's a timestamp
if
isinstance
(
time
,
str
):
# otherwise, assume it's a parsable date
time
=
iso8601
.
parse_date
(
time
)
normalized_time
=
TimestampWithTimezone
.
from_datetime
(
time
)
return
Revision
(
type
=
RevisionType
.
TAR
,
message
=
REVISION_MESSAGE
,
date
=
normalized_time
,
author
=
SWH_PERSON
,
committer
=
SWH_PERSON
,
committer_date
=
normalized_time
,
parents
=
[],
directory
=
directory
,
synthetic
=
True
,
metadata
=
{
"intrinsic"
:
{},
"extrinsic"
:
{
"provider"
:
self
.
url
,
"when"
:
self
.
visit_date
.
isoformat
(),
"raw"
:
a_metadata
,
},
},
)
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jul 4 2025, 5:51 PM (4 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3314660
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment