Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9340110
converters.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
converters.py
View Options
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Convert pygit2 objects to dictionaries suitable for swh.storage"""
from
pygit2
import
GIT_OBJ_COMMIT
from
swh.core
import
hashutil
from
.utils
import
format_date
HASH_ALGORITHMS
=
[
'sha1'
,
'sha256'
]
def
blob_to_content
(
id
,
repo
,
log
=
None
,
max_content_size
=
None
,
origin_id
=
None
):
"""Format a blob as a content"""
blob
=
repo
[
id
]
size
=
blob
.
size
ret
=
{
'sha1_git'
:
id
.
raw
,
'length'
:
blob
.
size
,
'status'
:
'absent'
}
if
max_content_size
:
if
size
>
max_content_size
:
if
log
:
log
.
info
(
'Skipping content
%s
, too large (
%s
>
%s
)'
%
(
id
.
hex
,
size
,
max_content_size
),
extra
=
{
'swh_type'
:
'loader_git_content_skip'
,
'swh_repo'
:
repo
.
path
,
'swh_id'
:
id
.
hex
,
'swh_size'
:
size
,
})
ret
[
'reason'
]
=
'Content too large'
ret
[
'origin'
]
=
origin_id
return
ret
data
=
blob
.
data
hashes
=
hashutil
.
hashdata
(
data
,
HASH_ALGORITHMS
)
ret
.
update
(
hashes
)
ret
[
'data'
]
=
data
ret
[
'status'
]
=
'visible'
return
ret
def
tree_to_directory
(
id
,
repo
,
log
=
None
):
"""Format a tree as a directory"""
ret
=
{
'id'
:
id
.
raw
,
}
entries
=
[]
ret
[
'entries'
]
=
entries
entry_type_map
=
{
'tree'
:
'dir'
,
'blob'
:
'file'
,
'commit'
:
'rev'
,
}
for
entry
in
repo
[
id
]:
entries
.
append
({
'type'
:
entry_type_map
[
entry
.
type
],
'perms'
:
entry
.
filemode
,
'name'
:
entry
.
_name
,
'target'
:
entry
.
id
.
raw
,
})
return
ret
def
commit_to_revision
(
id
,
repo
,
log
=
None
):
"""Format a commit as a revision"""
commit
=
repo
[
id
]
author
=
commit
.
author
committer
=
commit
.
committer
return
{
'id'
:
id
.
raw
,
'date'
:
format_date
(
author
),
'committer_date'
:
format_date
(
committer
),
'type'
:
'git'
,
'directory'
:
commit
.
tree_id
.
raw
,
'message'
:
commit
.
raw_message
,
'metadata'
:
None
,
'author'
:
{
'name'
:
author
.
raw_name
,
'email'
:
author
.
raw_email
,
},
'committer'
:
{
'name'
:
committer
.
raw_name
,
'email'
:
committer
.
raw_email
,
},
'synthetic'
:
False
,
'parents'
:
[
p
.
raw
for
p
in
commit
.
parent_ids
],
}
def
annotated_tag_to_release
(
id
,
repo
,
log
=
None
):
"""Format an annotated tag as a release"""
tag
=
repo
[
id
]
tag_pointer
=
repo
[
tag
.
target
]
if
tag_pointer
.
type
!=
GIT_OBJ_COMMIT
:
if
log
:
log
.
warn
(
"Ignoring tag
%s
pointing at
%s
%s
"
%
(
tag
.
id
.
hex
,
tag_pointer
.
__class__
.
__name__
,
tag_pointer
.
id
.
hex
),
extra
=
{
'swh_type'
:
'loader_git_tag_ignore'
,
'swh_repo'
:
repo
.
path
,
'swh_tag_id'
:
tag
.
id
.
hex
,
'swh_tag_dest'
:
{
'type'
:
tag_pointer
.
__class__
.
__name__
,
'id'
:
tag_pointer
.
id
.
hex
,
},
})
return
if
not
tag
.
tagger
:
if
log
:
log
.
warn
(
"Tag
%s
has no author, using default values"
%
id
.
hex
,
extra
=
{
'swh_type'
:
'loader_git_tag_author_default'
,
'swh_repo'
:
repo
.
path
,
'swh_tag_id'
:
tag
.
id
.
hex
,
})
author
=
None
date
=
None
else
:
author
=
{
'name'
:
tag
.
tagger
.
raw_name
,
'email'
:
tag
.
tagger
.
raw_email
,
}
date
=
format_date
(
tag
.
tagger
)
return
{
'id'
:
id
.
raw
,
'date'
:
date
,
'target'
:
tag
.
target
.
raw
,
'target_type'
:
'revision'
,
'message'
:
tag
.
_message
,
'name'
:
tag
.
name
.
encode
(
'utf-8'
),
'author'
:
author
,
'metadata'
:
None
,
'synthetic'
:
False
,
}
def
ref_to_occurrence
(
ref
):
"""Format a reference as an occurrence"""
return
ref
def
origin_url_to_origin
(
origin_url
):
"""Format a pygit2.Repository as an origin suitable for swh.storage"""
return
{
'type'
:
'git'
,
'url'
:
origin_url
,
}
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jul 4 2025, 10:15 AM (4 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3277897
Attached To
rDLDG Git loader
Event Timeline
Log In to Comment