Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163627
D2714.id9714.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
38 KB
Subscribers
None
D2714.id9714.diff
View Options
diff --git a/conftest.py b/conftest.py
--- a/conftest.py
+++ b/conftest.py
@@ -19,7 +19,6 @@
'storage': {
'cls': 'pipeline',
'steps': [
- {'cls': 'validate'},
{'cls': 'retry'},
{'cls': 'filter'},
{'cls': 'buffer'},
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.core >= 0.0.75
-swh.model >= 0.0.54
+swh.model >= 0.0.57
swh.scheduler
swh.storage >= 0.0.163
diff --git a/swh/loader/core/tests/test_converters.py b/swh/loader/core/tests/test_converters.py
--- a/swh/loader/core/tests/test_converters.py
+++ b/swh/loader/core/tests/test_converters.py
@@ -31,8 +31,7 @@
data = b'temp file for testing content storage conversion'
tmpfile = tmpfile_with_content(tmpdir, data)
- obj = from_disk.Content.from_file(path=os.fsdecode(tmpfile),
- save_path=True).get_data()
+ obj = from_disk.Content.from_file(path=os.fsdecode(tmpfile)).get_data()
expected_content = obj.copy()
expected_content['data'] = data
diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py
--- a/swh/loader/package/archive/loader.py
+++ b/swh/loader/package/archive/loader.py
@@ -11,15 +11,17 @@
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import release_name, artifact_identity
-from swh.model.identifiers import normalize_timestamp
+from swh.model.model import (
+ Sha1Git, Person, TimestampWithTimezone, Revision, RevisionType,
+)
logger = logging.getLogger(__name__)
-SWH_PERSON = {
- 'name': b'Software Heritage',
- 'fullname': b'Software Heritage',
- 'email': b'robot@softwareheritage.org'
-}
+SWH_PERSON = Person(
+ name=b'Software Heritage',
+ fullname=b'Software Heritage',
+ email=b'robot@softwareheritage.org'
+)
REVISION_MESSAGE = b'swh-loader-package: synthetic revision message'
@@ -101,21 +103,24 @@
return rev_id
return None
- def build_revision(self, a_metadata: Mapping[str, Any],
- uncompressed_path: str) -> Dict:
+ def build_revision(
+ self, a_metadata: Mapping[str, Any], uncompressed_path: str,
+ directory: Sha1Git) -> Optional[Revision]:
time = a_metadata['time'] # assume it's a timestamp
if isinstance(time, str): # otherwise, assume it's a parsable date
time = iso8601.parse_date(time)
- normalized_time = normalize_timestamp(time)
- return {
- 'type': 'tar',
- 'message': REVISION_MESSAGE,
- 'date': normalized_time,
- 'author': SWH_PERSON,
- 'committer': SWH_PERSON,
- 'committer_date': normalized_time,
- 'parents': [],
- 'metadata': {
+ normalized_time = TimestampWithTimezone.from_datetime(time)
+ return Revision(
+ type=RevisionType.TAR,
+ message=REVISION_MESSAGE,
+ date=normalized_time,
+ author=SWH_PERSON,
+ committer=SWH_PERSON,
+ committer_date=normalized_time,
+ parents=[],
+ directory=directory,
+ synthetic=True,
+ metadata={
'intrinsic': {},
'extrinsic': {
'provider': self.url,
@@ -123,4 +128,4 @@
'raw': a_metadata,
},
},
- }
+ )
diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py
--- a/swh/loader/package/cran/loader.py
+++ b/swh/loader/package/cran/loader.py
@@ -19,7 +19,9 @@
from swh.loader.package.utils import (
release_name, parse_author, swh_author, artifact_identity
)
-from swh.model.identifiers import normalize_timestamp
+from swh.model.model import (
+ TimestampWithTimezone, Sha1Git, Revision, RevisionType,
+)
logger = logging.getLogger(__name__)
@@ -85,21 +87,24 @@
def build_revision(
self, a_metadata: Mapping[str, Any],
- uncompressed_path: str) -> Dict[str, Any]:
+ uncompressed_path: str,
+ directory: Sha1Git) -> Optional[Revision]:
# a_metadata is empty
metadata = extract_intrinsic_metadata(uncompressed_path)
- normalized_date = normalize_timestamp(parse_date(metadata.get('Date')))
+ date = parse_date(metadata.get('Date'))
author = swh_author(parse_author(metadata.get('Maintainer', {})))
version = metadata.get('Version', a_metadata['version'])
- return {
- 'message': version.encode('utf-8'),
- 'type': 'tar',
- 'date': normalized_date,
- 'author': author,
- 'committer': author,
- 'committer_date': normalized_date,
- 'parents': [],
- 'metadata': {
+ return Revision(
+ message=version.encode('utf-8'),
+ type=RevisionType.TAR,
+ date=date,
+ author=author,
+ committer=author,
+ committer_date=date,
+ parents=[],
+ directory=directory,
+ synthetic=True,
+ metadata={
'intrinsic': {
'tool': 'DESCRIPTION',
'raw': metadata,
@@ -110,7 +115,7 @@
'raw': a_metadata,
},
},
- }
+ )
def parse_debian_control(filepath: str) -> Dict[str, Any]:
@@ -159,14 +164,14 @@
return parse_debian_control(description_path)
-def parse_date(date: Optional[str]) -> Optional[datetime.datetime]:
+def parse_date(date: Optional[str]) -> Optional[TimestampWithTimezone]:
"""Parse a date into a datetime
"""
assert not date or isinstance(date, str)
dt: Optional[datetime.datetime] = None
if not date:
- return dt
+ return None
try:
specific_date = DATE_PATTERN.match(date)
if specific_date:
@@ -183,4 +188,7 @@
dt = dt.replace(tzinfo=timezone.utc)
except Exception as e:
logger.warning('Fail to parse date %s. Reason: %s', (date, e))
- return dt
+ if dt:
+ return TimestampWithTimezone.from_datetime(dt)
+ else:
+ return None
diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py
--- a/swh/loader/package/cran/tests/test_cran.py
+++ b/swh/loader/package/cran/tests/test_cran.py
@@ -16,6 +16,7 @@
parse_debian_control
)
from swh.core.tarball import uncompress
+from swh.model.model import TimestampWithTimezone
from swh.loader.package.tests.common import (
check_snapshot, get_stats
@@ -102,8 +103,12 @@
("Check NEWS file for changes: news(package='simSummary')", None)
]
for date, expected_date in data:
- actual_date = parse_date(date)
- assert actual_date == expected_date, f'input date to parse {date}'
+ actual_tstz = parse_date(date)
+ if expected_date is None:
+ assert actual_tstz is None, date
+ else:
+ expected_tstz = TimestampWithTimezone.from_datetime(expected_date)
+ assert actual_tstz == expected_tstz, date
@pytest.mark.fs
diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py
--- a/swh/loader/package/debian/loader.py
+++ b/swh/loader/package/debian/loader.py
@@ -4,21 +4,23 @@
# See top-level LICENSE file for more information
import email.utils
-import iso8601
import logging
+from os import path
import re
import subprocess
from dateutil.parser import parse as parse_date
from debian.changelog import Changelog
from debian.deb822 import Dsc
-from os import path
from typing import (
- Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple
+ Any, Generator, List, Mapping, Optional, Sequence, Tuple
)
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import download, release_name
+from swh.model.model import (
+ Sha1Git, Person, Revision, RevisionType, TimestampWithTimezone
+)
logger = logging.getLogger(__name__)
@@ -119,8 +121,9 @@
logger.debug('dl_artifacts: %s', dl_artifacts)
return extract_package(dl_artifacts, dest=dest)
- def build_revision(self, a_metadata: Mapping[str, Any],
- uncompressed_path: str) -> Dict:
+ def build_revision(
+ self, a_metadata: Mapping[str, Any], uncompressed_path: str,
+ directory: Sha1Git) -> Optional[Revision]:
dsc_url, dsc_name = dsc_information(a_metadata)
if not dsc_name:
raise ValueError(
@@ -135,19 +138,22 @@
msg = 'Synthetic revision for Debian source package %s version %s' % (
a_metadata['name'], a_metadata['version'])
- date = iso8601.parse_date(i_metadata['changelog']['date'])
+ date = TimestampWithTimezone.from_iso8601(
+ i_metadata['changelog']['date'])
author = prepare_person(i_metadata['changelog']['person'])
# inspired from swh.loader.debian.converters.package_metadata_to_revision # noqa
- return {
- 'type': 'dsc',
- 'message': msg.encode('utf-8'),
- 'author': author,
- 'date': date,
- 'committer': author,
- 'committer_date': date,
- 'parents': [],
- 'metadata': {
+ return Revision(
+ type=RevisionType.DSC,
+ message=msg.encode('utf-8'),
+ author=author,
+ date=date,
+ committer=author,
+ committer_date=date,
+ parents=[],
+ directory=directory,
+ synthetic=True,
+ metadata={
'intrinsic': {
'tool': 'dsc',
'raw': i_metadata,
@@ -157,8 +163,8 @@
'when': self.visit_date.isoformat(),
'raw': a_metadata,
},
- }
- }
+ },
+ )
def resolve_revision_from(known_package_artifacts: Mapping,
@@ -223,20 +229,20 @@
return ret
-def prepare_person(person: Mapping[str, str]) -> Mapping[str, bytes]:
+def prepare_person(person: Mapping[str, str]) -> Person:
"""Prepare person for swh serialization...
Args:
A person dict
Returns:
- A person dict ready for storage
+ A person ready for storage
"""
- ret = {}
- for key, value in person.items():
- ret[key] = value.encode('utf-8')
- return ret
+ return Person.from_dict({
+ key: value.encode('utf-8')
+ for (key, value) in person.items()
+ })
def download_package(
diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py
--- a/swh/loader/package/debian/tests/test_debian.py
+++ b/swh/loader/package/debian/tests/test_debian.py
@@ -17,6 +17,8 @@
from swh.loader.package.tests.common import check_snapshot, get_stats
from swh.loader.package.debian.loader import resolve_revision_from
+from swh.model.model import Person
+
logger = logging.getLogger(__name__)
@@ -224,11 +226,11 @@
'fullname': 'Someone Name <someone@orga.org>',
})
- assert actual_author == {
- 'name': b'Someone Name',
- 'email': b'someone@orga.org',
- 'fullname': b'Someone Name <someone@orga.org>',
- }
+ assert actual_author == Person(
+ name=b'Someone Name',
+ email=b'someone@orga.org',
+ fullname=b'Someone Name <someone@orga.org>',
+ )
def test_download_package(datadir, tmpdir, requests_mock_datadir):
diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -12,6 +12,9 @@
)
from swh.model.hashutil import hash_to_hex, hash_to_bytes
+from swh.model.model import (
+ Person, Revision, RevisionType, TimestampWithTimezone, Sha1Git,
+)
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import download
@@ -69,26 +72,35 @@
self.deposit_id, tmpdir, p_info['filename'])]
def build_revision(
- self, a_metadata: Dict, uncompressed_path: str) -> Dict:
- revision = a_metadata.pop('revision')
- metadata = {
+ self, a_metadata: Dict, uncompressed_path: str,
+ directory: Sha1Git) -> Optional[Revision]:
+ revision_data = a_metadata.pop('revision')
+
+ # FIXME: the deposit no longer needs to build the revision
+
+ date = TimestampWithTimezone.from_dict(revision_data['date'])
+ metadata = revision_data['metadata']
+ metadata.update({
'extrinsic': {
'provider': self.client.metadata_url(self.deposit_id),
'when': self.visit_date.isoformat(),
'raw': a_metadata,
},
- }
-
- # FIXME: the deposit no longer needs to build the revision
- revision['metadata'].update(metadata)
- revision['author'] = parse_author(revision['author'])
- revision['committer'] = parse_author(revision['committer'])
- revision['message'] = revision['message'].encode('utf-8')
- revision['type'] = 'tar'
- parents = revision.get('parents', [])
- revision['parents'] = [hash_to_bytes(p) for p in parents]
-
- return revision
+ })
+
+ return Revision(
+ type=RevisionType.TAR,
+ message=revision_data['message'].encode('utf-8'),
+ author=parse_author(revision_data['author']),
+ date=date,
+ committer=parse_author(revision_data['committer']),
+ committer_date=date,
+ parents=[hash_to_bytes(p)
+ for p in revision_data.get('parents', [])],
+ directory=directory,
+ synthetic=True,
+ metadata=metadata,
+ )
def load(self) -> Dict:
# Usual loading
@@ -153,15 +165,15 @@
return r
-def parse_author(author):
+def parse_author(author) -> Person:
"""See prior fixme
"""
- return {
- 'fullname': author['fullname'].encode('utf-8'),
- 'name': author['name'].encode('utf-8'),
- 'email': author['email'].encode('utf-8'),
- }
+ return Person(
+ fullname=author['fullname'].encode('utf-8'),
+ name=author['name'].encode('utf-8'),
+ email=author['email'].encode('utf-8'),
+ )
class ApiClient:
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -12,18 +12,22 @@
Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple
)
+import attr
+
from swh.core.tarball import uncompress
from swh.core.config import SWHConfig
-from swh.model.from_disk import Directory
+from swh.model import from_disk
from swh.model.hashutil import hash_to_hex
-from swh.model.identifiers import (
- revision_identifier, snapshot_identifier, identifier_to_bytes
+from swh.model.model import (
+ BaseModel, Sha1Git,
+ Content, SkippedContent, Directory,
+ Revision,
+ TargetType, Snapshot,
+ Origin
)
-from swh.model.model import Sha1Git
from swh.storage import get_storage
from swh.storage.algos.snapshot import snapshot_get_all_branches
-from swh.loader.core.converters import prepare_contents
from swh.loader.package.utils import download
@@ -96,8 +100,9 @@
yield from {}
def build_revision(
- self, a_metadata: Dict, uncompressed_path: str) -> Dict:
- """Build the revision dict from the archive metadata (extrinsic
+ self, a_metadata: Dict, uncompressed_path: str,
+ directory: Sha1Git) -> Optional[Revision]:
+ """Build the revision from the archive metadata (extrinsic
artifact metadata) and the intrinsic metadata.
Args:
@@ -108,7 +113,7 @@
SWH data dict
"""
- return {}
+ raise NotImplementedError('build_revision')
def get_default_version(self) -> str:
"""Retrieve the latest release version if any.
@@ -119,19 +124,20 @@
"""
return ''
- def last_snapshot(self) -> Optional[Dict]:
+ def last_snapshot(self) -> Optional[Snapshot]:
"""Retrieve the last snapshot
"""
snapshot = None
visit = self.storage.origin_visit_get_latest(
self.url, require_snapshot=True)
- if visit:
- snapshot = snapshot_get_all_branches(
- self.storage, visit['snapshot'])
+ if visit and visit.get('snapshot'):
+ snapshot = Snapshot.from_dict(snapshot_get_all_branches(
+ self.storage, visit['snapshot']))
return snapshot
- def known_artifacts(self, snapshot: Optional[Dict]) -> Dict:
+ def known_artifacts(
+ self, snapshot: Optional[Snapshot]) -> Dict[Sha1Git, BaseModel]:
"""Retrieve the known releases/artifact for the origin.
Args
@@ -141,13 +147,13 @@
Dict of keys revision id (bytes), values a metadata Dict.
"""
- if not snapshot or 'branches' not in snapshot:
+ if not snapshot:
return {}
# retrieve only revisions (e.g the alias we do not want here)
- revs = [rev['target']
- for rev in snapshot['branches'].values()
- if rev and rev['target_type'] == 'revision']
+ revs = [rev.target
+ for rev in snapshot.branches.values()
+ if rev and rev.target_type == TargetType.REVISION]
known_revisions = self.storage.revision_get(revs)
ret = {}
@@ -263,16 +269,15 @@
snapshot = None
# Prepare origin and origin_visit
- origin = {'url': self.url}
+ origin = Origin(url=self.url)
try:
self.storage.origin_add_one(origin)
visit_id = self.storage.origin_visit_add(
origin=self.url,
date=self.visit_date,
type=self.visit_type)['visit']
- except Exception as e:
- logger.error(
- 'Failed to create origin/origin_visit. Reason: %s', e)
+ except Exception:
+ logger.exception('Failed to create origin/origin_visit:')
return {'status': 'failed'}
try:
@@ -327,13 +332,12 @@
'target': target,
}
- snapshot = {
+ snapshot_data = {
'branches': branches
}
- logger.debug('snapshot: %s', snapshot)
+ logger.debug('snapshot: %s', snapshot_data)
- snapshot['id'] = identifier_to_bytes(
- snapshot_identifier(snapshot))
+ snapshot = Snapshot.from_dict(snapshot_data)
logger.debug('snapshot: %s', snapshot)
self.storage.snapshot_add([snapshot])
@@ -346,12 +350,12 @@
finally:
self.storage.origin_visit_update(
origin=self.url, visit_id=visit_id, status=status_visit,
- snapshot=snapshot and snapshot['id'])
+ snapshot=snapshot and snapshot.id)
result = {
'status': status_load,
} # type: Dict[str, Any]
if snapshot:
- result['snapshot_id'] = hash_to_hex(snapshot['id'])
+ result['snapshot_id'] = hash_to_hex(snapshot.id)
return result
def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]:
@@ -373,51 +377,56 @@
uncompressed_path = self.uncompress(dl_artifacts, dest=tmpdir)
logger.debug('uncompressed_path: %s', uncompressed_path)
- directory = Directory.from_disk(
+ directory = from_disk.Directory.from_disk(
path=uncompressed_path.encode('utf-8'),
- data=True) # noqa
- # FIXME: Try not to load the full raw content in
- # memory
- objects = directory.collect()
-
- contents, skipped_contents = prepare_contents(
- objects.get('content', {}).values(),
- max_content_size=self.max_content_size,
- origin_url=origin['url'])
- self.storage.skipped_content_add(skipped_contents)
+ max_content_length=self.max_content_size)
+
+ contents: List[Content] = []
+ skipped_contents: List[SkippedContent] = []
+ directories: List[Directory] = []
+
+ for obj in directory.iter_tree():
+ obj = obj.to_model()
+ if isinstance(obj, Content):
+ # FIXME: read the data from disk later (when the
+ # storage buffer is flushed).
+ obj = obj.with_data()
+ contents.append(obj)
+ elif isinstance(obj, SkippedContent):
+ skipped_contents.append(obj)
+ elif isinstance(obj, Directory):
+ directories.append(obj)
+ else:
+ raise TypeError(
+ f'Unexpected content type from disk: {obj}')
+
logger.debug('Number of skipped contents: %s',
len(skipped_contents))
- self.storage.content_add(contents)
+ self.storage.skipped_content_add(skipped_contents)
logger.debug('Number of contents: %s', len(contents))
+ self.storage.content_add(contents)
- directories = list(
- objects.get('directory', {}).values())
logger.debug('Number of directories: %s', len(directories))
self.storage.directory_add(directories)
# FIXME: This should be release. cf. D409
- revision = self.build_revision(p_info['raw'], uncompressed_path)
+ revision = self.build_revision(
+ p_info['raw'], uncompressed_path, directory=directory.hash)
if not revision:
# Some artifacts are missing intrinsic metadata
# skipping those
return (None, True)
- revision.update({
- 'synthetic': True,
- 'directory': directory.hash,
- })
-
- revision['metadata'].update({
+ metadata = revision.metadata or {}
+ metadata.update({
'original_artifact': [
hashes for _, hashes in dl_artifacts
],
})
-
- revision['id'] = identifier_to_bytes(
- revision_identifier(revision))
+ revision = attr.evolve(revision, metadata=metadata)
logger.debug('Revision: %s', revision)
self.storage.revision_add([revision])
- return (revision['id'], True)
+ return (revision.id, True)
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -10,11 +10,14 @@
from codecs import BOM_UTF8
from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional
+import attr
import chardet
-import iso8601
from urllib.parse import quote
-from swh.model.identifiers import normalize_timestamp
+from swh.model.model import (
+ Person, RevisionType, Revision, TimestampWithTimezone, Sha1Git,
+)
+
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import (
api_info, release_name, parse_author, swh_author
@@ -75,10 +78,11 @@
return artifact_to_revision_id(known_artifacts, artifact_metadata)
def build_revision(
- self, a_metadata: Dict, uncompressed_path: str) -> Dict:
+ self, a_metadata: Dict, uncompressed_path: str,
+ directory: Sha1Git) -> Optional[Revision]:
i_metadata = extract_intrinsic_metadata(uncompressed_path)
if not i_metadata:
- return {}
+ return None
# from intrinsic metadata
author = extract_npm_package_author(i_metadata)
message = i_metadata['version'].encode('ascii')
@@ -101,18 +105,23 @@
(self.url, artifact_name)
)
- date = iso8601.parse_date(date)
- date = normalize_timestamp(int(date.timestamp()))
-
- return {
- 'type': 'tar',
- 'message': message,
- 'author': author,
- 'date': date,
- 'committer': author,
- 'committer_date': date,
- 'parents': [],
- 'metadata': {
+ date = TimestampWithTimezone.from_iso8601(date)
+
+ # FIXME: this is to remain bug-compatible with earlier versions:
+ date = attr.evolve(date, timestamp=attr.evolve(
+ date.timestamp, microseconds=0))
+
+ r = Revision(
+ type=RevisionType.TAR,
+ message=message,
+ author=author,
+ date=date,
+ committer=author,
+ committer_date=date,
+ parents=[],
+ directory=directory,
+ synthetic=True,
+ metadata={
'intrinsic': {
'tool': 'package.json',
'raw': i_metadata,
@@ -123,7 +132,8 @@
'raw': a_metadata,
},
},
- }
+ )
+ return r
def artifact_to_revision_id(
@@ -170,7 +180,7 @@
return None
-def extract_npm_package_author(package_json):
+def extract_npm_package_author(package_json) -> Person:
"""
Extract package author from a ``package.json`` file content and
return it in swh format.
@@ -180,10 +190,7 @@
``package.json`` file
Returns:
- dict: A dict with the following keys:
- * fullname
- * name
- * email
+ Person
"""
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -8,6 +8,7 @@
import pytest
from swh.model.hashutil import hash_to_bytes
+from swh.model.model import Person
from swh.loader.package.npm.loader import (
NpmLoader, extract_npm_package_author,
@@ -26,19 +27,19 @@
package_metadata = json.load(json_file)
extract_npm_package_author(package_metadata['versions']['0.0.2']) == \
- {
- 'fullname': b'mooz <stillpedant@gmail.com>',
- 'name': b'mooz',
- 'email': b'stillpedant@gmail.com'
- }
+ Person(
+ fullname=b'mooz <stillpedant@gmail.com>',
+ name=b'mooz',
+ email=b'stillpedant@gmail.com'
+ )
assert (
extract_npm_package_author(package_metadata['versions']['0.0.3']) ==
- {
- 'fullname': b'Masafumi Oyamada <stillpedant@gmail.com>',
- 'name': b'Masafumi Oyamada',
- 'email': b'stillpedant@gmail.com'
- }
+ Person(
+ fullname=b'Masafumi Oyamada <stillpedant@gmail.com>',
+ name=b'Masafumi Oyamada',
+ email=b'stillpedant@gmail.com'
+ )
)
package_json = json.loads('''
@@ -67,11 +68,11 @@
}''') # noqa
assert extract_npm_package_author(package_json) == \
- {
- 'fullname': b'Yauheni Pakala <evgeniy.pakalo@gmail.com>',
- 'name': b'Yauheni Pakala',
- 'email': b'evgeniy.pakalo@gmail.com'
- }
+ Person(
+ fullname=b'Yauheni Pakala <evgeniy.pakalo@gmail.com>',
+ name=b'Yauheni Pakala',
+ email=b'evgeniy.pakalo@gmail.com'
+ )
package_json = json.loads('''
{
@@ -106,11 +107,11 @@
}''')
assert extract_npm_package_author(package_json) == \
- {
- 'fullname': b'Shawn Walsh',
- 'name': b'Shawn Walsh',
- 'email': None
- }
+ Person(
+ fullname=b'Shawn Walsh',
+ name=b'Shawn Walsh',
+ email=None
+ )
package_json = json.loads('''
{
@@ -129,11 +130,11 @@
}''')
assert extract_npm_package_author(package_json) == \
- {
- 'fullname': b'fengmk2 <fengmk2@gmail.com>',
- 'name': b'fengmk2',
- 'email': b'fengmk2@gmail.com'
- }
+ Person(
+ fullname=b'fengmk2 <fengmk2@gmail.com>',
+ name=b'fengmk2',
+ email=b'fengmk2@gmail.com'
+ )
package_json = json.loads('''
{
@@ -153,11 +154,11 @@
}''')
assert extract_npm_package_author(package_json) == \
- {
- 'fullname': b'xiaohuoni <448627663@qq.com>',
- 'name': b'xiaohuoni',
- 'email': b'448627663@qq.com'
- }
+ Person(
+ fullname=b'xiaohuoni <448627663@qq.com>',
+ name=b'xiaohuoni',
+ email=b'448627663@qq.com'
+ )
def normalize_hashes(hashes):
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -10,11 +10,12 @@
from urllib.parse import urlparse
from pkginfo import UnpackedSDist
-import iso8601
+from swh.model.model import (
+ Person, Sha1Git, TimestampWithTimezone, Revision, RevisionType
+)
-from swh.model.identifiers import normalize_timestamp
from swh.loader.package.loader import PackageLoader
-from swh.loader.package.utils import api_info, release_name
+from swh.loader.package.utils import api_info, release_name, EMPTY_AUTHOR
logger = logging.getLogger(__name__)
@@ -72,10 +73,11 @@
return artifact_to_revision_id(known_artifacts, artifact_metadata)
def build_revision(
- self, a_metadata: Dict, uncompressed_path: str) -> Dict:
+ self, a_metadata: Dict, uncompressed_path: str,
+ directory: Sha1Git) -> Optional[Revision]:
i_metadata = extract_intrinsic_metadata(uncompressed_path)
if not i_metadata:
- return {}
+ return None
# from intrinsic metadata
name = i_metadata['version']
@@ -84,18 +86,19 @@
# from extrinsic metadata
message = a_metadata.get('comment_text', '')
message = '%s: %s' % (name, message) if message else name
- date = normalize_timestamp(
- int(iso8601.parse_date(a_metadata['upload_time']).timestamp()))
-
- return {
- 'type': 'tar',
- 'message': message.encode('utf-8'),
- 'author': _author,
- 'date': date,
- 'committer': _author,
- 'committer_date': date,
- 'parents': [],
- 'metadata': {
+ date = TimestampWithTimezone.from_iso8601(a_metadata['upload_time'])
+
+ return Revision(
+ type=RevisionType.TAR,
+ message=message.encode('utf-8'),
+ author=_author,
+ date=date,
+ committer=_author,
+ committer_date=date,
+ parents=[],
+ directory=directory,
+ synthetic=True,
+ metadata={
'intrinsic': {
'tool': 'PKG-INFO',
'raw': i_metadata,
@@ -106,7 +109,7 @@
'raw': a_metadata,
},
}
- }
+ )
def artifact_to_revision_id(
@@ -210,7 +213,7 @@
return raw
-def author(data: Dict) -> Dict:
+def author(data: Dict) -> Person:
"""Given a dict of project/release artifact information (coming from
PyPI), returns an author subset.
@@ -232,7 +235,7 @@
fullname = name
if not fullname:
- return {'fullname': b'', 'name': None, 'email': None}
+ return EMPTY_AUTHOR
if name is not None:
name = name.encode('utf-8')
@@ -240,8 +243,8 @@
if email is not None:
email = email.encode('utf-8')
- return {
- 'fullname': fullname.encode('utf-8'),
- 'name': name,
- 'email': email
- }
+ return Person(
+ fullname=fullname.encode('utf-8'),
+ name=name,
+ email=email
+ )
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -14,6 +14,7 @@
from swh.core.tarball import uncompress
from swh.core.pytest_plugin import requests_mock_datadir_factory
from swh.model.hashutil import hash_to_bytes
+from swh.model.model import Person
from swh.loader.package.pypi.loader import (
PyPILoader, pypi_api_url, author, extract_intrinsic_metadata,
@@ -31,11 +32,11 @@
}
actual_author = author(data)
- expected_author = {
- 'fullname': b'i-am-groot <iam@groot.org>',
- 'name': b'i-am-groot',
- 'email': b'iam@groot.org',
- }
+ expected_author = Person(
+ fullname=b'i-am-groot <iam@groot.org>',
+ name=b'i-am-groot',
+ email=b'iam@groot.org',
+ )
assert actual_author == expected_author
@@ -47,11 +48,11 @@
}
actual_author = author(data)
- expected_author = {
- 'fullname': b'i-am-groot',
- 'name': b'i-am-groot',
- 'email': b'',
- }
+ expected_author = Person(
+ fullname=b'i-am-groot',
+ name=b'i-am-groot',
+ email=b'',
+ )
assert actual_author == expected_author
@@ -63,11 +64,11 @@
}
actual_author = author(data)
- expected_author = {
- 'fullname': b' <iam@groot.org>',
- 'name': b'',
- 'email': b'iam@groot.org',
- }
+ expected_author = Person(
+ fullname=b' <iam@groot.org>',
+ name=b'',
+ email=b'iam@groot.org',
+ )
assert actual_author == expected_author
@@ -80,11 +81,11 @@
actual_author = author(data)
- expected_author = {
- 'fullname': b"['pierre', 'paul', 'jacques']",
- 'name': b"['pierre', 'paul', 'jacques']",
- 'email': None,
- }
+ expected_author = Person(
+ fullname=b"['pierre', 'paul', 'jacques']",
+ name=b"['pierre', 'paul', 'jacques']",
+ email=None,
+ )
assert actual_author == expected_author
@@ -97,11 +98,11 @@
actual_author = author(data)
- expected_author = {
- 'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
- 'name': b'[marie, jeanne]',
- 'email': b'[marie@some, jeanne@thing]',
- }
+ expected_author = Person(
+ fullname=b'[marie, jeanne] <[marie@some, jeanne@thing]>',
+ name=b'[marie, jeanne]',
+ email=b'[marie@some, jeanne@thing]',
+ )
assert actual_author == expected_author
@@ -114,11 +115,14 @@
actual_author = author(data)
- expected_author = {
- 'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa
- 'name': b'[marie, jeanne, pierre]',
- 'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
- }
+ expected_author = Person(
+ fullname=(
+ b'[marie, jeanne, pierre] '
+ b'<[marie@somewhere.org, jeanne@somewhere.org]>'
+ ),
+ name=b'[marie, jeanne, pierre]',
+ email=b'[marie@somewhere.org, jeanne@somewhere.org]',
+ )
actual_author == expected_author
diff --git a/swh/loader/package/tests/test_common.py b/swh/loader/package/tests/test_common.py
--- a/swh/loader/package/tests/test_common.py
+++ b/swh/loader/package/tests/test_common.py
@@ -6,6 +6,7 @@
import pytest
from swh.model.hashutil import hash_to_bytes
+from swh.model.model import Snapshot, SnapshotBranch, TargetType
from swh.loader.package.tests.common import (
decode_target, check_snapshot, check_metadata, check_metadata_paths
)
@@ -17,9 +18,6 @@
storage_config = {
'cls': 'pipeline',
'steps': [
- {
- 'cls': 'validate',
- },
{
'cls': 'memory',
}
@@ -57,15 +55,15 @@
storage = get_storage(**storage_config)
snap_id = '2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'
- snapshot = {
- 'id': hash_to_bytes(snap_id),
- 'branches': {
- b'master': {
- 'target': hash_to_bytes(hash_hex),
- 'target_type': 'revision',
- },
+ snapshot = Snapshot(
+ id=hash_to_bytes(snap_id),
+ branches={
+ b'master': SnapshotBranch(
+ target=hash_to_bytes(hash_hex),
+ target_type=TargetType.REVISION,
+ ),
},
- }
+ )
s = storage.snapshot_add([snapshot])
assert s == {
@@ -87,15 +85,15 @@
def test_check_snapshot_failure():
storage = get_storage(**storage_config)
- snapshot = {
- 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'),
- 'branches': {
- b'master': {
- 'target': hash_to_bytes(hash_hex),
- 'target_type': 'revision',
- },
+ snapshot = Snapshot(
+ id=hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'),
+ branches={
+ b'master': SnapshotBranch(
+ target=hash_to_bytes(hash_hex),
+ target_type=TargetType.REVISION,
+ ),
},
- }
+ )
s = storage.snapshot_add([snapshot])
assert s == {
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -12,6 +12,8 @@
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE
+from swh.model.model import Person
+
from swh.loader.package import DEFAULT_PARAMS
@@ -25,7 +27,11 @@
_author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
-_EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
+EMPTY_AUTHOR = Person(
+ fullname=b'',
+ name=None,
+ email=None,
+)
def api_info(url: str) -> Dict:
@@ -171,7 +177,7 @@
return author
-def swh_author(author: Dict[str, str]) -> Dict[str, Optional[bytes]]:
+def swh_author(author: Dict[str, str]) -> Person:
"""Transform an author like dict to an expected swh like dict (values are
bytes)
@@ -187,13 +193,13 @@
fullname = name
if not fullname:
- r = _EMPTY_AUTHOR
+ r = EMPTY_AUTHOR
else:
- r = {
- 'fullname': fullname.encode('utf-8') if fullname else None,
- 'name': name.encode('utf-8') if name else None,
- 'email': email.encode('utf-8') if email else None
- }
+ r = Person(
+ fullname=fullname.encode('utf-8') if fullname else b'',
+ name=name.encode('utf-8') if name else None,
+ email=email.encode('utf-8') if email else None
+ )
return r
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jan 30 2025, 11:26 AM (6 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217438
Attached To
D2714: Use swh-model objects in package loader.
Event Timeline
Log In to Comment