Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/npm/loader.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | import logging | ||||
import os | import os | ||||
from codecs import BOM_UTF8 | from codecs import BOM_UTF8 | ||||
from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional | from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional | ||||
import attr | |||||
import chardet | import chardet | ||||
import iso8601 | |||||
from urllib.parse import quote | from urllib.parse import quote | ||||
from swh.model.identifiers import normalize_timestamp | from swh.model.model import ( | ||||
Person, RevisionType, Revision, TimestampWithTimezone, Sha1Git, | |||||
) | |||||
from swh.loader.package.loader import PackageLoader | from swh.loader.package.loader import PackageLoader | ||||
from swh.loader.package.utils import ( | from swh.loader.package.utils import ( | ||||
api_info, release_name, parse_author, swh_author | api_info, release_name, parse_author, swh_author | ||||
) | ) | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | def get_package_info(self, version: str) -> Generator[ | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_artifacts: Dict, artifact_metadata: Dict) \ | self, known_artifacts: Dict, artifact_metadata: Dict) \ | ||||
-> Optional[bytes]: | -> Optional[bytes]: | ||||
return artifact_to_revision_id(known_artifacts, artifact_metadata) | return artifact_to_revision_id(known_artifacts, artifact_metadata) | ||||
def build_revision( | def build_revision( | ||||
self, a_metadata: Dict, uncompressed_path: str) -> Dict: | self, a_metadata: Dict, uncompressed_path: str, | ||||
directory: Sha1Git) -> Optional[Revision]: | |||||
i_metadata = extract_intrinsic_metadata(uncompressed_path) | i_metadata = extract_intrinsic_metadata(uncompressed_path) | ||||
if not i_metadata: | if not i_metadata: | ||||
return {} | return None | ||||
# from intrinsic metadata | # from intrinsic metadata | ||||
author = extract_npm_package_author(i_metadata) | author = extract_npm_package_author(i_metadata) | ||||
message = i_metadata['version'].encode('ascii') | message = i_metadata['version'].encode('ascii') | ||||
# from extrinsic metadata | # from extrinsic metadata | ||||
# No date available in intrinsic metadata: retrieve it from the API | # No date available in intrinsic metadata: retrieve it from the API | ||||
# metadata, using the version number that the API claims this package | # metadata, using the version number that the API claims this package | ||||
# has. | # has. | ||||
extrinsic_version = a_metadata['version'] | extrinsic_version = a_metadata['version'] | ||||
if 'time' in self.info: | if 'time' in self.info: | ||||
date = self.info['time'][extrinsic_version] | date = self.info['time'][extrinsic_version] | ||||
elif 'mtime' in a_metadata: | elif 'mtime' in a_metadata: | ||||
date = a_metadata['mtime'] | date = a_metadata['mtime'] | ||||
else: | else: | ||||
artifact_name = os.path.basename(a_metadata['dist']['tarball']) | artifact_name = os.path.basename(a_metadata['dist']['tarball']) | ||||
raise ValueError( | raise ValueError( | ||||
'Origin %s: Cannot determine upload time for artifact %s.' % | 'Origin %s: Cannot determine upload time for artifact %s.' % | ||||
(self.url, artifact_name) | (self.url, artifact_name) | ||||
) | ) | ||||
date = iso8601.parse_date(date) | date = TimestampWithTimezone.from_iso8601(date) | ||||
date = normalize_timestamp(int(date.timestamp())) | |||||
return { | # FIXME: this is to remain bug-compatible with earlier versions: | ||||
'type': 'tar', | date = attr.evolve(date, timestamp=attr.evolve( | ||||
'message': message, | date.timestamp, microseconds=0)) | ||||
'author': author, | |||||
'date': date, | r = Revision( | ||||
'committer': author, | type=RevisionType.TAR, | ||||
'committer_date': date, | message=message, | ||||
'parents': [], | author=author, | ||||
'metadata': { | date=date, | ||||
committer=author, | |||||
committer_date=date, | |||||
parents=[], | |||||
directory=directory, | |||||
synthetic=True, | |||||
metadata={ | |||||
'intrinsic': { | 'intrinsic': { | ||||
'tool': 'package.json', | 'tool': 'package.json', | ||||
'raw': i_metadata, | 'raw': i_metadata, | ||||
}, | }, | ||||
'extrinsic': { | 'extrinsic': { | ||||
'provider': self.provider_url, | 'provider': self.provider_url, | ||||
'when': self.visit_date.isoformat(), | 'when': self.visit_date.isoformat(), | ||||
'raw': a_metadata, | 'raw': a_metadata, | ||||
}, | }, | ||||
}, | }, | ||||
} | ) | ||||
return r | |||||
ardumont: remove the prints ;) | |||||
def artifact_to_revision_id( | def artifact_to_revision_id( | ||||
known_artifacts: Dict, artifact_metadata: Dict) -> Optional[bytes]: | known_artifacts: Dict, artifact_metadata: Dict) -> Optional[bytes]: | ||||
"""Given metadata artifact, solves the associated revision id. | """Given metadata artifact, solves the associated revision id. | ||||
The following code allows to deal with 2 metadata formats: | The following code allows to deal with 2 metadata formats: | ||||
- old format sample:: | - old format sample:: | ||||
Show All 28 Lines | for rev_id, known_artifact in known_artifacts.items(): | ||||
else: | else: | ||||
assert isinstance(known_original_artifact, list) | assert isinstance(known_original_artifact, list) | ||||
original_hash = known_original_artifact[0]['checksums']['sha1'] | original_hash = known_original_artifact[0]['checksums']['sha1'] | ||||
if shasum == original_hash: | if shasum == original_hash: | ||||
return rev_id | return rev_id | ||||
return None | return None | ||||
def extract_npm_package_author(package_json): | def extract_npm_package_author(package_json) -> Person: | ||||
""" | """ | ||||
Extract package author from a ``package.json`` file content and | Extract package author from a ``package.json`` file content and | ||||
return it in swh format. | return it in swh format. | ||||
Args: | Args: | ||||
package_json (dict): Dict holding the content of parsed | package_json (dict): Dict holding the content of parsed | ||||
``package.json`` file | ``package.json`` file | ||||
Returns: | Returns: | ||||
dict: A dict with the following keys: | Person | ||||
* fullname | |||||
* name | |||||
""" | """ | ||||
def _author_str(author_data): | def _author_str(author_data): | ||||
if type(author_data) is dict: | if type(author_data) is dict: | ||||
author_str = '' | author_str = '' | ||||
if 'name' in author_data: | if 'name' in author_data: | ||||
author_str += author_data['name'] | author_str += author_data['name'] | ||||
▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines |
remove the prints ;)