Page MenuHomeSoftware Heritage

D2743.id9906.diff
No OneTemporary

D2743.id9906.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.core >= 0.0.75
-swh.model >= 0.0.57
+swh.model >= 0.0.60
swh.scheduler
swh.storage >= 0.0.163
diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py
--- a/swh/loader/package/cran/loader.py
+++ b/swh/loader/package/cran/loader.py
@@ -17,10 +17,10 @@
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import (
- release_name, parse_author, swh_author, artifact_identity
+ release_name, artifact_identity
)
from swh.model.model import (
- TimestampWithTimezone, Sha1Git, Revision, RevisionType,
+ Person, TimestampWithTimezone, Sha1Git, Revision, RevisionType,
)
@@ -92,7 +92,7 @@
# a_metadata is empty
metadata = extract_intrinsic_metadata(uncompressed_path)
date = parse_date(metadata.get('Date'))
- author = swh_author(parse_author(metadata.get('Maintainer', {})))
+ author = Person.from_fullname(metadata.get('Maintainer', '').encode())
version = metadata.get('Version', a_metadata['version'])
return Revision(
message=version.encode('utf-8'),
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -20,7 +20,7 @@
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import (
- api_info, release_name, parse_author, swh_author
+ api_info, release_name
)
@@ -207,13 +207,12 @@
else:
return author_data
- author_data: Dict = {}
for author_key in ('author', 'authors'):
if author_key in package_json:
author_str = _author_str(package_json[author_key])
- author_data = parse_author(author_str)
+ return Person.from_fullname(author_str.encode())
- return swh_author(author_data)
+ return Person(fullname=b'', name=None, email=None)
def _lstrip_bom(s, bom=BOM_UTF8):
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -131,9 +131,9 @@
assert extract_npm_package_author(package_json) == \
Person(
- fullname=b'fengmk2 <fengmk2@gmail.com>',
+ fullname=b'fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)',
name=b'fengmk2',
- email=b'fengmk2@gmail.com'
+ email=b'fengmk2@gmail.com',
)
package_json = json.loads('''
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -10,7 +10,7 @@
import swh.loader.package
from swh.loader.package.utils import (
- download, api_info, release_name, parse_author, artifact_identity
+ download, api_info, release_name, artifact_identity
)
@@ -159,148 +159,6 @@
assert release_name(version, filename) == expected_release
-def _parse_author_string_test(author_str, expected_result):
- assert parse_author(author_str) == expected_result
- assert parse_author(' %s' % author_str) == expected_result
- assert parse_author('%s ' % author_str) == expected_result
-
-
-def test_parse_author():
- _parse_author_string_test(
- 'John Doe',
- {
- 'name': 'John Doe'
- }
- )
-
- _parse_author_string_test(
- '<john.doe@foo.bar>',
- {
- 'email': 'john.doe@foo.bar'
- }
- )
-
- _parse_author_string_test(
- '(https://john.doe)',
- {
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe <john.doe@foo.bar>',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar'
- }
- )
-
- _parse_author_string_test(
- 'John Doe<john.doe@foo.bar>',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar'
- }
- )
-
- _parse_author_string_test(
- 'John Doe (https://john.doe)',
- {
- 'name': 'John Doe',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe(https://john.doe)',
- {
- 'name': 'John Doe',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- '<john.doe@foo.bar> (https://john.doe)',
- {
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- '(https://john.doe) <john.doe@foo.bar>',
- {
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe <john.doe@foo.bar> (https://john.doe)',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe (https://john.doe) <john.doe@foo.bar>',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe<john.doe@foo.bar> (https://john.doe)',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe<john.doe@foo.bar>(https://john.doe)',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test('', {})
- _parse_author_string_test('<>', {})
- _parse_author_string_test(' <>', {})
- _parse_author_string_test('<>()', {})
- _parse_author_string_test('<> ()', {})
- _parse_author_string_test('()', {})
- _parse_author_string_test(' ()', {})
-
- _parse_author_string_test(
- 'John Doe <> ()',
- {
- 'name': 'John Doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe <>',
- {
- 'name': 'John Doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe ()',
- {
- 'name': 'John Doe'
- }
- )
-
-
def test_artifact_identity():
"""Compute primary key should return the right identity
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -7,7 +7,6 @@
import logging
import os
import requests
-import re
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
@@ -23,10 +22,6 @@
DOWNLOAD_HASHES = set(['sha1', 'sha256', 'length'])
-# https://github.com/jonschlinkert/author-regex
-_author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
-
-
EMPTY_AUTHOR = Person(
fullname=b'',
name=None,
@@ -127,82 +122,6 @@
return 'releases/%s' % version
-def parse_author(author_str: str) -> Dict[str, str]:
- """
- Parse npm package author string.
-
- It works with a flexible range of formats, as detailed below::
-
- name
- name <email> (url)
- name <email>(url)
- name<email> (url)
- name<email>(url)
- name (url) <email>
- name (url)<email>
- name(url) <email>
- name(url)<email>
- name (url)
- name(url)
- name <email>
- name<email>
- <email> (url)
- <email>(url)
- (url) <email>
- (url)<email>
- <email>
- (url)
-
- Args:
- author_str (str): input author string
-
- Returns:
- dict: A dict that may contain the following keys:
- * name
- * email
- * url
-
- """
- author = {}
- matches = re.findall(_author_regexp,
- author_str.replace('<>', '').replace('()', ''),
- re.M)
- for match in matches:
- if match[0].strip():
- author['name'] = match[0].strip()
- if match[1].strip():
- author['email'] = match[1].strip()
- if match[2].strip():
- author['url'] = match[2].strip()
- return author
-
-
-def swh_author(author: Dict[str, str]) -> Person:
- """Transform an author like dict to an expected swh like dict (values are
- bytes)
-
- """
- name = author.get('name')
- email = author.get('email')
-
- fullname = None
-
- if name and email:
- fullname = '%s <%s>' % (name, email)
- elif name:
- fullname = name
-
- if not fullname:
- r = EMPTY_AUTHOR
- else:
- r = Person(
- fullname=fullname.encode('utf-8') if fullname else b'',
- name=name.encode('utf-8') if name else None,
- email=email.encode('utf-8') if email else None
- )
- return r
-
-
def artifact_identity(d: Mapping[str, Any],
id_keys: Sequence[str]) -> List[Any]:
"""Compute the primary key for a dict using the id_keys as primary key

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 1:05 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220918

Event Timeline