Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9342903
D2743.id9906.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D2743.id9906.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.core >= 0.0.75
-swh.model >= 0.0.57
+swh.model >= 0.0.60
swh.scheduler
swh.storage >= 0.0.163
diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py
--- a/swh/loader/package/cran/loader.py
+++ b/swh/loader/package/cran/loader.py
@@ -17,10 +17,10 @@
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import (
- release_name, parse_author, swh_author, artifact_identity
+ release_name, artifact_identity
)
from swh.model.model import (
- TimestampWithTimezone, Sha1Git, Revision, RevisionType,
+ Person, TimestampWithTimezone, Sha1Git, Revision, RevisionType,
)
@@ -92,7 +92,7 @@
# a_metadata is empty
metadata = extract_intrinsic_metadata(uncompressed_path)
date = parse_date(metadata.get('Date'))
- author = swh_author(parse_author(metadata.get('Maintainer', {})))
+ author = Person.from_fullname(metadata.get('Maintainer', '').encode())
version = metadata.get('Version', a_metadata['version'])
return Revision(
message=version.encode('utf-8'),
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -20,7 +20,7 @@
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import (
- api_info, release_name, parse_author, swh_author
+ api_info, release_name
)
@@ -207,13 +207,12 @@
else:
return author_data
- author_data: Dict = {}
for author_key in ('author', 'authors'):
if author_key in package_json:
author_str = _author_str(package_json[author_key])
- author_data = parse_author(author_str)
+ return Person.from_fullname(author_str.encode())
- return swh_author(author_data)
+ return Person(fullname=b'', name=None, email=None)
def _lstrip_bom(s, bom=BOM_UTF8):
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -131,9 +131,9 @@
assert extract_npm_package_author(package_json) == \
Person(
- fullname=b'fengmk2 <fengmk2@gmail.com>',
+ fullname=b'fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)',
name=b'fengmk2',
- email=b'fengmk2@gmail.com'
+ email=b'fengmk2@gmail.com',
)
package_json = json.loads('''
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -10,7 +10,7 @@
import swh.loader.package
from swh.loader.package.utils import (
- download, api_info, release_name, parse_author, artifact_identity
+ download, api_info, release_name, artifact_identity
)
@@ -159,148 +159,6 @@
assert release_name(version, filename) == expected_release
-def _parse_author_string_test(author_str, expected_result):
- assert parse_author(author_str) == expected_result
- assert parse_author(' %s' % author_str) == expected_result
- assert parse_author('%s ' % author_str) == expected_result
-
-
-def test_parse_author():
- _parse_author_string_test(
- 'John Doe',
- {
- 'name': 'John Doe'
- }
- )
-
- _parse_author_string_test(
- '<john.doe@foo.bar>',
- {
- 'email': 'john.doe@foo.bar'
- }
- )
-
- _parse_author_string_test(
- '(https://john.doe)',
- {
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe <john.doe@foo.bar>',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar'
- }
- )
-
- _parse_author_string_test(
- 'John Doe<john.doe@foo.bar>',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar'
- }
- )
-
- _parse_author_string_test(
- 'John Doe (https://john.doe)',
- {
- 'name': 'John Doe',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe(https://john.doe)',
- {
- 'name': 'John Doe',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- '<john.doe@foo.bar> (https://john.doe)',
- {
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- '(https://john.doe) <john.doe@foo.bar>',
- {
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe <john.doe@foo.bar> (https://john.doe)',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe (https://john.doe) <john.doe@foo.bar>',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe<john.doe@foo.bar> (https://john.doe)',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe<john.doe@foo.bar>(https://john.doe)',
- {
- 'name': 'John Doe',
- 'email': 'john.doe@foo.bar',
- 'url': 'https://john.doe'
- }
- )
-
- _parse_author_string_test('', {})
- _parse_author_string_test('<>', {})
- _parse_author_string_test(' <>', {})
- _parse_author_string_test('<>()', {})
- _parse_author_string_test('<> ()', {})
- _parse_author_string_test('()', {})
- _parse_author_string_test(' ()', {})
-
- _parse_author_string_test(
- 'John Doe <> ()',
- {
- 'name': 'John Doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe <>',
- {
- 'name': 'John Doe'
- }
- )
-
- _parse_author_string_test(
- 'John Doe ()',
- {
- 'name': 'John Doe'
- }
- )
-
-
def test_artifact_identity():
"""Compute primary key should return the right identity
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -7,7 +7,6 @@
import logging
import os
import requests
-import re
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
@@ -23,10 +22,6 @@
DOWNLOAD_HASHES = set(['sha1', 'sha256', 'length'])
-# https://github.com/jonschlinkert/author-regex
-_author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
-
-
EMPTY_AUTHOR = Person(
fullname=b'',
name=None,
@@ -127,82 +122,6 @@
return 'releases/%s' % version
-def parse_author(author_str: str) -> Dict[str, str]:
- """
- Parse npm package author string.
-
- It works with a flexible range of formats, as detailed below::
-
- name
- name <email> (url)
- name <email>(url)
- name<email> (url)
- name<email>(url)
- name (url) <email>
- name (url)<email>
- name(url) <email>
- name(url)<email>
- name (url)
- name(url)
- name <email>
- name<email>
- <email> (url)
- <email>(url)
- (url) <email>
- (url)<email>
- <email>
- (url)
-
- Args:
- author_str (str): input author string
-
- Returns:
- dict: A dict that may contain the following keys:
- * name
- * email
- * url
-
- """
- author = {}
- matches = re.findall(_author_regexp,
- author_str.replace('<>', '').replace('()', ''),
- re.M)
- for match in matches:
- if match[0].strip():
- author['name'] = match[0].strip()
- if match[1].strip():
- author['email'] = match[1].strip()
- if match[2].strip():
- author['url'] = match[2].strip()
- return author
-
-
-def swh_author(author: Dict[str, str]) -> Person:
- """Transform an author like dict to an expected swh like dict (values are
- bytes)
-
- """
- name = author.get('name')
- email = author.get('email')
-
- fullname = None
-
- if name and email:
- fullname = '%s <%s>' % (name, email)
- elif name:
- fullname = name
-
- if not fullname:
- r = EMPTY_AUTHOR
- else:
- r = Person(
- fullname=fullname.encode('utf-8') if fullname else b'',
- name=name.encode('utf-8') if name else None,
- email=email.encode('utf-8') if email else None
- )
- return r
-
-
def artifact_identity(d: Mapping[str, Any],
id_keys: Sequence[str]) -> List[Any]:
"""Compute the primary key for a dict using the id_keys as primary key
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 1:05 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220918
Attached To
D2743: Move Person parsing to swh-model.
Event Timeline
Log In to Comment