Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/cran/tests/test_cran.py
- This file was added.
# Copyright (C) 2019 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import os | |||||
import pytest | |||||
from datetime import datetime, timezone | |||||
from dateutil.tz import tzlocal | |||||
from os import path | |||||
from swh.loader.package.cran.loader import ( | |||||
extract_intrinsic_metadata, CRANLoader, parse_date | |||||
) | |||||
from swh.core.tarball import uncompress | |||||
def test_parse_date(): | |||||
data = [ | |||||
# parsable, some have debatable results though | |||||
('2001-June-08', | |||||
datetime(2001, 6, 8, 0, 0, tzinfo=timezone.utc)), | |||||
('Tue Dec 27 15:06:08 PST 2011', | |||||
datetime(2011, 12, 27, 15, 6, 8, tzinfo=timezone.utc)), | |||||
('8-14-2013', | |||||
datetime(2013, 8, 14, 0, 0, tzinfo=timezone.utc)), | |||||
('2011-01', | |||||
datetime(2011, 1, 17, 0, 0, tzinfo=timezone.utc)), | |||||
ardumont: oh no!
This peculiar case is dependent on the day this is computed.
It was on the 17th:
```… | |||||
('04-12-2014', | |||||
datetime(2014, 4, 12, 0, 0, tzinfo=timezone.utc)), | |||||
('2018-08-24, 10:40:10', | |||||
datetime(2018, 8, 24, 10, 40, 10, tzinfo=timezone.utc)), | |||||
('2013-October-16', | |||||
datetime(2013, 10, 16, 0, 0, tzinfo=timezone.utc)), | |||||
('Aug 23, 2013', | |||||
datetime(2013, 8, 23, 0, 0, tzinfo=timezone.utc)), | |||||
('27-11-2014', | |||||
datetime(2014, 11, 27, 0, 0, tzinfo=timezone.utc)), | |||||
('19-02-2013', | |||||
datetime(2013, 2, 19, 0, 0, tzinfo=timezone.utc)), | |||||
('2019-09-26,', | |||||
datetime(2019, 9, 26, 0, 0, tzinfo=timezone.utc)), | |||||
('9/25/2014', | |||||
datetime(2014, 9, 25, 0, 0, tzinfo=timezone.utc)), | |||||
('Fri Jun 27 17:23:53 2014', | |||||
datetime(2014, 6, 27, 17, 23, 53, tzinfo=timezone.utc)), | |||||
('28-04-2014', | |||||
datetime(2014, 4, 28, 0, 0, tzinfo=timezone.utc)), | |||||
('2014-07', | |||||
datetime(2014, 7, 17, 0, 0, tzinfo=timezone.utc)), | |||||
('04-14-2014', | |||||
datetime(2014, 4, 14, 0, 0, tzinfo=timezone.utc)), | |||||
('2019-05-08 14:17:31 UTC', | |||||
datetime(2019, 5, 8, 14, 17, 31, tzinfo=timezone.utc)), | |||||
('Wed May 21 13:50:39 CEST 2014', | |||||
datetime(2014, 5, 21, 13, 50, 39, tzinfo=tzlocal())), | |||||
('2018-04-10 00:01:04 KST', | |||||
datetime(2018, 4, 10, 0, 1, 4, tzinfo=timezone.utc)), | |||||
('2019-08-25 10:45', | |||||
datetime(2019, 8, 25, 10, 45, tzinfo=timezone.utc)), | |||||
('March 9, 2015', | |||||
datetime(2015, 3, 9, 0, 0, tzinfo=timezone.utc)), | |||||
('Aug. 18, 2012', | |||||
datetime(2012, 8, 18, 0, 0, tzinfo=timezone.utc)), | |||||
('2014-Dec-17', | |||||
datetime(2014, 12, 17, 0, 0, tzinfo=timezone.utc)), | |||||
('March 01, 2013', | |||||
datetime(2013, 3, 1, 0, 0, tzinfo=timezone.utc)), | |||||
('2017-04-08.', | |||||
datetime(2017, 4, 8, 0, 0, tzinfo=timezone.utc)), | |||||
('2014-Apr-22', | |||||
datetime(2014, 4, 22, 0, 0, tzinfo=timezone.utc)), | |||||
('Mon Jan 12 19:54:04 2015', | |||||
datetime(2015, 1, 12, 19, 54, 4, tzinfo=timezone.utc)), | |||||
('May 22, 2014', | |||||
datetime(2014, 5, 22, 0, 0, tzinfo=timezone.utc)), | |||||
('2014-08-12 09:55:10 EDT', | |||||
datetime(2014, 8, 12, 9, 55, 10, tzinfo=timezone.utc)), | |||||
# unparsable | |||||
Done Inline ActionsNeed to add loading tests scenario here ardumont: Need to add loading tests scenario here | |||||
('Fabruary 21, 2012', None), | |||||
('2019-05-28"', None), | |||||
('2017-03-01 today', None), | |||||
('2016-11-0110.1093/icesjms/fsw182', None), | |||||
('2019-07-010', None), | |||||
('2015-02.23', None), | |||||
('20013-12-30', None), | |||||
('2016-08-017', None), | |||||
('2019-02-07l', None), | |||||
('2018-05-010', None), | |||||
('2019-09-27 KST', None), | |||||
('$Date$', None), | |||||
('2019-09-27 KST', None), | |||||
('2019-06-22 $Date$', None), | |||||
('$Date: 2013-01-18 12:49:03 -0600 (Fri, 18 Jan 2013) $', None), | |||||
('2015-7-013', None), | |||||
('2018-05-023', None), | |||||
("Check NEWS file for changes: news(package='simSummary')", None) | |||||
] | |||||
for date, expected_date in data: | |||||
actual_date = parse_date(date) | |||||
assert actual_date == expected_date | |||||
@pytest.mark.fs | |||||
def test_extract_intrinsic_metadata(tmp_path, datadir): | |||||
"""Parsing existing archive's PKG-INFO should yield results""" | |||||
uncompressed_archive_path = str(tmp_path) | |||||
# sample url | |||||
# https://cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz # noqa | |||||
archive_path = path.join( | |||||
datadir, 'https_cran.r-project.org', | |||||
'src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz') | |||||
uncompress(archive_path, dest=uncompressed_archive_path) | |||||
actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path) | |||||
expected_metadata = { | |||||
'Package': 'KernSmooth', | |||||
'Priority': 'recommended', | |||||
'Version': '2.22-6', | |||||
'Date': '2001-June-08', | |||||
'Title': 'Functions for kernel smoothing for Wand & Jones (1995)', | |||||
'Author': 'S original by Matt Wand.\n\tR port by Brian Ripley <ripley@stats.ox.ac.uk>.', # noqa | |||||
'Maintainer': 'Brian Ripley <ripley@stats.ox.ac.uk>', | |||||
'Description': 'functions for kernel smoothing (and density estimation)\n corresponding to the book: \n Wand, M.P. and Jones, M.C. (1995) "Kernel Smoothing".', # noqa | |||||
'License': 'Unlimited use and distribution (see LICENCE).', | |||||
'URL': 'http://www.biostat.harvard.edu/~mwand' | |||||
} | |||||
assert actual_metadata == expected_metadata | |||||
@pytest.mark.fs | |||||
def test_extract_intrinsic_metadata_failures(tmp_path): | |||||
"""Parsing inexistent path/archive/PKG-INFO yield None""" | |||||
# inexistent first level path | |||||
assert extract_intrinsic_metadata('/something-inexistent') == {} | |||||
# inexistent second level path (as expected by pypi archives) | |||||
assert extract_intrinsic_metadata(tmp_path) == {} | |||||
# inexistent PKG-INFO within second level path | |||||
existing_path_no_pkginfo = str(tmp_path / 'something') | |||||
os.mkdir(existing_path_no_pkginfo) | |||||
assert extract_intrinsic_metadata(tmp_path) == {} | |||||
def test_loader_cran(swh_config, requests_mock_datadir): | |||||
url = 'https://cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz' # noqa | |||||
loader = CRANLoader(url, version='2.22-6') | |||||
actual_load_status = loader.load() | |||||
assert actual_load_status['status'] == 'eventful' | |||||
assert actual_load_status['snapshot_id'] is not None | |||||
# expected_revision_id = hash_to_bytes( | |||||
# 'd8a1c7474d2956ac598a19f0f27d52f7015f117e') | |||||
# revision = list(loader.storage.revision_get([expected_revision_id]))[0] | |||||
# assert revision is not None | |||||
# check_metadata_paths(revision['metadata'], paths=[ | |||||
# ('intrinsic.tool', str), | |||||
# ('intrinsic.raw', dict), | |||||
# ('extrinsic.provider', str), | |||||
# ('extrinsic.when', str), | |||||
# ('extrinsic.raw', dict), | |||||
# ('original_artifact', list), | |||||
# ]) | |||||
# for original_artifact in revision['metadata']['original_artifact']: | |||||
# check_metadata_paths(original_artifact, paths=[ | |||||
# ('filename', str), | |||||
# ('length', int), | |||||
# ('checksums', dict), | |||||
# ]) |
oh no!
This peculiar case is dependent on the day this is computed.
It was on the 17th:
Today, the 18th, this fails with a result of:
Adding a corner case workaround for that (T.T)