Page MenuHomeSoftware Heritage

D7052.id25578.diff
No OneTemporary

D7052.id25578.diff

diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from dataclasses import asdict, dataclass
+from datetime import datetime, timezone
import logging
import re
from typing import Any, Dict, Iterator, Optional
@@ -274,6 +275,7 @@
"""
assert self.lister_obj.id is not None
+ scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
if page["type"] == "scm":
# If origin is a scm url: detect scm type and yield.
# Note that the official format is:
@@ -283,11 +285,12 @@
m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
if m_scm is not None:
scm_type = m_scm.group("type")
- scm_url = m_scm.group("url")
- origin = ListedOrigin(
- lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
- )
- yield origin
+ if scm_type in scm_types_ok:
+ scm_url = m_scm.group("url")
+ origin = ListedOrigin(
+ lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
+ )
+ yield origin
else:
if page["url"].endswith(".git"):
origin = ListedOrigin(
@@ -296,14 +299,24 @@
yield origin
else:
# Origin is a source archive:
+ last_update_dt = None
+ last_update_iso = ""
+ last_update_seconds = str(page["time"])[:-3]
+ try:
+ last_update_dt = datetime.fromtimestamp(int(last_update_seconds))
+ except OverflowError:
+ logger.warning("- Failed to convert datetime %s.", last_update_seconds)
+ if last_update_dt:
+ last_update_iso = last_update_dt.astimezone(timezone.utc).isoformat()
origin = ListedOrigin(
lister_id=self.lister_obj.id,
url=page["url"],
visit_type=page["type"],
+ last_update=last_update_dt,
extra_loader_arguments={
"artifacts": [
{
- "time": page["time"],
+ "time": last_update_iso,
"gid": page["gid"],
"aid": page["aid"],
"version": page["version"],
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
new file mode 100644
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>al.aldi</groupId>
+ <artifactId>sprova4j</artifactId>
+ <version>0.1.0</version>
+ <name>sprova4j</name>
+ <description>Java client for Sprova Test Management</description>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ <inceptionYear>2018</inceptionYear>
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+ <developers>
+ <developer>
+ <id>aldi</id>
+ <name>Aldi Alimucaj</name>
+ <email>aldi.alimucaj@gmail.com</email>
+ </developer>
+ </developers>
+ <scm>
+ <connection>scm:https://github.com/aldialimucaj/sprova4j.git</connection>
+ <developerConnection>scm:ghttps://github.com/aldialimucaj/sprova4j.git</developerConnection>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ </scm>
+ <dependencies>
+ <dependency>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <version>1.2.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.8.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>okhttp</artifactId>
+ <version>3.10.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okio</groupId>
+ <artifactId>okio</artifactId>
+ <version>1.0.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.glassfish</groupId>
+ <artifactId>javax.json</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.json</groupId>
+ <artifactId>javax.json-api</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.validation</groupId>
+ <artifactId>validation-api</artifactId>
+ <version>2.0.1.Final</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>mockwebserver</artifactId>
+ <version>3.10.0</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -3,8 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import timezone
from pathlib import Path
+import iso8601
import pytest
import requests
@@ -34,7 +36,7 @@
"type": "maven",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
+ "/0.1.0/sprova4j-0.1.0-sources.jar",
- "time": 1626109619335,
+ "time": "2021-07-12T17:06:59+00:00",
"gid": "al.aldi",
"aid": "sprova4j",
"version": "0.1.0",
@@ -43,7 +45,7 @@
"type": "maven",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
+ "/0.1.1/sprova4j-0.1.1-sources.jar",
- "time": 1626111425534,
+ "time": "2021-07-12T17:37:05+00:00",
"gid": "al.aldi",
"aid": "sprova4j",
"version": "0.1.1",
@@ -69,6 +71,12 @@
return text
+@pytest.fixture
+def maven_pom_1_malformed(datadir) -> str:
+ text = Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_text()
+ return text
+
+
@pytest.fixture
def maven_pom_2(datadir) -> str:
text = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text()
@@ -113,6 +121,67 @@
origin_urls = [origin.url for origin in scheduler_origins]
assert sorted(origin_urls) == sorted(LIST_GIT + LIST_SRC)
+ for origin in scheduler_origins:
+ if origin.visit_type == "maven":
+ for src in LIST_SRC_DATA:
+ if src.get("url") == origin.url:
+ last_update_src = iso8601.parse_date(src.get("time")).astimezone(
+ tz=timezone.utc
+ )
+ assert last_update_src == origin.last_update
+ artifact = origin.extra_loader_arguments["artifacts"][0]
+ assert src.get("time") == artifact["time"]
+ assert src.get("gid") == artifact["gid"]
+ assert src.get("aid") == artifact["aid"]
+ assert src.get("version") == artifact["version"]
+ assert MVN_URL == artifact["base_url"]
+ break
+ else:
+ raise AssertionError
+ scheduler_state = lister.get_state_from_scheduler()
+ assert scheduler_state is not None
+ assert scheduler_state.last_seen_doc == -1
+ assert scheduler_state.last_seen_pom == -1
+
+
+def test_maven_full_listing_malformed(
+ swh_scheduler,
+ requests_mock,
+ mocker,
+ maven_index,
+ maven_pom_1_malformed,
+ maven_pom_2,
+):
+ """Covers full listing of multiple pages, checking page results with a malformed
+ scm entry in pom."""
+
+ lister = MavenLister(
+ scheduler=swh_scheduler,
+ url=MVN_URL,
+ instance="maven.org",
+ index_url=INDEX_URL,
+ incremental=False,
+ )
+
+ # Set up test.
+ index_text = maven_index
+ requests_mock.get(INDEX_URL, text=index_text)
+ requests_mock.get(URL_POM_1, text=maven_pom_1_malformed)
+ requests_mock.get(URL_POM_2, text=maven_pom_2)
+
+ # Then run the lister.
+ stats = lister.run()
+
+ # Start test checks.
+ assert stats.pages == 4
+ assert stats.origins == 3
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ origin_urls = [origin.url for origin in scheduler_origins]
+ LIST_SRC_1 = ("https://github.com/aldialimucaj/sprova4j.git",)
+ assert sorted(origin_urls) == sorted(LIST_SRC_1 + LIST_SRC)
+
for origin in scheduler_origins:
if origin.visit_type == "maven":
for src in LIST_SRC_DATA:

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 8:19 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217514

Event Timeline