Page MenuHomeSoftware Heritage

D7052.diff
No OneTemporary

D7052.diff

diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -274,6 +274,7 @@
"""
assert self.lister_obj.id is not None
+ scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
if page["type"] == "scm":
# If origin is a scm url: detect scm type and yield.
# Note that the official format is:
@@ -283,11 +284,12 @@
m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
if m_scm is not None:
scm_type = m_scm.group("type")
- scm_url = m_scm.group("url")
- origin = ListedOrigin(
- lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
- )
- yield origin
+ if scm_type in scm_types_ok:
+ scm_url = m_scm.group("url")
+ origin = ListedOrigin(
+ lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
+ )
+ yield origin
else:
if page["url"].endswith(".git"):
origin = ListedOrigin(
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
new file mode 100644
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>al.aldi</groupId>
+ <artifactId>sprova4j</artifactId>
+ <version>0.1.0</version>
+ <name>sprova4j</name>
+ <description>Java client for Sprova Test Management</description>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ <inceptionYear>2018</inceptionYear>
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+ <developers>
+ <developer>
+ <id>aldi</id>
+ <name>Aldi Alimucaj</name>
+ <email>aldi.alimucaj@gmail.com</email>
+ </developer>
+ </developers>
+ <scm>
+ <connection>scm:https://github.com/aldialimucaj/sprova4j.git</connection>
+ <developerConnection>scm:ghttps://github.com/aldialimucaj/sprova4j.git</developerConnection>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ </scm>
+ <dependencies>
+ <dependency>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <version>1.2.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.8.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>okhttp</artifactId>
+ <version>3.10.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okio</groupId>
+ <artifactId>okio</artifactId>
+ <version>1.0.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.glassfish</groupId>
+ <artifactId>javax.json</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.json</groupId>
+ <artifactId>javax.json-api</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.validation</groupId>
+ <artifactId>validation-api</artifactId>
+ <version>2.0.1.Final</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>mockwebserver</artifactId>
+ <version>3.10.0</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -69,6 +69,12 @@
return text
+@pytest.fixture
+def maven_pom_1_malformed(datadir) -> str:
+ text = Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_text()
+ return text
+
+
@pytest.fixture
def maven_pom_2(datadir) -> str:
text = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text()
@@ -132,6 +138,63 @@
assert scheduler_state.last_seen_pom == -1
+def test_maven_full_listing_malformed(
+ swh_scheduler,
+ requests_mock,
+ mocker,
+ maven_index,
+ maven_pom_1_malformed,
+ maven_pom_2,
+):
+ """Covers full listing of multiple pages, checking page results with a malformed
+ scm entry in pom."""
+
+ lister = MavenLister(
+ scheduler=swh_scheduler,
+ url=MVN_URL,
+ instance="maven.org",
+ index_url=INDEX_URL,
+ incremental=False,
+ )
+
+ # Set up test.
+ index_text = maven_index
+ requests_mock.get(INDEX_URL, text=index_text)
+ requests_mock.get(URL_POM_1, text=maven_pom_1_malformed)
+ requests_mock.get(URL_POM_2, text=maven_pom_2)
+
+ # Then run the lister.
+ stats = lister.run()
+
+ # Start test checks.
+ assert stats.pages == 4
+ assert stats.origins == 3
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ origin_urls = [origin.url for origin in scheduler_origins]
+ LIST_SRC_1 = ("https://github.com/aldialimucaj/sprova4j.git",)
+ assert sorted(origin_urls) == sorted(LIST_SRC_1 + LIST_SRC)
+
+ for origin in scheduler_origins:
+ if origin.visit_type == "maven":
+ for src in LIST_SRC_DATA:
+ if src.get("url") == origin.url:
+ artifact = origin.extra_loader_arguments["artifacts"][0]
+ assert src.get("time") == artifact["time"]
+ assert src.get("gid") == artifact["gid"]
+ assert src.get("aid") == artifact["aid"]
+ assert src.get("version") == artifact["version"]
+ assert MVN_URL == artifact["base_url"]
+ break
+ else:
+ raise AssertionError
+ scheduler_state = lister.get_state_from_scheduler()
+ assert scheduler_state is not None
+ assert scheduler_state.last_seen_doc == -1
+ assert scheduler_state.last_seen_pom == -1
+
+
def test_maven_incremental_listing(
swh_scheduler,
requests_mock,

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 4:39 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221119

Event Timeline