Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124120
D7052.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
D7052.diff
View Options
diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -274,6 +274,7 @@
"""
assert self.lister_obj.id is not None
+ scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
if page["type"] == "scm":
# If origin is a scm url: detect scm type and yield.
# Note that the official format is:
@@ -283,11 +284,12 @@
m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
if m_scm is not None:
scm_type = m_scm.group("type")
- scm_url = m_scm.group("url")
- origin = ListedOrigin(
- lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
- )
- yield origin
+ if scm_type in scm_types_ok:
+ scm_url = m_scm.group("url")
+ origin = ListedOrigin(
+ lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
+ )
+ yield origin
else:
if page["url"].endswith(".git"):
origin = ListedOrigin(
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
new file mode 100644
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>al.aldi</groupId>
+ <artifactId>sprova4j</artifactId>
+ <version>0.1.0</version>
+ <name>sprova4j</name>
+ <description>Java client for Sprova Test Management</description>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ <inceptionYear>2018</inceptionYear>
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+ <developers>
+ <developer>
+ <id>aldi</id>
+ <name>Aldi Alimucaj</name>
+ <email>aldi.alimucaj@gmail.com</email>
+ </developer>
+ </developers>
+ <scm>
+ <connection>scm:https://github.com/aldialimucaj/sprova4j.git</connection>
+ <developerConnection>scm:ghttps://github.com/aldialimucaj/sprova4j.git</developerConnection>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ </scm>
+ <dependencies>
+ <dependency>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <version>1.2.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.8.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>okhttp</artifactId>
+ <version>3.10.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okio</groupId>
+ <artifactId>okio</artifactId>
+ <version>1.0.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.glassfish</groupId>
+ <artifactId>javax.json</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.json</groupId>
+ <artifactId>javax.json-api</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.validation</groupId>
+ <artifactId>validation-api</artifactId>
+ <version>2.0.1.Final</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>mockwebserver</artifactId>
+ <version>3.10.0</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -69,6 +69,12 @@
return text
+@pytest.fixture
+def maven_pom_1_malformed(datadir) -> str:
+ text = Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_text()
+ return text
+
+
@pytest.fixture
def maven_pom_2(datadir) -> str:
text = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text()
@@ -132,6 +138,63 @@
assert scheduler_state.last_seen_pom == -1
+def test_maven_full_listing_malformed(
+ swh_scheduler,
+ requests_mock,
+ mocker,
+ maven_index,
+ maven_pom_1_malformed,
+ maven_pom_2,
+):
+ """Covers full listing of multiple pages, checking page results with a malformed
+ scm entry in pom."""
+
+ lister = MavenLister(
+ scheduler=swh_scheduler,
+ url=MVN_URL,
+ instance="maven.org",
+ index_url=INDEX_URL,
+ incremental=False,
+ )
+
+ # Set up test.
+ index_text = maven_index
+ requests_mock.get(INDEX_URL, text=index_text)
+ requests_mock.get(URL_POM_1, text=maven_pom_1_malformed)
+ requests_mock.get(URL_POM_2, text=maven_pom_2)
+
+ # Then run the lister.
+ stats = lister.run()
+
+ # Start test checks.
+ assert stats.pages == 4
+ assert stats.origins == 3
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ origin_urls = [origin.url for origin in scheduler_origins]
+ LIST_SRC_1 = ("https://github.com/aldialimucaj/sprova4j.git",)
+ assert sorted(origin_urls) == sorted(LIST_SRC_1 + LIST_SRC)
+
+ for origin in scheduler_origins:
+ if origin.visit_type == "maven":
+ for src in LIST_SRC_DATA:
+ if src.get("url") == origin.url:
+ artifact = origin.extra_loader_arguments["artifacts"][0]
+ assert src.get("time") == artifact["time"]
+ assert src.get("gid") == artifact["gid"]
+ assert src.get("aid") == artifact["aid"]
+ assert src.get("version") == artifact["version"]
+ assert MVN_URL == artifact["base_url"]
+ break
+ else:
+ raise AssertionError
+ scheduler_state = lister.get_state_from_scheduler()
+ assert scheduler_state is not None
+ assert scheduler_state.last_seen_doc == -1
+ assert scheduler_state.last_seen_pom == -1
+
+
def test_maven_incremental_listing(
swh_scheduler,
requests_mock,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 4:39 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221119
Attached To
D7052: Maven: fix lister after docker-dev review.
Event Timeline
Log In to Comment