Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124154
D7052.id25581.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D7052.id25581.diff
View Options
diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from dataclasses import asdict, dataclass
+from datetime import datetime, timezone
import logging
import re
from typing import Any, Dict, Iterator, Optional
@@ -274,6 +275,7 @@
"""
assert self.lister_obj.id is not None
+ scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
if page["type"] == "scm":
# If origin is a scm url: detect scm type and yield.
# Note that the official format is:
@@ -283,11 +285,12 @@
m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
if m_scm is not None:
scm_type = m_scm.group("type")
- scm_url = m_scm.group("url")
- origin = ListedOrigin(
- lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
- )
- yield origin
+ if scm_type in scm_types_ok:
+ scm_url = m_scm.group("url")
+ origin = ListedOrigin(
+ lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
+ )
+ yield origin
else:
if page["url"].endswith(".git"):
origin = ListedOrigin(
@@ -296,14 +299,25 @@
yield origin
else:
# Origin is a source archive:
+ last_update_dt = None
+ last_update_iso = ""
+ last_update_seconds = str(page["time"])[:-3]
+ try:
+ last_update_dt = datetime.fromtimestamp(int(last_update_seconds))
+ last_update_dt_tz = last_update_dt.astimezone(timezone.utc)
+ except OverflowError:
+ logger.warning("- Failed to convert datetime %s.", last_update_seconds)
+ if last_update_dt:
+ last_update_iso = last_update_dt_tz.isoformat()
origin = ListedOrigin(
lister_id=self.lister_obj.id,
url=page["url"],
visit_type=page["type"],
+ last_update=last_update_dt,
extra_loader_arguments={
"artifacts": [
{
- "time": page["time"],
+ "time": last_update_iso,
"gid": page["gid"],
"aid": page["aid"],
"version": page["version"],
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
new file mode 100644
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>al.aldi</groupId>
+ <artifactId>sprova4j</artifactId>
+ <version>0.1.0</version>
+ <name>sprova4j</name>
+ <description>Java client for Sprova Test Management</description>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ <inceptionYear>2018</inceptionYear>
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+ <developers>
+ <developer>
+ <id>aldi</id>
+ <name>Aldi Alimucaj</name>
+ <email>aldi.alimucaj@gmail.com</email>
+ </developer>
+ </developers>
+ <scm>
+ <connection>scm:https://github.com/aldialimucaj/sprova4j.git</connection>
+ <developerConnection>scm:ghttps://github.com/aldialimucaj/sprova4j.git</developerConnection>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ </scm>
+ <dependencies>
+ <dependency>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <version>1.2.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.8.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>okhttp</artifactId>
+ <version>3.10.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okio</groupId>
+ <artifactId>okio</artifactId>
+ <version>1.0.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.glassfish</groupId>
+ <artifactId>javax.json</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.json</groupId>
+ <artifactId>javax.json-api</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.validation</groupId>
+ <artifactId>validation-api</artifactId>
+ <version>2.0.1.Final</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>mockwebserver</artifactId>
+ <version>3.10.0</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -3,8 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import timezone
from pathlib import Path
+import iso8601
import pytest
import requests
@@ -34,7 +36,7 @@
"type": "maven",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
+ "/0.1.0/sprova4j-0.1.0-sources.jar",
- "time": 1626109619335,
+ "time": "2021-07-12T17:06:59+00:00",
"gid": "al.aldi",
"aid": "sprova4j",
"version": "0.1.0",
@@ -43,7 +45,7 @@
"type": "maven",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
+ "/0.1.1/sprova4j-0.1.1-sources.jar",
- "time": 1626111425534,
+ "time": "2021-07-12T17:37:05+00:00",
"gid": "al.aldi",
"aid": "sprova4j",
"version": "0.1.1",
@@ -69,6 +71,12 @@
return text
+@pytest.fixture
+def maven_pom_1_malformed(datadir) -> str:
+ text = Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_text()
+ return text
+
+
@pytest.fixture
def maven_pom_2(datadir) -> str:
text = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text()
@@ -113,6 +121,67 @@
origin_urls = [origin.url for origin in scheduler_origins]
assert sorted(origin_urls) == sorted(LIST_GIT + LIST_SRC)
+ for origin in scheduler_origins:
+ if origin.visit_type == "maven":
+ for src in LIST_SRC_DATA:
+ if src.get("url") == origin.url:
+ last_update_src = iso8601.parse_date(src.get("time")).astimezone(
+ tz=timezone.utc
+ )
+ assert last_update_src == origin.last_update
+ artifact = origin.extra_loader_arguments["artifacts"][0]
+ assert src.get("time") == artifact["time"]
+ assert src.get("gid") == artifact["gid"]
+ assert src.get("aid") == artifact["aid"]
+ assert src.get("version") == artifact["version"]
+ assert MVN_URL == artifact["base_url"]
+ break
+ else:
+ raise AssertionError
+ scheduler_state = lister.get_state_from_scheduler()
+ assert scheduler_state is not None
+ assert scheduler_state.last_seen_doc == -1
+ assert scheduler_state.last_seen_pom == -1
+
+
+def test_maven_full_listing_malformed(
+ swh_scheduler,
+ requests_mock,
+ mocker,
+ maven_index,
+ maven_pom_1_malformed,
+ maven_pom_2,
+):
+ """Covers full listing of multiple pages, checking page results with a malformed
+ scm entry in pom."""
+
+ lister = MavenLister(
+ scheduler=swh_scheduler,
+ url=MVN_URL,
+ instance="maven.org",
+ index_url=INDEX_URL,
+ incremental=False,
+ )
+
+ # Set up test.
+ index_text = maven_index
+ requests_mock.get(INDEX_URL, text=index_text)
+ requests_mock.get(URL_POM_1, text=maven_pom_1_malformed)
+ requests_mock.get(URL_POM_2, text=maven_pom_2)
+
+ # Then run the lister.
+ stats = lister.run()
+
+ # Start test checks.
+ assert stats.pages == 4
+ assert stats.origins == 3
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ origin_urls = [origin.url for origin in scheduler_origins]
+ LIST_SRC_1 = ("https://github.com/aldialimucaj/sprova4j.git",)
+ assert sorted(origin_urls) == sorted(LIST_SRC_1 + LIST_SRC)
+
for origin in scheduler_origins:
if origin.visit_type == "maven":
for src in LIST_SRC_DATA:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 7:46 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228391
Attached To
D7052: Maven: fix lister after docker-dev review.
Event Timeline
Log In to Comment