Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124140
D7052.id25739.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
D7052.id25739.diff
View Options
diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from dataclasses import asdict, dataclass
+from datetime import datetime, timezone
import logging
import re
from typing import Any, Dict, Iterator, Optional
@@ -274,6 +275,7 @@
"""
assert self.lister_obj.id is not None
+ scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
if page["type"] == "scm":
# If origin is a scm url: detect scm type and yield.
# Note that the official format is:
@@ -283,11 +285,12 @@
m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
if m_scm is not None:
scm_type = m_scm.group("type")
- scm_url = m_scm.group("url")
- origin = ListedOrigin(
- lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
- )
- yield origin
+ if scm_type in scm_types_ok:
+ scm_url = m_scm.group("url")
+ origin = ListedOrigin(
+ lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
+ )
+ yield origin
else:
if page["url"].endswith(".git"):
origin = ListedOrigin(
@@ -296,14 +299,25 @@
yield origin
else:
# Origin is a source archive:
+ last_update_dt = None
+ last_update_iso = ""
+ last_update_seconds = str(page["time"])[:-3]
+ try:
+ last_update_dt = datetime.fromtimestamp(int(last_update_seconds))
+ last_update_dt_tz = last_update_dt.astimezone(timezone.utc)
+ except OverflowError:
+ logger.warning("- Failed to convert datetime %s.", last_update_seconds)
+ if last_update_dt:
+ last_update_iso = last_update_dt_tz.isoformat()
origin = ListedOrigin(
lister_id=self.lister_obj.id,
url=page["url"],
visit_type=page["type"],
+ last_update=last_update_dt,
extra_loader_arguments={
"artifacts": [
{
- "time": page["time"],
+ "time": last_update_iso,
"gid": page["gid"],
"aid": page["aid"],
"version": page["version"],
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
new file mode 100644
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>al.aldi</groupId>
+ <artifactId>sprova4j</artifactId>
+ <version>0.1.0</version>
+ <name>sprova4j</name>
+ <description>Java client for Sprova Test Management</description>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ <inceptionYear>2018</inceptionYear>
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+ <developers>
+ <developer>
+ <id>aldi</id>
+ <name>Aldi Alimucaj</name>
+ <email>aldi.alimucaj@gmail.com</email>
+ </developer>
+ </developers>
+ <scm>
+ <connection>scm:https://github.com/aldialimucaj/sprova4j.git</connection>
+ <developerConnection>scm:ghttps://github.com/aldialimucaj/sprova4j.git</developerConnection>
+ <url>https://github.com/aldialimucaj/sprova4j</url>
+ </scm>
+ <dependencies>
+ <dependency>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <version>1.2.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.8.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>okhttp</artifactId>
+ <version>3.10.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okio</groupId>
+ <artifactId>okio</artifactId>
+ <version>1.0.0</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.glassfish</groupId>
+ <artifactId>javax.json</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.json</groupId>
+ <artifactId>javax.json-api</artifactId>
+ <version>1.1.2</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.validation</groupId>
+ <artifactId>validation-api</artifactId>
+ <version>2.0.1.Final</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.squareup.okhttp3</groupId>
+ <artifactId>mockwebserver</artifactId>
+ <version>3.10.0</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -3,8 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import timezone
from pathlib import Path
+import iso8601
import pytest
import requests
@@ -34,7 +36,7 @@
"type": "maven",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
+ "/0.1.0/sprova4j-0.1.0-sources.jar",
- "time": 1626109619335,
+ "time": "2021-07-12T17:06:59+00:00",
"gid": "al.aldi",
"aid": "sprova4j",
"version": "0.1.0",
@@ -43,7 +45,7 @@
"type": "maven",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
+ "/0.1.1/sprova4j-0.1.1-sources.jar",
- "time": 1626111425534,
+ "time": "2021-07-12T17:37:05+00:00",
"gid": "al.aldi",
"aid": "sprova4j",
"version": "0.1.1",
@@ -53,32 +55,32 @@
@pytest.fixture
def maven_index(datadir) -> str:
- text = Path(datadir, "http_indexes", "export.fld").read_text()
- return text
+ return Path(datadir, "http_indexes", "export.fld").read_text()
@pytest.fixture
def maven_index_incr(datadir) -> str:
- text = Path(datadir, "http_indexes", "export_incr.fld").read_text()
- return text
+ return Path(datadir, "http_indexes", "export_incr.fld").read_text()
@pytest.fixture
def maven_pom_1(datadir) -> str:
- text = Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_text()
- return text
+ return Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_text()
+
+
+@pytest.fixture
+def maven_pom_1_malformed(datadir) -> str:
+ return Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_text()
@pytest.fixture
def maven_pom_2(datadir) -> str:
- text = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text()
- return text
+ return Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text()
@pytest.fixture
def maven_pom_3(datadir) -> str:
- text = Path(datadir, "https_maven.org", "arangodb-graphql-1.2.pom").read_text()
- return text
+ return Path(datadir, "https_maven.org", "arangodb-graphql-1.2.pom").read_text()
def test_maven_full_listing(
@@ -117,6 +119,10 @@
if origin.visit_type == "maven":
for src in LIST_SRC_DATA:
if src.get("url") == origin.url:
+ last_update_src = iso8601.parse_date(src.get("time")).astimezone(
+ tz=timezone.utc
+ )
+ assert last_update_src == origin.last_update
artifact = origin.extra_loader_arguments["artifacts"][0]
assert src.get("time") == artifact["time"]
assert src.get("gid") == artifact["gid"]
@@ -125,7 +131,68 @@
assert MVN_URL == artifact["base_url"]
break
else:
- raise AssertionError
+ raise AssertionError(
+ "Could not find scheduler origin in referenced origins."
+ )
+ scheduler_state = lister.get_state_from_scheduler()
+ assert scheduler_state is not None
+ assert scheduler_state.last_seen_doc == -1
+ assert scheduler_state.last_seen_pom == -1
+
+
+def test_maven_full_listing_malformed(
+ swh_scheduler,
+ requests_mock,
+ mocker,
+ maven_index,
+ maven_pom_1_malformed,
+ maven_pom_2,
+):
+ """Covers full listing of multiple pages, checking page results with a malformed
+ scm entry in pom."""
+
+ lister = MavenLister(
+ scheduler=swh_scheduler,
+ url=MVN_URL,
+ instance="maven.org",
+ index_url=INDEX_URL,
+ incremental=False,
+ )
+
+ # Set up test.
+ index_text = maven_index
+ requests_mock.get(INDEX_URL, text=index_text)
+ requests_mock.get(URL_POM_1, text=maven_pom_1_malformed)
+ requests_mock.get(URL_POM_2, text=maven_pom_2)
+
+ # Then run the lister.
+ stats = lister.run()
+
+ # Start test checks.
+ assert stats.pages == 4
+ assert stats.origins == 3
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ origin_urls = [origin.url for origin in scheduler_origins]
+ LIST_SRC_1 = ("https://github.com/aldialimucaj/sprova4j.git",)
+ assert sorted(origin_urls) == sorted(LIST_SRC_1 + LIST_SRC)
+
+ for origin in scheduler_origins:
+ if origin.visit_type == "maven":
+ for src in LIST_SRC_DATA:
+ if src.get("url") == origin.url:
+ artifact = origin.extra_loader_arguments["artifacts"][0]
+ assert src.get("time") == artifact["time"]
+ assert src.get("gid") == artifact["gid"]
+ assert src.get("aid") == artifact["aid"]
+ assert src.get("version") == artifact["version"]
+ assert MVN_URL == artifact["base_url"]
+ break
+ else:
+ raise AssertionError(
+ "Could not find scheduler origin in referenced origins."
+ )
scheduler_state = lister.get_state_from_scheduler()
assert scheduler_state is not None
assert scheduler_state.last_seen_doc == -1
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 6:56 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228895
Attached To
D7052: Maven: fix lister after docker-dev review.
Event Timeline
Log In to Comment