Changeset View
Standalone View
swh/lister/maven/lister.py
- This file was added.
# Copyright (C) 2021 The Software Heritage developers | ||||||||||||||||||
# See the AUTHORS file at the top-level directory of this distribution | ||||||||||||||||||
# License: GNU General Public License version 3, or any later version | ||||||||||||||||||
# See top-level LICENSE file for more information | ||||||||||||||||||
from collections import defaultdict | ||||||||||||||||||
import glob | ||||||||||||||||||
import logging | ||||||||||||||||||
from os import chdir, mkdir | ||||||||||||||||||
from os.path import isabs, isdir, isfile, join | ||||||||||||||||||
import re | ||||||||||||||||||
from typing import Any, Dict, Iterator, Optional | ||||||||||||||||||
from urllib.parse import urljoin | ||||||||||||||||||
import docker | ||||||||||||||||||
import requests | ||||||||||||||||||
from tenacity.before_sleep import before_sleep_log | ||||||||||||||||||
from urllib3.util import parse_url | ||||||||||||||||||
from swh.lister.utils import throttling_retry | ||||||||||||||||||
from swh.scheduler.interface import SchedulerInterface | ||||||||||||||||||
from swh.scheduler.model import ListedOrigin | ||||||||||||||||||
from .. import USER_AGENT | ||||||||||||||||||
from ..pattern import CredentialsType, StatelessLister | ||||||||||||||||||
logger = logging.getLogger(__name__) | ||||||||||||||||||
RepoPage = Dict[str, Any] | ||||||||||||||||||
class MavenLister(StatelessLister[RepoPage]): | ||||||||||||||||||
"""List origins from a Maven repository. | ||||||||||||||||||
Maven Central provides artifacts for Java builds. | ||||||||||||||||||
It includes POM files and source archives, which we download to get | ||||||||||||||||||
the source code of artifacts and links to their scm repository. | ||||||||||||||||||
This lister yields origins of types: git/svn/hg or whatever the Artifacts | ||||||||||||||||||
use as repository type, plus maven types for the maven loader (tgz, jar).""" | ||||||||||||||||||
LISTER_NAME = "maven" | ||||||||||||||||||
# WITH trailing slash | ||||||||||||||||||
WORK_DIR = "/tmp/maven-index-exporter/" | ||||||||||||||||||
BASE_URL = "" | ||||||||||||||||||
def __init__( | ||||||||||||||||||
self, | ||||||||||||||||||
scheduler: SchedulerInterface, | ||||||||||||||||||
url: str, | ||||||||||||||||||
instance: Optional[str] = None, | ||||||||||||||||||
credentials: CredentialsType = None, | ||||||||||||||||||
): | ||||||||||||||||||
self.BASE_URL = url | ||||||||||||||||||
if instance is None: | ||||||||||||||||||
instance = parse_url(url).host | ||||||||||||||||||
douardda: Why is docker involved here? I see no other mention of a docker stuff anywhere in this diff.
I… | ||||||||||||||||||
Done Inline ActionsIt's been a long discussion held back in June on IRC and in task T1724. In a nutshell, we need two tools to transform the maven indexes into something readable: maven-indexer-cli and clue. Rather than have a virtual machine (there is no way to run java code in python without one) it was requested that the tools be put in a docker container. End of August, the docker image was ready and olasd asked me (08-25) to put it on a separate server so the lister would simply have to query it on the network. so: index_url is the name (or IP address) of this local server that hosts the docker image. borisbaldassari: It's been a long discussion held back in June on IRC and in task T1724. In a nutshell, we need… | ||||||||||||||||||
Done Inline Actions
so: index_url is the name (or IP address) of this local server that hosts the docker image *and* the exported indexes to be downloaded. borisbaldassari: > so: index_url is the name (or IP address) of this local server that hosts the docker image. | ||||||||||||||||||
Done Inline ActionsOk, so please document all this some where (in this diff). The README file shoudl give some high level explanations, and this docstring should refer to this former doc. Also, where is documented/specified this index file format? It should be somewhere. douardda: Ok, so please document all this some where (in this diff). The README file shoudl give some… | ||||||||||||||||||
Done Inline ActionsAdded a README.md (in this arc diff) and added a link to the readme in the f-string (yet to commit). borisbaldassari: Added a README.md (in this arc diff) and added a link to the readme in the f-string (yet to… | ||||||||||||||||||
super().__init__( | ||||||||||||||||||
scheduler=scheduler, credentials=credentials, url=url, instance=instance, | ||||||||||||||||||
) | ||||||||||||||||||
# Check work_dir and create it if needed. | ||||||||||||||||||
self.WORK_DIR = join(self.WORK_DIR, instance, "") | ||||||||||||||||||
print(f"WORK_DIR is {self.WORK_DIR}") | ||||||||||||||||||
if isdir(self.WORK_DIR): | ||||||||||||||||||
logger.info("WORKD_DIR exists. Reusing it.") | ||||||||||||||||||
else: | ||||||||||||||||||
try: | ||||||||||||||||||
mkdir(self.WORK_DIR) | ||||||||||||||||||
except OSError as error: | ||||||||||||||||||
logger.info(f"Could not create WORK_DIR {self.WORK_DIR}: {error}.") | ||||||||||||||||||
assert isdir(self.WORK_DIR) | ||||||||||||||||||
assert isabs(self.WORK_DIR) | ||||||||||||||||||
self.session = requests.Session() | ||||||||||||||||||
self.session.headers.update( | ||||||||||||||||||
{"Accept": "application/json", "User-Agent": USER_AGENT,} | ||||||||||||||||||
) | ||||||||||||||||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) | ||||||||||||||||||
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response: | ||||||||||||||||||
logger.info("Fetching URL %s with params %s", url, params) | ||||||||||||||||||
response = self.session.get(url, params=params) | ||||||||||||||||||
if response.status_code != 200: | ||||||||||||||||||
logger.warning( | ||||||||||||||||||
"Unexpected HTTP status code %s on %s: %s", | ||||||||||||||||||
response.status_code, | ||||||||||||||||||
response.url, | ||||||||||||||||||
response.content, | ||||||||||||||||||
) | ||||||||||||||||||
response.raise_for_status() | ||||||||||||||||||
return response | ||||||||||||||||||
def _docker_run(self, docker_image: str): | ||||||||||||||||||
"""Start the container for the maven index export, using the image | ||||||||||||||||||
'bbaldassari/maven-index-exporter'. If needed the image is pulled from | ||||||||||||||||||
docker hub. If it already exists, simply use the local one. | ||||||||||||||||||
""" | ||||||||||||||||||
client = docker.from_env() | ||||||||||||||||||
myimage = None | ||||||||||||||||||
for image in client.images.list(name=docker_image): | ||||||||||||||||||
myimage = image | ||||||||||||||||||
break | ||||||||||||||||||
if myimage is None: | ||||||||||||||||||
logger.info("Docker: Could not find %s. Pulling it.", docker_image) | ||||||||||||||||||
myimage = client.images.pull(repository=docker_image) | ||||||||||||||||||
else: | ||||||||||||||||||
logger.info( | ||||||||||||||||||
"Docker: Found image %s locally, ID is %s.", | ||||||||||||||||||
myimage, | ||||||||||||||||||
myimage.attrs["Id"], | ||||||||||||||||||
) | ||||||||||||||||||
ret = client.containers.run( | ||||||||||||||||||
myimage, | ||||||||||||||||||
tty=True, | ||||||||||||||||||
command=["sh", "/opt/extract_indexes.sh", "/work/"], | ||||||||||||||||||
volumes={self.WORK_DIR: {"bind": "/work", "mode": "rw"}}, | ||||||||||||||||||
) | ||||||||||||||||||
Done Inline Actionswhy not use the context manager API of the NamedTemporaryFile here? douardda: why not use the context manager API of the `NamedTemporaryFile` here? | ||||||||||||||||||
Done Inline ActionsAs far as i can remember, because I wanted to stream it in order to reduce memory footprint: the download can be huge. borisbaldassari: As far as i can remember, because I wanted to stream it in order to reduce memory footprint… | ||||||||||||||||||
Done Inline ActionsThe context manager is unrelated with loading the file in RAM or not. douardda: The context manager is unrelated with loading the file in RAM or not. | ||||||||||||||||||
Done Inline ActionsDone. borisbaldassari: Done. | ||||||||||||||||||
logger.info("Docker log:\n%s", ret.decode()) | ||||||||||||||||||
def get_pages(self) -> Iterator[RepoPage]: | ||||||||||||||||||
""" | ||||||||||||||||||
Example of returned RepoPage's: | ||||||||||||||||||
Done Inline ActionsPlease document a bit these regex. Also please prefer named match groups when possible (?P<name>...) which helps to "self-document" regexes. douardda: Please document a bit these regex. Also please prefer named match groups when possible `(? | ||||||||||||||||||
Done Inline ActionsYou're definitely right. Fixed, thanks. borisbaldassari: You're definitely right. Fixed, thanks. | ||||||||||||||||||
[ | ||||||||||||||||||
{ | ||||||||||||||||||
"type": "jar", | ||||||||||||||||||
"url": "https://maven.xwiki.org/..-5.4.2-sources.jar", | ||||||||||||||||||
"project": "xwiki-platform-wikistream-events-xwiki", | ||||||||||||||||||
"version": "5.4.2" | ||||||||||||||||||
}, | ||||||||||||||||||
Done Inline Actionsa python text file object is iterable, so one would prefer the form: for line in file_txt: [...] douardda: a python text file object is iterable, so one would prefer the form:
```
for line in… | ||||||||||||||||||
Done Inline ActionsYes, I'm shameful. borisbaldassari: Yes, I'm shameful.
The reason for this is there is a second readline later on in the loop, and… | ||||||||||||||||||
Done Inline ActionsFixed with refactoring (see below). borisbaldassari: Fixed with refactoring (see below). | ||||||||||||||||||
{ | ||||||||||||||||||
"type": "scm", | ||||||||||||||||||
Done Inline Actionswhile line: is enough here douardda: `while line:` is enough here | ||||||||||||||||||
Done Inline ActionsFixed, thanks. borisbaldassari: Fixed, thanks. | ||||||||||||||||||
"url": "scm:git:git://github.com/openengsb/openengsb-framework.git", | ||||||||||||||||||
"project": "openengsb-framework", | ||||||||||||||||||
}, | ||||||||||||||||||
... | ||||||||||||||||||
Done Inline Actionsthis could be handled by the regex itself douardda: this could be handled by the regex itself | ||||||||||||||||||
Done Inline ActionsYes, it could. But it seems to me that readability is better this way. borisbaldassari: Yes, it could. But it seems to me that readability is better this way.
As you say. Want me to… | ||||||||||||||||||
] | ||||||||||||||||||
""" | ||||||||||||||||||
# Get all indexes files to extract and export. | ||||||||||||||||||
print("Downloading all required index files..") | ||||||||||||||||||
file_properties = join(self.WORK_DIR, "nexus-maven-repository-index.properties") | ||||||||||||||||||
index_url = urljoin(self.BASE_URL, ".index/") | ||||||||||||||||||
url = urljoin(index_url, "nexus-maven-repository-index.properties") | ||||||||||||||||||
# Retrieve properties file | ||||||||||||||||||
content = requests.get(url).content.decode() | ||||||||||||||||||
open(file_properties, "w").write(content) | ||||||||||||||||||
diff_re = re.compile("^nexus.index.incremental-[0-9]+=([0-9]+)") | ||||||||||||||||||
for line in content.split("\n"): | ||||||||||||||||||
diff_group = diff_re.match(line) | ||||||||||||||||||
if diff_group is not None: | ||||||||||||||||||
Done Inline Actionswhy use urljoin while hand-building the URL by concatenation of strings with '/'? I mean urljoin does support multiple arguments, like: urljoin(self.BASE_URL, path, aid, version, f"{aid}-{version}.{ext}") douardda: why use `urljoin` while hand-building the URL by concatenation of strings with '/'?
I mean… | ||||||||||||||||||
Done Inline ActionsHum. It yields [1] when I try, and that's not what I've read [2]. [1] "TypeError: urljoin() takes from 2 to 3 positional arguments but 6 were given" Am I missing something? Note: I have fixed the ugliness of if by using f-strings, and it looks a lot better. borisbaldassari: Hum. It yields [1] when I try, and that's not what I've read [2].
[1] "TypeError: urljoin()… | ||||||||||||||||||
Done Inline ActionsNo you are right, my mistake, I was assuming urljoin has a decent API, which is not the case. sorry. douardda: No you are right, my mistake, I was assuming urljoin has a decent API, which is not the case. | ||||||||||||||||||
Done Inline ActionsI did assume that too at some point, rings a bell. yeah. borisbaldassari: I did assume that too at some point, rings a bell. yeah. | ||||||||||||||||||
ind_file = diff_group.group(1) | ||||||||||||||||||
ind_name = "nexus-maven-repository-index." + ind_file + ".gz" | ||||||||||||||||||
ind_path = join(self.WORK_DIR, ind_name) | ||||||||||||||||||
Done Inline Actionsand ext in ("zip", "jar") douardda: `and ext in ("zip", "jar")` | ||||||||||||||||||
Done Inline Actionsext in (a, b) => Far more elegant, of course. Fixed, thanks. uppercase => Yes, good point. As a matter of fact there is no uppercase extensions on maven central (just checked) but I'm not sure why (part of the maven convention, maybe?) and that can surely happen. borisbaldassari: ext in (a, b) => Far more elegant, of course. Fixed, thanks.
uppercase => Yes, good point. As… | ||||||||||||||||||
Done Inline ActionsBTW, what about an upper case ext value? douardda: BTW, what about an upper case `ext` value? | ||||||||||||||||||
ind_url = urljoin(index_url, ind_name) | ||||||||||||||||||
if isfile(ind_path): | ||||||||||||||||||
print(f"File {ind_path} exists, skipping download.") | ||||||||||||||||||
else: | ||||||||||||||||||
print( | ||||||||||||||||||
( | ||||||||||||||||||
f"File {ind_path} doesn't exist. " | ||||||||||||||||||
f"Downloading file from {ind_url}" | ||||||||||||||||||
) | ||||||||||||||||||
) | ||||||||||||||||||
# Retrieve incremental gz file | ||||||||||||||||||
contentb = requests.get(ind_url).content | ||||||||||||||||||
open(ind_path, "wb").write(contentb) | ||||||||||||||||||
file_path = join(self.WORK_DIR, "nexus-maven-repository-index.gz") | ||||||||||||||||||
file_url = urljoin(index_url, "nexus-maven-repository-index.gz") | ||||||||||||||||||
if isfile(file_path): | ||||||||||||||||||
print(f"File {file_path} exists, skipping download.") | ||||||||||||||||||
else: | ||||||||||||||||||
print(f"File {file_path} doesn't exist. Downloading file from {file_url}") | ||||||||||||||||||
Done Inline Actionsout_src[url_src] = {"g": gid, "a": aid, "v": version} Not sure out_src and out_pom really need to be defaultdict actually. douardda: ```
out_src[url_src] = {"g": gid, "a": aid, "v": version}
```
Not sure `out_src` and `out_pom`… | ||||||||||||||||||
Done Inline ActionsFor uniqueness of entries (some entries tend to appear a few times). Is there a way to do it better with Python? borisbaldassari: For uniqueness of entries (some entries tend to appear a few times). Is there a way to do it… | ||||||||||||||||||
Done Inline ActionsI don't see how using defaultdict is related with this uniqueness question. douardda: I don't see how using defaultdict is related with this uniqueness question.
What does it bring… | ||||||||||||||||||
Done Inline ActionsOk, I get it. That's probably an old Perl habit to explicitly have hashes of hashes. borisbaldassari: Ok, I get it. That's probably an old Perl habit to explicitly have hashes of hashes.
Fixed to… | ||||||||||||||||||
contentb = requests.get(file_url).content | ||||||||||||||||||
open(file_path, "wb").write(contentb) | ||||||||||||||||||
# Execute Docker image for index extract and export. | ||||||||||||||||||
# self._docker_run('bbaldassari/maven-index-exporter') | ||||||||||||||||||
# Check that we can find the proper file (.fld) | ||||||||||||||||||
chdir(join(self.WORK_DIR, "export")) | ||||||||||||||||||
myfile = None | ||||||||||||||||||
re_fld = re.compile(r".*\.fld$") | ||||||||||||||||||
Done Inline ActionsNow that I read this, why not do the processing on the flight? Why bother storing the file on disk then read it back line by line to do a bunch of regex? requests does provide a nice API for this: douardda: Now that I read this, why not do the processing on the flight? Why bother storing the file on… | ||||||||||||||||||
Done Inline ActionsShort answer: because I didn't know of iter_*lines*. Oh god. :-) borisbaldassari: Short answer: because I didn't know of iter_*lines*. Oh god. :-)
Thanks!! | ||||||||||||||||||
Done Inline ActionsThat's exactly why we need peers: to get out of our own train of thought. Ok, fixed as you proposed. The file is now parsed as it is downloaded, and that solved a few other points. When the design is broken, everything looks weird, right? Thanks a lot for the feedback! borisbaldassari: That's exactly why we need peers: to get out of our own train of thought.
Ok, fixed as you… | ||||||||||||||||||
for file in glob.glob("*.*"): | ||||||||||||||||||
if re_fld.match(file): | ||||||||||||||||||
myfile = file | ||||||||||||||||||
if myfile is not None and isfile(myfile): | ||||||||||||||||||
print("Found fld file:", myfile) | ||||||||||||||||||
Done Inline Actionsno need for the .keys() here, iterating on a dict is iterating on its keys. for src, val in out_src.items(): ... yield { ... "time": val["t"], ... BTW, if you use proper keys in out_src[*] (i.e. "time" instead of "t" and so on) you can just use it as is here: yield { "type": "jar", "url": src, **val } douardda: no need for the `.keys()` here, iterating on a dict is iterating on its keys.
But more… | ||||||||||||||||||
else: | ||||||||||||||||||
print("Cannot find .fld file. Exiting") | ||||||||||||||||||
exit(4) | ||||||||||||||||||
# Prepare regex's to parse index exports. | ||||||||||||||||||
r_val = re.compile(r"^\s{4}value ([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)$") | ||||||||||||||||||
r_src = re.compile(r".*src.*") | ||||||||||||||||||
# Read the index text export and get URLs and SCMs. | ||||||||||||||||||
out_pom: Dict = defaultdict(dict) | ||||||||||||||||||
out_src: Dict = defaultdict(dict) | ||||||||||||||||||
with open(myfile, "r") as f: | ||||||||||||||||||
line = f.readline() | ||||||||||||||||||
Done Inline Actionsthat case is missing from the example in the comment above vlorentz: that case is missing from the example in the comment above | ||||||||||||||||||
Done Inline ActionsThat's a very good point. This kind of metadata is useful for the jar loader, but not so much for the other types of scm loaders (scm_type, which could be about anything). Do we want to keep them? borisbaldassari: That's a very good point. This kind of metadata is useful for the jar loader, but not so much… | ||||||||||||||||||
while line != "": | ||||||||||||||||||
m = r_val.match(line) | ||||||||||||||||||
if m is not None: | ||||||||||||||||||
(g, a, v, c, e) = m.group(1, 2, 3, 4, 5) | ||||||||||||||||||
e = e.strip() | ||||||||||||||||||
Done Inline ActionsI'm always nervous when I see a bytes.decode() called on some content coming from The Workd™. Is there any change of getting some encoding error here? douardda: I'm always nervous when I see a `bytes.decode()` called on some content coming from The Workd™. | ||||||||||||||||||
Done Inline ActionsThanks for spotting that, it needs some consideration. Theoretically, no: we're decoding the content of a file downloaded from a local server (transport errors should be ok), which is output by clue asynchronously (i.e. file is not served if the process fails), so corruption or weird content is unlikely, but.. it's still clearly out of our control. So yes, we never know and you're definitely right. OTOH we can't go on without that data, and I have the feeling we should rather fail (throw an exception about decoding and end execution) than pass silently (adding errors='ignore' to decode could do that). Would you like to try & catch, and then throw a specific error? What would you recommend? => added errors='ignore' so the list will simply be empty. borisbaldassari: Thanks for spotting that, it needs some consideration.
Theoretically, no: we're decoding the… | ||||||||||||||||||
path = "/".join(g.split(".")) | ||||||||||||||||||
if c == "NA" and e == "pom": | ||||||||||||||||||
url = urljoin( | ||||||||||||||||||
self.BASE_URL, | ||||||||||||||||||
path + "/" + a + "/" + v + "/" + a + "-" + v + "." + e, | ||||||||||||||||||
) | ||||||||||||||||||
out_pom[url] = a | ||||||||||||||||||
if (c == "sources" or r_src.match(c)) and ( | ||||||||||||||||||
e == "zip" or e == "jar" or e == "tar.gz" or e == "tar.bz2" | ||||||||||||||||||
): | ||||||||||||||||||
url = urljoin( | ||||||||||||||||||
self.BASE_URL, | ||||||||||||||||||
path | ||||||||||||||||||
+ "/" | ||||||||||||||||||
+ a | ||||||||||||||||||
Done Inline Actionsuse items() on the dict: for src, project in out_pom.items(): yield {"type": "scm", "url": src, "project": project} BTW, why build the dict to yield its values just after building it? Why not yielding values directly from the for pom in out_pom loop? douardda: use `items()` on the dict:
```
for src, project in out_pom.items():
yield {"type"… | ||||||||||||||||||
Done Inline ActionsVery good point, moved it to the for pom in out_pom loop. Thanks! borisbaldassari: Very good point, moved it to the `for pom in out_pom` loop. Thanks! | ||||||||||||||||||
+ "/" | ||||||||||||||||||
+ v | ||||||||||||||||||
+ "/" | ||||||||||||||||||
+ a | ||||||||||||||||||
+ "-" | ||||||||||||||||||
+ v | ||||||||||||||||||
+ "-" | ||||||||||||||||||
+ c | ||||||||||||||||||
+ "." | ||||||||||||||||||
+ e, | ||||||||||||||||||
) | ||||||||||||||||||
out_src[url]["a"] = a | ||||||||||||||||||
out_src[url]["v"] = v | ||||||||||||||||||
line = f.readline() | ||||||||||||||||||
print(f"Found {len(out_pom)} poms and {len(out_src)} src items.") | ||||||||||||||||||
# Yield all src archives found. | ||||||||||||||||||
for s in out_src.keys(): | ||||||||||||||||||
yield { | ||||||||||||||||||
Done Inline Actionsno need to compile the regex if it's used only once. Just use the match function directly: m_scm = re.match(r"^scm:([^:]+):(.*)$", page["url"]) Also please prefer named group matching (https://docs.python.org/3/library/re.html#index-17) douardda: no need to compile the regex if it's used only once. Just use the `match` function directly… | ||||||||||||||||||
Done Inline ActionsRight, fixed, thank you! :-) borisbaldassari: Right, fixed, thank you! :-) | ||||||||||||||||||
"type": "jar", | ||||||||||||||||||
"url": s, | ||||||||||||||||||
"project": out_src[s]["a"], | ||||||||||||||||||
"version": out_src[s]["v"], | ||||||||||||||||||
} | ||||||||||||||||||
Done Inline Actionswhat's the comment for? douardda: what's the comment for? | ||||||||||||||||||
Done Inline ActionsRemoved. borisbaldassari: Removed. | ||||||||||||||||||
# Now fetch pom files and scan them for scm info. | ||||||||||||||||||
def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]: | ||||||||||||||||||
"""Convert a page of Maven repositories into a list of ListedOrigins. | ||||||||||||||||||
""" | ||||||||||||||||||
assert self.lister_obj.id is not None | ||||||||||||||||||
if page["type"] == "scm": | ||||||||||||||||||
# If origin is a scm url: detect scm type and yield. | ||||||||||||||||||
# scm:git:git://github.com/openengsb/openengsb-framework.git | ||||||||||||||||||
Done Inline Actions
do you really need a regex to check for a '.git' at the end? I means page["url"].enndswith(".git") should do the trick here. douardda: do you really need a regex to check for a '.git' at the end? I means page["url"].enndswith(". | ||||||||||||||||||
scm_re = re.compile(r"^scm:([^:]+):(.*)$") | ||||||||||||||||||
m = scm_re.match(page["url"]) | ||||||||||||||||||
if m is not None: | ||||||||||||||||||
scm_type = m.group(1) | ||||||||||||||||||
scm_url = m.group(1) | ||||||||||||||||||
origin = ListedOrigin( | ||||||||||||||||||
lister_id=self.lister_obj.id, | ||||||||||||||||||
Done Inline Actionswhy the commented line? douardda: why the commented line? | ||||||||||||||||||
url=scm_url, # or page["url"], | ||||||||||||||||||
visit_type=scm_type, | ||||||||||||||||||
# last_update=parse_packaged_date(package_info), | ||||||||||||||||||
) | ||||||||||||||||||
yield origin | ||||||||||||||||||
else: | ||||||||||||||||||
# Origin is a source archive: | ||||||||||||||||||
origin = ListedOrigin( | ||||||||||||||||||
lister_id=self.lister_obj.id, | ||||||||||||||||||
url=page["project"], | ||||||||||||||||||
visit_type=page["type"], | ||||||||||||||||||
# last_update=parse_packaged_date(package_info), | ||||||||||||||||||
extra_loader_arguments={ | ||||||||||||||||||
"artifacts": [{"url": page["url"], "version": page["version"]}] | ||||||||||||||||||
}, | ||||||||||||||||||
) | ||||||||||||||||||
yield origin |
Why is docker involved here? I see no other mention of a docker stuff anywhere in this diff.
I don't understand what this "index_url" is and how it is supposed to be used.