Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/origin_head.py
# Copyright (C) 2018 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import List, Tuple, Any, Dict, Union | |||||
import re | import re | ||||
import click | import click | ||||
import logging | import logging | ||||
from swh.indexer.indexer import OriginIndexer | from swh.indexer.indexer import OriginIndexer | ||||
class OriginHeadIndexer(OriginIndexer): | class OriginHeadIndexer(OriginIndexer): | ||||
"""Origin-level indexer. | """Origin-level indexer. | ||||
This indexer is in charge of looking up the revision that acts as the | This indexer is in charge of looking up the revision that acts as the | ||||
"head" of an origin. | "head" of an origin. | ||||
In git, this is usually the commit pointed to by the 'master' branch.""" | In git, this is usually the commit pointed to by the 'master' branch.""" | ||||
USE_TOOLS = False | USE_TOOLS = False | ||||
def persist_index_computations(self, results, policy_update): | def persist_index_computations( | ||||
self, results: Any, policy_update: str | |||||
) -> None: | |||||
"""Do nothing. The indexer's results are not persistent, they | """Do nothing. The indexer's results are not persistent, they | ||||
should only be piped to another indexer.""" | should only be piped to another indexer.""" | ||||
pass | pass | ||||
# Dispatch | # Dispatch | ||||
def index(self, origin_url): | def index(self, origin_url): | ||||
latest_visit = self.storage.origin_visit_get_latest( | latest_visit = self.storage.origin_visit_get_latest( | ||||
Show All 21 Lines | _archive_filename_re = re.compile( | ||||
rb'^' | rb'^' | ||||
rb'(?P<pkgname>.*)[-_]' | rb'(?P<pkgname>.*)[-_]' | ||||
rb'(?P<version>[0-9]+(\.[0-9])*)' | rb'(?P<version>[0-9]+(\.[0-9])*)' | ||||
rb'(?P<preversion>[-+][a-zA-Z0-9.~]+?)?' | rb'(?P<preversion>[-+][a-zA-Z0-9.~]+?)?' | ||||
rb'(?P<extension>(\.[a-zA-Z0-9]+)+)' | rb'(?P<extension>(\.[a-zA-Z0-9]+)+)' | ||||
rb'$') | rb'$') | ||||
@classmethod | @classmethod | ||||
def _parse_version(cls, filename): | def _parse_version( | ||||
cls: Any, filename: str | |||||
vlorentz: can be improved: https://docs.python.org/3/library/typing.html#typing.Tuple | |||||
) -> Tuple[Union[float, int], ...]: | |||||
"""Extracts the release version from an archive filename, | """Extracts the release version from an archive filename, | ||||
to get an ordering whose maximum is likely to be the last | to get an ordering whose maximum is likely to be the last | ||||
version of the software | version of the software | ||||
>>> OriginHeadIndexer._parse_version(b'foo') | >>> OriginHeadIndexer._parse_version(b'foo') | ||||
(-inf,) | (-inf,) | ||||
>>> OriginHeadIndexer._parse_version(b'foo.tar.gz') | >>> OriginHeadIndexer._parse_version(b'foo.tar.gz') | ||||
(-inf,) | (-inf,) | ||||
Show All 17 Lines | ) -> Tuple[Union[float, int], ...]: | ||||
version.append(preversion[1:]) | version.append(preversion[1:]) | ||||
elif preversion.startswith('+'): | elif preversion.startswith('+'): | ||||
version.append(1) | version.append(1) | ||||
version.append(preversion[1:]) | version.append(preversion[1:]) | ||||
else: | else: | ||||
assert False, res.group('preversion') | assert False, res.group('preversion') | ||||
return tuple(version) | return tuple(version) | ||||
def _try_get_ftp_head(self, snapshot): | def _try_get_ftp_head(self, snapshot: Dict[str, Any]) -> Any: | ||||
archive_names = list(snapshot['branches']) | archive_names = list(snapshot['branches']) | ||||
max_archive_name = max(archive_names, key=self._parse_version) | max_archive_name = max(archive_names, key=self._parse_version) | ||||
r = self._try_resolve_target(snapshot['branches'], max_archive_name) | r = self._try_resolve_target(snapshot['branches'], max_archive_name) | ||||
return r | return r | ||||
# Generic | # Generic | ||||
def _try_get_head_generic(self, snapshot): | def _try_get_head_generic( | ||||
self, snapshot: Dict[str, Any] | |||||
Done Inline Actionsnot a bool vlorentz: not a bool | |||||
) -> Any: | |||||
# Works on 'deposit', 'pypi', and VCSs. | # Works on 'deposit', 'pypi', and VCSs. | ||||
try: | try: | ||||
branches = snapshot['branches'] | branches = snapshot['branches'] | ||||
except KeyError: | except KeyError: | ||||
return None | return None | ||||
else: | else: | ||||
return ( | return ( | ||||
self._try_resolve_target(branches, b'HEAD') or | self._try_resolve_target(branches, b'HEAD') or | ||||
self._try_resolve_target(branches, b'master') | self._try_resolve_target(branches, b'master') | ||||
) | ) | ||||
def _try_resolve_target(self, branches, target_name): | def _try_resolve_target(self, branches: Dict, target_name: bytes) -> Any: | ||||
Done Inline ActionsYou can use specific types vlorentz: You can use specific types | |||||
try: | try: | ||||
target = branches[target_name] | target = branches[target_name] | ||||
if target is None: | if target is None: | ||||
return None | return None | ||||
while target['target_type'] == 'alias': | while target['target_type'] == 'alias': | ||||
target = branches[target['target']] | target = branches[target['target']] | ||||
if target is None: | if target is None: | ||||
return None | return None | ||||
Show All 11 Lines | def _try_resolve_target(self, branches: Dict, target_name: bytes) -> Any: | ||||
except KeyError: | except KeyError: | ||||
return None | return None | ||||
@click.command() | @click.command() | ||||
@click.option('--origins', '-i', | @click.option('--origins', '-i', | ||||
help='Origins to lookup, in the "type+url" format', | help='Origins to lookup, in the "type+url" format', | ||||
multiple=True) | multiple=True) | ||||
def main(origins): | def main(origins: List[str]) -> None: | ||||
rev_metadata_indexer = OriginHeadIndexer() | rev_metadata_indexer = OriginHeadIndexer() | ||||
rev_metadata_indexer.run(origins) | rev_metadata_indexer.run(origins) | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
main() | main() |
can be improved: https://docs.python.org/3/library/typing.html#typing.Tuple