Differential D834 Diff 2657 swh/loader/git/loader.py

Changeset View

Standalone View

swh/loader/git/loader.py

This file was copied to swh/loader/git/from_disk.py.

	# Copyright (C) 2015-2018 The Software Heritage developers			# Copyright (C) 2016-2018 The Software Heritage developers
	# See the AUTHORS file at the top-level directory of this distribution			# See the AUTHORS file at the top-level directory of this distribution
	# License: GNU General Public License version 3, or any later version			# License: GNU General Public License version 3, or any later version
	# See top-level LICENSE file for more information			# See top-level LICENSE file for more information

	import datetime			import datetime
	import dulwich.repo			import dulwich.client
				import logging
	import os			import os
	import shutil			import pickle
				import sys

	from dulwich.errors import ObjectFormatException, EmptyFileException
	from collections import defaultdict			from collections import defaultdict
				from io import BytesIO
				from dulwich.object_store import ObjectStoreGraphWalker
				from dulwich.pack import PackData, PackInflater

	from swh.model import hashutil			from swh.model import hashutil
	from swh.loader.core.loader import UnbufferedLoader			from swh.loader.core.loader import UnbufferedLoader
	from . import converters, utils			from swh.storage.algos.snapshot import snapshot_get_all_branches
				from . import converters


	class GitLoader(UnbufferedLoader):			class RepoRepresentation:
	"""Load a git repository from a directory.			"""Repository representation for a Software Heritage origin."""
				def __init__(self, storage, origin_id, base_snapshot=None,
				ignore_history=False):
				self.storage = storage

				self._parents_cache = {}
				self._type_cache = {}

				self.ignore_history = ignore_history

				if origin_id and not ignore_history:
				self.heads = set(self._cache_heads(origin_id, base_snapshot))
				else:
				self.heads = set()

				def _fill_parents_cache(self, commits):
				"""When querying for a commit's parents, we fill the cache to a depth of 1000
				commits."""
				root_revs = self._encode_for_storage(commits)
				for rev, parents in self.storage.revision_shortlog(root_revs, 1000):
				rev_id = hashutil.hash_to_bytehex(rev)
				if rev_id not in self._parents_cache:
				self._parents_cache[rev_id] = [
				hashutil.hash_to_bytehex(parent) for parent in parents
				]
				for rev in commits:
				if rev not in self._parents_cache:
				self._parents_cache[rev] = []

				def _cache_heads(self, origin_id, base_snapshot):
				"""Return all the known head commits for `origin_id`"""
				_git_types = ['content', 'directory', 'revision', 'release']

				if not base_snapshot:
				return []

				snapshot_targets = set()
				for target in base_snapshot['branches'].values():
				if target and target['target_type'] in _git_types:
				snapshot_targets.add(target['target'])

				decoded_targets = self._decode_from_storage(snapshot_targets)

				for id, objs in self.get_stored_objects(decoded_targets).items():
				if not objs:
				logging.warn('Missing head: %s' % hashutil.hash_to_hex(id))
				return []

				return decoded_targets

				def get_parents(self, commit):
				"""Bogus method to prevent expensive recursion, at the expense of less
				efficient downloading"""
				return []

				def get_heads(self):
				return self.heads

				@staticmethod
				def _encode_for_storage(objects):
				return [hashutil.bytehex_to_hash(object) for object in objects]

				@staticmethod
				def _decode_from_storage(objects):
				return set(hashutil.hash_to_bytehex(object) for object in objects)

				def graph_walker(self):
				return ObjectStoreGraphWalker(self.get_heads(), self.get_parents)

				@staticmethod
				def filter_unwanted_refs(refs):
				"""Filter the unwanted references from refs"""
				ret = {}
				for ref, val in refs.items():
				if ref.endswith(b'^{}'):
				# Peeled refs make the git protocol explode
				continue
				elif ref.startswith(b'refs/pull/') and ref.endswith(b'/merge'):
				# We filter-out auto-merged GitHub pull requests
				continue
				else:
				ret[ref] = val

				return ret

				def determine_wants(self, refs):
				"""Filter the remote references to figure out which ones
				Software Heritage needs.
	"""			"""
				if not refs:
				return []

	CONFIG_BASE_FILENAME = 'loader/git-loader'			# Find what objects Software Heritage has
				refs = self.find_remote_ref_types_in_swh(refs)

	def __init__(self, config=None):			# Cache the objects found in swh as existing heads
	super().__init__(logging_class='swh.loader.git.Loader', config=config)			for target in refs.values():
				if target['target_type'] is not None:
				self.heads.add(target['target'])

				ret = set()
				for target in self.filter_unwanted_refs(refs).values():
				if target['target_type'] is None:
				# The target doesn't exist in Software Heritage, let's retrieve
				# it.
				ret.add(target['target'])

	def _prepare_origin_visit(self, origin_url, visit_date):			return list(ret)
	self.origin_url = origin_url
	self.origin = converters.origin_url_to_origin(self.origin_url)
	self.visit_date = visit_date

	def prepare_origin_visit(self, origin_url, directory, visit_date):			def get_stored_objects(self, objects):
	self._prepare_origin_visit(origin_url, visit_date)			"""Find which of these objects were stored in the archive.

	def prepare(self, origin_url, directory, visit_date):			Do the request in packets to avoid a server timeout.
	self.repo = dulwich.repo.Repo(directory)			"""
				if self.ignore_history:
				return {}

	def iter_objects(self):			packet_size = 1000
	object_store = self.repo.object_store

	for pack in object_store.packs:			ret = {}
	objs = list(pack.index.iterentries())			query = []
	objs.sort(key=lambda x: x[1])			for object in objects:
	for sha, offset, crc32 in objs:			query.append(object)
	yield hashutil.hash_to_bytehex(sha)			if len(query) >= packet_size:
				ret.update(
				self.storage.object_find_by_sha1_git(
				self._encode_for_storage(query)
				)
				)
				query = []
				if query:
				ret.update(
				self.storage.object_find_by_sha1_git(
				self._encode_for_storage(query)
				)
				)
				return ret

	yield from object_store._iter_loose_objects()			def find_remote_ref_types_in_swh(self, remote_refs):
	yield from object_store._iter_alternate_objects()			"""Parse the remote refs information and list the objects that exist in
				Software Heritage.
				"""

	def _check(self, obj):			all_objs = set(remote_refs.values()) - set(self._type_cache)
	"""Check the object's repository representation.			type_by_id = {}

				for id, objs in self.get_stored_objects(all_objs).items():
				id = hashutil.hash_to_bytehex(id)
				if objs:
				type_by_id[id] = objs[0]['type']

				self._type_cache.update(type_by_id)

				ret = {}
				for ref, id in remote_refs.items():
				ret[ref] = {
				'target': id,
				'target_type': self._type_cache.get(id),
				}
				return ret

	If any errors in check exists, an ObjectFormatException is
	raised.			class GitLoader(UnbufferedLoader):
				"""A bulk loader for a git repository"""
				CONFIG_BASE_FILENAME = 'loader/git'

				ADDITIONAL_CONFIG = {
				'pack_size_bytes': ('int', 4 * 1024 * 1024 * 1024),
				}

				def __init__(self, repo_representation=RepoRepresentation, config=None):
				"""Initialize the bulk updater.

	Args:			Args:
	obj (object): Dulwich object read from the repository.			repo_representation: swh's repository representation
				which is in charge of filtering between known and remote
				data.

	"""			"""
	obj.check()			super().__init__(logging_class='swh.loader.git.BulkLoader',
	from dulwich.objects import Commit, Tag			config=config)
	try:			self.repo_representation = repo_representation
	# For additional checks on dulwich objects with date
	# for now, only checks on *time			def fetch_pack_from_origin(self, origin_url, base_origin_id,
	if isinstance(obj, Commit):			base_snapshot, do_activity):
	commit_time = obj._commit_time			"""Fetch a pack from the origin"""
	utils.check_date_time(commit_time)			pack_buffer = BytesIO()
	author_time = obj._author_time
	utils.check_date_time(author_time)			base_repo = self.repo_representation(
	elif isinstance(obj, Tag):			storage=self.storage,
	tag_time = obj._tag_time			origin_id=base_origin_id,
	utils.check_date_time(tag_time)			base_snapshot=base_snapshot,
	except Exception as e:			ignore_history=self.ignore_history,
	raise ObjectFormatException(e)			)

	def get_object(self, oid):
	"""Given an object id, return the object if it is found and not
	malformed in some way.

	Args:			client, path = dulwich.client.get_transport_and_path(origin_url,
	oid (bytes): the object's identifier			thin_packs=False)

	Returns:			size_limit = self.config['pack_size_bytes']
	The object if found without malformation

	"""			def do_pack(data,
	try:			pack_buffer=pack_buffer,
	# some errors are raised when reading the object			limit=size_limit,
	obj = self.repo[oid]			origin_url=origin_url):
	# some we need to check ourselves			cur_size = pack_buffer.tell()
	self._check(obj)			would_write = len(data)
	except KeyError:			if cur_size + would_write > limit:
	_id = oid.decode('utf-8')			raise IOError('Pack file too big for repository %s, '
	self.log.warn('object %s not found, skipping' % _id,			'limit is %d bytes, current size is %d, '
	extra={			'would write %d' %
	'swh_type': 'swh_loader_git_missing_object',			(origin_url, limit, cur_size, would_write))
	'swh_object_id': _id,
	'origin_id': self.origin_id,			pack_buffer.write(data)
	})
	return None			remote_refs = client.fetch_pack(path,
	except ObjectFormatException:			base_repo.determine_wants,
	_id = oid.decode('utf-8')			base_repo.graph_walker(),
	self.log.warn('object %s malformed, skipping' % _id,			do_pack,
	extra={			progress=do_activity).refs
	'swh_type': 'swh_loader_git_missing_object',
	'swh_object_id': _id,			if remote_refs:
	'origin_id': self.origin_id,			local_refs = base_repo.find_remote_ref_types_in_swh(remote_refs)
	})
	return None
	except EmptyFileException:
	_id = oid.decode('utf-8')
	self.log.warn('object %s corrupted (empty file), skipping' % _id,
	extra={
	'swh_type': 'swh_loader_git_missing_object',
	'swh_object_id': _id,
	'origin_id': self.origin_id,
	})
	else:			else:
	return obj			local_refs = remote_refs = {}

				pack_buffer.flush()
				pack_size = pack_buffer.tell()
				pack_buffer.seek(0)

				return {
				'remote_refs': base_repo.filter_unwanted_refs(remote_refs),
				'local_refs': local_refs,
				'pack_buffer': pack_buffer,
				'pack_size': pack_size,
				}

				def list_pack(self, pack_data, pack_size):
				id_to_type = {}
				type_to_ids = defaultdict(set)

				inflater = self.get_inflater()

				for obj in inflater:
				type, id = obj.type_name, obj.id
				id_to_type[id] = type
				type_to_ids[type].add(id)

				return id_to_type, type_to_ids

				def prepare_origin_visit(self, origin_url, **kwargs):
				self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
				self.origin = converters.origin_url_to_origin(origin_url)

				def get_full_snapshot(self, origin_id):
				prev_snapshot = self.storage.snapshot_get_latest(origin_id)
				if prev_snapshot and prev_snapshot.pop('next_branch', None):
				return snapshot_get_all_branches(self.storage, prev_snapshot['id'])

				return prev_snapshot

				def prepare(self, origin_url, base_url=None, ignore_history=False):
				base_origin_id = origin_id = self.origin_id

				prev_snapshot = None

				if not ignore_history:
				prev_snapshot = self.get_full_snapshot(origin_id)

				if base_url and not prev_snapshot:
				base_origin = converters.origin_url_to_origin(base_url)
				base_origin = self.storage.origin_get(base_origin)
				if base_origin:
				base_origin_id = base_origin['id']
				prev_snapshot = self.get_full_snapshot(base_origin_id)

				self.base_snapshot = prev_snapshot
				self.base_origin_id = base_origin_id
				self.ignore_history = ignore_history

	def fetch_data(self):			def fetch_data(self):
	"""Fetch the data from the data source"""			def do_progress(msg):
	self.previous_snapshot = self.storage.snapshot_get_latest(			sys.stderr.buffer.write(msg)
	self.origin_id			sys.stderr.flush()
	)
				fetch_info = self.fetch_pack_from_origin(
				self.origin['url'], self.base_origin_id, self.base_snapshot,
				do_progress)

				self.pack_buffer = fetch_info['pack_buffer']
				self.pack_size = fetch_info['pack_size']

				self.remote_refs = fetch_info['remote_refs']
				self.local_refs = fetch_info['local_refs']

				origin_url = self.origin['url']

				self.log.info('Listed %d refs for repo %s' % (
				len(self.remote_refs), origin_url), extra={
				'swh_type': 'git_repo_list_refs',
				'swh_repo': origin_url,
				'swh_num_refs': len(self.remote_refs),
				})

	type_to_ids = defaultdict(list)			# We want to load the repository, walk all the objects
	for oid in self.iter_objects():			id_to_type, type_to_ids = self.list_pack(self.pack_buffer,
	obj = self.get_object(oid)			self.pack_size)
	if not obj:
	continue
	type_name = obj.type_name
	type_to_ids[type_name].append(oid)

				self.id_to_type = id_to_type
	self.type_to_ids = type_to_ids			self.type_to_ids = type_to_ids

				def save_data(self):
				"""Store a pack for archival"""

				write_size = 8192
				pack_dir = self.get_save_data_path()

				pack_name = "%s.pack" % self.visit_date.isoformat()
				refs_name = "%s.refs" % self.visit_date.isoformat()

				with open(os.path.join(pack_dir, pack_name), 'xb') as f:
				self.pack_buffer.seek(0)
				while True:
				r = self.pack_buffer.read(write_size)
				if not r:
				break
				f.write(r)

				self.pack_buffer.seek(0)

				with open(os.path.join(pack_dir, refs_name), 'xb') as f:
				pickle.dump(self.remote_refs, f)

				def get_inflater(self):
				"""Reset the pack buffer and get an object inflater from it"""
				self.pack_buffer.seek(0)
				return PackInflater.for_pack_data(
				PackData.from_file(self.pack_buffer, self.pack_size))

	def has_contents(self):			def has_contents(self):
	"""Checks whether we need to load contents"""
	return bool(self.type_to_ids[b'blob'])			return bool(self.type_to_ids[b'blob'])

	def get_content_ids(self):			def get_content_ids(self):
	"""Get the content identifiers from the git repository"""			"""Get the content identifiers from the git repository"""
	for oid in self.type_to_ids[b'blob']:			for raw_obj in self.get_inflater():
	yield converters.dulwich_blob_to_content_id(self.repo[oid])			if raw_obj.type_name != b'blob':
				continue

				yield converters.dulwich_blob_to_content_id(raw_obj)

	def get_contents(self):			def get_contents(self):
	"""Get the contents that need to be loaded"""			"""Format the blobs from the git repository as swh contents"""
	max_content_size = self.config['content_size_limit']			max_content_size = self.config['content_size_limit']

	missing_contents = set(self.storage.content_missing(			missing_contents = set(self.storage.content_missing(
	self.get_content_ids(), 'sha1_git'))			self.get_content_ids(), 'sha1_git'))

	for oid in missing_contents:			for raw_obj in self.get_inflater():
				if raw_obj.type_name != b'blob':
				continue

				if raw_obj.sha().digest() not in missing_contents:
				continue

	yield converters.dulwich_blob_to_content(			yield converters.dulwich_blob_to_content(
	self.repo[hashutil.hash_to_bytehex(oid)], log=self.log,			raw_obj, log=self.log, max_content_size=max_content_size,
	max_content_size=max_content_size,
	origin_id=self.origin_id)			origin_id=self.origin_id)

	def has_directories(self):			def has_directories(self):
	"""Checks whether we need to load directories"""
	return bool(self.type_to_ids[b'tree'])			return bool(self.type_to_ids[b'tree'])

	def get_directory_ids(self):			def get_directory_ids(self):
	"""Get the directory identifiers from the git repository"""			"""Get the directory identifiers from the git repository"""
	return (hashutil.hash_to_bytes(id.decode())			return (hashutil.hash_to_bytes(id.decode())
	for id in self.type_to_ids[b'tree'])			for id in self.type_to_ids[b'tree'])

	def get_directories(self):			def get_directories(self):
	"""Get the directories that need to be loaded"""			"""Format the trees as swh directories"""
	missing_dirs = set(self.storage.directory_missing(			missing_dirs = set(self.storage.directory_missing(
	sorted(self.get_directory_ids())))			sorted(self.get_directory_ids())))

	for oid in missing_dirs:			for raw_obj in self.get_inflater():
	yield converters.dulwich_tree_to_directory(			if raw_obj.type_name != b'tree':
	self.repo[hashutil.hash_to_bytehex(oid)], log=self.log)			continue

				if raw_obj.sha().digest() not in missing_dirs:
				continue

				yield converters.dulwich_tree_to_directory(raw_obj, log=self.log)

	def has_revisions(self):			def has_revisions(self):
	"""Checks whether we need to load revisions"""
	return bool(self.type_to_ids[b'commit'])			return bool(self.type_to_ids[b'commit'])

	def get_revision_ids(self):			def get_revision_ids(self):
	"""Get the revision identifiers from the git repository"""			"""Get the revision identifiers from the git repository"""
	return (hashutil.hash_to_bytes(id.decode())			return (hashutil.hash_to_bytes(id.decode())
	for id in self.type_to_ids[b'commit'])			for id in self.type_to_ids[b'commit'])

	def get_revisions(self):			def get_revisions(self):
	"""Get the revisions that need to be loaded"""			"""Format commits as swh revisions"""
	missing_revs = set(self.storage.revision_missing(			missing_revs = set(self.storage.revision_missing(
	sorted(self.get_revision_ids())))			sorted(self.get_revision_ids())))

	for oid in missing_revs:			for raw_obj in self.get_inflater():
	yield converters.dulwich_commit_to_revision(			if raw_obj.type_name != b'commit':
	self.repo[hashutil.hash_to_bytehex(oid)], log=self.log)			continue

				if raw_obj.sha().digest() not in missing_revs:
				continue

				yield converters.dulwich_commit_to_revision(raw_obj, log=self.log)

	def has_releases(self):			def has_releases(self):
	"""Checks whether we need to load releases"""
	return bool(self.type_to_ids[b'tag'])			return bool(self.type_to_ids[b'tag'])

	def get_release_ids(self):			def get_release_ids(self):
	"""Get the release identifiers from the git repository"""			"""Get the release identifiers from the git repository"""
	return (hashutil.hash_to_bytes(id.decode())			return (hashutil.hash_to_bytes(id.decode())
	for id in self.type_to_ids[b'tag'])			for id in self.type_to_ids[b'tag'])

	def get_releases(self):			def get_releases(self):
	"""Get the releases that need to be loaded"""			"""Retrieve all the release objects from the git repository"""
	missing_rels = set(self.storage.release_missing(			missing_rels = set(self.storage.release_missing(
	sorted(self.get_release_ids())))			sorted(self.get_release_ids())))

	for oid in missing_rels:			for raw_obj in self.get_inflater():
	yield converters.dulwich_tag_to_release(			if raw_obj.type_name != b'tag':
	self.repo[hashutil.hash_to_bytehex(oid)], log=self.log)			continue

				if raw_obj.sha().digest() not in missing_rels:
				continue

				yield converters.dulwich_tag_to_release(raw_obj, log=self.log)

	def get_snapshot(self):			def get_snapshot(self):
	"""Turn the list of branches into a snapshot to load"""
	branches = {}			branches = {}

	for ref, target in self.repo.refs.as_dict().items():			for ref in self.remote_refs:
	obj = self.get_object(target)			ret_ref = self.local_refs[ref].copy()
	if obj:			if not ret_ref['target_type']:
	branches[ref] = {			target_type = self.id_to_type[ret_ref['target']]
	'target': hashutil.bytehex_to_hash(target),			ret_ref['target_type'] = converters.DULWICH_TYPES[target_type]
	'target_type': converters.DULWICH_TYPES[obj.type_name],
	}			ret_ref['target'] = hashutil.bytehex_to_hash(ret_ref['target'])
	else:
	branches[ref] = None			branches[ref] = ret_ref

	self.snapshot = converters.branches_to_snapshot(branches)			self.snapshot = converters.branches_to_snapshot(branches)
	return self.snapshot			return self.snapshot

	def get_fetch_history_result(self):			def get_fetch_history_result(self):
	"""Return the data to store in fetch_history for the current loader"""
	return {			return {
	'contents': len(self.type_to_ids[b'blob']),			'contents': len(self.type_to_ids[b'blob']),
	'directories': len(self.type_to_ids[b'tree']),			'directories': len(self.type_to_ids[b'tree']),
	'revisions': len(self.type_to_ids[b'commit']),			'revisions': len(self.type_to_ids[b'commit']),
	'releases': len(self.type_to_ids[b'tag']),			'releases': len(self.type_to_ids[b'tag']),
	}			}

	def save_data(self):
	"""We already have the data locally, no need to save it"""
	pass

	def load_status(self):			def load_status(self):
	"""The load was eventful if the current occurrences are different to			"""The load was eventful if the current snapshot is different to
	the ones we retrieved at the beginning of the run"""			the one we retrieved at the beginning of the run"""
	eventful = False			eventful = False

	if self.previous_snapshot:			if self.base_snapshot:
	eventful = self.snapshot['id'] != self.previous_snapshot['id']			eventful = self.snapshot['id'] != self.base_snapshot['id']
	else:			else:
	eventful = bool(self.snapshot['branches'])			eventful = bool(self.snapshot['branches'])

	return {'status': ('eventful' if eventful else 'uneventful')}			return {'status': ('eventful' if eventful else 'uneventful')}


	class GitLoaderFromArchive(GitLoader):
	"""Load a git repository from an archive.

	This loader ingests a git repository compressed into an archive.
	The supported archive formats are ``.zip`` and ``.tar.gz``.

	From an input tarball named ``my-git-repo.zip``, the following layout is
	expected in it::

	my-git-repo/
	├── .git
	│ ├── branches
	│ ├── COMMIT_EDITMSG
	│ ├── config
	│ ├── description
	│ ├── HEAD
	...

	Nevertheless, the loader is able to ingest tarballs with the following
	layouts too::

	.
	├── .git
	│ ├── branches
	│ ├── COMMIT_EDITMSG
	│ ├── config
	│ ├── description
	│ ├── HEAD
	...

	or::

	other-repo-name/
	├── .git
	│ ├── branches
	│ ├── COMMIT_EDITMSG
	│ ├── config
	│ ├── description
	│ ├── HEAD
	...

	"""
	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)
	self.temp_dir = self.repo_path = None

	def project_name_from_archive(self, archive_path):
	"""Compute the project name from the archive's path.

	"""
	archive_name = os.path.basename(archive_path)
	for ext in ('.zip', '.tar.gz', '.tgz'):
	if archive_name.lower().endswith(ext):
	archive_name = archive_name[:-len(ext)]
	break
	return archive_name

	def prepare_origin_visit(self, origin_url, archive_path, visit_date):
	self._prepare_origin_visit(origin_url, visit_date)

	def prepare(self, origin_url, archive_path, visit_date):
	"""1. Uncompress the archive in temporary location.
	2. Prepare as the GitLoader does
	3. Load as GitLoader does

	"""
	project_name = self.project_name_from_archive(archive_path)
	self.temp_dir, self.repo_path = utils.init_git_repo_from_archive(
	project_name, archive_path)

	self.log.info('Project %s - Uncompressing archive %s at %s' % (
	origin_url, os.path.basename(archive_path), self.repo_path))
	super().prepare(origin_url, self.repo_path, visit_date)

	def cleanup(self):
	"""Cleanup the temporary location (if it exists).

	"""
	if self.temp_dir and os.path.exists(self.temp_dir):
	shutil.rmtree(self.temp_dir)
	self.log.info('Project %s - Done injecting %s' % (
	self.origin_url, self.repo_path))


	if __name__ == '__main__':			if __name__ == '__main__':
	import click			import click
	import logging

	logging.basicConfig(			logging.basicConfig(
	level=logging.DEBUG,			level=logging.DEBUG,
	format='%(asctime)s %(process)d %(message)s'			format='%(asctime)s %(process)d %(message)s'
	)			)

	@click.command()			@click.command()
	@click.option('--origin-url', help='origin url')			@click.option('--origin-url', help='Origin url', required=True)
	@click.option('--git-directory', help='Path to git repository to load')			@click.option('--base-url', default=None, help='Optional Base url')
	@click.option('--visit-date', default=None, help='Visit date')			@click.option('--ignore-history/--no-ignore-history',
	def main(origin_url, git_directory, visit_date):			help='Ignore the repository history', default=False)
	if not visit_date:			def main(origin_url, base_url, ignore_history):
	visit_date = datetime.datetime.now(tz=datetime.timezone.utc)			return GitLoader().load(
				origin_url,
	return GitLoader().load(origin_url, git_directory, visit_date)			base_url=base_url,
				ignore_history=ignore_history,
				)

	main()			main()