Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/fossology_license.py
Show First 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | ADDITIONAL_CONFIG = { | ||||
'write_batch_size': ('int', 1000), | 'write_batch_size': ('int', 1000), | ||||
} | } | ||||
CONFIG_BASE_FILENAME = 'indexer/fossology_license' | CONFIG_BASE_FILENAME = 'indexer/fossology_license' | ||||
def prepare(self): | def prepare(self): | ||||
super().prepare() | super().prepare() | ||||
self.working_directory = self.config['workdir'] | self.working_directory = self.config['workdir'] | ||||
self.tool = self.tools[0] | |||||
def compute_license(self, path, log=None): | |||||
"""Determine license from file at path. | |||||
Args: | |||||
path: filepath to determine the license | |||||
Returns: | |||||
dict: A dict with the following keys: | |||||
- licenses ([str]): associated detected licenses to path | |||||
- path (bytes): content filepath | |||||
""" | |||||
return compute_license(path, log=log) | |||||
def index(self, id, data): | def index(self, id, data): | ||||
"""Index sha1s' content and store result. | """Index sha1s' content and store result. | ||||
Args: | Args: | ||||
id (bytes): content's identifier | id (bytes): content's identifier | ||||
raw_content (bytes): associated raw content to content id | raw_content (bytes): associated raw content to content id | ||||
Returns: | Returns: | ||||
dict: A dict, representing a content_license, with keys: | dict: A dict, representing a content_license, with keys: | ||||
- id (bytes): content's identifier (sha1) | - id (bytes): content's identifier (sha1) | ||||
- license (bytes): license in bytes | - license (bytes): license in bytes | ||||
- path (bytes): path | - path (bytes): path | ||||
- indexer_configuration_id (int): tool used to compute the output | - indexer_configuration_id (int): tool used to compute the output | ||||
""" | """ | ||||
assert isinstance(id, bytes) | assert isinstance(id, bytes) | ||||
content_path = self.write_to_temp( | content_path = self.write_to_temp( | ||||
filename=hashutil.hash_to_hex(id), # use the id as pathname | filename=hashutil.hash_to_hex(id), # use the id as pathname | ||||
data=data) | data=data) | ||||
try: | try: | ||||
properties = self.compute_license(path=content_path, log=self.log) | properties = compute_license(path=content_path, log=self.log) | ||||
properties.update({ | properties.update({ | ||||
'id': id, | 'id': id, | ||||
'indexer_configuration_id': self.tool['id'], | 'indexer_configuration_id': self.tool['id'], | ||||
}) | }) | ||||
finally: | finally: | ||||
self.cleanup(content_path) | self.cleanup(content_path) | ||||
return properties | return properties | ||||
▲ Show 20 Lines • Show All 71 Lines • Show Last 20 Lines |