Page MenuHomeSoftware Heritage

fossology_license.py
No OneTemporary

fossology_license.py

# Copyright (C) 2016-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import click
import subprocess
from swh.model import hashutil
from .indexer import ContentIndexer, DiskIndexer
def compute_license(path, log=None):
"""Determine license from file at path.
Args:
path: filepath to determine the license
Returns:
A dict with the following keys:
- licenses ([str]): associated detected licenses to path
- path (bytes): content filepath
- tool (str): tool used to compute the output
"""
try:
properties = subprocess.check_output(['nomossa', path],
universal_newlines=True)
if properties:
res = properties.rstrip().split(' contains license(s) ')
licenses = res[1].split(',')
return {
'licenses': licenses,
'path': path,
}
except subprocess.CalledProcessError:
if log:
from os import path as __path
log.exception('Problem during license detection for sha1 %s' %
__path.basename(path))
return {
'licenses': [],
'path': path,
}
class ContentFossologyLicenseIndexer(ContentIndexer, DiskIndexer):
"""Indexer in charge of:
- filtering out content already indexed
- reading content from objstorage per the content's id (sha1)
- computing {license, encoding} from that content
- store result in storage
"""
ADDITIONAL_CONFIG = {
'workdir': ('str', '/tmp/swh/indexer.fossology.license'),
'tools': ('dict', {
'name': 'nomos',
'version': '3.1.0rc2-31-ga2cbb8c',
'configuration': {
'command_line': 'nomossa <filepath>',
},
}),
}
CONFIG_BASE_FILENAME = 'indexer/fossology_license'
def prepare(self):
super().prepare()
self.working_directory = self.config['workdir']
def filter(self, sha1s):
"""Filter out known sha1s and return only missing ones.
"""
yield from self.storage.content_fossology_license_missing((
{
'id': sha1,
'indexer_configuration_id': self.tools['id'],
} for sha1 in sha1s
))
def index(self, sha1, raw_content):
"""Index sha1s' content and store result.
Args:
sha1 (bytes): content's identifier
raw_content (bytes): raw content in bytes
Returns:
A dict, representing a content_license, with keys:
- id (bytes): content's identifier (sha1)
- license (bytes): license in bytes
- path (bytes): path
"""
filename = hashutil.hash_to_hex(sha1)
content_path = self.write_to_temp(
filename=filename,
data=raw_content)
try:
properties = compute_license(path=content_path, log=self.log)
properties.update({
'id': sha1,
'indexer_configuration_id': self.tools['id'],
})
finally:
self.cleanup(content_path)
return properties
def persist_index_computations(self, results, policy_update):
"""Persist the results in storage.
Args:
results ([dict]): list of content_license, dict with the
following keys:
- id (bytes): content's identifier (sha1)
- license (bytes): license in bytes
- path (bytes): path
policy_update ([str]): either 'update-dups' or 'ignore-dups' to
respectively update duplicates or ignore them
"""
self.storage.content_fossology_license_add(
results, conflict_update=(policy_update == 'update-dups'))
@click.command(help='Compute license for path using tool')
@click.option('--tool', default='nomossa', help="Path to tool")
@click.option('--path', required=1, help="Path to execute index on")
def main(tool, path):
print(compute_license(tool, path))
if __name__ == '__main__':
main()

File Metadata

Mime Type
text/x-python
Expires
Fri, Jul 4, 5:11 PM (3 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3295013

Event Timeline