diff --git a/Makefile.local b/Makefile.local index 21982b2cc..b94c4cbab 100644 --- a/Makefile.local +++ b/Makefile.local @@ -1,31 +1,41 @@ .PHONY: build-webpack-dev build-webpack-dev: npm run build-dev +.PHONY: build-webpack-dev-no-verbose +build-webpack-dev-no-verbose: + npm run build-dev >/dev/null + .PHONY: build-webpack-prod build-webpack-prod: npm run build .PHONY: run-migrations run-migrations: - cd swh/web && python3 manage.py migrate 2>/dev/null + python3 swh/web/manage.py migrate 2>/dev/null + +.PHONY: run-tests-migrations +run-tests-migrations: + rm -f swh/web/settings/testdb.sqlite3 2>/dev/null + django-admin migrate --settings=swh.web.settings.tests 2>/dev/null run-django-webpack-devserver: run-migrations bash -c "trap 'trap - SIGINT SIGTERM ERR; kill %1' SIGINT SIGTERM ERR; npm run start-dev & cd swh/web && python3 manage.py runserver" run-django-webpack-dev: build-webpack-dev run-migrations - cd swh/web && python3 manage.py runserver --nostatic + python3 swh/web/manage.py runserver --nostatic run-django-webpack-prod: build-webpack-prod run-migrations - cd swh/web && python3 manage.py runserver --nostatic --settings=swh.web.settings.production + python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-django-server-dev: run-migrations - cd swh/web && python3 manage.py runserver --nostatic + python3 swh/web/manage.py runserver --nostatic run-django-server-prod: run-migrations - cd swh/web && python3 manage.py runserver --nostatic --settings=swh.web.settings.production + python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-gunicorn-server: run-migrations gunicorn3 -b 127.0.0.1:5004 swh.web.wsgi -test: build-webpack-dev +test: build-webpack-dev-no-verbose run-tests-migrations + diff --git a/debian/control b/debian/control index 8c90b8a2b..8699ef901 100644 --- a/debian/control +++ b/debian/control @@ -1,42 +1,44 @@ Source: swh-web Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: curl, debhelper (>= 9), dh-python (>= 2), python3-all, python3-bs4, python3-django (>= 1.10.7~), python3-djangorestframework (>= 3.4.0~), python3-django-webpack-loader, python3-django-js-reverse, python3-docutils, python3-htmlmin, python3-magic (>= 0.3.0~), python3-lxml, python3-nose, python3-pygments, python3-pypandoc, python3-setuptools, python3-sphinx, python3-sphinxcontrib.httpdomain, python3-yaml, python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), python3-swh.storage (>= 0.0.104~), python3-swh.indexer.storage (>= 0.0.52~), - python3-swh.vault (>= 0.0.20~) + python3-swh.vault (>= 0.0.20~), + python3-swh.scheduler (>= 0.0.31~) Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DWUI/ Package: python3-swh.web Architecture: all Depends: python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), python3-swh.storage (>= 0.0.104~), python3-swh.indexer.storage (>= 0.0.52~), python3-swh.vault (>= 0.0.20~), + python3-swh.scheduler (>= 0.0.31~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Web Applications diff --git a/debian/postinst b/debian/postinst new file mode 100755 index 000000000..db179c5db --- /dev/null +++ b/debian/postinst @@ -0,0 +1,3 @@ +#!/bin/bash + +python3 /usr/lib/python3/dist-packages/swh/web/manage.py migrate diff --git a/debian/rules b/debian/rules index ca3fac068..a9ea3b16d 100755 --- a/debian/rules +++ b/debian/rules @@ -1,12 +1,14 @@ #!/usr/bin/make -f export PYBUILD_NAME=swh.web export PYBUILD_TEST_ARGS=--with-doctest -sv ./swh/web/tests +export PYBUILD_BEFORE_TEST=cd {build_dir}; python3 swh/web/manage.py migrate +export PYBUILD_AFTER_TEST=cd {build_dir}; rm -f swh/web/settings/testdb.sqlite3 export DJANGO_SETTINGS_MODULE=swh.web.settings.tests %: dh $@ --with python3 --buildsystem=pybuild override_dh_install: dh_install rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py diff --git a/docs/uri-scheme-api-origin.rst b/docs/uri-scheme-api-origin.rst index 0fa7d8648..90a28a59e 100644 --- a/docs/uri-scheme-api-origin.rst +++ b/docs/uri-scheme-api-origin.rst @@ -1,10 +1,12 @@ Origin ------ .. autosimple:: swh.web.api.views.origin.api_origin .. autosimple:: swh.web.api.views.origin.api_origin_search .. autosimple:: swh.web.api.views.origin.api_origin_visits .. autosimple:: swh.web.api.views.origin.api_origin_visit + +.. autosimple:: swh.web.api.views.origin_save.api_save_origin diff --git a/requirements-swh.txt b/requirements-swh.txt index 6a9cafe3a..9c2a5e54c 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,6 @@ swh.core >= 0.0.40 swh.model >= 0.0.25 swh.storage >= 0.0.104 swh.vault >= 0.0.20 swh.indexer >= 0.0.52 +swh.scheduler >= 0.0.31 diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index 679fe6358..3ec6ff8ca 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,346 +1,356 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import docutils.nodes import docutils.parsers.rst import docutils.utils import functools import os import re from functools import wraps from rest_framework.decorators import api_view from swh.web.common.utils import parse_rst from swh.web.api.apiurls import APIUrls from swh.web.api.apiresponse import make_api_response, error_response class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ docutils visitor for walking on a parsed rst document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written using sphinx httpdomain. """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) parameter_roles = ('param', 'parameter', 'arg', 'argument') response_json_object_roles = ('resjsonobj', 'resjson', '>jsonobj', '>json') response_json_array_roles = ('resjsonarr', '>jsonarr') query_parameter_roles = ('queryparameter', 'queryparam', 'qparam', 'query') request_header_roles = ('header', 'resheader', 'responseheader') status_code_roles = ('statuscode', 'status', 'code') def __init__(self, document, urls, data): super().__init__(document) self.urls = urls self.url_idx = 0 self.data = data self.args_set = set() self.params_set = set() self.returns_set = set() self.status_codes_set = set() self.reqheaders_set = set() self.resheaders_set = set() self.field_list_visited = False def process_paragraph(self, par): """ Process extracted paragraph text before display. Cleanup document model markups and transform the paragraph into a valid raw rst string (as the apidoc documentation transform rst to html when rendering). """ par = par.replace('\n', ' ') # keep empahasized and strong text par = par.replace('', '*') par = par.replace('', '*') par = par.replace('', '**') par = par.replace('', '**') # remove parsed document markups par = re.sub('<[^<]+?>', '', par) # api urls cleanup to generate valid links afterwards par = re.sub('\(\w+\)', '', par) par = re.sub('\[.*\]', '', par) par = par.replace('//', '/') # transform references to api endpoints into valid rst links par = re.sub(':http:get:`(.*)`', r'`<\1>`_', par) # transform references to some elements into bold text par = re.sub(':http:header:`(.*)`', r'**\1**', par) par = re.sub(':func:`(.*)`', r'**\1**', par) return par def visit_field_list(self, node): """ Visit parsed rst field lists to extract relevant info regarding api endpoint. """ self.field_list_visited = True for child in node.traverse(): # get the parsed field name if isinstance(child, docutils.nodes.field_name): field_name = child.astext() # parse field text elif isinstance(child, docutils.nodes.paragraph): text = self.process_paragraph(str(child)) field_data = field_name.split(' ') # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: self.data['args'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: self.data['params'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.params_set.add(field_data[2]) # Response type if field_data[0] in self.response_json_array_roles or \ field_data[0] in self.response_json_object_roles: # array if field_data[0] in self.response_json_array_roles: self.data['return_type'] = 'array' # object else: self.data['return_type'] = 'object' # returned object field if field_data[2] not in self.returns_set: self.data['returns'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.returns_set.add(field_data[2]) # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: self.data['status_codes'].append({'code': field_data[1], # noqa 'doc': text}) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: self.data['reqheaders'].append({'name': field_data[1], 'doc': text}) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: resheader = {'name': field_data[1], 'doc': text} self.data['resheaders'].append(resheader) self.resheaders_set.add(field_data[1]) if resheader['name'] == 'Content-Type' and \ resheader['doc'] == 'application/octet-stream': self.data['return_type'] = 'octet stream' def visit_paragraph(self, node): """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs if isinstance(node.parent, docutils.nodes.block_quote): text = self.process_paragraph(str(node)) # endpoint description if not text.startswith('**') and self.data['description'] != text: self.data['description'] += '\n\n' if self.data['description'] else '' # noqa self.data['description'] += text # http methods elif text.startswith('**Allowed HTTP Methods:**'): text = text.replace('**Allowed HTTP Methods:**', '') http_methods = text.strip().split(',') http_methods = [m[m.find('`')+1:-1].upper() for m in http_methods] self.data['urls'].append({'rule': self.urls[self.url_idx], 'methods': http_methods}) self.url_idx += 1 def visit_literal_block(self, node): """ Visit litteral blocks """ text = node.astext() # litteral block in endpoint description if not self.field_list_visited: self.data['description'] += ':\n\n\t%s' % text # extract example url if ':swh_web_api:' in text: self.data['examples'].append( '/api/1/' + re.sub('.*`(.*)`.*', r'\1', text)) + def visit_bullet_list(self, node): + # bullet list in endpoint description + if not self.field_list_visited: + self.data['description'] += '\n\n' + for child in node.traverse(): + # process list item + if isinstance(child, docutils.nodes.paragraph): + line_text = self.process_paragraph(str(child)) + self.data['description'] += '\t* %s\n' % line_text + def unknown_visit(self, node): pass def depart_document(self, node): """ End of parsing extra processing """ default_methods = ['GET', 'HEAD', 'OPTIONS'] # ensure urls info is present and set default http methods if not self.data['urls']: for url in self.urls: self.data['urls'].append({'rule': url, 'methods': default_methods}) def unknown_departure(self, node): pass def _parse_httpdomain_doc(doc, data): doc_lines = doc.split('\n') doc_lines_filtered = [] urls = [] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: if '.. http' not in doc_line: doc_lines_filtered.append(doc_line) else: url = doc_line[doc_line.find('/'):] # emphasize url arguments for html rendering url = re.sub(r'\((\w+)\)', r' **\(\1\)** ', url) urls.append(url) # parse the rst doctring and do not print system messages about # unknown httpdomain roles document = parse_rst('\n'.join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) # visit the document nodes to extract relevant endpoint info visitor = _HTTPDomainDocVisitor(document, urls, data) document.walkabout(visitor) class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ class api_doc(object): # noqa: N801 """ Decorate an API function to register it in the API doc route index and create the corresponding DRF route. Args: route (str): documentation page's route noargs (boolean): set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False tags (list): Further information on api endpoints. Two values are possibly expected: * hidden: remove the entry points from the listing * upcoming: display the entry point but it is not followable handle_response (boolean): indicate if the decorated function takes care of creating the HTTP response or delegates that task to the apiresponse module api_version (str): api version string """ def __init__(self, route, noargs=False, tags=[], handle_response=False, api_version='1'): super().__init__() self.route = route self.urlpattern = '^' + api_version + route + '$' self.noargs = noargs self.tags = set(tags) self.handle_response = handle_response # @api_doc() Decorator call def __call__(self, f): # If the route is not hidden, add it to the index if 'hidden' not in self.tags: doc_data = self.get_doc_data(f) doc_desc = doc_data['description'] first_dot_pos = doc_desc.find('.') APIUrls.add_route(self.route, doc_desc[:first_dot_pos+1], tags=self.tags) # If the decorated route has arguments, we create a specific # documentation view if not self.noargs: @api_view(['GET', 'HEAD']) def doc_view(request): doc_data = self.get_doc_data(f) return make_api_response(request, None, doc_data) view_name = self.route[1:-1].replace('/', '-') APIUrls.add_url_pattern(self.urlpattern, doc_view, view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = self.get_doc_data(f) try: response = f(request, **kwargs) except Exception as exc: return error_response(request, exc, doc_data) if self.handle_response: return response else: return make_api_response(request, response, doc_data) return documented_view @functools.lru_cache(maxsize=32) def get_doc_data(self, f): """ Build documentation data for the decorated api endpoint function """ data = { 'description': '', 'response_data': None, 'urls': [], 'args': [], 'params': [], 'resheaders': [], 'reqheaders': [], 'return_type': '', 'returns': [], 'status_codes': [], 'examples': [], 'route': self.route, 'noargs': self.noargs } if not f.__doc__: raise APIDocException('apidoc %s: expected a docstring' ' for function %s' % (self.__class__.__name__, f.__name__)) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used if '.. http' not in f.__doc__: data['description'] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) elif 'SWH_WEB_DOC_BUILD' not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process returned object info for nicer html display returns_list = '' for ret in data['returns']: returns_list += '\t* **%s (%s)**: %s\n' %\ (ret['name'], ret['type'], ret['doc']) data['returns_list'] = returns_list return data diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py index 6f325ed82..12043f31d 100644 --- a/swh/web/api/urls.py +++ b/swh/web/api/urls.py @@ -1,20 +1,21 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import swh.web.api.views.content # noqa import swh.web.api.views.directory # noqa import swh.web.api.views.entity # noqa import swh.web.api.views.identifiers # noqa import swh.web.api.views.origin # noqa +import swh.web.api.views.origin_save # noqa import swh.web.api.views.person # noqa import swh.web.api.views.release # noqa import swh.web.api.views.revision # noqa import swh.web.api.views.snapshot # noqa import swh.web.api.views.stat # noqa import swh.web.api.views.vault # noqa from swh.web.api.apiurls import APIUrls urlpatterns = APIUrls.get_url_patterns() diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py new file mode 100644 index 000000000..954c609c7 --- /dev/null +++ b/swh/web/api/views/origin_save.py @@ -0,0 +1,80 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.views.decorators.cache import never_cache + +from swh.web.api.apidoc import api_doc +from swh.web.api.apiurls import api_route + +from swh.web.common.origin_save import ( + create_save_origin_request, get_save_origin_requests +) + + +@api_route(r'/origin/save/(?P.+)/url/(?P.+)/', + 'save-origin', methods=['GET', 'POST'], + throttle_scope='swh_save_origin') +@never_cache +@api_doc('/origin/save/') +def api_save_origin(request, origin_type, origin_url): + """ + .. http:get:: /api/1/origin/save/(origin_type)/url/(origin_url)/ + .. http:post:: /api/1/origin/save/(origin_type)/url/(origin_url)/ + + Request the saving of a software origin into the archive + or check the status of previously created save requests. + + That endpoint enables to create a saving task for a software origin + through a POST request. + + Depending of the provided origin url, the save request can either be: + + * immediately **accepted**, for well kwown code hosting providers + like for instance GitHub or GitLab + * **rejected**, in case the url is blacklisted by Software Heritage + * **put in pending state** until a manual check is done in order to + determine if it can be loaded or not + + Once a saving request has been accepted, its associated saving task status can + then be checked through a GET request on the same url. Returned status can either be: + + * **not created**: no saving task has been created + * **not yet scheduled**: saving task has been created but its execution has not + yet been scheduled + * **scheduled**: the task execution has been scheduled + * **succeed**: the saving task has been successfully executed + * **failed**: the saving task has been executed but it failed + + When issuing a POST request an object will be returned while a GET request will + return an array of objects (as multiple save requests might have been submitted + for the same origin). + + :param string origin_type: the type of origin to save + (currently only *git* but *hg* and *svn* will soon be available) + :param string origin_url: the url of the origin to save + + :reqheader Accept: the requested response content type, + either *application/json* (default) or *application/yaml* + :resheader Content-Type: this depends on :http:header:`Accept` header of request + + :>json string origin_url: the url of the origin to save + :>json string origin_type: the type of the origin to save + :>json string save_request_date: the date (in iso format) the save request was issued + :>json string save_request_status: the status of the save request, either *accepted*, + *rejected* or *pending* + :>json string save_task_status: the status of the origin saving task, either *not created*, + *not yet scheduled*, *scheduled*, *succeed* or *failed* + + **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options` + + :statuscode 200: no error + :statuscode 400: an invalid origin type or url has been provided + :statuscode 403: the provided origin url is blacklisted + """ # noqa + + if request.method == 'POST': + return create_save_origin_request(origin_type, origin_url) + else: + return get_save_origin_requests(origin_type, origin_url) diff --git a/swh/web/common/__init__.py b/swh/web/common/__init__.py index e69de29bb..da757d63b 100644 --- a/swh/web/common/__init__.py +++ b/swh/web/common/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +default_app_config = 'swh.web.common.apps.SwhWebCommonConfig' diff --git a/swh/web/common/apps.py b/swh/web/common/apps.py new file mode 100644 index 000000000..52c13450f --- /dev/null +++ b/swh/web/common/apps.py @@ -0,0 +1,11 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.apps import AppConfig + + +class SwhWebCommonConfig(AppConfig): + name = 'swh.web.common' + label = 'swh.web.common' diff --git a/swh/web/common/migrations/0001_initial.py b/swh/web/common/migrations/0001_initial.py new file mode 100644 index 000000000..a112f7507 --- /dev/null +++ b/swh/web/common/migrations/0001_initial.py @@ -0,0 +1,75 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +# flake8: noqa + +from __future__ import unicode_literals + +from django.db import migrations, models + +_authorized_origins = [ + 'https://github.com/', + 'https://gitlab.com/', + 'https://bitbucket.org/', + 'https://git.code.sf.net/', + 'http://git.code.sf.net/', + 'https://hg.code.sf.net/', + 'http://hg.code.sf.net/', + 'https://svn.code.sf.net/', + 'http://svn.code.sf.net/' +] + + +def _populate_save_authorized_origins(apps, schema_editor): + SaveAuthorizedOrigin = apps.get_model('swh.web.common', 'SaveAuthorizedOrigin') + for origin_url in _authorized_origins: + SaveAuthorizedOrigin.objects.create(url=origin_url) + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='SaveAuthorizedOrigin', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('url', models.CharField(max_length=200)), + ], + options={ + 'db_table': 'save_authorized_origin', + }, + ), + migrations.CreateModel( + name='SaveOriginRequest', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('request_date', models.DateTimeField(auto_now_add=True)), + ('origin_type', models.CharField(max_length=200)), + ('origin_url', models.CharField(max_length=200)), + ('status', models.TextField(choices=[('accepted', 'accepted'), ('rejected', 'rejected'), ('pending', 'pending')], default='pending')), + ('loading_task_id', models.IntegerField(default=-1)), + ], + options={ + 'db_table': 'save_origin_request', + 'ordering': ['-id'], + }, + ), + migrations.CreateModel( + name='SaveUnauthorizedOrigin', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('url', models.CharField(max_length=200)), + ], + options={ + 'db_table': 'save_unauthorized_origin', + }, + ), + migrations.RunPython(_populate_save_authorized_origins) + ] diff --git a/swh/web/common/__init__.py b/swh/web/common/migrations/__init__.py similarity index 100% copy from swh/web/common/__init__.py copy to swh/web/common/migrations/__init__.py diff --git a/swh/web/common/models.py b/swh/web/common/models.py new file mode 100644 index 000000000..faf313ff8 --- /dev/null +++ b/swh/web/common/models.py @@ -0,0 +1,69 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.db import models + + +class SaveAuthorizedOrigin(models.Model): + """ + Model table holding origin urls authorized to be loaded into the archive. + """ + url = models.CharField(max_length=200, null=False) + + class Meta: + db_table = 'save_authorized_origin' + + def __str__(self): + return self.url + + +class SaveUnauthorizedOrigin(models.Model): + """ + Model table holding origin urls not authorized to be loaded into the + archive. + """ + url = models.CharField(max_length=200, null=False) + + class Meta: + db_table = 'save_unauthorized_origin' + + def __str__(self): + return self.url + + +SAVE_REQUEST_ACCEPTED = 'accepted' +SAVE_REQUEST_REJECTED = 'rejected' +SAVE_REQUEST_PENDING = 'pending' + +SAVE_REQUEST_STATUS = [ + (SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_ACCEPTED), + (SAVE_REQUEST_REJECTED, SAVE_REQUEST_REJECTED), + (SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING) +] + + +class SaveOriginRequest(models.Model): + """ + Model table holding all the save origin requests issued by users. + """ + id = models.BigAutoField(primary_key=True) + request_date = models.DateTimeField(auto_now_add=True) + origin_type = models.CharField(max_length=200, null=False) + origin_url = models.CharField(max_length=200, null=False) + status = models.TextField(choices=SAVE_REQUEST_STATUS, + default=SAVE_REQUEST_PENDING) + loading_task_id = models.IntegerField(default=-1) + + class Meta: + db_table = 'save_origin_request' + ordering = ['-id'] + + def __str__(self): + return str({'id': self.id, + 'request_date': self.request_date, + 'origin_type': self.origin_type, + 'origin_url': self.origin_url, + 'status': self.status, + 'loading_task_id': self.loading_task_id}) diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py new file mode 100644 index 000000000..78ad9da56 --- /dev/null +++ b/swh/web/common/origin_save.py @@ -0,0 +1,318 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.core.exceptions import ObjectDoesNotExist +from django.core.exceptions import ValidationError +from django.core.validators import URLValidator + +from swh.web import config +from swh.web.common.exc import BadInputExc, ForbiddenExc +from swh.web.common.models import ( + SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, + SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING +) + +from swh.scheduler.utils import create_oneshot_task_dict + +scheduler = config.scheduler() + + +def get_origin_save_authorized_urls(): + """ + Get the list of origin url prefixes authorized to be + immediately loaded into the archive (whitelist). + + Returns: + list: The list of authorized origin url prefix + """ + return [origin.url + for origin in SaveAuthorizedOrigin.objects.all()] + + +def get_origin_save_unauthorized_urls(): + """ + Get the list of origin url prefixes forbidden to be + loaded into the archive (blacklist). + + Returns: + list: the list of unauthorized origin url prefix + """ + return [origin.url + for origin in SaveUnauthorizedOrigin.objects.all()] + + +def can_save_origin(origin_url): + """ + Check if a software origin can be saved into the archive. + + Based on the origin url, the save request will be either: + + * immediately accepted if the url is whitelisted + * rejected if the url is blacklisted + * put in pending state for manual review otherwise + + Args: + origin_url (str): the software origin url to check + + Returns: + str: the origin save request status, either *accepted*, + *rejected* or *pending* + """ + # origin url may be blacklisted + for url_prefix in get_origin_save_unauthorized_urls(): + if origin_url.startswith(url_prefix): + return SAVE_REQUEST_REJECTED + + # if the origin url is in the white list, it can be immediately saved + for url_prefix in get_origin_save_authorized_urls(): + if origin_url.startswith(url_prefix): + return SAVE_REQUEST_ACCEPTED + + # otherwise, the origin url needs to be manually verified + return SAVE_REQUEST_PENDING + + +# map origin type to scheduler task +# TODO: do not hardcode the task name here +# TODO: unlock hg and svn loading once the scheduler +# loading tasks are available in production +_origin_type_task = { + 'git': 'origin-update-git', + # 'hg': 'origin-load-hg', + # 'svn': 'origin-load-svn' +} + +SAVE_TASK_NOT_CREATED = 'not created' +SAVE_TASK_NOT_YET_SCHEDULED = 'not yet scheduled' +SAVE_TASK_SCHEDULED = 'scheduled' +SAVE_TASK_SUCCEED = 'succeed' +SAVE_TASK_FAILED = 'failed' + +# map scheduler task status to origin save status +_save_task_status = { + 'next_run_not_scheduled': SAVE_TASK_NOT_YET_SCHEDULED, + 'next_run_scheduled': SAVE_TASK_SCHEDULED, + 'completed': SAVE_TASK_SUCCEED, + 'disabled': SAVE_TASK_FAILED +} + + +def get_savable_origin_types(): + """ + Get the list of software origin types that can be loaded + through a save request. + + Returns: + list: the list of savable origin types + """ + return list(_origin_type_task.keys()) + + +def _check_origin_type_savable(origin_type): + allowed_origin_types = ', '.join(get_savable_origin_types()) + if origin_type not in _origin_type_task: + raise BadInputExc('Origin of type %s can not be saved! ' + 'Allowed types are the following: %s' % + (origin_type, allowed_origin_types)) + + +_validate_url = URLValidator(schemes=['http', 'https']) + + +def _check_origin_url_valid(origin_url): + try: + _validate_url(origin_url) + except ValidationError: + raise BadInputExc('The provided origin url (%s) is not valid!' % + origin_url) + + +def _save_request_dict(save_request, task=None): + save_task_status = SAVE_TASK_NOT_CREATED + if task: + save_task_status = _save_task_status[task['status']] + return {'origin_type': save_request.origin_type, + 'origin_url': save_request.origin_url, + 'save_request_date': save_request.request_date.isoformat(), + 'save_request_status': save_request.status, + 'save_task_status': save_task_status} + + +def create_save_origin_request(origin_type, origin_url): + """ + Create a loading task to save a software origin into the archive. + + This function aims to create a software origin loading task + trough the use of the swh-scheduler component. + + First, some checks are performed to see if the origin type and + url are valid but also if the the save request can be accepted. + If those checks passed, the loading task is then created. + Otherwise, the save request is put in pending or rejected state. + + All the submitted save requests are logged into the swh-web + database to keep track of them. + + Args: + origin_type (str): the type of origin to save (*git*, *hg*, *svn*, ...) + origin_url (str): the url of the origin to save + + Raises: + BadInputExc: the origin type or url is invalid + ForbiddenExc: the provided origin url is blacklisted + + Returns: + dict: A dict describing the save request with the following keys: + + * **origin_type**: the type of the origin to save + * **origin_url**: the url of the origin + * **save_request_date**: the date the request was submitted + * **save_request_status**: the request status, either *accepted*, + *rejected* or *pending* + * **save_task_status**: the origin loading task status, either + *not created*, *not yet scheduled*, *scheduled*, *succeed* or + *failed* + + + """ + _check_origin_type_savable(origin_type) + _check_origin_url_valid(origin_url) + save_request_status = can_save_origin(origin_url) + task = None + + # if the origin save request is accepted, create a scheduler + # task to load it into the archive + if save_request_status == SAVE_REQUEST_ACCEPTED: + # create a task with high priority + kwargs = {'priority': 'high'} + # set task parameters according to the origin type + if origin_type == 'git': + kwargs['repo_url'] = origin_url + elif origin_type == 'hg': + kwargs['origin_url'] = origin_url + elif origin_type == 'svn': + kwargs['svn_url'] = origin_url + + sor = None + # get list of previously sumitted save requests + current_sors = \ + list(SaveOriginRequest.objects.filter(origin_type=origin_type, + origin_url=origin_url)) + + can_create_task = False + # if no save requests previously submitted, create the scheduler task + if not current_sors: + can_create_task = True + else: + # get the latest submitted save request + sor = current_sors[0] + # if it was in pending state, we need to create the scheduler task + # and update the save request info in the database + if sor.status == SAVE_REQUEST_PENDING: + can_create_task = True + # a task has already been created to load the origin + elif sor.loading_task_id != -1: + # get the scheduler task and its status + task = scheduler.get_tasks([sor.loading_task_id])[0] + save_task_status = _save_task_status[task['status']] + # create a new scheduler task only if the previous one has been + # already executed + if save_task_status == SAVE_TASK_FAILED or \ + save_task_status == SAVE_TASK_SUCCEED: + can_create_task = True + sor = None + else: + can_create_task = False + + if can_create_task: + # effectively create the scheduler task + task_dict = create_oneshot_task_dict( + _origin_type_task[origin_type], **kwargs) + task = scheduler.create_tasks([task_dict])[0] + + # pending save request has been accepted + if sor: + sor.status = SAVE_REQUEST_ACCEPTED + sor.loading_task_id = task['id'] + sor.save() + else: + sor = SaveOriginRequest.objects.create(origin_type=origin_type, + origin_url=origin_url, + status=save_request_status, # noqa + loading_task_id=task['id']) # noqa + + # save request must be manually reviewed for acceptation + elif save_request_status == SAVE_REQUEST_PENDING: + # check if there is already such a save request already submitted, + # no need to add it to the database in that case + try: + sor = SaveOriginRequest.objects.get(origin_type=origin_type, + origin_url=origin_url, + status=save_request_status) + # if not add it to the database + except ObjectDoesNotExist: + sor = SaveOriginRequest.objects.create(origin_type=origin_type, + origin_url=origin_url, + status=save_request_status) + # origin can not be saved as its url is blacklisted, + # log the request to the database anyway + else: + sor = SaveOriginRequest.objects.create(origin_type=origin_type, + origin_url=origin_url, + status=save_request_status) + + if save_request_status == SAVE_REQUEST_REJECTED: + raise ForbiddenExc('The origin url is blacklisted and will not be ' + 'loaded into the archive.') + + return _save_request_dict(sor, task) + + +def get_save_origin_requests_from_queryset(requests_queryset): + """ + Get all save requests from a SaveOriginRequest queryset. + + Args: + requests_queryset (django.db.models.QuerySet): input + SaveOriginRequest queryset + + Returns: + list: A list of save origin requests dict as described in + :func:`swh.web.common.origin_save.create_save_origin_request` + """ + requests = [] + for sor in requests_queryset: + # rejected saving task or pending for acceptation + if sor.loading_task_id == -1: + requests.append(_save_request_dict(sor)) + continue + task = scheduler.get_tasks([sor.loading_task_id]) + # loading task may have been archived, do not return + # save request info in that case + if task: + requests.append(_save_request_dict(sor, task[0])) + return requests + + +def get_save_origin_requests(origin_type, origin_url): + """ + Get all save requests for a given software origin. + + Args: + origin_type (str): the type of the origin + origin_url (str): the url of the origin + + Raises: + BadInputExc: the origin type or url is invalid + + Returns: + list: A list of save origin requests dict as described in + :func:`swh.web.common.origin_save.create_save_origin_request` + """ + _check_origin_type_savable(origin_type) + _check_origin_url_valid(origin_url) + sors = SaveOriginRequest.objects.filter(origin_type=origin_type, + origin_url=origin_url) + return get_save_origin_requests_from_queryset(sors) diff --git a/swh/web/config.py b/swh/web/config.py index dcf5a06d2..8c45b1f60 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -1,95 +1,117 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.core import config from swh.storage import get_storage from swh.indexer.storage import get_indexer_storage from swh.vault.api.client import RemoteVaultClient +from swh.scheduler import get_scheduler DEFAULT_CONFIG = { 'allowed_hosts': ('list', []), 'storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5002/', 'timeout': 10, }, }), 'indexer_storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5007/', 'timeout': 1, } }), 'vault': ('string', 'http://127.0.0.1:5005/'), 'log_dir': ('string', '/tmp/swh/log'), 'debug': ('bool', False), 'host': ('string', '127.0.0.1'), 'port': ('int', 5004), 'secret_key': ('string', 'development key'), # do not display code highlighting for content > 1MB 'content_display_max_size': ('int', 1024 * 1024), 'throttling': ('dict', { 'cache_uri': None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None 'scopes': { 'swh_api': { 'limiter_rate': { 'default': '120/h' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_vault_cooking': { 'limiter_rate': { 'default': '120/h', 'GET': '60/m' }, 'exempted_networks': ['127.0.0.0/8'] + }, + 'swh_save_origin': { + 'limiter_rate': { + 'default': '120/h', + 'POST': '10/h' + }, + 'exempted_networks': ['127.0.0.0/8'] } } + }), + 'scheduler': ('dict', { + 'cls': 'remote', + 'args': { + 'url': 'http://localhost:5008/' + } }) } swhweb_config = {} def get_config(config_file='webapp/webapp'): """Read the configuration file `config_file`, update the app with parameters (secret_key, conf) and return the parsed configuration as a dict. If no configuration file is provided, return a default configuration.""" if not swhweb_config: cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) config.prepare_folders(swhweb_config, 'log_dir') swhweb_config['storage'] = get_storage(**swhweb_config['storage']) swhweb_config['vault'] = RemoteVaultClient(swhweb_config['vault']) - swhweb_config['indexer_storage'] = get_indexer_storage( - **swhweb_config['indexer_storage']) + swhweb_config['indexer_storage'] = \ + get_indexer_storage(**swhweb_config['indexer_storage']) + swhweb_config['scheduler'] = get_scheduler(**swhweb_config['scheduler']) # noqa return swhweb_config def storage(): """Return the current application's SWH storage. """ return get_config()['storage'] def vault(): """Return the current application's SWH vault. """ return get_config()['vault'] def indexer_storage(): """Return the current application's SWH indexer storage. """ return get_config()['indexer_storage'] + + +def scheduler(): + """Return the current application's SWH scheduler. + + """ + return get_config()['scheduler'] diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index d0fbf47f0..79cb86837 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -1,225 +1,223 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django common settings for swh-web. """ import os from swh.web.config import get_config swh_web_config = get_config() # Build paths inside the project like this: os.path.join(BASE_DIR, ...) PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = swh_web_config['secret_key'] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = swh_web_config['debug'] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config['debug'] ALLOWED_HOSTS = ['127.0.0.1', 'localhost'] + swh_web_config['allowed_hosts'] # Application definition INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'rest_framework', + 'swh.web.common', 'swh.web.api', 'swh.web.browse', 'webpack_loader', 'django_js_reverse' ] MIDDLEWARE = [ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', 'swh.web.common.middlewares.ThrottlingHeadersMiddleware' ] # Compress all assets (static ones and dynamically generated html) # served by django in a local development environment context. # In a production environment, assets compression will be directly # handled by web servers like apache or nginx. if swh_web_config['debug']: MIDDLEWARE.insert(0, 'django.middleware.gzip.GZipMiddleware') ROOT_URLCONF = 'swh.web.urls' TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': [os.path.join(PROJECT_DIR, "../templates")], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], 'libraries': { 'swh_templatetags': 'swh.web.common.swh_templatetags', }, }, }, ] WSGI_APPLICATION = 'swh.web.wsgi.application' -# Database -# https://docs.djangoproject.com/en/1.11/ref/settings/#databases - DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(PROJECT_DIR, 'db.sqlite3'), } } # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa }, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ LANGUAGE_CODE = 'en-us' TIME_ZONE = 'UTC' USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = '/static/' STATICFILES_DIRS = [ os.path.join(PROJECT_DIR, "../static") ] INTERNAL_IPS = ['127.0.0.1'] throttle_rates = {} http_requests = ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'PATCH'] throttling = swh_web_config['throttling'] for limiter_scope, limiter_conf in throttling['scopes'].items(): if 'default' in limiter_conf['limiter_rate']: throttle_rates[limiter_scope] = limiter_conf['limiter_rate']['default'] # for backward compatibility else: throttle_rates[limiter_scope] = limiter_conf['limiter_rate'] # register sub scopes specific for HTTP request types for http_request in http_requests: if http_request in limiter_conf['limiter_rate']: throttle_rates[limiter_scope + '_' + http_request.lower()] = \ limiter_conf['limiter_rate'][http_request] REST_FRAMEWORK = { 'DEFAULT_RENDERER_CLASSES': ( 'rest_framework.renderers.JSONRenderer', 'swh.web.api.renderers.YAMLRenderer', 'rest_framework.renderers.TemplateHTMLRenderer' ), 'DEFAULT_THROTTLE_CLASSES': ( 'swh.web.common.throttling.SwhWebRateThrottle', ), 'DEFAULT_THROTTLE_RATES': throttle_rates } LOGGING = { 'version': 1, 'disable_existing_loggers': False, 'filters': { 'require_debug_false': { '()': 'django.utils.log.RequireDebugFalse', }, 'require_debug_true': { '()': 'django.utils.log.RequireDebugTrue', }, }, 'formatters': { 'verbose': { 'format': '[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s', # noqa 'datefmt': "%d/%b/%Y %H:%M:%S" }, }, 'handlers': { 'console': { 'level': 'DEBUG', 'filters': ['require_debug_true'], 'class': 'logging.StreamHandler', }, 'file': { 'level': 'INFO', 'filters': ['require_debug_false'], 'class': 'logging.FileHandler', 'filename': os.path.join(swh_web_config['log_dir'], 'swh-web.log'), 'formatter': 'verbose' }, }, 'loggers': { 'django': { 'handlers': ['console', 'file'], 'level': 'DEBUG' if DEBUG else 'INFO', 'propagate': True, }, 'django.request': { 'handlers': ['file'], 'level': 'DEBUG' if DEBUG else 'INFO', 'propagate': False, } }, } WEBPACK_LOADER = { 'DEFAULT': { 'CACHE': not DEBUG, 'BUNDLE_DIR_NAME': './', 'STATS_FILE': os.path.join(PROJECT_DIR, '../static/webpack-stats.json'), # noqa 'POLL_INTERVAL': 0.1, 'TIMEOUT': None, 'IGNORE': ['.+\.hot-update.js', '.+\.map'] } } diff --git a/swh/web/settings/tests.py b/swh/web/settings/tests.py index ad81700ee..6a9c71014 100644 --- a/swh/web/settings/tests.py +++ b/swh/web/settings/tests.py @@ -1,69 +1,82 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa """ Django tests settings for swh-web. """ import os from swh.web.config import get_config scope1_limiter_rate = 3 scope1_limiter_rate_post = 1 scope2_limiter_rate = 5 scope2_limiter_rate_post = 2 scope3_limiter_rate = 1 scope3_limiter_rate_post = 1 swh_web_config = get_config() swh_web_config.update({ 'debug': True, 'secret_key': 'test', 'throttling': { 'cache_uri': None, 'scopes': { 'swh_api': { 'limiter_rate': { 'default': '60/min' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_vault_cooking': { 'limiter_rate': { 'default': '120/h', 'GET': '60/m' }, 'exempted_networks': ['127.0.0.0/8'] }, + 'swh_save_origin': { + 'limiter_rate': { + 'default': '120/h', + 'POST': '10/h' + }, + 'exempted_networks': ['127.0.0.0/8'] + }, 'scope1': { 'limiter_rate': { 'default': '%s/min' % scope1_limiter_rate, 'POST': '%s/min' % scope1_limiter_rate_post, } }, 'scope2': { 'limiter_rate': { 'default': '%s/min' % scope2_limiter_rate, 'POST': '%s/min' % scope2_limiter_rate_post } }, 'scope3': { 'limiter_rate': { 'default': '%s/min' % scope3_limiter_rate, 'POST': '%s/min' % scope3_limiter_rate_post }, 'exempted_networks': ['127.0.0.0/8'] } } } }) from .common import * -ALLOWED_HOSTS += ['testserver'] \ No newline at end of file +ALLOWED_HOSTS += ['testserver'] + +# As nose is used as a test runner, we cannot benefit from the in-memory +# django test database used when running tests through '$ python3 manage.py test'. +# So instead use a different database file that will be created on the fly +# when running the tests. +DATABASES['default']['NAME'] = os.path.join(PROJECT_DIR, 'testdb.sqlite3') diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py new file mode 100644 index 000000000..add3e3bc1 --- /dev/null +++ b/swh/web/tests/api/views/test_origin_save.py @@ -0,0 +1,232 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from nose.tools import istest, nottest +from rest_framework.test import APITestCase +from unittest.mock import patch + +from swh.web.common.utils import reverse +from swh.web.common.models import ( + SaveUnauthorizedOrigin, SaveOriginRequest, + SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, + SAVE_REQUEST_PENDING +) +from swh.web.common.origin_save import ( + SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, + SAVE_TASK_SCHEDULED, SAVE_TASK_FAILED, SAVE_TASK_SUCCEED +) +from swh.web.tests.testcase import SWHWebTestCase + + +class SaveApiTestCase(SWHWebTestCase, APITestCase): + + @classmethod + def setUpTestData(cls): + SaveUnauthorizedOrigin.objects.create( + url='https://github.com/user/illegal_repo') + SaveUnauthorizedOrigin.objects.create( + url='https://gitlab.com/user_to_exclude') + + @istest + def invalid_origin_type(self): + url = reverse('save-origin', + kwargs={'origin_type': 'foo', + 'origin_url': 'https://github.com/torvalds/linux'}) # noqa + + response = self.client.post(url) + self.assertEquals(response.status_code, 400) + + @istest + def invalid_origin_url(self): + url = reverse('save-origin', + kwargs={'origin_type': 'git', + 'origin_url': 'bar'}) + + response = self.client.post(url) + self.assertEquals(response.status_code, 400) + + @nottest + def check_created_save_request_status(self, mock_scheduler, origin_url, + scheduler_task_status, + expected_request_status, + expected_task_status=None): + + if not scheduler_task_status: + mock_scheduler.get_tasks.return_value = [] + else: + mock_scheduler.get_tasks.return_value = \ + [{ + 'priority': 'high', + 'policy': 'oneshot', + 'type': 'origin-update-git', + 'arguments': { + 'kwargs': { + 'repo_url': origin_url + }, + 'args': [] + }, + 'status': scheduler_task_status, + 'id': 1, + }] + + mock_scheduler.create_tasks.return_value = \ + [{ + 'priority': 'high', + 'policy': 'oneshot', + 'type': 'origin-update-git', + 'arguments': { + 'kwargs': { + 'repo_url': origin_url + }, + 'args': [] + }, + 'status': 'next_run_not_scheduled', + 'id': 1, + }] + + url = reverse('save-origin', + kwargs={'origin_type': 'git', + 'origin_url': origin_url}) + + response = self.client.post(url) + + if expected_request_status != SAVE_REQUEST_REJECTED: + self.assertEqual(response.status_code, 200) + self.assertEqual(response.data['save_request_status'], + expected_request_status) + self.assertEqual(response.data['save_task_status'], + expected_task_status) + + else: + self.assertEqual(response.status_code, 403) + + @nottest + def check_save_request_status(self, mock_scheduler, origin_url, + expected_request_status, + expected_task_status, + scheduler_task_status='next_run_not_scheduled'): # noqa + + mock_scheduler.get_tasks.return_value = \ + [{ + 'priority': 'high', + 'policy': 'oneshot', + 'type': 'origin-update-git', + 'arguments': { + 'kwargs': { + 'repo_url': origin_url + }, + 'args': [] + }, + 'status': scheduler_task_status, + 'id': 1, + }] + + url = reverse('save-origin', + kwargs={'origin_type': 'git', + 'origin_url': origin_url}) + + response = self.client.get(url) + self.assertEqual(response.status_code, 200) + + save_request_data = response.data[0] + + self.assertEqual(save_request_data['save_request_status'], + expected_request_status) + self.assertEqual(save_request_data['save_task_status'], + expected_task_status) + + @istest + @patch('swh.web.common.origin_save.scheduler') + def save_request_rejected(self, mock_scheduler): + origin_url = 'https://github.com/user/illegal_repo' + self.check_created_save_request_status(mock_scheduler, origin_url, + None, SAVE_REQUEST_REJECTED) + self.check_save_request_status(mock_scheduler, origin_url, + SAVE_REQUEST_REJECTED, + SAVE_TASK_NOT_CREATED) + + @istest + @patch('swh.web.common.origin_save.scheduler') + def save_request_pending(self, mock_scheduler): + origin_url = 'https://unkwownforge.com/user/repo' + self.check_created_save_request_status(mock_scheduler, origin_url, + None, SAVE_REQUEST_PENDING, + SAVE_TASK_NOT_CREATED) + self.check_save_request_status(mock_scheduler, origin_url, + SAVE_REQUEST_PENDING, + SAVE_TASK_NOT_CREATED) + + @istest + @patch('swh.web.common.origin_save.scheduler') + def save_request_succeed(self, mock_scheduler): + origin_url = 'https://github.com/Kitware/CMake' + self.check_created_save_request_status(mock_scheduler, origin_url, + None, SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED) + self.check_save_request_status(mock_scheduler, origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SCHEDULED, + scheduler_task_status='next_run_scheduled') # noqa + self.check_save_request_status(mock_scheduler, origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SUCCEED, + scheduler_task_status='completed') # noqa + + @istest + @patch('swh.web.common.origin_save.scheduler') + def save_request_failed(self, mock_scheduler): + origin_url = 'https://gitlab.com/inkscape/inkscape' + self.check_created_save_request_status(mock_scheduler, origin_url, + None, SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED) + self.check_save_request_status(mock_scheduler, origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SCHEDULED, + scheduler_task_status='next_run_scheduled') # noqa + self.check_save_request_status(mock_scheduler, origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_FAILED, + scheduler_task_status='disabled') # noqa + + @istest + @patch('swh.web.common.origin_save.scheduler') + def create_save_request_only_when_needed(self, mock_scheduler): + origin_url = 'https://gitlab.com/webpack/webpack' + SaveOriginRequest.objects.create(origin_type='git', + origin_url=origin_url, + status=SAVE_REQUEST_ACCEPTED, # noqa + loading_task_id=56) + + self.check_created_save_request_status(mock_scheduler, origin_url, + 'next_run_not_scheduled', + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED) + sors = list(SaveOriginRequest.objects.filter(origin_type='git', + origin_url=origin_url)) + self.assertEquals(len(sors), 1) + + self.check_created_save_request_status(mock_scheduler, origin_url, + 'next_run_scheduled', + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SCHEDULED) + sors = list(SaveOriginRequest.objects.filter(origin_type='git', + origin_url=origin_url)) + self.assertEquals(len(sors), 1) + + self.check_created_save_request_status(mock_scheduler, origin_url, + 'completed', + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED) + sors = list(SaveOriginRequest.objects.filter(origin_type='git', + origin_url=origin_url)) + self.assertEquals(len(sors), 2) + + self.check_created_save_request_status(mock_scheduler, origin_url, + 'disabled', + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED) + sors = list(SaveOriginRequest.objects.filter(origin_type='git', + origin_url=origin_url)) + self.assertEquals(len(sors), 3) diff --git a/swh/web/urls.py b/swh/web/urls.py index 6f7423adb..1ca965235 100644 --- a/swh/web/urls.py +++ b/swh/web/urls.py @@ -1,51 +1,53 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf import settings from django.conf.urls import ( url, include, handler400, handler403, handler404, handler500 ) +from django.contrib import admin from django.contrib.staticfiles.urls import staticfiles_urlpatterns from django.contrib.staticfiles.views import serve from django.shortcuts import render from django.views.generic.base import RedirectView from django_js_reverse.views import urls_js from swh.web.common.exc import ( swh_handle400, swh_handle403, swh_handle404, swh_handle500 ) from swh.web.browse.identifiers import swh_id_browse favicon_view = RedirectView.as_view(url='/static/img/icons/swh-logo-32x32.png', permanent=True) def default_view(request): return render(request, "homepage.html") urlpatterns = [ + url(r'^admin/', admin.site.urls), url(r'^favicon\.ico$', favicon_view), url(r'^api/', include('swh.web.api.urls')), url(r'^browse/', include('swh.web.browse.urls')), url(r'^$', default_view, name='swh-web-homepage'), url(r'^jsreverse/$', urls_js, name='js_reverse'), url(r'^(?Pswh:[0-9]+:[a-z]+:[0-9a-f]+.*)/$', swh_id_browse, name='browse-swh-id') ] # enable to serve compressed assets through django development server if settings.DEBUG: static_pattern = r'^%s(?P.*)$' % settings.STATIC_URL[1:] urlpatterns.append(url(static_pattern, serve)) else: urlpatterns += staticfiles_urlpatterns() handler400 = swh_handle400 # noqa handler403 = swh_handle403 # noqa handler404 = swh_handle404 # noqa handler500 = swh_handle500 # noqa