diff --git a/swh/langdetect/checker.py b/swh/langdetect/checker.py new file mode 100644 index 0000000..01f8811 --- /dev/null +++ b/swh/langdetect/checker.py @@ -0,0 +1,102 @@ +#!/usr/bin/python + +from PyQt5 import QtGui, QtCore +from pyforms import BaseWidget +from pyforms.controls import ControlTextArea +from pyforms.controls import ControlDir +from pyforms.controls import ControlList +from pyforms.controls import ControlLabel +from pyforms.controls import ControlCombo +from .cnn import CNN + +import pyforms, os, gzip + +RED = QtGui.QColor(255,0,0) +WHITE = QtGui.QColor(255,255,255) +GREEN = QtGui.QColor(0,255,0) +BLACK = QtGui.QColor(0,0,0) + +class Check(BaseWidget): + + def __init__(self): + super(Check, self).__init__('Software Heritage Source Code Language Manual Check Tool') + + self._control = ControlDir('Choose a directory: ') + self._list = ControlList('Files in the directory') + self._text = ControlTextArea('Content') + self._label = ControlLabel('Language: \nValue: ') + self._label_rest = ControlLabel('') + self._combo = ControlCombo('Is that correct ?') + + self.formset = ['_control', ('_list', ['_text', ('_label', '_combo')]),'_label_rest'] + self._control.changed_event = self.__get_files + self._list.readonly = True + self._list.cell_double_clicked_event=self.__show_text + self._text.readonly = True + self._cnn = CNN(None, 2048, None) + self._dict = {} + self._combo += ('Unknown', None) + self._combo += ('No', False) + self._combo += ('Yes', True) + self._combo.activated_event = self.__checked + + self._curr_row = None + self._curr_column = None + + def __get_files(self): + self._dict = {} + res = [] + for root, sub, files in os.walk(self._control.value): + if sub == []: + for file in files: + if not file.startswith('.'): + res.append((os.path.join(root, file),)) + self._list.value = res + + def __checked(self, index): + path = self._list.get_value(self._curr_column, self._curr_row) + self._dict[path] = self._combo.value + print(self._combo.value) + if self._combo.value == 'Unknown': + del self._dict[path] + self._update_color(self._combo.value, self._list.get_cell(self._curr_column, self._curr_row)) + correct = len([x for x in self._dict.values() if x == True]) + wrong = len(self._dict.keys()) - correct + remaining = len(self._list.value) - len(self._dict.keys()) + self._label_rest.value = 'Correct:\t{}\tWrong:\t{}\tRemaining:\t{}'.format(correct, wrong, remaining) + + def _update_color(self, x, cell): + if x == False: + cell.setBackground(RED) + elif x == True: + cell.setBackground(GREEN) + else: + cell.setBackground(WHITE) + + def __show_text(self, row, column): + self._curr_row = row + self._curr_column = column + path = self._list.get_value(column, row) + with gzip.open(path, 'rb') as f: + string = f.read() + try: + string = string.decode('utf-8') + except UnicodeDecodeError: + pass + + self._text.value = string[:10240] + res = self._cnn.classify(path) + self._label.value = 'Language: {}\nValue: {}'.format(res[0],res[1]) + + h_sel = self._dict.get(path, None) + if h_sel == None: + self._combo.current_index = 0 + elif h_sel == False: + self._combo.current_index = 1 + elif h_sel == True: + self._combo.current_index = 2 + + +#Execute the application +if __name__ == "__main__": + pyforms.start_app(Check) diff --git a/swh/langdetect/static_data/languages_mini.json b/swh/langdetect/static_data/languages_mini.json new file mode 100644 index 0000000..1641253 --- /dev/null +++ b/swh/langdetect/static_data/languages_mini.json @@ -0,0 +1,2 @@ +["C", "C#", "C++", "Clojure", "CSS", "Go", "Haskell", "HTML", "Java", "JavaScript", "Lua", "Objective-C", "Perl", "PHP", "Python", "R", "Ruby", "Scala", "Scheme", "Swift"] + diff --git a/swh/langdetect/static_data/model.h5 b/swh/langdetect/static_data/model.h5 new file mode 100644 index 0000000..513da70 Binary files /dev/null and b/swh/langdetect/static_data/model.h5 differ