softwareheritage=> select convert_from(mimetype, 'utf-8') as mimetype, count(*) as count from content_language cl inner join content_mimetype using(id) where lang='unknown' group by mimetype order by count desc; mimetype | count -------------------------------+-------- text/plain | 491510 text/x-ruby | 93716 text/x-c | 33233 text/x-c++ | 19323 text/x-python | 13690 text/x-diff | 3827 text/x-lisp | 1847 text/html | 1843 text/x-msdos-batch | 1629 text/x-fortran | 935 text/x-makefile | 803 text/troff | 754 application/zlib | 443 text/x-asm | 354 application/javascript | 346 text/x-pascal | 291 text/x-tex | 209 text/x-po | 154 text/x-php | 141 text/x-m4 | 139 application/xml | 71 text/rtf | 65 text/x-shellscript | 61 application/pgp-keys | 44 text/x-lua | 42 message/news | 32 image/x-portable-pixmap | 31 application/pgp-signature | 29 message/rfc822 | 27 text/x-awk | 19 application/vnd.ms-fontobject | 11 application/x-elc | 6 image/x-portable-greymap | 5 image/x-xpmi | 4 application/pdf | 4 application/x-bzip2 | 3 application/x-freemind | 3 image/x-portable-bitmap | 2 text/x-perl | 2 image/gif | 2 model/vrml | 1 text/x-vcard | 1 application/pgp | 1 audio/mpeg | 1 application/vnd.ms-opentype | 1 text/x-nawk | 1 application/x-archive | 1 (47 rows)