softwareheritage=> select mimetype, count, percent from swh_content_mimetype_text_repartition();
mimetype | count | percent
------------------------------------+---------+-----------
text/plain | 2723484 | 37.3825
text/x-c | 1578897 | 21.6719
text/x-po | 583554 | 8.0098
text/html | 457466 | 6.2792
text/x-c++ | 392656 | 5.3896
application/xml | 375774 | 5.1579
text/x-php | 312741 | 4.2927
text/x-ruby | 275558 | 3.7823
text/x-python | 220031 | 3.0201
text/x-shellscript | 32886 | .4514
text/x-makefile | 20387 | .2798
text/x-asm | 11645 | .1598
image/svg+xml | 9916 | .1361
text/x-diff | 8405 | .1154
text/x-pascal | 7720 | .1060
text/troff | 7219 | .0991
text/x-lisp | 5607 | .0770
text/x-m4 | 4711 | .0647
text/x-tex | 4315 | .0592
application/javascript | 2746 | .0377
text/x-msdos-batch | 2336 | .0321
text/x-fortran | 2099 | .0288
text/x-perl | 1965 | .0270
application/x-elc | 525 | .0072
application/zlib | 507 | .0070
message/rfc822 | 254 | .0035
application/postscript | 227 | .0031
image/x-xpmi | 141 | .0019
text/rtf | 126 | .0017
application/vnd.ms-fontobject | 93 | .0013
text/x-lua | 93 | .0013
text/x-awk | 88 | .0012
application/pdf | 81 | .0011
application/x-setupscript | 56 | .0008
application/pgp-keys | 43 | .0006
message/news | 41 | .0006
text/x-tcl | 34 | .0005
image/x-portable-pixmap | 30 | .0004
application/pgp-signature | 29 | .0004
text/x-vcard | 21 | .0003
video/quicktime | 10 | .0001
application/mac-binhex40 | 5 | .0001
image/x-portable-greymap | 5 | .0001
application/vnd.font-fontforge-sfd | 4 | .0001
image/gif | 3 | .0000
application/x-wine-extension-ini | 3 | .0000
application/x-bzip2 | 3 | .0000
application/x-freemind | 3 | .0000
image/x-portable-bitmap | 2 | .0000
model/vrml | 2 | .0000
application/vnd.fdf | 1 | .0000
application/x-archive | 1 | .0000
application/vnd.ms-opentype | 1 | .0000
application/pgp | 1 | .0000
application/x-kdelnk | 1 | .0000
audio/mpeg | 1 | .0000
text/x-nawk | 1 | .0000
(57 rows)
softwareheritage=> select count(*) from content_mimetype;
count
---------
7285456
(1 row)
# sql
create type swh_content_mimetype_text_repartition_signature as (
mimetype text,
count bigint,
percent text
);
create or replace function swh_content_mimetype_text_repartition()
returns setof swh_content_mimetype_text_repartition_signature
language plpgsql
as $$
declare
total real;
begin
select count(id) from content_mimetype into total;
return query
select convert_from(mimetype, 'utf-8'), count(id) as count, to_char((100.0 * count(id) / total), '999.9999') as percent
from content_mimetype
where encoding <> 'binary'
group by mimetype
order by count desc;
end
$$;