Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Paste
P117
swh mimetype recognized repartition - updated
Active
Public
Actions
Authored by
ardumont
on Oct 21 2016, 11:36 AM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
Indexer
Subscribers
None
softwareheritage
=>
select
mimetype
,
count
,
percent
from
swh_content_mimetype_text_repartition
();
mimetype
|
count
|
percent
------------------------------------+---------+-----------
text
/
plain
|
2764656
|
37
.
3915
text
/
x
-
c
|
1603002
|
21
.
6803
text
/
x
-
po
|
593128
|
8
.
0220
text
/
html
|
464516
|
6
.
2825
text
/
x
-
c
++
|
398550
|
5
.
3903
application
/
xml
|
379255
|
5
.
1294
text
/
x
-
php
|
317574
|
4
.
2951
text
/
x
-
ruby
|
279675
|
3
.
7826
text
/
x
-
python
|
223713
|
3
.
0257
text
/
x
-
shellscript
|
33402
|
.
4518
text
/
x
-
makefile
|
20722
|
.
2803
text
/
x
-
asm
|
11816
|
.
1598
image
/
svg
+
xml
|
9961
|
.
1347
text
/
x
-
diff
|
8486
|
.
1148
text
/
x
-
pascal
|
7876
|
.
1065
text
/
troff
|
7285
|
.
0985
text
/
x
-
lisp
|
5694
|
.
0770
text
/
x
-
m4
|
4804
|
.
0650
text
/
x
-
tex
|
4388
|
.
0593
application
/
javascript
|
2811
|
.
0380
text
/
x
-
msdos
-
batch
|
2380
|
.
0322
text
/
x
-
fortran
|
2125
|
.
0287
text
/
x
-
perl
|
2001
|
.
0271
application
/
x
-
elc
|
531
|
.
0072
application
/
zlib
|
518
|
.
0070
message
/
rfc822
|
259
|
.
0035
application
/
postscript
|
227
|
.
0031
image
/
x
-
xpmi
|
142
|
.
0019
text
/
rtf
|
128
|
.
0017
application
/
vnd
.
ms
-
fontobject
|
95
|
.
0013
text
/
x
-
lua
|
94
|
.
0013
text
/
x
-
awk
|
89
|
.
0012
application
/
pdf
|
81
|
.
0011
application
/
x
-
setupscript
|
57
|
.
0008
application
/
pgp
-
keys
|
44
|
.
0006
message
/
news
|
42
|
.
0006
text
/
x
-
tcl
|
35
|
.
0005
image
/
x
-
portable
-
pixmap
|
31
|
.
0004
application
/
pgp
-
signature
|
29
|
.
0004
text
/
x
-
vcard
|
21
|
.
0003
video
/
quicktime
|
10
|
.
0001
application
/
mac
-
binhex40
|
5
|
.
0001
image
/
x
-
portable
-
greymap
|
5
|
.
0001
application
/
vnd
.
font
-
fontforge
-
sfd
|
4
|
.
0001
image
/
gif
|
3
|
.
0000
application
/
x
-
wine
-
extension
-
ini
|
3
|
.
0000
application
/
x
-
bzip2
|
3
|
.
0000
application
/
x
-
freemind
|
3
|
.
0000
image
/
x
-
portable
-
bitmap
|
2
|
.
0000
model
/
vrml
|
2
|
.
0000
application
/
vnd
.
fdf
|
1
|
.
0000
application
/
x
-
archive
|
1
|
.
0000
application
/
vnd
.
ms
-
opentype
|
1
|
.
0000
application
/
pgp
|
1
|
.
0000
application
/
x
-
kdelnk
|
1
|
.
0000
audio
/
mpeg
|
1
|
.
0000
text
/
x
-
nawk
|
1
|
.
0000
(
57
rows
)
softwareheritage
=>
select
(
100
*
(
select
count
(
*
)
from
content_mimetype
where
encoding
<>
'binary'
))
/
(
select
count
(
*
)
from
content_mimetype
)::
real
as
percent
;
percent
------------------
96
.
7064603246392
(
1
row
)
softwareheritage
=>
select
count
(
*
)
from
content_mimetype
;
count
---------
7393808
(
1
row
)
softwareheritage
=>
select
count
(
*
)
from
content_mimetype
where
encoding
<>
'binary'
;
count
---------
7150290
(
1
row
)
softwareheritage
=>
select
count
(
*
)
from
content_language
;
count
---------
7017866
(
1
row
)
#
sql
create
type
swh_content_mimetype_text_repartition_signature
as
(
mimetype
text
,
count
bigint
,
percent
text
);
create
or
replace
function
swh_content_mimetype_text_repartition
()
returns
setof
swh_content_mimetype_text_repartition_signature
language
plpgsql
as
$$
declare
total
real
;
begin
select
count
(
id
)
from
content_mimetype
into
total
;
return
query
select
convert_from
(
mimetype
,
'utf-8'
),
count
(
id
)
as
count
,
to_char
((
100
.
0
*
count
(
id
)
/
total
),
'999.9999'
)
as
percent
from
content_mimetype
where
encoding
<>
'binary'
group
by
mimetype
order
by
count
desc
;
end
$$
;
Event Timeline
ardumont
created this paste.
Oct 21 2016, 11:36 AM
2016-10-21 11:36:50 (UTC+2)
ardumont
updated the paste's language from
autodetect
to
sql
.
Oct 21 2016, 11:38 AM
2016-10-21 11:38:58 (UTC+2)
ardumont
changed the title of this paste from
swh mimetype recognized repartition
to
swh mimetype recognized repartition - updated
.
Oct 24 2016, 11:36 AM
2016-10-24 11:36:06 (UTC+2)
ardumont
edited the content of this paste.
(Show Details)
Log In to Comment