Languages detected in PDFBox_1_8_9_20150316_single_thread by content type then language

DETECTED_CONTENT_TYPE_A LANG_ID1_A COUNT
application/pdf
en
217476
application/pdf
de
1427
application/pdf
es
959
application/pdf
fr
705
application/pdf
bn
560
application/pdf
vi
172
application/pdf
it
171
application/pdf
pt
161
application/pdf
th
82
application/pdf
sl
67
application/pdf
so
66
application/pdf
lt
62
application/pdf
tl
60
application/pdf
id
59
application/pdf
pl
54
application/pdf
hu
47
application/pdf
sq
46
application/pdf
tr
46
application/pdf
nl
46
application/pdf
hr
38
application/pdf
sv
35
application/pdf
ro
32
application/pdf
da
26
application/pdf
sk
24
application/pdf
no
24
application/pdf
et
20
application/pdf
el
18
application/pdf
sw
18
application/pdf
ru
17
application/pdf
cs
14
application/pdf
hi
13
application/pdf
af
10
application/pdf
ko
8
application/pdf
lv
6
application/pdf
ar
5
application/pdf
ja
4
application/pdf
fi
4
application/pdf
uk
2
application/pdf
ur
2
application/pdf
zh-tw
2
application/pdf
zh-cn
2
application/pdf
bg
2
application/pdf
he
1
application/pdf
mk
1
application/pdf
fa
1
text/html; charset=ISO-8859-1
en
1
text/plain; charset=ISO-8859-8
en
1
text/plain; charset=windows-1252
en
6
text/plain; charset=windows-1255
en
3




select DETECTED_CONTENT_TYPE_A, LANG_ID1_A, count(1) as COUNT from comparisons where LANG_ID1_A is not null group by DETECTED_CONTENT_TYPE_A, LANG_ID1_A order by DETECTED_CONTENT_TYPE_A, COUNT desc