Languages detected in tika_1_8_SNAPSHOT by content type then language

DETECTED_CONTENT_TYPE_B LANG_ID1_B COUNT
 end-functional polystyrene, interdiffusion, neutron reflectometry, surface, thin film , Diffusion, Reflectometry, Thin Films; c
en
1
Public Affairs Officer, USAID/WBG
en
1
This USAID/Timor-Leste page describes the programmatic activities of USAID in Timor-Leste.
en
1
application/gzip
en
293
application/gzip
sv
5
application/gzip
fr
4
application/gzip
vi
4
application/gzip
af
2
application/gzip
de
2
application/gzip
it
2
application/gzip
tl
2
application/gzip
pt
1
application/msword
en
15836
application/msword
es
118
application/msword
fr
17
application/msword
pt
9
application/msword
zh-cn
9
application/msword
de
8
application/msword
id
5
application/msword
ru
5
application/msword
vi
5
application/msword
hr
4
application/msword
it
4
application/msword
da
3
application/msword
pl
2
application/msword
sl
2
application/msword
tl
2
application/msword
ne
1
application/msword
ro
1
application/msword
so
1
application/msword
sw
1
application/msword
tr
1
application/pdf
en
50508
application/pdf
es
400
application/pdf
fr
145
application/pdf
de
119
application/pdf
bn
111
application/pdf
it
52
application/pdf
vi
39
application/pdf
pt
36
application/pdf
tl
16
application/pdf
id
14
application/pdf
lt
10
application/pdf
tr
10
application/pdf
ro
9
application/pdf
hr
7
application/pdf
pl
7
application/pdf
sq
7
application/pdf
nl
6
application/pdf
ru
6
application/pdf
so
6
application/pdf
da
5
application/pdf
hu
5
application/pdf
et
4
application/pdf
sk
4
application/pdf
th
4
application/pdf
af
3
application/pdf
cs
2
application/pdf
hi
2
application/pdf
ko
2
application/pdf
sl
2
application/pdf
sv
2
application/pdf
bg
1
application/pdf
el
1
application/pdf
he
1
application/pdf
mk
1
application/pdf
no
1
application/rdf+xml
en
20
application/rss+xml
en
12
application/rtf
en
243
application/rtf
es
2
application/vnd.google-earth.kml+xml
en
37
application/vnd.ms-excel
en
7388
application/vnd.ms-excel
de
98
application/vnd.ms-excel
es
73
application/vnd.ms-excel
nl
40
application/vnd.ms-excel
so
26
application/vnd.ms-excel
fr
13
application/vnd.ms-excel
ro
13
application/vnd.ms-excel
pt
10
application/vnd.ms-excel
hu
9
application/vnd.ms-excel
it
7
application/vnd.ms-excel
no
6
application/vnd.ms-excel
id
5
application/vnd.ms-excel
pl
5
application/vnd.ms-excel
da
4
application/vnd.ms-excel
vi
4
application/vnd.ms-excel
af
3
application/vnd.ms-excel
tl
3
application/vnd.ms-excel
sk
2
application/vnd.ms-excel
et
1
application/vnd.ms-excel
lv
1
application/vnd.ms-excel
sv
1
application/vnd.ms-excel.sheet.3
en
1
application/vnd.ms-excel.sheet.4
en
92
application/vnd.ms-excel.sheet.4
de
2
application/vnd.ms-excel.sheet.4
fr
1
application/vnd.ms-powerpoint
en
11667
application/vnd.ms-powerpoint
es
49
application/vnd.ms-powerpoint
fr
6
application/vnd.ms-powerpoint
de
4
application/vnd.ms-powerpoint
it
2
application/vnd.ms-powerpoint
ro
2
application/vnd.ms-powerpoint
pt
1
application/vnd.ms-powerpoint
th
1
application/vnd.openxmlformats-officedocument.presentationml.presentation
en
33
application/vnd.openxmlformats-officedocument.presentationml.slideshow
en
1
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
en
2
application/vnd.openxmlformats-officedocument.wordprocessingml.document
en
18
application/x-tika-msworks-spreadsheet
en
2
application/xhtml+xml
en
1420
application/xhtml+xml
es
16
application/xhtml+xml
de
5
application/xhtml+xml
it
1
application/xhtml+xml; charset=UTF-8
en
1
application/xhtml+xml; charset=iso-8859-1
en
11
application/xhtml+xml; charset=utf-8
en
24
application/xhtml+xml; charset=utf-8
es
1
application/xml
en
1641
application/xml
de
4
application/xml
es
4
application/xml
vi
3
application/xml
it
2
application/xml
sq
2
application/xml
sv
2
application/xml
tl
2
application/xml
af
1
application/xml
da
1
application/xml
fr
1
application/xml
pl
1
application/xml
pt
1
application/xml
ro
1
application/xml
sl
1
application/xml; charset=UTF-8
en
353
application/zip
en
48
image/jpeg
en
6
image/jpeg
de
1
image/jpeg
vi
1
message/rfc822
en
446
message/rfc822
es
3
message/rfc822
pt
1
noindex
en
1
text-html; charset=Windows-1252
en
5
text/css
en
3
text/css; charset=ISO-8859-1
en
10
text/css; charset=iso-8859-1
en
4
text/html
en
304
text/html
es
10
text/html
et
2
text/html
it
2
text/html
de
1
text/html
tl
1
text/html
vi
1
text/html   charset=iso-8859-1
en
1
text/html charset=ISO-8859-1
en
6
text/html' charset=iso-8859-1
en
9
text/html+xml; charset=UTF-8
en
1
text/html/ charset=iso-8859-1
en
1
text/html; chaobjrset=windows-1252
en
2
text/html; charset=
en
24
text/html; charset="iso-8859\<sup\>-1\<\/sup\>"
en
1
text/html; charset=10646
en
45
text/html; charset=8859-1
en
6
text/html; charset=EUC-JP
en
3
text/html; charset=GB18030
en
23
text/html; charset=IBM437
en
9
text/html; charset=IBM500
cs
1
text/html; charset=IBM855
en
1
text/html; charset=IBM866
en
2
text/html; charset=ISO-2022-JP
en
1
text/html; charset=ISO-8859-1
en
10930
text/html; charset=ISO-8859-1
de
74
text/html; charset=ISO-8859-1
es
69
text/html; charset=ISO-8859-1
sw
55
text/html; charset=ISO-8859-1
fr
51
text/html; charset=ISO-8859-1
da
46
text/html; charset=ISO-8859-1
id
19
text/html; charset=ISO-8859-1
sl
16
text/html; charset=ISO-8859-1
ro
14
text/html; charset=ISO-8859-1
it
12
text/html; charset=ISO-8859-1
sq
10
text/html; charset=ISO-8859-1
tl
10
text/html; charset=ISO-8859-1
sv
9
text/html; charset=ISO-8859-1
nl
8
text/html; charset=ISO-8859-1
pt
7
text/html; charset=ISO-8859-1
so
5
text/html; charset=ISO-8859-1
hr
3
text/html; charset=ISO-8859-1
hu
2
text/html; charset=ISO-8859-1
pl
2
text/html; charset=ISO-8859-1
af
1
text/html; charset=ISO-8859-1
cs
1
text/html; charset=ISO-8859-1
vi
1
text/html; charset=ISO-8859-15
en
2
text/html; charset=ISO-8859-9
en
1
text/html; charset=KOI8-R
en
1
text/html; charset=Shift_JIS
en
2
text/html; charset=US-ASCII
en
22
text/html; charset=UTF-16
zh-cn
17
text/html; charset=UTF-16LE
en
1
text/html; charset=UTF-32LE
bn
1
text/html; charset=UTF-8
en
8195
text/html; charset=UTF-8
es
125
text/html; charset=UTF-8
fr
30
text/html; charset=UTF-8
vi
11
text/html; charset=UTF-8
de
8
text/html; charset=UTF-8
tr
6
text/html; charset=UTF-8
pt
4
text/html; charset=UTF-8
ro
3
text/html; charset=UTF-8
hr
2
text/html; charset=UTF-8
lt
2
text/html; charset=UTF-8
pl
2
text/html; charset=UTF-8
cs
1
text/html; charset=UTF-8
hu
1
text/html; charset=UTF-8
it
1
text/html; charset=UTF-8
nl
1
text/html; charset=UTF-8
ru
1
text/html; charset=UTF-8
so
1
text/html; charset=UTF-8
sq
1
text/html; charset=UTF-8
th
1
text/html; charset=WINDOWS-1251
en
2
text/html; charset=WINDOWS-1252
en
3
text/html; charset=Windows-1252
en
74
text/html; charset=Windows-1252
es
1
text/html; charset=big5
en
2
text/html; charset=csVISCII
vi
1
text/html; charset=euc-kr
en
1
text/html; charset=gb2312
en
5
text/html; charset=iso-10646
en
1
text/html; charset=iso-2022-jp
ja
1
text/html; charset=iso-8859-1
en
12861
text/html; charset=iso-8859-1
es
157
text/html; charset=iso-8859-1
it
34
text/html; charset=iso-8859-1
fr
24
text/html; charset=iso-8859-1
da
22
text/html; charset=iso-8859-1
pt
12
text/html; charset=iso-8859-1
de
11
text/html; charset=iso-8859-1
vi
6
text/html; charset=iso-8859-1
sv
4
text/html; charset=iso-8859-1
et
3
text/html; charset=iso-8859-1
nl
3
text/html; charset=iso-8859-1
ro
2
text/html; charset=iso-8859-1
af
1
text/html; charset=iso-8859-1
hu
1
text/html; charset=iso-8859-1
id
1
text/html; charset=iso-8859-15
en
2
text/html; charset=iso-8859-1; macromedia dreamweaver 4.0=
en
1
text/html; charset=iso-8859-2
ro
3
text/html; charset=iso-8859-2
en
1
text/html; charset=iso8859-1
en
6
text/html; charset=iso_8859_1
en
3
text/html; charset=ks_c_5601-1987
en
1
text/html; charset=macintosh
en
18
text/html; charset=macintosh
sv
1
text/html; charset=shift_jis
en
2
text/html; charset=unicode
en
2
text/html; charset=us-ascii
en
222
text/html; charset=us-ascii
de
1
text/html; charset=us-ascii
es
1
text/html; charset=utf-8
en
3187
text/html; charset=utf-8
es
44
text/html; charset=utf-8
it
8
text/html; charset=utf-8
de
4
text/html; charset=utf-8
tl
3
text/html; charset=utf-8
vi
3
text/html; charset=utf-8
fr
2
text/html; charset=utf-8
hu
2
text/html; charset=utf-8
id
2
text/html; charset=utf-8
pt
2
text/html; charset=utf-8
sv
2
text/html; charset=utf-8
da
1
text/html; charset=utf-8
hr
1
text/html; charset=utf-8
nl
1
text/html; charset=utf-8
ro
1
text/html; charset=utf-8
zh-cn
1
text/html; charset=windows-1250
en
8
text/html; charset=windows-1250
hr
3
text/html; charset=windows-1251
en
7
text/html; charset=windows-1251
bg
1
text/html; charset=windows-1251
ro
1
text/html; charset=windows-1251
ru
1
text/html; charset=windows-1252
en
12982
text/html; charset=windows-1252
es
108
text/html; charset=windows-1252
de
61
text/html; charset=windows-1252
fr
42
text/html; charset=windows-1252
it
25
text/html; charset=windows-1252
id
14
text/html; charset=windows-1252
pt
9
text/html; charset=windows-1252
tl
5
text/html; charset=windows-1252
sq
4
text/html; charset=windows-1252
sw
4
text/html; charset=windows-1252
hu
3
text/html; charset=windows-1252
nl
3
text/html; charset=windows-1252
vi
3
text/html; charset=windows-1252
lt
2
text/html; charset=windows-1252
ro
2
text/html; charset=windows-1252
sl
2
text/html; charset=windows-1252
no
1
text/html; charset=windows-1252
pl
1
text/html; charset=windows-1252
sk
1
text/html; charset=windows-1252
so
1
text/html; charset=windows-1252
sv
1
text/html; charset=windows-1252
zh-cn
1
text/html; charset=windows-1254
en
2
text/html; charset=windows-1256
en
5
text/html; charset=x-mac-roman
en
1
text/html; iso-8859-1=
en
3
text/html; set=iso-8859-1
en
2
text/plain; charset=EUC-KR
en
7
text/plain; charset=GB18030
en
69
text/plain; charset=GB18030
de
4
text/plain; charset=IBM500
pt
1
text/plain; charset=IBM855
en
3
text/plain; charset=IBM866
en
1
text/plain; charset=ISO-2022-JP
en
2
text/plain; charset=ISO-8859-1
en
14402
text/plain; charset=ISO-8859-1
de
750
text/plain; charset=ISO-8859-1
sq
489
text/plain; charset=ISO-8859-1
fr
261
text/plain; charset=ISO-8859-1
vi
175
text/plain; charset=ISO-8859-1
es
169
text/plain; charset=ISO-8859-1
so
107
text/plain; charset=ISO-8859-1
hu
64
text/plain; charset=ISO-8859-1
ro
61
text/plain; charset=ISO-8859-1
pl
59
text/plain; charset=ISO-8859-1
pt
52
text/plain; charset=ISO-8859-1
it
51
text/plain; charset=ISO-8859-1
da
34
text/plain; charset=ISO-8859-1
id
28
text/plain; charset=ISO-8859-1
af
24
text/plain; charset=ISO-8859-1
tl
24
text/plain; charset=ISO-8859-1
sk
20
text/plain; charset=ISO-8859-1
sv
19
text/plain; charset=ISO-8859-1
sw
17
text/plain; charset=ISO-8859-1
et
16
text/plain; charset=ISO-8859-1
nl
13
text/plain; charset=ISO-8859-1
no
12
text/plain; charset=ISO-8859-1
sl
7
text/plain; charset=ISO-8859-1
tr
5
text/plain; charset=ISO-8859-1
hr
4
text/plain; charset=ISO-8859-1
cs
2
text/plain; charset=ISO-8859-15
en
14
text/plain; charset=ISO-8859-15
es
3
text/plain; charset=ISO-8859-5
en
5
text/plain; charset=KOI8-R
en
5
text/plain; charset=KOI8-R
bg
1
text/plain; charset=Shift_JIS
en
5
text/plain; charset=UTF-8
en
73
text/plain; charset=UTF-8
es
3
text/plain; charset=UTF-8
ko
1
text/plain; charset=UTF-8
nl
1
text/plain; charset=UTF-8
vi
1
text/plain; charset=windows-1250
bn
5
text/plain; charset=windows-1250
pl
1
text/plain; charset=windows-1250
sk
1
text/plain; charset=windows-1251
en
1
text/plain; charset=windows-1252
en
11982
text/plain; charset=windows-1252
de
407
text/plain; charset=windows-1252
fr
155
text/plain; charset=windows-1252
pl
91
text/plain; charset=windows-1252
es
90
text/plain; charset=windows-1252
hu
48
text/plain; charset=windows-1252
it
33
text/plain; charset=windows-1252
pt
32
text/plain; charset=windows-1252
sq
23
text/plain; charset=windows-1252
so
22
text/plain; charset=windows-1252
vi
22
text/plain; charset=windows-1252
id
15
text/plain; charset=windows-1252
da
12
text/plain; charset=windows-1252
ro
11
text/plain; charset=windows-1252
tl
10
text/plain; charset=windows-1252
sk
7
text/plain; charset=windows-1252
et
6
text/plain; charset=windows-1252
nl
6
text/plain; charset=windows-1252
sl
6
text/plain; charset=windows-1252
sw
6
text/plain; charset=windows-1252
fi
3
text/plain; charset=windows-1252
hr
3
text/plain; charset=windows-1252
sv
3
text/plain; charset=windows-1252
af
2
text/plain; charset=windows-1252
no
2
text/plain; charset=windows-1252
bn
1
text/plain; charset=windows-1252
hi
1
text/plain; charset=windows-1252
lt
1
text/plain; charset=windows-1253
bn
1
text/plain; charset=windows-1255
bn
1
text/plain; charset=windows-1255
en
1
text/plain; charset=windows-1255
th
1
text/x-java-source
en
53
text; charset=ISO-8859-1
en
9
text; charset=ISO-8859-1
es
1
texthtml; charset=is0-8859-1
en
5




select DETECTED_CONTENT_TYPE_B, LANG_ID1_B, count(1) as COUNT from comparisons where LANG_ID1_B is not null group by DETECTED_CONTENT_TYPE_B, LANG_ID1_B order by DETECTED_CONTENT_TYPE_B, COUNT desc