Mime types in "tika_1_6"

DETECTED_CONTENT_TYPE_A COUNT
application/pdf
52024
text/plain; charset=ISO-8859-1
18086
application/msword
16265
image/jpeg
14908
text/html; charset=windows-1252
13654
text/html; charset=iso-8859-1
13233
text/plain; charset=windows-1252
13151
application/vnd.ms-powerpoint
12063
text/html; charset=ISO-8859-1
11552
text/html; charset=UTF-8
8404
application/vnd.ms-excel
7875
image/gif
4103
text/html; charset=utf-8
3268
application/postscript
2484
application/xml
1993
application/gzip
1596
application/xhtml+xml
1454
image/png
728
message/rfc822
463
application/octet-stream
391
application/xml; charset=UTF-8
353
text/html
321
application/rtf
246
text/html; charset=us-ascii
227
application/fits
86
text/plain; charset=UTF-8
79
application/x-shockwave-flash
78
text/html; charset=Windows-1252
76
text/plain; charset=GB18030
73
text/x-java-source
53
application/zip
49
text/html; charset=10646
45
application/vnd.google-earth.kml+xml
42
application/vnd.openxmlformats-officedocument.presentationml.presentation
35
application/x-tex
26
application/rdf+xml
25
application/xhtml+xml; charset=utf-8
25
text/html; charset=
24
text/html; charset=GB18030
23
text/html; charset=US-ASCII
22
text/html; charset=macintosh
19
application/msword2
18
application/vnd.openxmlformats-officedocument.wordprocessingml.document
18
text/html; charset=UTF-16
17
text/plain; charset=ISO-8859-15
17
image/x-ms-bmp
16
model/vnd.dwf
16
application/rss+xml
14
application/xhtml+xml; charset=iso-8859-1
11
text/html; charset=windows-1250
11
text/css; charset=ISO-8859-1
10
text/html; charset=windows-1251
10
text; charset=ISO-8859-1
10
text/html' charset=iso-8859-1
9
text/html; charset=IBM437
9
text/html; charset=iso8859-1
7
text/plain; charset=windows-1250
7
application/x-bibtex-text-file
6
message/x-emlx
6
text/html charset=ISO-8859-1
6
text/html; charset=8859-1
6
text/plain; charset=EUC-KR
6
text/plain; charset=KOI8-R
6
image/tiff
5
text-html; charset=Windows-1252
5
text/html; charset=gb2312
5
text/html; charset=windows-1256
5
text/plain; charset=ISO-8859-5
5
text/plain; charset=Shift_JIS
5
texthtml; charset=is0-8859-1
5
text/css; charset=iso-8859-1
4
text/html; charset=iso-8859-2
4
application/vnd.framemaker
3
application/x-123
3
text/css
3
text/html; charset=EUC-JP
3
text/html; charset=WINDOWS-1251
3
text/html; charset=WINDOWS-1252
3
text/html; charset=iso_8859_1
3
text/html; iso-8859-1=
3
text/plain; charset=IBM855
3
text/plain; charset=windows-1255
3
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
2
application/x-tika-msworks-spreadsheet
2
image/x-portable-bitmap
2
text/html; chaobjrset=windows-1252
2
text/html; charset=IBM866
2
text/html; charset=ISO-8859-15
2
text/html; charset=Shift_JIS
2
text/html; charset=big5
2
text/html; charset=iso-8859-15
2
text/html; charset=shift_jis
2
text/html; charset=unicode
2
text/html; charset=windows-1254
2
text/html; set=iso-8859-1
2
text/plain; charset=ISO-2022-JP
2
video/x-ms-wmv
2
 end-functional polystyrene, interdiffusion, neutron reflectometry, surface, thin film , Diffusion, Reflectometry, Thin Films; c
1
Public Affairs Officer, USAID/WBG
1
This USAID/Timor-Leste page describes the programmatic activities of USAID in Timor-Leste.
1
application/dita+xml; format=topic
1
application/vnd.ms-xpsdocument
1
application/vnd.openxmlformats-officedocument.presentationml.slideshow
1
application/vnd.sun.xml.impress
1
application/x-elc
1
application/x-executable
1
application/x-iso9660-image
1
application/x-stuffit
1
application/x-tika-msoffice
1
application/xhtml+xml; charset=UTF-8
1
image/g3fax
1
image/x-portable-pixmap
1
noindex
1
text/html   charset=iso-8859-1
1
text/html+xml; charset=UTF-8
1
text/html/ charset=iso-8859-1
1
text/html; charset="iso-8859\<sup\>-1\<\/sup\>"
1
text/html; charset=0
1
text/html; charset=IBM500
1
text/html; charset=IBM855
1
text/html; charset=ISO-2022-JP
1
text/html; charset=ISO-8859-9
1
text/html; charset=KOI8-R
1
text/html; charset=UTF-16LE
1
text/html; charset=UTF-32LE
1
text/html; charset=csVISCII
1
text/html; charset=euc-kr
1
text/html; charset=iso-10646
1
text/html; charset=iso-2022-jp
1
text/html; charset=iso-8859-1; macromedia dreamweaver 4.0=
1
text/html; charset=ks_c_5601-1987
1
text/html; charset=x-mac-roman
1
text/plain; charset=IBM500
1
text/plain; charset=IBM866
1
text/plain; charset=windows-1251
1
text/plain; charset=windows-1253
1




select DETECTED_CONTENT_TYPE_A, count(1) as COUNT from comparisons group by DETECTED_CONTENT_TYPE_A order by COUNT desc