Number of metadata values per content type in "tika_1_6"

DETECTED_CONTENT_TYPE_A NUM_METADATA_VALUES_TOTAL TOTAL_FILES Average number of metadata values per file
application/vnd.ms-powerpoint
19404807
12063
1608.62 
application/msword
4351266
16265
267.52 
application/pdf
1521309
52024
29.24 
image/jpeg
565962
14908
37.96 
application/vnd.ms-excel
405240
7875
51.46 
image/png
207619
728
285.19 
text/html; charset=iso-8859-1
192558
13233
14.55 
text/html; charset=windows-1252
177258
13654
12.98 
text/plain; charset=ISO-8859-1
162771
18086
9.00 
text/html; charset=UTF-8
152595
8404
18.16 
text/html; charset=ISO-8859-1
142203
11552
12.31 
text/plain; charset=windows-1252
118359
13151
9.00 
image/gif
113633
4103
27.70 
application/vnd.openxmlformats-officedocument.presentationml.presentation
87877
35
2510.77 
text/html; charset=utf-8
48427
3268
14.82 
message/rfc822
47787
463
103.21 
application/xhtml+xml
30066
1454
20.68 
application/xml
19306
1993
9.69 
application/gzip
18583
1596
11.64 
application/postscript
17388
2484
7.00 
application/rtf
15818
246
64.30 
application/vnd.openxmlformats-officedocument.wordprocessingml.document
5446
18
302.56 
text/html
4834
321
15.06 
application/xml; charset=UTF-8
3883
353
11.00 
text/html; charset=us-ascii
2815
227
12.40 
application/octet-stream
2737
391
7.00 
text/html; charset=Windows-1252
1498
76
19.71 
application/zip
1199
49
24.47 
image/x-ms-bmp
1042
16
65.12 
text/plain; charset=UTF-8
711
79
9.00 
text/html; charset=10646
689
45
15.31 
text/x-java-source
686
53
12.94 
text/plain; charset=GB18030
657
73
9.00 
application/fits
602
86
7.00 
application/x-shockwave-flash
546
78
7.00 
application/xhtml+xml; charset=utf-8
372
25
14.88 
application/vnd.google-earth.kml+xml
332
42
7.90 
text/html; charset=
287
24
11.96 
text/html; charset=macintosh
282
19
14.84 
text/html; charset=GB18030
260
23
11.30 
text/html; charset=US-ASCII
253
22
11.50 
image/tiff
237
5
47.40 
text/html' charset=iso-8859-1
202
9
22.44 
application/rdf+xml
198
25
7.92 
application/vnd.openxmlformats-officedocument.presentationml.slideshow
195
1
195.00 
application/x-tex
182
26
7.00 
application/rss+xml
168
14
12.00 
text; charset=ISO-8859-1
158
10
15.80 
text/html; charset=UTF-16
153
17
9.00 
text/plain; charset=ISO-8859-15
153
17
9.00 
text/html; charset=windows-1251
143
10
14.30 
text/css; charset=ISO-8859-1
140
10
14.00 
application/xhtml+xml; charset=iso-8859-1
137
11
12.45 
text/html; charset=windows-1250
135
11
12.27 
application/msword2
126
18
7.00 
model/vnd.dwf
112
16
7.00 
text/html; charset=IBM437
109
9
12.11 
text/html; charset=iso8859-1
102
7
14.57 
texthtml; charset=is0-8859-1
70
5
14.00 
text/html charset=ISO-8859-1
66
6
11.00 
text/html; charset=8859-1
66
6
11.00 
text/plain; charset=windows-1250
63
7
9.00 
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
62
2
31.00 
text-html; charset=Windows-1252
61
5
12.20 
text/html; charset=gb2312
60
5
12.00 
text/html; charset=iso-8859-2
60
4
15.00 
text/html; charset=windows-1256
59
5
11.80 
text/css; charset=iso-8859-1
56
4
14.00 
text/plain; charset=EUC-KR
54
6
9.00 
text/plain; charset=KOI8-R
54
6
9.00 
application/x-tika-msworks-spreadsheet
48
2
24.00 
text/html; charset=iso_8859_1
45
3
15.00 
text/plain; charset=ISO-8859-5
45
5
9.00 
text/plain; charset=Shift_JIS
45
5
9.00 
application/x-bibtex-text-file
42
6
7.00 
message/x-emlx
42
6
7.00 
text/css
42
3
14.00 
text/html; charset=WINDOWS-1251
36
3
12.00 
text/html; iso-8859-1=
35
3
11.67 
text/html; charset=WINDOWS-1252
34
3
11.33 
text/html; charset=EUC-JP
33
3
11.00 
text/html; chaobjrset=windows-1252
28
2
14.00 
text/html; charset=unicode
28
2
14.00 
text/plain; charset=IBM855
27
3
9.00 
text/plain; charset=windows-1255
27
3
9.00 
text/html; charset=shift_jis
25
2
12.50 
text/html; charset=ISO-8859-15
24
2
12.00 
text/html; charset=windows-1254
24
2
12.00 
text/html; set=iso-8859-1
24
2
12.00 
text/html; charset=IBM866
23
2
11.50 
text/html; charset=iso-8859-15
23
2
11.50 
text/html; charset=Shift_JIS
22
2
11.00 
text/html; charset=big5
22
2
11.00 
application/vnd.framemaker
21
3
7.00 
application/x-123
21
3
7.00 
text/plain; charset=ISO-2022-JP
18
2
9.00 
Public Affairs Officer, USAID/WBG
17
1
17.00 
text/html; charset=iso-10646
16
1
16.00 
text/html; charset=0
15
1
15.00 
text/html; charset=iso-8859-1; macromedia dreamweaver 4.0=
15
1
15.00 
image/x-portable-bitmap
14
2
7.00 
video/x-ms-wmv
14
2
7.00 
 end-functional polystyrene, interdiffusion, neutron reflectometry, surface, thin film , Diffusion, Reflectometry, Thin Films; c
13
1
13.00 
application/xhtml+xml; charset=UTF-8
13
1
13.00 
text/html+xml; charset=UTF-8
13
1
13.00 
This USAID/Timor-Leste page describes the programmatic activities of USAID in Timor-Leste.
12
1
12.00 
text/html; charset=IBM855
12
1
12.00 
text/html; charset=csVISCII
12
1
12.00 
text/html; charset=euc-kr
12
1
12.00 
text/html; charset=ks_c_5601-1987
12
1
12.00 
application/x-executable
11
1
11.00 
noindex
11
1
11.00 
text/html   charset=iso-8859-1
11
1
11.00 
text/html/ charset=iso-8859-1
11
1
11.00 
text/html; charset="iso-8859\<sup\>-1\<\/sup\>"
11
1
11.00 
text/html; charset=ISO-2022-JP
11
1
11.00 
text/html; charset=KOI8-R
11
1
11.00 
text/html; charset=iso-2022-jp
11
1
11.00 
text/html; charset=x-mac-roman
10
1
10.00 
text/html; charset=IBM500
9
1
9.00 
text/html; charset=ISO-8859-9
9
1
9.00 
text/html; charset=UTF-16LE
9
1
9.00 
text/html; charset=UTF-32LE
9
1
9.00 
text/plain; charset=IBM500
9
1
9.00 
text/plain; charset=IBM866
9
1
9.00 
text/plain; charset=windows-1251
9
1
9.00 
text/plain; charset=windows-1253
9
1
9.00 
application/dita+xml; format=topic
7
1
7.00 
application/vnd.ms-xpsdocument
7
1
7.00 
application/vnd.sun.xml.impress
7
1
7.00 
application/x-elc
7
1
7.00 
application/x-iso9660-image
7
1
7.00 
application/x-stuffit
7
1
7.00 
application/x-tika-msoffice
7
1
7.00 
image/g3fax
7
1
7.00 
image/x-portable-pixmap
7
1
7.00 




SELECT comparisons.detected_content_type_A, sum(ifnull(NUM_METADATA_VALUES_A, 0)) as NUM_METADATA_VALUES_TOTAL, detected_types_A.NUM_FILES as TOTAL_FILES, printf("%.2f ", (1.0*sum(ifnull(NUM_METADATA_VALUES_A, 0))/detected_types_A.NUM_FILES)) as 'Average number of metadata values per file' from comparisons left outer join detected_types_A on comparisons.detected_content_type_A=detected_types_A.DETECTED_CONTENT_TYPE_A group by comparisons.DETECTED_CONTENT_TYPE_A order by NUM_METADATA_VALUES_TOTAL desc;