Exceptions in "tika_1_6"

Detected content type Number of files Number of files with exceptions Percentage of files with exceptions
application/dita+xml; format=topic
1
1
100.00% 
application/x-tika-msoffice
1
1
100.00% 
application/xml
1993
260
13.05% 
application/vnd.google-earth.kml+xml
42
4
9.52% 
application/rdf+xml
25
2
8.00% 
application/vnd.openxmlformats-officedocument.presentationml.presentation
35
2
5.71% 
application/vnd.ms-powerpoint
12063
462
3.83% 
application/zip
49
1
2.04% 
application/vnd.ms-excel
7875
154
1.96% 
application/msword
16265
217
1.33% 
application/rtf
246
1
0.41% 
application/pdf
52024
56
0.11% 
text/html; charset=utf-8
3268
2
0.06% 
text/plain; charset=ISO-8859-1
18086
4
0.02% 
image/jpeg
14908
2
0.01% 
text/html; charset=windows-1252
13654
1
0.01% 




SELECT comparisons.DETECTED_CONTENT_TYPE_A as 'Detected content type', detected_types_A.NUM_FILES as 'Number of files', count(1) as 'Number of files with exceptions', printf("%.2f%% ", (100.0*ifNull(count(1), 0)/detected_types_A.NUM_FILES)) as 'Percentage of files with exceptions' from comparisons left outer join detected_types_A on comparisons.DETECTED_CONTENT_TYPE_A =detected_types_A.DETECTED_CONTENT_TYPE_A where SORT_STACK_TRACE_A is not null group by comparisons.DETECTED_CONTENT_TYPE_A order by (1.0*ifNull(count(1), 0)/detected_types_A.NUM_FILES) desc;