Total attachments in "tika_1_8_SNAPSHOT" by detected content type

DETECTED_CONTENT_TYPE_B NUM_ATTACHMENTS_TOTAL TOTAL_FILES Average number of attachments per file
application/vnd.ms-powerpoint
216053
12063
17.91 
application/msword
71368
16265
4.39 
application/vnd.ms-excel
3398
7759
0.44 
application/gzip
1597
1596
1.00 
message/rfc822
1334
463
2.88 
application/vnd.openxmlformats-officedocument.presentationml.presentation
1239
35
35.40 
application/pdf
1016
52024
0.02 
application/rtf
418
246
1.70 
application/zip
70
49
1.43 
application/vnd.openxmlformats-officedocument.wordprocessingml.document
64
18
3.56 
application/vnd.openxmlformats-officedocument.presentationml.slideshow
7
1
7.00 




SELECT comparisons.DETECTED_CONTENT_TYPE_B, sum(ifnull(NUM_ATTACHMENTS_B, 0)) as NUM_ATTACHMENTS_TOTAL, detected_types_B.NUM_FILES as TOTAL_FILES, printf("%.2f ", (1.0*sum(ifnull(NUM_ATTACHMENTS_B, 0))/detected_types_B.NUM_FILES)) as 'Average number of attachments per file' from comparisons left outer join detected_types_B on comparisons.DETECTED_CONTENT_TYPE_B=detected_types_B.DETECTED_CONTENT_TYPE_B group by comparisons.DETECTED_CONTENT_TYPE_B having NUM_ATTACHMENTS_TOTAL > 0 order by NUM_ATTACHMENTS_TOTAL desc;