Total attachments in "tika_1_6" by file extension
FILE_EXTENSION | NUM_ATTACHMENTS_TOTAL | TOTAL_FILES | Average number of attachments per file |
ppt
| 206290
| 11884
| 17.36
|
doc
| 71040
| 16156
| 4.40
|
pps
| 3674
| 160
| 22.96
|
xls
| 3403
| 7868
| 0.43
|
gz
| 1568
| 1567
| 1.00
|
txt
| 1328
| 27671
| 0.05
|
pptx
| 1238
| 35
| 35.37
|
text
| 1006
| 321
| 3.13
|
pdf
| 975
| 52020
| 0.02
|
rtf
| 418
| 246
| 1.70
|
docx
| 64
| 18
| 3.56
|
kmz
| 55
| 47
| 1.17
|
ps
| 29
| 2396
| 0.01
|
html
| 25
| 52983
| 0.00
|
zip
| 15
| 1
| 15.00
|
tmp
| 2
| 10
| 0.20
|
unk
| 1
| 879
| 0.00
|
SELECT comparisons.FILE_EXTENSION, sum(ifnull(NUM_ATTACHMENTS_A, 0)) as NUM_ATTACHMENTS_TOTAL, extensions_total.NUM_FILES as TOTAL_FILES, printf("%.2f ", (1.0*sum(ifnull(NUM_ATTACHMENTS_A, 0))/extensions_total.NUM_FILES)) as 'Average number of attachments per file' from comparisons left outer join extensions_total on comparisons.FILE_EXTENSION=extensions_total.FILE_EXTENSION group by comparisons.FILE_EXTENSION having NUM_ATTACHMENTS_TOTAL > 0 order by NUM_ATTACHMENTS_TOTAL desc;