Files with content differing by more than the overlap threshold in PDFBox_1_8_9_20150316_single_thread and pdfbox_1_8_9_20150316

FILE_EXTENSION COUNT
pdf
3




select FILE_EXTENSION, count(1) as COUNT from comparisons where (JSON_EX_A is null and JSON_EX_B is null and SORT_STACK_TRACE_A is null and SORT_STACK_TRACE_B is null) and (TOKEN_COUNT_A > 30 or TOKEN_COUNT_B > 30) and (overlap < 0.90 or abs(TOKEN_COUNT_A - TOKEN_COUNT_B) > 100) group by FILE_EXTENSION order by COUNT desc