cross_fuzz v3

# HG changeset patch # Parent e3e69b7d5845116a157e9289e83677c31ff69a97 # User Bob Clary Add the ability to replay the testcase in Firefox and to reduce it using Lithium. diff --git a/.hgignore b/.hgignore new file mode 100644 --- /dev/null +++ b/.hgignore @@ -0,0 +1,15 @@ +# glob syntax for shell glob file patterns +syntax: glob + +*.bak +*.pyc +*~ +1.*.sh +*.log +*.rej + +# perl regexp syntax for file patterns +syntax: regexp +\#.*# +^\.# +test.html diff --git a/Makefile b/Makefile new file mode 100644 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +all: test.html + +temp.js: output.log + grep 'cross_fuzz_replay:' output.log |sed 's|.*cross_fuzz_replay:||' > temp.js + +temp-encoded.js: temp.js + python fixup-test-js.py temp.js > temp-fixup.js + python encode-test-js.py temp-fixup.js > temp-encoded.js + +test.html: testhead.html testtail.html temp-encoded.js + cat testhead.html temp-encoded.js testtail.html > test.html + +reduce: test.html + ./run-lithium.sh $(LITHIUM) $(FIREFOX) + +clean: FORCE + rm -f test.js test.html temp*.js output.log + +FORCE: diff --git a/cross_fuzz_randomized_20110105_seed.html b/cross_fuzz_randomized_20110105_seed.html --- a/cross_fuzz_randomized_20110105_seed.html +++ b/cross_fuzz_randomized_20110105_seed.html @@ -1,27 +1,36 @@ cross_fuzz v3 + +

cross_fuzz v3 2011/01/05	IMPORTANT: Please read comments in this file and run me from `file:///` is possible.
	Logging disabled (edit source to re-enable) Reference count: + + + +

diff --git a/cross_fuzz_v3.js b/cross_fuzz_v3.js --- a/cross_fuzz_v3.js +++ b/cross_fuzz_v3.js @@ -57,24 +57,102 @@ var INTER_ODDS = 2; /* Odds of usi var TRASH_ODDS = 8; /* Target window trashing probability */ var RESET_ODDS = 2; /* Odds of respawning target windows */ var PARAMS = 4; /* Number of params to make up for methods */ var MAX_REFS = 200; /* Maximum number of refs to keep */ var KEEP_REFS = 100; /* Number of refs to move across cycles */ +var Dump = (typeof dump == 'undefined') ? (function () {}) : dump; + +/* Script replay stuff. Send string to stdout which can be extracted + to create the replay script. The second parameter controls the + indentation prior to the output and after. + means increase + indentation, - means decrease indentation. The first character + controls the prior indentiation while the second controls the after. + Interesting replay scripts cause crashes and to help recover the + recursive nature of the replay script, we annotate the open and closing + blocks to help identify the missing closing function calls. + */ + +var replay_indent = ''; +var replay_block_stack = []; +var replay_block_id = 0; + +function replay(string, action) { + if (!string) + return; + + action = action || ""; + + var spaces = " "; + + if (action) { + switch(action.substring(0,1)) { + case "+": + replay_indent += spaces; + Dump("cross_fuzz_replay:" + replay_indent + "/* open block " + replay_block_id + " */\n"); + replay_block_stack.push(replay_block_id++); + break + case "-": + Dump("cross_fuzz_replay:" + replay_indent + "/* close block " + replay_block_stack.pop() + " */\n"); + replay_indent = replay_indent.slice(0, -spaces.length); + break; + } + } + + Dump("cross_fuzz_replay:" + replay_indent + string + '\n'); + + if (action) { + switch(action.substring(1,2)) { + case "+": + replay_indent += spaces; + Dump("cross_fuzz_replay:" + replay_indent + "/* open block " + replay_block_id + " */\n"); + replay_block_stack.push(replay_block_id++); + break + case "-": + Dump("cross_fuzz_replay:" + replay_indent + "/* close block " + replay_block_stack.pop() + " */\n"); + replay_indent = replay_indent.slice(0, -spaces.length); + break; + } + } + +} /************************** * Various crawl settings * **************************/ +replay("var interesting_vals = [", " +"); +replay("0, 1, 1e6, -1e6, 1e-6, 1e100, null, undefined, 'pink', screen, Infinity, false, true, eval, [], {},"); +replay("4500000000, 2200000000, -2200000000, -4500000000"); +replay("];", "- "); + var interesting_vals = [ - 0, 1, 1e6, -1e6, 1e-6, 1e100, null, undefined, 'pink', screen, Infinity, false, true, eval, [], {}, - 4500000000, 2200000000, -2200000000, -4500000000 + 0, // 0 + 1, // 1 + 1e6, // 2 + -1e6, // 3 + 1e-6, // 4 + 1e100, // 5 + null, // 6 + undefined, // 7 + 'pink', // 8 + screen, // 9 + Infinity, // 10 + false, // 11 + true, // 12 + eval, // 13 + [], // 14 + {}, // 15 + 4500000000, // 16 + 2200000000, // 17 + -2200000000, // 18 + -4500000000 // 19 ]; var object_blacklist = { /* Properties */ 'fuzz1_visited' : 0, 'fuzz2_visited' : 0, @@ -137,16 +215,23 @@ function Random() { * Log stuff * *************/ var log_box; var message_data = ''; function LOG(message) { + try { + Dump('cross_fuzz: ' + message + '\n'); + } + catch(e) { + Dump('cross_fuzz: exception printing message.\n'); + } + /* TODO: Find a way to log stuff. */ return; message_data += '* ' + message + '\n'; log_box.value = message_data.substr(message_data.length - 2048); log_box.scrollTop = log_box.scrollHeight; @@ -188,128 +273,163 @@ function shuffle_array(arr) { } /************************* * Reopen target windows * *************************/ +replay("var t1, t2;"); var t1, t2; function new_targets() { LOG('Respawning targets...'); - try { - t1.close(); - t2.close(); - } catch (e) { } + replay("(function () { // args: none", " +"); + replay("try {", " +"); + replay("t1.close();"); + replay("t2.close();"); + replay("} catch (e) { }", "- "); + replay("try {", " +"); + replay("t1.close();"); + replay("t2.close();"); + replay("} catch (e) { }", "- "); /* TODO: Add more interesting document types. */ switch (R(4)) { case 0: if ('v' != '\v') { + replay("t1 = window.open('targets/target.svg', 't1');"); t1 = window.open('targets/target.svg', 't1'); LOG('Target 1: SVG'); } else { + replay("t1 = window.open('targets/target_strict2.html', 't1');"); t1 = window.open('targets/target_strict2.html', 't1'); LOG('Target 1: HTML strict'); } break; default: + replay("t1 = window.open('targets/target2.html', 't1');"); t1 = window.open('targets/target2.html', 't1'); LOG('Target 1: HTML'); } switch (R(4)) { case 0: if ('v' != '\v') { + replay("t2 = window.open('targets/target.svg', 't2');"); t2 = window.open('targets/target.svg', 't2'); LOG('Target 2: SVG'); } else { + dump_relay("t2 = window.open('targets/target_strict2.html', 't2');"); t2 = window.open('targets/target_strict2.html', 't2'); LOG('Target 2: HTML strict'); } break; default: + replay("t2 = window.open('targets/target2.html', 't2');"); t2 = window.open('targets/target2.html', 't2'); LOG('Target 2: HTML'); } + replay("try { t1.opener = null; } catch (e) { }"); + replay("try { t2.opener = null; } catch (e) { }"); + replay("})();", "- "); + try { t1.opener = null; } catch (e) { } try { t2.opener = null; } catch (e) { } } /******************************************************* * Trash target window in one of several possible ways * *******************************************************/ function trash_target(target) { + replay("(function () { // args: none", " +"); + replay("try {", " +"); + try { switch (R(3)) { case 0: LOG('Clobbering target with document.write()'); + replay(target.name + ".document.write('Hi mom!');"); target.document.write('Hi mom!'); break; case 1: LOG('Clobbering target with document.body.innerHTML'); + replay(target.name + ".document.body.innerHTML = 'Hi mom';"); target.document.body.innerHTML = 'Hi mom'; break; case 2: LOG('Clobbering target with window.close()'); + replay(target.name + ".close();"); target.close(); break; } } catch (e) { } + replay("} catch(e) { }", "- "); + replay("})();", "- "); } /***************************** * Maybe add a new reference * *****************************/ function maybe_add_ref(obj, add_set) { if (R(REF_ODDS) != 0) return; /* Be more conservative about adding non-objects. */ if (typeof obj != 'object' && R(NONOBJ_ODDS) != 0) return; + replay("try {", " +"); + try { try { - if (obj.ref_visited) return; + if (obj.ref_visited) { + replay("} catch(e) { }", "- "); + return; + } obj.ref_visited = 1; } catch (e) { } LOG('+++ Adding reference ' + obj + ' (' + add_set.length + ') +++'); - if (add_set.length > MAX_REFS) - add_set[R(MAX_REFS)] = obj; - else + var r = R(MAX_REFS); + + if (add_set.length > MAX_REFS) { + replay("add_set[" + r + "] = obj;"); + add_set[r] = obj; + } else { + replay("add_set.push(obj);"); add_set.push(obj); + } } catch (e) { } + replay("} catch(e) { }", "- "); } /***************************** * Crawl, collect properties * *****************************/ var crawl_history = []; var cur_id; @@ -344,48 +464,64 @@ function crawl_properties(path, target, LOG('*** Already crawled (1) ***'); return; } target.fuzz1_visited = cur_id; } catch (e) { } shuffle_array(members); + replay("var cur_value;"); + replay("var name;"); + for (var num in members) { var name = members[num]; var cur_value = null; if (name == '0' || (object_blacklist[name] != undefined && level >= object_blacklist[name])) { -// LOG('Skipping: ' + path + '.' + name); + LOG('Skipping: ' + path + '.' + name); continue; } -// LOG('Trying: ' + path + '.' + name); + LOG('Trying: ' + path + '.' + name); + + // must emit replay code first to make sure it is emitted. + replay("cur_value = null;"); + replay("name = '" + name + "';"); + replay("try {", " +"); + replay("cur_value = eval('target.' + name);"); + replay("} catch (e) {", "-+"); + //replay("Dump('...received exception (' + e + ')\\n');"); + replay("}", "- "); try { cur_value = eval('target.' + name); -// LOG('...result is "' + cur_value + '" (' + typeof cur_value + ')'); + //LOG('...result is "' + cur_value + '" (' + typeof cur_value + ')'); } catch (e) { -// LOG('...received exception (' + e + ')'); + //LOG('...received exception (' + e + ')'); } if (cur_value != null) { - if (typeof cur_value != 'function') + if (typeof cur_value != 'function') { + replay("(function (obj, add_set) { // args: cur_value, add_set", " +"); maybe_add_ref(cur_value, add_set); + replay("})(cur_value, add_set);", "- "); + } /* Recurse into objects */ if (typeof cur_value == 'object' && cur_fan < FAN_LIMIT) { cur_fan++; + replay("(function (path, target, add_set) { // args: path + '.' + name, cur_value, add_set", " +"); crawl_properties(path + '.' + name, cur_value, level + 1, add_set); - -// LOG('-- BACK TO PROPERTY CRAWL (' + level + '): ' + path + ' --'); + replay("})(path + '.' + name, cur_value, add_set);", "- "); + LOG('-- BACK TO PROPERTY CRAWL (' + level + '): ' + path + ' --'); } } } init_genrand(ret_seed); @@ -426,57 +562,84 @@ function tweak_properties(path, target, LOG('*** Already crawled (1) ***'); return; } target.fuzz2_visited = cur_id; } catch (e) { } shuffle_array(members); + replay("var orig_value;"); + replay("var name;"); + for (var num in members) { var name = members[num]; var orig_value = null; if (name == '0' || (object_blacklist[name] != undefined && level >= object_blacklist[name])) { -// LOG('Skipping: ' + path + '.' + name); + LOG('Skipping: ' + path + '.' + name); continue; } + // must emit replay code first to make sure it is emitted. + replay("orig_value = null;"); + replay("name = '" + name + "';"); + replay("try {", " +"); + replay("orig_value = eval('target.' + name);"); + replay("} catch (e) {", "-+"); + //replay("Dump('...received exception (' + e + ')\\n');"); + replay("}", "- "); + try { orig_value = eval('target.' + name); } catch (e) { + //LOG('...received exception (' + e + ')'); continue; } /* Leave functions alone!!! */ if (typeof orig_value == 'function') continue; -// LOG('Trying: ' + path + '.' + name); + LOG('Trying: ' + path + '.' + name); + + replay("try {", " +"); if (R(TWEAK_ODDS) == 0) try { - if (use_set.length != 0 && R(INTER_ODDS) != 0) - eval('target.' + name + ' = use_set[' + R(use_set.length) + ']'); - else + var r; + + if (use_set.length != 0 && R(INTER_ODDS) != 0) { + r = R(use_set.length) + replay("eval('target.' + name + ' = use_set[" + r + "]');"); + eval('target.' + name + ' = use_set[' + r + ']'); + } else { + r = R(interesting_vals.length); + replay("eval('target.' + name + ' = interesting_vals[" + r + "]');"); eval('target.' + name + ' = interesting_vals[' + R(interesting_vals.length) + ']'); + } } catch (e) { -// LOG('...received exception (' + e + ')'); + //LOG('...received exception (' + e + ')'); } + replay("} catch(e) {", "-+"); + //replay("Dump('...received exception (' + e + ')\\n');"); + replay("}", "- "); + /* Recurse into objects. */ if (orig_value != null && typeof orig_value == 'object' && cur_fan < FAN_LIMIT) { cur_fan++; + replay("(function (path, target, use_set) { // args: path + '.' + name, orig_value, use_set", " +"); tweak_properties(path + '.' + name, orig_value, level + 1, use_set); - -// LOG('-- BACK TO PROPERTY TWEAK (' + level + '): ' + path + ' --'); + replay("})(path + '.' + name, orig_value, use_set);", "- ") + LOG('-- BACK TO PROPERTY TWEAK (' + level + '): ' + path + ' --'); } } init_genrand(ret_seed); } @@ -521,30 +684,43 @@ function call_methods(path, target, leve LOG('*** Already crawled (1) ***'); return; } target.fuzz3_visited = cur_id; } catch (e) { } shuffle_array(members); + replay("var cur_value;"); + replay("var name;"); + for (var num in members) { var name = members[num]; var cur_value = null; if (name == '0' || (object_blacklist[name] != undefined && level >= object_blacklist[name])) { -// LOG('Skipping: ' + path + '.' + name); + LOG('Skipping: ' + path + '.' + name); continue; } + // must emit replay code first to make sure it is emitted. + replay("cur_value = null;"); + replay("name = '" + name + "';"); + replay("try {", " +"); + replay("cur_value = eval('target.' + name);"); + replay("} catch (e) {", "-+"); + //replay("Dump('...received exception (' + e + ')\\n');"); + replay("}", "- "); + try { cur_value = eval('target.' + name); } catch (e) { + //LOG('...received exception (' + e + ')'); continue; } if (cur_value == null) continue; if (R(CALL_ODDS) == 0 && typeof cur_value == 'function') { var ret_value = null; @@ -558,182 +734,259 @@ function call_methods(path, target, leve par_str += 'use_set[' + R(use_set.length) + ']'; else par_str += 'interesting_vals[' + R(interesting_vals.length) + ']'; if (i + 1 != PARAMS) par_str += ", "; } -// LOG('Trying: ' + path + '.' + name); + LOG('Trying: ' + path + '.' + name); + + // must emit replay code first to make sure it is emitted. + // XXX: ? + replay("ret_value = null;"); + replay("name = '" + name + "';"); + replay("try {", " +"); + replay("ret_value = eval('target.' + name + '(" + par_str + ")');") + replay("} catch (e) {", "-+"); + //replay("Dump('...received exception (' + e + ')\\n');"); + replay("}", "- "); try { ret_value = eval('target.' + name + '(' + par_str + ')'); -// LOG('...result is "' + ret_value + '" (' + typeof ret_value + ')'); + //LOG('...result is "' + ret_value + '" (' + typeof ret_value + ')'); } catch (e) { -// LOG('...received exception (' + e + ')'); + //LOG('...received exception (' + e + ')'); } if (ret_value != null) { - if (typeof ret_value != 'function') maybe_add_ref(ret_value, add_set); + if (typeof ret_value != 'function') { + replay("(function (obj, add_set) { // args: ret_value, add_set", " +"); + maybe_add_ref(ret_value, add_set); + replay("})(ret_value, add_set);", "- "); + } /* Recurse into returned objects. */ if (typeof ret_value == 'object' && cur_fan < FAN_LIMIT) { cur_fan++; + replay("(function (path, target, use_set, add_set) { // args: path + '.[ret:' + name + ']' , ret_value, use_set, add_set", " +"); call_methods(path + '.[ret:' + name + ']' , ret_value, level, ret_level + 1, use_set, add_set); + replay("})(path + '.[ret:' + name + ']' , ret_value, use_set, add_set);", "- "); + replay("(function (path, target, use_set) { // args: path + '.[ret:' + name + ']' , ret_value, use_set", " +"); tweak_properties(path + '.[ret:' + name + ']' , ret_value, level + ret_level + 1, use_set); - -// LOG('-- BACK TO METHOD CRAWL (' + level + '): ' + path + ' --'); + replay("})(path + '.[ret:' + name + ']' , ret_value, use_set);", "- "); + LOG('-- BACK TO METHOD CRAWL (' + level + '): ' + path + ' --'); } } } /* Recurse into crawled objects. */ if (typeof cur_value == 'object' && cur_fan < FAN_LIMIT) { cur_fan++; + replay("(function (path, target, use_set, add_set) { // args: path + '.' + name, cur_value, use_set, add_set", " +"); call_methods(path + '.' + name, cur_value, level + 1, ret_level, use_set, add_set); - -// LOG('-- BACK TO METHOD CRAWL (' + level + '): ' + path + ' --'); + replay("})(path + '.' + name, cur_value, use_set, add_set);", "- "); + LOG('-- BACK TO METHOD CRAWL (' + level + '): ' + path + ' --'); } } init_genrand(ret_seed); } /******************* * Main event loop * *******************/ var state = 0; +replay("var cur_set = [];"); +replay("var new_set = [];"); + var cur_set = []; var new_set = []; var wait_cycles = 0; function event_loop() { var ret_seed = genrand_int32(); if (t1 == null) { - alert('Disable pop-up blocking.'); + LOG('Disable pop-up blocking.'); + setTimeout("quit()", 10); return; } if (wait_cycles > 0) { wait_cycles--; - setTimeout('event_loop()', 10); + setTimeout('try { event_loop() } catch (e) { setTimeout("quit()", 10); LOG("FATAL_ERROR: " + e);}', 10); return; } + replay("(function () { // args: none", " +"); + switch (state) { case 0: + replay("(function (path, target, add_set) { // args: '[target1]', t1, cur_set", " +"); crawl_properties('[target1]', t1, 0, cur_set); + replay("})('[target1]', t1, cur_set);", "- "); break; case 1: + replay("(function (path, target, use_set, add_set) { // args: '[target1]', t1, cur_set, cur_set", " +"); call_methods('[target1]', t1, 0, 0, cur_set, cur_set); + replay("})('[target1]', t1, cur_set, cur_set);", "- "); break; case 2: + replay("(function (path, target, use_set) { // args: '[target1]', t1, cur_set", " +"); tweak_properties('[target1]', t1, 0, cur_set); + replay("})('[target1]', t1, cur_set);", "- "); break; case 3: + replay("(function (path, target, use_set, add_set) { // args: '[target1]', t1, cur_set, cur_set", " +"); call_methods('[target1]', t1, 0, 0, cur_set, cur_set); + replay("})('[target1]', t1, cur_set, cur_set);", "- "); break; case 4: if (R(TRASH_ODDS) == 0) { trash_target(t1); } + replay("toggle_gc();"); toggle_gc(); break; case 5: + replay("(function (path, target, add_set) { // args: '[target2]', t2, new_set", " +"); crawl_properties('[target2]', t2, 0, new_set); + replay("})('[target2]', t2, new_set);", "- "); break; case 6: + replay("(function (path, target, use_set, add_set) { // args: '[target2]', t2, cur_set, new_set", " +"); call_methods('[target2]', t2, 0, 0, cur_set, new_set); + replay("})('[target2]', t2, cur_set, new_set);", "- "); break; case 7: + replay("(function (path, target, use_set) { // args: '[target2]', t2, new_set", " +"); tweak_properties('[target2]', t2, 0, new_set); + replay("})('[target2]', t2, new_set);", "- "); break; case 8: + replay("(function (path, target, use_set, add_set) { // args: '[target2]', t2, cur_set, new_set", " +"); call_methods('[target2]', t2, 0, 0, cur_set, new_set); + replay("})('[target2]', t2, cur_set, new_set);", "- "); break; case 9: stat_box.innerHTML = cur_set.length; + replay("cur_set = new_set.slice(-KEEP_REFS);"); + replay("new_set = [];"); + cur_set = new_set.slice(-KEEP_REFS); new_set = []; LOG('Kept ' + cur_set.length + ' references.'); if (R(RESET_ODDS) == 0) { new_targets(); + replay("toggle_gc();"); toggle_gc(); wait_cycles = 50; } break; } state = (state + 1) % 10; + replay("//### State " + state + " ###"); LOG('### State ' + state + ' ###'); - setTimeout('try { event_loop() } catch (e) { alert(e); }', 10); + setTimeout('try { event_loop() } catch (e) { setTimeout("quit()", 10); LOG("FATAL_ERROR: " + e);}', 10); + + replay("})();", "- "); init_genrand(ret_seed); } /*********************** * Initialization code * ***********************/ function run_tests(run) { + // always start with a standard window size + if ('sizeToContent' in window) + window.sizeToContent(); + if (!run && location.search.indexOf('run') == -1) return; - if (location.hash) { + if (typeof seed != "undefined") + seed = seed; + else if(location.hash) { seed = location.hash.substr(1); } else { seed = (new Date()).getTime() & 0xFFFFFFFF; } + replay("var seed = " + seed + ';'); + + LOG('seed = ' + seed); + init_genrand(seed); location.hash = '#' + seed; + replay("init_genrand(" + seed + ");"); + replay("location.hash = '#' + " + seed + ";"); + log_box = document.getElementById('results'); stat_box = document.getElementById('stats'); new_targets(); - setTimeout('event_loop()', 1000); + setTimeout('try { event_loop() } catch (e) { setTimeout("quit()", 10); LOG("FATAL_ERROR: " + e);}', 1000); + + // Terminate after 1 minutes. Any longer and the replay scripts + // are too long. + // XXX: set via url? + setTimeout("quit()", 1000*60); } +/**************************************** + * quit() uses Jesse Ruderman's Quitter * + * extension to terminate the browser * + * cleanly. * + ****************************************/ +function quit() +{ + var evt = document.createEvent('Events'); + evt.initEvent('please-quit', true, false); + document.dispatchEvent(evt); +} diff --git a/encode-test-js.py b/encode-test-js.py new file mode 100644 --- /dev/null +++ b/encode-test-js.py @@ -0,0 +1,86 @@ +import sys +import re + +reBF = re.compile(r'$function \(([^)]*)$*\)\s+{') # => /**/(function (\1) { /**/ +reOB = re.compile(r'(.*){$') # => \1/**/{/**/ +reCB = re.compile(r'(.*)}$') # => \1/**/}/**/ +reEF = re.compile(r'}\)$(.*?)$;?$') # => /**/})(\1);/**/ +reL = re.compile(r'(.*)') # => /**/\1/**/ +reIC = re.compile(r'//(.*)') + +def jsencode(program, level, inputfh): + + for line in inputfh: + + subprogramtext = "" # set to null string to free memory + + line = line.strip() + + if not line: + continue + + if line[-1] == '/': # append a space to prevent inadvertent // + line = line + ' ' + + + match = reBF.match(line) + if match: + if match.group(1): + args = match.group(1) + else: + args = '' + subprogram = ["/**/(function (%s) {/**/" % args] + match = reIC.search(line) + if match: + subprogram.append("/* %s */" % match.group(1)) + + jsencode(subprogram, level + 1, inputfh) + subprogramtext = ' '.join(subprogram) + if level == 0: + print subprogramtext + else: + program.append(subprogramtext) + continue + + match = reEF.match(line) + if match: + program.append("/**/})(%s);/**/" % match.group(1)) + return + + match = reOB.match(line) + if match: + subprogramtext = "/**/ %s /**/{/**/ /**/" % match.group(1) + if level == 0: + print subprogramtext + else: + program.append(subprogramtext) + continue + + match = reCB.match(line) + if match: + subprogramtext = "/**/ %s /**/}/**/ /**/" % match.group(1) + if level == 0: + print subprogramtext + else: + program.append(subprogramtext) + continue + + # handle inline comments specially + line = reIC.sub(r'/*\1*/', line) + + subprogramtext = "/**/%s/**/" % line + if level == 0: + print subprogramtext + else: + program.append(subprogramtext) + +if __name__ == "__main__": + + program = [] + inputfile = sys.argv[1] + + inputfh = open(inputfile, 'r') + + jsencode(program, 0, inputfh) + + inputfh.close() diff --git a/expand-test-js.py b/expand-test-js.py new file mode 100644 --- /dev/null +++ b/expand-test-js.py @@ -0,0 +1,150 @@ +import sys +import re + +reBF = re.compile(r'/\*\*/$function \(([^)]*)$*\)\s+{/\*\*/') # => /**/(function (\1) { /**/ +#reOB = re.compile(r'(.*)/\*\*/{/\*\*/') # => \1/**/{/**/ +#reCB = re.compile(r'(.*)/\*\*/}/*\*/') # => \1/**/}/**/ +reEF = re.compile(r'/\*\*/}\)$(.*?)$;?/\*\*/') # => /**/})(\1);/**/ +reL = re.compile(r'/\*\*/(.*?)/\*\*/') # => /**/\1/**/ +reC = re.compile(r'/\* [^\*]+ \*/') +reOBEOL = re.compile(r'/\*\*/{/\*\*/\s+/\*\*/\s*$') +reCBBOL = re.compile(r'^(\s*/\*\*/\s*}|/\*\*/\s+/\*\*/}/\*\*/)') + +def jsexpand(program, level, expandlevel, inputfh, line): + + global output_line_count + + if not line: + line = readline(inputfh) + + while line: + + #sys.stderr.write("================\n") + #sys.stderr.write("program=%s\n" % program) + #sys.stderr.write("level=%s\n" % level) + #sys.stderr.write("line=%s\n" % line) + + subprogramtext = "" # set to null string to free memory + + line = line.strip() + + if not line: + line = readline(inputfh) + continue + + match = reBF.match(line) + if match: + #sys.stderr.write("match BF %s\n" % match.group(0)) + subprogramtext = match.group(0) + subprogram = [subprogramtext] + line = line[len(subprogramtext):] + line = jsexpand(subprogram, level + 1, expandlevel, inputfh, line) + if level < expandlevel: + subprogramtext = '\n'.join(subprogram) + output_line_count += len(subprogram) - 1 + else: + subprogramtext = ' '.join(subprogram) + + if level == 0: + print subprogramtext + output_line_count += 1 + else: + program.append(subprogramtext) + + if not line: + line = readline(inputfh) + continue + + match = reEF.match(line) + if match: + #sys.stderr.write("match EF %s\n" % match.group(0)) + subprogramtext = match.group(0) + line = line[len(subprogramtext):] + program.append(subprogramtext) + return line + + #match = reOB.match(line) + #if match: + # #sys.stderr.write("match OB %s\n" % match.group(0)) + # subprogramtext = match.group(0) + # line = line[len(subprogramtext):] + # if level == 0: + # print subprogramtext + # output_line_count += 1 + # else: + # program.append(subprogramtext) + # if not line: + # line = readline(inputfh) + # continue + + #match = reCB.match(line) + #if match: + # #sys.stderr.write("match CB %s\n" % match.group(0)) + # subprogramtext = match.group(0) + # line = line[len(subprogramtext):] + # if level == 0: + # print subprogramtext + # continue + + match = reL.match(line) + if match: + #sys.stderr.write("match L %s\n" % match.group(0)) + subprogramtext = match.group(0) + line = line[len(subprogramtext):] + if level == 0: + print subprogramtext + output_line_count += 1 + else: + # hack empty blocks to same line if possible + program.append(subprogramtext) + if len(program) > 2: + openmatch = reOBEOL.search(program[-2]) + closematch = reCBBOL.search(program[-1]) + if openmatch and closematch: + program[-2] += ' ' + program[-1] + program.pop() + if not line: + line = readline(inputfh) + continue + + match = reC.match(line) + if match: + #sys.stderr.write("match C %s\n" % match.group(0)) + subprogramtext = match.group(0) + line = line[len(subprogramtext):] + if level == 0: + print subprogramtext + output_line_count += 1 + else: + program.append(subprogramtext) + if not line: + line = readline(inputfh) + + return line + +def readline(fh): + global input_line_count + + line = fh.readline() + if line: + input_line_count += 1 + + return line + +if __name__ == "__main__": + + input_line_count = 0 + output_line_count = 0 + + program = [] + inputfile = sys.argv[1] + expandlevel = int(sys.argv[2]) + + inputfh = open(inputfile, 'r') + + jsexpand(program, 0, expandlevel, inputfh, None) + + inputfh.close() + + if input_line_count == output_line_count: + sys.exit(1) diff --git a/extract-test-js.py b/extract-test-js.py new file mode 100644 --- /dev/null +++ b/extract-test-js.py @@ -0,0 +1,21 @@ +import sys +import re + +inputfile = sys.argv[1] +inputfh = open(inputfile, 'r') +skip = True + +for line in inputfh: + + if line.find('DDBEGIN') != -1: + skip = False + continue + + if line.find('DDEND') != -1: + break; + + if not skip: + line = line.strip() + print line + +inputfh.close() diff --git a/firefox_replay_readme.txt b/firefox_replay_readme.txt new file mode 100644 --- /dev/null +++ b/firefox_replay_readme.txt @@ -0,0 +1,140 @@ +Running Cross Fuzz with Replay and Lithium + +Prerequisites +============= + +I assume you have a good posix shell such as bash available. On Linux +and Mac, just open a terminal window. On Windows, I recommend you +install cygwin from cygwin.com. Be sure to install python. + +Installation +============ + +Unzip the cross_fuzz.zip file to your local disk. + +Create a new Firefox profile, I call mine lithium, to automatically +start. + +Copy the user.js from the cross_fuzz directory to the profile to set +the necessary prefs. + +Install Jesse's Quitter extension from +https://www.squarefree.com/extensions/quitter.xpi + +Install Jesse's lithium testcase reducer from: +http://www.squarefree.com/2007/09/15/introducing-lithium-a-testcase-reduction-tool/ + +For more information about using lithium, see: + +http://www.squarefree.com/lithium/using.html +http://www.squarefree.com/lithium/using-for-firefox.html +http://www.squarefree.com/2009/01/11/reducing-real-world-scripts/ + +Open a command shell and cd to the cross_fuzz directory + +Manual Steps +============ + +Execute the contained setmoz.sh from a command shell to set the +necessary environment variables (or set them by hand). + +source setmoz.sh + +Run the following command to continually run cross fuzz until a +non-zero exit code is returned. + +let t=0; \ +while pathtofirefoxbinary \ + ./cross_fuzz_randomized_20110105_seed.html?run > output.log 2>&1 ; \ +do \ + let t=t+1; \ + echo $t; \ +done + +Make sure you include ?run in the cross fuzz url, otherwise it will +not run. + +Execute make to create the test.html file from the output.log + +make + +You can quickly check that the replay script will reproduce the crash +by loading the test.html page in the browser. + +pathtofirefoxbinary ./cross_fuzz_randomized_20110105_seed.html?run ./test.html + +If you were able to reproduce the crash, you will be able to use +lithium to reduce the test case. Before spending the time reducing the +testcase, first attempt to crash using a nightly build (from a +different shell since we have disabled crash reporting in this one) +and submit the crash report. This will allow you to discover the crash +signature and search crash-stats.mozilla.com and bugzilla to see if +there is already a bug report for the crash. If you crashed using a +debug build and obtained an Abort or Assertion failure message, you +can also search bugzilla for the message to see if any bug reports +have already been filed. + +To run lithium repeatedly until the replay script test.html can not be +reduced further, execute: + + ./run-lithium.sh pathtolithiumdirectory pathtofirefoxbinary + +The final test will be in the test.html file. It will typically +contain relatively few nested function calls. It is fairly easy to +reduce this further. See the enclosed test cases for bugs 622165, +622593, 624493. Contact me or Jesse for tips on how to further reduce +the test.html to a minimal test case. Note it should not be necessary +to include any of the cross fuzz application in the reduced test case. + +Since the cross fuzz application performs not only DOM manipulations +but also executes itself, it will occasionally cause a crash which is +reproducible by running cross fuzz with the seed and not +reproducible when running the replay script. + +If you do not crash when loading the replay script test.html, you can +extract the seed value used during the test via: + +grep seed output.log + +and then attempt to reproduce the crash by re-running cross fuzz with +that seed, as in: + +pathtofirefoxbinary ./cross_fuzz_randomized_20110105_seed.html?run#seedvalue + +If you can reproduce the crash by running with the seed value, you +will not be able to use lithium to reduce the test but can still file +a bug with the appropriate seed value which is enough information for +someone else to reproduce the crash. + +It is also possible that you will not be able to reproduce a crash +even when re-running cross fuzz with the seed value. You can attempt +to run the test under valgrind or with gczeal to see if you can +discover an underlying reason for the randomness of the crash. + +Automated Steps +=============== + +To run cross fuzz until it crashes and then automically execute +lithium to reduce the test case, execute: + +./run-test.sh pathtolithium pathtofirefox + +Black Listing Known Crashes +=========================== + +If you find that you are continually hitting the same crashes, you can +modify the cross_fuzz_v3.js script and add and entry to the +object_blacklist to skip over the offending properties or methods. + + +Known Problems +============== + +Not all crashes are reproducible. + +File Upload dialogs on Linux block Firefox and cause cross fuzz to stop. I don't know +which properties to black list to prevent this. + +javascript.options.gczeal 2 causes crashes on start up. See bug 625191. + + diff --git a/fixup-test-js.py b/fixup-test-js.py new file mode 100644 --- /dev/null +++ b/fixup-test-js.py @@ -0,0 +1,74 @@ +import sys +import re + +filename = sys.argv[1] + +stack = [] +reArgs = re.compile(r'// args: (.*)') +reOpenBlock = re.compile(r'/\* open block ([0-9]+) \*/') +reCloseBlock = re.compile(r'/\* close block ([0-9]+) \*/') + +line_number = 0 +fh = open(filename, 'r') + +# keep the function declaration line and don't immediately output it +# so we can output the block id with the function declaration to aid +# in debugging. + +function_decl_line = "" + +for line in fh: + + line = line.rstrip() + + line_number += 1 + + match = reArgs.search(line) + if match: + stack.append({ 'args' : match.group(1)}) + function_decl_line = line + # Delay outputing the function declaration line + # until we have a block id. + # The next line *must* be the open block comment. + else: + match = reOpenBlock.search(line) + if match: + + id = match.group(1) + if len(stack) > 0 and 'args' in stack[-1] and 'id' not in stack[-1]: + stack[-1]['id'] = id + print "%s block %s" % (function_decl_line, id) + function_decl_line = "" + else: + if function_decl_line: + raise Exception("error: function declaration line %s missing args block line: %s at line %d" % (function_decl_line, line, line_number)) + stack.append({'id' : id}) + else: + if function_decl_line: + raise Exception("error: function declaration line %s not followed by open block line: %s at line %d" % (function_decl_line, line, line_number)) + + match = reCloseBlock.search(line) + if match: + id = match.group(1) + opened_block = stack.pop() + if opened_block['id'] != id: + raise Exception("error: opened block %s does not match closed block %s at line %d" % (opened_block['id'], id, line_number)) + else: + print line + +fh.close() + +if len(stack) > 0: + print "%s// CRASHED" % (len(stack)*" ") + +while len(stack) > 0: + block = stack.pop() + if 'args' in block: + args = block['args'] + if args == 'none': + args = '' + print "%s})(%s);" % (len(stack)*" ", args) + else: + print "%s}" % (len(stack)*" ") + + diff --git a/lithium-reduction-ideas.txt b/lithium-reduction-ideas.txt new file mode 100644 --- /dev/null +++ b/lithium-reduction-ideas.txt @@ -0,0 +1,280 @@ +Cross Fuzz Replay Script format +=============================== + +The standard output of the replay cross fuzz tests currently is a +series of statements followed by a nested sequence of anonymous +function calls of the form: + +statement +statement +statement +(function (arglist) { + statement + (function (arglist) { + statement + (function (arglist) { + statement + })(vallist); + })(vallist); +})(vallist); +(function (arglist) { + statement + (function (arglist) { + statement + (function (arglist) { + statement + })(vallist); + })(vallist); +})(vallist); +(function (arglist) { + statement + (function (arglist) { + statement + (function (arglist) { + statement + })(vallist); + })(vallist); +})(vallist); +(function (arglist) { + statement + (function (arglist) { + statement + (function (arglist) { + statement + })(vallist); + })(vallist); +})(vallist); + +Actual test files can contain over 100,000 lines and can take take +lithium days to reduce. One cause of difficulty for lithium is the +splitting of the function expression statements over many lines which +result in syntax errors for many of lithium's trial cases. + +If we can reformat the program so that the top level function +expression calls are contained on a single line as in: + +split level 0 + +statement +statement +statement +(function (arglist) { statement (function (arglist) { statement (function (arglist) { statement })(vallist); })(vallist);})(vallist); +(function (arglist) { statement (function (arglist) { statement (function (arglist) { statement })(vallist); })(vallist);})(vallist); +(function (arglist) { statement (function (arglist) { statement (function (arglist) { statement })(vallist); })(vallist);})(vallist); +(function (arglist) { statement (function (arglist) { statement (function (arglist) { statement })(vallist); })(vallist);})(vallist); + +Lithium will perform much better on this as the reductions of the +function expressions will all be syntactically valid and each +eliminated line could be an arbitrary number of lines of the orignal +format. + +Once this has been completely reduced, it can be split again +into the next level where the top level expression statements begin +and end on separate lines while all contained statements are also one +separate lines. + +split level 1 + +(function (arglist) { + statement + (function (arglist) { statement (function (arglist) { statement })(vallist); })(vallist); +})(vallist); +(function (arglist) { + statement + (function (arglist) { statement (function (arglist) { statement })(vallist); })(vallist); +})(vallist); +(function (arglist) { + statement + (function (arglist) { statement (function (arglist) { statement })(vallist); })(vallist); +})(vallist); +(function (arglist) { + statement + (function (arglist) { statement function (arglist) { statement })(vallist); })(vallist); +})(vallist); + +and again + +split level 2 + +(function (arglist) { + statement + (function (arglist) { + statement + (function (arglist) { + statement + })(vallist); + })(vallist); +})(vallist); +(function (arglist) { + statement + (function (arglist) { + statement + (function (arglist) { + statement + })(vallist); + })(vallist); +})(vallist); + +At split levels higher than 0, lithium induced syntax errors will +increase but hopefully the reductions of the lower levels reduced the +size of the script appreciably since for every interior line +eliminated we potentially have removed an unlimited number of lines of +the original format. + +Needed tools: + +0. an internal data structure to represent the programs. + +1. tool to read the original cross fuzz replay script and convert into +an internal data structure. + +2. tool to read an intermediate script and convert it into an internal +data structure. + +3. tool to output a script from the internal data structure with +specific formatting options. + +If we had a real JavaScript parser and a tool to reformat JavaScript +we might use that, but it is a lot more work than I want to do now. In +addition, the size of the cross fuzz replay scripts may be so large as +to prevent using any normal parser. + +Parsing the initial cross fuzz replay script +============================================ + +The limited program styles produced by cross fuzz replay supports an +initial approach for reading the cross fuzz replay script triggered on +the patterns for the function expressions, lines and opening and +closing brackets {}. + +Pattern BF (Begin Function) - start of function expression statement (arglist is a comma +separated list of identifiers) + +$function \(([\w],?)*$ {(\s*//.*)?$ + +In the initial format produced by replay, the beginning of the +function expression can have an inline comment following the opening +brace. + +Pattern OB (Open Block) - start of new block statement + +.*{$ + +Pattern CB (Close Block) - end of existing block + +.*}$ + +Pattern EF (End Function) - end of function expression (this is problematic since +vallist can contain arbitrary characters). + +}\)$.*?);$ + +If we are joining lines together in the intermediate scripts we must +either remove the inline comments or transform them into block +comments. + +Intermediate script format +========================== + +Processing the intermediate scripts can not use the same approach +since we will be removing the line boundaries implicit in the original +replay script. To enable processing of the intermediate scripts +without the use of a full fledged JavaScript parser, we can mark the +code fragments identified by the initial patterns using block comments +that contain marker strings. We must take care that when joining +lines, adjacent markers do not form an inline comment. + +Replay to Intermediate Code transformations +=========================================== + +^\s+ => +\(function \(([\w],?)*$\s+{\s*(//.*)?$ => /*BF>*/(function (\1)/*/BF>*/ +(.*){$ => \1/*OB>*/{/*/OB>*/ +(.*)}$ => \1/*CB>*/}/*/CB>*/ +}\)$.*?);$ => /**/})\(\1$;/**/ +(.*) => /**/\1/**/ + +where + +BF - begin function statement +EF - end function statement +OB - open block +CB - close block +L - line + +For example, + +(function (arg1, arg2) { // comment + statement1 + statement2 + (function (arg3) { + statement3 + function (arg4) { + statement4 + })(val4); + })(val3); +})(val1, val2); + +transforms to + +/**/(function (arg1, arg2) {/**/ +/**/statement1/**/ +/**/statement2/**/ +/**/(function (arg3) {/**/ +/**/statement3/**/ +/**/function (arg4) {/**/ +/**/statement4/**/ +/**/})(val4);/**/ +/**/})(val3);/**/ +/**/})(val1, val2);/**/ + +level 0 form - entire function expression statement on one line + +/**/(function (arg1, arg2) {/**/ /**/statement1/**/ /**/statement2/**/ /**/(function (arg3) {/**/ /**/statement3/**/ /**/function (arg4) {/**/ /**/statement4/**/ /**/})(val4);/**/ /**/})(val3);/**/ /**/})(val1, val2);/**/ + +Note that lines were joined with a space separator to prevent adjacent +markers from forming an inline comment. + +level 1 form - beginning and ending parts of the outer function expression and its interior statements are on separate lines. + +/**/(function (arg1, arg2) {/**/ +/**/statement1/**/ +/**/statement2/**/ +/**/(function (arg3) {/**/ /**/statement3/**/ /**/function (arg4) {/**/ /**/statement4/**/ /**/})(val4);/**/ /**/})(val3);/**/ +/**/})(val1, val2);/**/ + +level 2 form + +/**/(function (arg1, arg2) {/**/ +/**/statement1/**/ +/**/statement2/**/ +/**/(function (arg3) {/**/ +/**/statement3/**/ +/**/function (arg4) {/**/ /**/statement4/**/ /**/})(val4);/**/ +/**/})(val3);/**/ +/**/})(val1, val2);/**/ + +level 3 form + +/**/(function (arg1, arg2) {/**/ +/**/statement1/**/ +/**/statement2/**/ +/**/(function (arg3) {/**/ +/**/statement3/**/ +/**/function (arg4) {/**/ +/**/statement4/**/ +/**/})(val4);/**/ +/**/})(val3);/**/ +/**/})(val1, val2);/**/ + +Data Structure +============== + +As we read the test script file, we maintain the program structure as +a list of objects representing the lines. Each object itself may +contain a list of lines which contain the source for the corresponding +piece of code. The logic is identical for processing the initial cross +fuzz replay script and for processing the "marked" scripts. We only +need to "mark" the input if it hasn't been already and to use the +appropriate regular expressions when performing matches. + diff --git a/run-lithium.sh b/run-lithium.sh new file mode 100755 --- /dev/null +++ b/run-lithium.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +lithium=$1 +firefox=$2 + +echo $lithium +echo $firefox + +if [[ -z "$lithium" || -z "$firefox" ]]; then + echo "Usage: run-lithium.sh pathtolithium pathtofirefox" + exit 2 +fi + +cross_fuzz_dir=`pwd` + +echo $cross_fuzz_dir + +let level=0 + +trap 'cd $cross_fuzz_dir; exit 2' TERM INT + +while true; do + + pushd $lithium + + if [[ ! -e lithium.py ]]; then + echo "Lithium not found" + exit 2 + fi + + if [[ -e binarysearch.py ]]; then # new version of lithium + if ! ./lithium.py ./crashes.py 300 $firefox $cross_fuzz_dir/test.html; then + popd + exit + fi + else + if ! ./lithium.py ./crashes.py $cross_fuzz_dir/test.html $firefox; then + popd + exit + fi + fi + + popd + + let level=level+1 + + echo "Extracting test script from test.html..." + + python extract-test-js.py test.html > temp-$level.js + + if [[ ! -s temp-$level.js ]]; then + echo "Extracted script is empty" + exit + fi + + echo "Expanding test script to level $level..." + + if ! python expand-test-js.py temp-$level.js $level > test.js; then + break + fi + + cat testhead.html test.js testtail.html > test.html + +done + +# use extended regexp in sed +if [[ `uname` == "Darwin" ]]; then + E='-E' +else + E='-r' +fi + +sed $E 's|/\*]*>\*/||g' test.js > temp.js +cat testhead.html temp.js testtail.html > test.html diff --git a/run-test.sh b/run-test.sh new file mode 100755 --- /dev/null +++ b/run-test.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +lithium=$1 +firefox=$2 + +echo $lithium +echo $firefox + +if [[ -z "$lithium" || -z "$firefox" ]]; then + echo "Usage: run-test.sh pathtolithium pathtofirefox" + exit 2 +fi + +if [[ ! -d "$lithium" ]]; then + echo "Lithium directory $lithium does not exist." + exit 2 +fi + +if [[ ! -x "$firefox" ]]; then + echo "Firefox $firefox does not exist or is not executable." + exit 2 +fi + +source ./setmoz.sh + +let t=0; +while $firefox -P lithium ./cross_fuzz_randomized_20110105_seed.html?run > output.log 2>&1; do + let t=t+1; + echo $t `ls -lah output.log`; +done; + +echo $t exit code $? `ls -lah output.log`; + +LITHIUM=$lithium FIREFOX=$firefox make reduce diff --git a/setmoz.sh b/setmoz.sh new file mode 100755 --- /dev/null +++ b/setmoz.sh @@ -0,0 +1,8 @@ +export MOZ_NO_REMOTE=1 +export NO_EM_RESTART=1 +export MALLOC_CHECK_=2 +export XPCOM_DEBUG_BREAK=warn +export MOZ_CRASHREPORTER_NO_REPORT=1 +export MOZ_CRASHREPORTER_DISABLE=1 +export MOZ_GDB_SLEEP=1 + diff --git a/testhead.html b/testhead.html new file mode 100644 --- /dev/null +++ b/testhead.html @@ -0,0 +1,13 @@ + + +cross_fuzz v3 + + + + + + + + + + +

cross_fuzz v3 2011/01/05	+IMPORTANT: Please read comments in this file and run me from `file:///` is possible. +
	+ ++Logging disabled (edit source to re-enable) + + + + +Reference count: +

+ + + + diff --git a/user.js b/user.js new file mode 100644 --- /dev/null +++ b/user.js @@ -0,0 +1,13 @@ +user_pref("browser.dom.window.dump.enabled", true); +user_pref("browser.privatebrowsing.autostart", true); +user_pref("browser.rights.3.shown", true); +user_pref("browser.sessionstore.max_resumed_crashes", 0); +user_pref("browser.sessionstore.resume_from_crash", false); +user_pref("browser.sessionstore.resume_session_once", true); +user_pref("browser.shell.checkDefaultBrowser", false); +user_pref("browser.startup.page", 0); +user_pref("browser.tabs.warnOnClose", false); +user_pref("browser.warnOnRestart", false); +user_pref("dom.disable_open_during_load", false); +user_pref("dom.max_script_run_time", 0); +user_pref("dom.max_chrome_script_run_time", 0);

cross_fuzz v3 2011/01/05

cross_fuzz v3 2011/01/05