/Users/treilly/dev/tamarin-redux/platform/mac/avmshell/../../../nanojit/Nativei386.cpp

Bug Summary

File:	platform/mac/avmshell/../../../nanojit/Nativei386.cpp
Location:	line 2696, column 17
Description:	Value stored to 'condop' is never read

Annotated Source Code

1	/* -- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -- */
2	/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3	/* *** BEGIN LICENSE BLOCK ***
4	* Version: MPL 1.1/GPL 2.0/LGPL 2.1
5	*
6	* The contents of this file are subject to the Mozilla Public License Version
7	* 1.1 (the "License"); you may not use this file except in compliance with
8	* the License. You may obtain a copy of the License at
9	* http://www.mozilla.org/MPL/
10	*
11	* Software distributed under the License is distributed on an "AS IS" basis,
12	* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13	* for the specific language governing rights and limitations under the
14	* License.
15	*
16	* The Original Code is [Open Source Virtual Machine].
17	*
18	* The Initial Developer of the Original Code is
19	* Adobe System Incorporated.
20	* Portions created by the Initial Developer are Copyright (C) 2004-2007
21	* the Initial Developer. All Rights Reserved.
22	*
23	* Contributor(s):
24	* Adobe AS3 Team
25	* Mozilla TraceMonkey Team
26	* Asko Tontti <atontti@cc.hut.fi>
27	*
28	* Alternatively, the contents of this file may be used under the terms of
29	* either the GNU General Public License Version 2 or later (the "GPL"), or
30	* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
31	* in which case the provisions of the GPL or the LGPL are applicable instead
32	* of those above. If you wish to allow use of your version of this file only
33	* under the terms of either the GPL or the LGPL, and not to allow others to
34	* use your version of this file under the terms of the MPL, indicate your
35	* decision by deleting the provisions above and replace them with the notice
36	* and other provisions required by the GPL or the LGPL. If you do not delete
37	* the provisions above, a recipient may use your version of this file under
38	* the terms of any one of the MPL, the GPL or the LGPL.
39	*
40	* *** END LICENSE BLOCK *** */
41	#include "nanojit.h"
42
43	#ifdef _MSC_VER
44	// disable some specific warnings which are normally useful, but pervasive in the code-gen macros
45	#pragma warning(disable:4310) // cast truncates constant value
46	#endif
47
48	namespace nanojit
49	{
50	#if defined FEATURE_NANOJIT && defined NANOJIT_IA32
51
52	#ifdef NJ_VERBOSE
53	const char *regNames[] = {
54	"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
55	"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
56	"f0"
57	};
58
59	const char *gpRegNames8lo[] = { "al", "cl", "dl", "bl" };
60	const char *gpRegNames8hi[] = { "ah", "ch", "dh", "bh" };
61	#endif
62
63	#define TODO(x)do{ do { } while (0); } while(0) do{ verbose_only(outputf(#x);) NanoAssertMsgf(false, "%s", #x)do { } while (0); } while(0)
64
65	const Register Assembler::argRegs[] = { rECX, rEDX };
66	const Register Assembler::retRegs[] = { rEAX, rEDX };
67	const Register Assembler::savedRegs[] = { rEBX, rESI, rEDI };
68
69	const static uint8_t max_abi_regs[] = {
70	2, /* ABI_FASTCALL */
71	1, /* ABI_THISCALL */
72	0, /* ABI_STDCALL */
73	0 /* ABI_CDECL */
74	};
75
76	#define RB(r)gpRegNames8lo[REGNUM(r)] gpRegNames8lo[REGNUM(r)]
77	#define RBhi(r)gpRegNames8hi[REGNUM(r)] gpRegNames8hi[REGNUM(r)]
78
79	typedef Register R;
80	typedef int32_t I32;
81
82	// Length: 2--6 bytes.
83	inline void Assembler::MODRMsib(I32 r, R b, R i, I32 s, I32 d) {
84	if (d == 0 && b != rEBP) {
85	SIB(s, REGNUM(i), REGNUM(b));
86	MODRM(0, r, 4); // amode == (b + i<<s)
87	} else if (isS8(d)( int32_t(d) == int8_t(d) )) {
88	IMM8(d);
89	SIB(s, REGNUM(i), REGNUM(b));
90	MODRM(1, r, 4); // amode == d8(b + i<<s)
91	} else {
92	IMM32(d);
93	SIB(s, REGNUM(i), REGNUM(b));
94	MODRM(2, r, 4); // amode == d32(b + i<<s)
95	}
96	}
97
98	// Length: 1--6 bytes.
99	inline void Assembler::MODRMm(I32 r, I32 d, R b) {
100	if (b == UnspecifiedReg) {
101	IMM32(d);
102	MODRM(0, r, 5); // amode == (d32)
103	} else if (b == rESP) {
104	MODRMsib(r, b, rESP, 0, d); // amode == d(b)
105	} else if (d == 0 && b != rEBP) {
106	MODRM(0, r, REGNUM(b)); // amode == (r)
107	} else if (isS8(d)( int32_t(d) == int8_t(d) )) {
108	IMM8(d);
109	MODRM(1, r, REGNUM(b)); // amode == d8(b)
110	} else {
111	IMM32(d);
112	MODRM(2, r, REGNUM(b)); // amode == d32(b)
113	}
114	}
115
116	// Length: 5 bytes.
117	inline void Assembler::MODRMdm(I32 r, I32 addr) {
118	IMM32(addr);
119	MODRM(0, r, 5); // amode == d32(r)
120	}
121
122	inline void Assembler::ALU0(I32 opc) {
123	underrunProtect(1);
124	OPCODE(opc);
125	}
126
127	inline void Assembler::ALUm(I32 opc, I32 r, I32 d, R b) {
128	underrunProtect(8);
129	MODRMm(r, d, b);
130	OPCODE(opc);
131	}
132
133	inline void Assembler::ALUdm(I32 opc, R r, I32 addr) {
134	underrunProtect(6);
135	MODRMdm(REGNUM(r), addr);
136	OPCODE(opc);
137	}
138
139	inline void Assembler::ALUsib(I32 opc, R r, R base, R index, I32 scale, I32 disp) {
140	underrunProtect(7);
141	MODRMsib(REGNUM(r), base, index, scale, disp);
142	OPCODE(opc);
143	}
144
145	inline void Assembler::ALUsib16(I32 opc, R r, R base, R index, I32 scale, I32 disp) {
146	underrunProtect(8);
147	MODRMsib(REGNUM(r), base, index, scale, disp);
148	OPCODE(opc);
149	OPCODE(0x66);
150	}
151
152	inline void Assembler::ALUm16(I32 opc, I32 r, I32 d, R b) {
153	underrunProtect(9);
154	MODRMm(r, d, b);
155	OPCODE(opc);
156	OPCODE(0x66);
157	}
158
159	inline void Assembler::ALU2dm(I32 opc2, R r, I32 addr) {
160	underrunProtect(7);
161	MODRMdm(REGNUM(r), addr);
162	OPCODE2(opc2);
163	}
164
165	inline void Assembler::ALU2m(I32 opc2, R r, I32 d, R b) {
166	underrunProtect(9);
167	MODRMm(REGNUM(r), d, b);
168	OPCODE2(opc2);
169	}
170
171	inline void Assembler::ALU2sib(I32 opc2, Register r, R base, R index, I32 scale, I32 disp) {
172	underrunProtect(8);
173	MODRMsib(REGNUM(r), base, index, scale, disp);
174	OPCODE2(opc2);
175	}
176
177	inline void Assembler::ALUi(I32 opc, R r, I32 i) {
178	underrunProtect(6);
179	NanoAssert(REGNUM(r) < 8)do { } while (0);
180	if (isS8(i)( int32_t(i) == int8_t(i) )) {
181	IMM8(i);
182	MODRMr(opc >> 3, REGNUM(r));
183	OPCODE(0x83);
184	} else {
185	IMM32(i);
186	if ( r == rEAX) {
187	OPCODE(opc);
188	} else {
189	MODRMr(opc >> 3, REGNUM(r));
190	OPCODE(0x81);
191	}
192	}
193	}
194
195	inline void Assembler::ALUmi(I32 opc, I32 d, Register b, I32 i) {
196	underrunProtect(10);
197	NanoAssert(REGNUM(b) < 8)do { } while (0);
198	if (isS8(i)( int32_t(i) == int8_t(i) )) {
199	IMM8(i);
200	MODRMm(opc >> 3, d, b);
201	OPCODE(0x83);
202	} else {
203	IMM32(i);
204	MODRMm(opc >> 3, d, b);
205	OPCODE(0x81);
206	}
207	}
208
209	inline void Assembler::ALU2(I32 opc2, R d, R s) {
210	underrunProtect(3);
211	MODRMr(REGNUM(d), REGNUM(s));
212	OPCODE2(opc2);
213	}
214
215	inline Register Assembler::AL2AHReg(R r) {
216	NanoAssert(REGNUM(r) < 4)do { } while (0); // one of: al, cl, dl, bl
217	Register r2 = { REGNUM(r) \| 4 }; // convert to one of: ah, ch, dh, bh
218	return r2;
219	}
220
221	inline void Assembler::OR(R l, R r) { count_alu(); ALU(0x0b, REGNUM(l), r); asm_output("or %s,%s", gpn(l), gpn(r)); }
222	inline void Assembler::AND(R l, R r) { count_alu(); ALU(0x23, REGNUM(l), r); asm_output("and %s,%s", gpn(l), gpn(r)); }
223	inline void Assembler::XOR(R l, R r) { count_alu(); ALU(0x33, REGNUM(l), r); asm_output("xor %s,%s", gpn(l), gpn(r)); }
224	inline void Assembler::ADD(R l, R r) { count_alu(); ALU(0x03, REGNUM(l), r); asm_output("add %s,%s", gpn(l), gpn(r)); }
225	inline void Assembler::SUB(R l, R r) { count_alu(); ALU(0x2b, REGNUM(l), r); asm_output("sub %s,%s", gpn(l), gpn(r)); }
226	inline void Assembler::IMUL(R l, R r){ count_alu(); ALU2(0x0faf, l, r); asm_output("imul %s,%s", gpn(l), gpn(r)); }
227	inline void Assembler::DIV(R r) { count_alu(); ALU(0xf7, 7, r); asm_output("idiv edx:eax, %s", gpn(r)); }
228	inline void Assembler::NOT(R r) { count_alu(); ALU(0xf7, 2, r); asm_output("not %s", gpn(r)); }
229	inline void Assembler::NEG(R r) { count_alu(); ALU(0xf7, 3, r); asm_output("neg %s", gpn(r)); }
230	inline void Assembler::AND8R(R r) { count_alu(); ALU(0x22, REGNUM(r), AL2AHReg(r)); asm_output("andb %s, %s", RB(r), RBhi(r)); }
231
232	inline void Assembler::SHR(R r, R s) {
233	count_alu();
234	NanoAssert(s == rECX)do { } while (0); (void)s;
235	ALU(0xd3, 5, r);
236	asm_output("shr %s,%s", gpn(r), gpn(s));
237	}
238
239	inline void Assembler::SAR(R r, R s) {
240	count_alu();
241	NanoAssert(s == rECX)do { } while (0); (void)s;
242	ALU(0xd3, 7, r);
243	asm_output("sar %s,%s", gpn(r), gpn(s));
244	}
245
246	inline void Assembler::SHL(R r, R s) {
247	count_alu();
248	NanoAssert(s == rECX)do { } while (0); (void)s;
249	ALU(0xd3, 4, r);
250	asm_output("shl %s,%s", gpn(r), gpn(s));
251	}
252
253	inline void Assembler::SHIFTi(I32 c, R r, I32 i) {
254	underrunProtect(3);
255	IMM8(i);
256	MODRMr(c, REGNUM(r));
257	OPCODE(0xc1);
258	}
259
260	inline void Assembler::SHLi(R r, I32 i) { count_alu(); SHIFTi(4, r, i); asm_output("shl %s,%d", gpn(r), i); }
261	inline void Assembler::SHRi(R r, I32 i) { count_alu(); SHIFTi(5, r, i); asm_output("shr %s,%d", gpn(r), i); }
262	inline void Assembler::SARi(R r, I32 i) { count_alu(); SHIFTi(7, r, i); asm_output("sar %s,%d", gpn(r), i); }
263
264	inline void Assembler::MOVZX8(R d, R s) { count_alu(); ALU2(0x0fb6, d, s); asm_output("movzx %s,%s", gpn(d), gpn(s)); }
265
266	inline void Assembler::SUBi(R r, I32 i) { count_alu(); ALUi(0x2d, r, i); asm_output("sub %s,%d", gpn(r), i); }
267	inline void Assembler::ADDi(R r, I32 i) { count_alu(); ALUi(0x05, r, i); asm_output("add %s,%d", gpn(r), i); }
268	inline void Assembler::ANDi(R r, I32 i) { count_alu(); ALUi(0x25, r, i); asm_output("and %s,%d", gpn(r), i); }
269	inline void Assembler::ORi(R r, I32 i) { count_alu(); ALUi(0x0d, r, i); asm_output("or %s,%d", gpn(r), i); }
270	inline void Assembler::XORi(R r, I32 i) { count_alu(); ALUi(0x35, r, i); asm_output("xor %s,%d", gpn(r), i); }
271
272	inline void Assembler::ADDmi(I32 d, R b, I32 i) { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); }
273
274	inline void Assembler::TEST(R d, R s) { count_alu(); ALU(0x85, REGNUM(d), s); asm_output("test %s,%s", gpn(d), gpn(s)); }
275	inline void Assembler::CMP(R l, R r) { count_alu(); ALU(0x3b, REGNUM(l), r); asm_output("cmp %s,%s", gpn(l), gpn(r)); }
276	inline void Assembler::CMPi(R r, I32 i) { count_alu(); ALUi(0x3d, r, i); asm_output("cmp %s,%d", gpn(r), i); }
277
278	inline void Assembler::LEA(R r, I32 d, R b) { count_alu(); ALUm(0x8d, REGNUM(r), d, b); asm_output("lea %s,%d(%s)", gpn(r), d, gpn(b)); }
279
280	inline void Assembler::CDQ() { SARi(rEDX, 31); MR(rEDX, rEAX); }
281
282	inline void Assembler::INCLi(I32 p) {
283	count_alu();
284	underrunProtect(6);
285	IMM32((uint32_t)(ptrdiff_t)p);
286	OPCODE(0x05);
287	OPCODE(0xFF);
288	asm_output("incl (%p)", (void*)p);
289	}
290
291	inline void Assembler::SETE( R r) { count_alu(); ALU2(0x0f94, r, r); asm_output("sete %s", gpn(r)); }
292	inline void Assembler::SETNP(R r) { count_alu(); ALU2(0x0f9B, r, r); asm_output("setnp %s", gpn(r)); }
293	inline void Assembler::SETNPH(R r) { count_alu(); ALU2(0x0f9B, AL2AHReg(r), AL2AHReg(r)); asm_output("setnp %s", RBhi(r)); }
294	inline void Assembler::SETL( R r) { count_alu(); ALU2(0x0f9C, r, r); asm_output("setl %s", gpn(r)); }
295	inline void Assembler::SETLE(R r) { count_alu(); ALU2(0x0f9E, r, r); asm_output("setle %s", gpn(r)); }
296	inline void Assembler::SETG( R r) { count_alu(); ALU2(0x0f9F, r, r); asm_output("setg %s", gpn(r)); }
297	inline void Assembler::SETGE(R r) { count_alu(); ALU2(0x0f9D, r, r); asm_output("setge %s", gpn(r)); }
298	inline void Assembler::SETB( R r) { count_alu(); ALU2(0x0f92, r, r); asm_output("setb %s", gpn(r)); }
299	inline void Assembler::SETBE(R r) { count_alu(); ALU2(0x0f96, r, r); asm_output("setbe %s", gpn(r)); }
300	inline void Assembler::SETA( R r) { count_alu(); ALU2(0x0f97, r, r); asm_output("seta %s", gpn(r)); }
301	inline void Assembler::SETAE(R r) { count_alu(); ALU2(0x0f93, r, r); asm_output("setae %s", gpn(r)); }
302	inline void Assembler::SETO( R r) { count_alu(); ALU2(0x0f92, r, r); asm_output("seto %s", gpn(r)); }
303
304	inline void Assembler::MREQ(R d, R s) { count_alu(); ALU2(0x0f44, d, s); asm_output("cmove %s,%s", gpn(d), gpn(s)); }
305	inline void Assembler::MRNE(R d, R s) { count_alu(); ALU2(0x0f45, d, s); asm_output("cmovne %s,%s", gpn(d), gpn(s)); }
306	inline void Assembler::MRL( R d, R s) { count_alu(); ALU2(0x0f4C, d, s); asm_output("cmovl %s,%s", gpn(d), gpn(s)); }
307	inline void Assembler::MRLE(R d, R s) { count_alu(); ALU2(0x0f4E, d, s); asm_output("cmovle %s,%s", gpn(d), gpn(s)); }
308	inline void Assembler::MRG( R d, R s) { count_alu(); ALU2(0x0f4F, d, s); asm_output("cmovg %s,%s", gpn(d), gpn(s)); }
309	inline void Assembler::MRGE(R d, R s) { count_alu(); ALU2(0x0f4D, d, s); asm_output("cmovge %s,%s", gpn(d), gpn(s)); }
310	inline void Assembler::MRB( R d, R s) { count_alu(); ALU2(0x0f42, d, s); asm_output("cmovb %s,%s", gpn(d), gpn(s)); }
311	inline void Assembler::MRBE(R d, R s) { count_alu(); ALU2(0x0f46, d, s); asm_output("cmovbe %s,%s", gpn(d), gpn(s)); }
312	inline void Assembler::MRA( R d, R s) { count_alu(); ALU2(0x0f47, d, s); asm_output("cmova %s,%s", gpn(d), gpn(s)); }
313	inline void Assembler::MRAE(R d, R s) { count_alu(); ALU2(0x0f43, d, s); asm_output("cmovae %s,%s", gpn(d), gpn(s)); }
314	inline void Assembler::MRNO(R d, R s) { count_alu(); ALU2(0x0f41, d, s); asm_output("cmovno %s,%s", gpn(d), gpn(s)); }
315
316	// these aren't currently used but left in for reference
317	//#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
318	//#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
319
320	inline void Assembler::LD(R reg, I32 disp, R base) {
321	count_ld();
322	ALUm(0x8b, REGNUM(reg), disp, base);
323	asm_output("mov %s,%d(%s)", gpn(reg), disp, gpn(base));
324	}
325
326	inline void Assembler::LDdm(R reg, I32 addr) {
327	count_ld();
328	ALUdm(0x8b, reg, addr);
329	asm_output("mov %s,0(%p)", gpn(reg), (void*)addr);
330	}
331
332	#define SIBIDX(n)"1248"[n] "1248"[n]
333
334	inline void Assembler::LDsib(R reg, I32 disp, R base, R index, I32 scale) {
335	count_ld();
336	ALUsib(0x8b, reg, base, index, scale, disp);
337	asm_output("mov %s,%d(%s+%s*%c)", gpn(reg), disp, gpn(base), gpn(index), SIBIDX(scale));
338	}
339
340	// note: movzx/movsx are being output with an 8/16 suffix to indicate the
341	// size being loaded. This doesn't really match standard intel format
342	// (though is arguably terser and more obvious in this case) and would
343	// probably be nice to fix. (Likewise, the 8/16 bit stores being output
344	// as "mov8" and "mov16" respectively.)
345
346	// Load 16-bit, sign extend.
347	inline void Assembler::LD16S(R r, I32 d, R b) {
348	count_ld();
349	ALU2m(0x0fbf, r, d, b);
350	asm_output("movsx16 %s,%d(%s)", gpn(r), d, gpn(b));
351	}
352
353	inline void Assembler::LD16Sdm(R r, I32 addr) {
354	count_ld();
355	ALU2dm(0x0fbf, r, addr);
356	asm_output("movsx16 %s,0(%lx)", gpn(r), (unsigned long)addr);
357	}
358
359	inline void Assembler::LD16Ssib(R r, I32 disp, R base, R index, I32 scale) {
360	count_ld();
361	ALU2sib(0x0fbf, r, base, index, scale, disp);
362	asm_output("movsx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
363	}
364
365	// Load 16-bit, zero extend.
366	inline void Assembler::LD16Z(R r, I32 d, R b) {
367	count_ld();
368	ALU2m(0x0fb7, r, d, b);
369	asm_output("movzx16 %s,%d(%s)", gpn(r), d, gpn(b));
370	}
371
372	inline void Assembler::LD16Zdm(R r, I32 addr) {
373	count_ld();
374	ALU2dm(0x0fb7, r, addr);
375	asm_output("movzx16 %s,0(%lx)", gpn(r), (unsigned long)addr);
376	}
377
378	inline void Assembler::LD16Zsib(R r, I32 disp, R base, R index, I32 scale) {
379	count_ld();
380	ALU2sib(0x0fb7, r, base, index, scale, disp);
381	asm_output("movzx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
382	}
383
384	// Load 8-bit, zero extend.
385	inline void Assembler::LD8Z(R r, I32 d, R b) {
386	count_ld();
387	ALU2m(0x0fb6, r, d, b);
388	asm_output("movzx8 %s,%d(%s)", gpn(r), d, gpn(b));
389	}
390
391	inline void Assembler::LD8Zdm(R r, I32 addr) {
392	count_ld();
393	ALU2dm(0x0fb6, r, addr);
394	asm_output("movzx8 %s,0(%lx)", gpn(r), (long unsigned)addr);
395	}
396
397	inline void Assembler::LD8Zsib(R r, I32 disp, R base, R index, I32 scale) {
398	count_ld();
399	ALU2sib(0x0fb6, r, base, index, scale, disp);
400	asm_output("movzx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
401	}
402
403	// Load 8-bit, sign extend.
404	inline void Assembler::LD8S(R r, I32 d, R b) {
405	count_ld();
406	ALU2m(0x0fbe, r, d, b);
407	asm_output("movsx8 %s,%d(%s)", gpn(r), d, gpn(b));
408	}
409
410	inline void Assembler::LD8Sdm(R r, I32 addr) {
411	count_ld();
412	ALU2dm(0x0fbe, r, addr);
413	asm_output("movsx8 %s,0(%lx)", gpn(r), (long unsigned)addr);
414	}
415
416	inline void Assembler::LD8Ssib(R r, I32 disp, R base, R index, I32 scale) {
417	count_ld();
418	ALU2sib(0x0fbe, r, base, index, scale, disp);
419	asm_output("movsx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
420	}
421
422	inline void Assembler::LDi(R r, I32 i) {
423	count_ld();
424	underrunProtect(5);
425	IMM32(i);
426	NanoAssert(REGNUM(r) < 8)do { } while (0);
427	OPCODE(0xb8 \| REGNUM(r));
428	asm_output("mov %s,%d", gpn(r), i);
429	}
430
431	// Quirk of x86-32: reg must be a/b/c/d for byte stores here.
432	inline void Assembler::ST8(R base, I32 disp, R reg) {
433	count_st();
434	NanoAssert(REGNUM(reg) < 4)do { } while (0);
435	ALUm(0x88, REGNUM(reg), disp, base);
436	asm_output("mov8 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
437	}
438
439	// Quirk of x86-32: reg must be a/b/c/d for byte stores here.
440	inline void Assembler::ST8sib(I32 disp, R base, R index, I32 scale, R reg) {
441	count_st();
442	NanoAssert(REGNUM(reg) < 4)do { } while (0);
443	ALUsib(0x88, reg, base, index, scale, disp);
444	asm_output("mov8 %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base),
445	gpn(index), SIBIDX(scale), gpn(reg));
446	}
447
448	inline void Assembler::ST16(R base, I32 disp, R reg) {
449	count_st();
450	ALUm16(0x89, REGNUM(reg), disp, base);
451	asm_output("mov16 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
452	}
453
454	inline void Assembler::ST16sib(I32 disp, R base, R index, I32 scale, R reg) {
455	count_st();
456	ALUsib16(0x89, reg, base, index, scale, disp);
457	asm_output("mov16 %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base),
458	gpn(index), SIBIDX(scale), gpn(reg));
459	}
460
461	inline void Assembler::ST(R base, I32 disp, R reg) {
462	count_st();
463	ALUm(0x89, REGNUM(reg), disp, base);
464	asm_output("mov %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
465	}
466
467	inline void Assembler::STsib(I32 disp, R base, R index, I32 scale, R reg) {
468	count_st();
469	ALUsib(0x89, reg, base, index, scale, disp);
470	asm_output("mov %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base),
471	gpn(index), SIBIDX(scale), gpn(reg));
472	}
473
474	inline void Assembler::ST8i(R base, I32 disp, I32 imm) {
475	count_st();
476	underrunProtect(8);
477	IMM8(imm);
478	MODRMm(0, disp, base);
479	OPCODE(0xc6);
480	asm_output("mov8 %d(%s),%d", disp, gpn(base), imm);
481	}
482
483	inline void Assembler::ST8isib(I32 disp, R base, R index, I32 scale, I32 imm) {
484	count_st();
485	underrunProtect(8);
486	IMM8(imm);
487	MODRMsib(0, base, index, scale, disp);
488	OPCODE(0xc6);
489	asm_output("mov8 %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm);
490	}
491
492	inline void Assembler::ST16i(R base, I32 disp, I32 imm) {
493	count_st();
494	underrunProtect(10);
495	IMM16(imm);
496	MODRMm(0, disp, base);
497	OPCODE(0xc7);
498	OPCODE(0x66);
499	asm_output("mov16 %d(%s),%d", disp, gpn(base), imm);
500	}
501
502	inline void Assembler::ST16isib(I32 disp, R base, R index, I32 scale, I32 imm) {
503	count_st();
504	underrunProtect(10);
505	IMM16(imm);
506	MODRMsib(0, base, index, scale, disp);
507	OPCODE(0xc7);
508	OPCODE(0x66);
509	asm_output("mov16 %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm);
510	}
511
512	inline void Assembler::STi(R base, I32 disp, I32 imm) {
513	count_st();
514	underrunProtect(11);
515	IMM32(imm);
516	MODRMm(0, disp, base);
517	OPCODE(0xc7);
518	asm_output("mov %d(%s),%d", disp, gpn(base), imm);
519	}
520
521	inline void Assembler::STisib(I32 disp, R base, R index, I32 scale, I32 imm) {
522	count_st();
523	underrunProtect(11);
524	IMM32(imm);
525	MODRMsib(0, base, index, scale, disp);
526	OPCODE(0xc7);
527	asm_output("mov %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm);
528	}
529
530	const uint8_t INT3_OP = 0xcc;
531
532	inline void Assembler::RET() { count_ret(); ALU0(0xc3); asm_output("ret"); }
533	inline void Assembler::NOP() { count_alu(); ALU0(0x90); asm_output("nop"); }
534	inline void Assembler::INT3() { ALU0(INT3_OP); asm_output("int3"); }
535
536	inline void Assembler::PUSHi(I32 i) {
537	count_push();
538	if (isS8(i)( int32_t(i) == int8_t(i) )) {
539	underrunProtect(2);
540	IMM8(i);
541	OPCODE(0x6a);
542	asm_output("push %d", i);
543	} else {
544	underrunProtect(5);
545	IMM32(i);
546	OPCODE(0x68);
547	asm_output("push %d", i);
548	}
549	}
550
551	inline void Assembler::PUSHr(R r) {
552	count_push();
553	underrunProtect(1);
554	NanoAssert(REGNUM(r) < 8)do { } while (0);
555	OPCODE(0x50 \| REGNUM(r));
556	asm_output("push %s", gpn(r));
557	}
558
559	inline void Assembler::PUSHm(I32 d, R b) {
560	count_pushld();
561	ALUm(0xff, 6, d, b);
562	asm_output("push %d(%s)", d, gpn(b));
563	}
564
565	inline void Assembler::POPr(R r) {
566	count_pop();
567	underrunProtect(1);
568	NanoAssert(REGNUM(r) < 8)do { } while (0);
569	OPCODE(0x58 \| REGNUM(r));
570	asm_output("pop %s", gpn(r));
571	}
572
573	inline void Assembler::JCC(I32 o, NIns* t, const char* n) {
574	count_jcc();
575	underrunProtect(6);
576	intptr_t tt = (intptr_t)t - (intptr_t)_nIns;
577	if (t && isS8(tt)( int32_t(tt) == int8_t(tt) )) {
578	IMM8(tt);
579	OPCODE(0x70 \| o);
580	} else {
581	IMM32(tt);
582	OPCODE(0x80 \| o);
583	OPCODE(JCC320x0f);
584	}
585	asm_output("%-5s %p", n, t);
586	(void) n;
587	}
588
589	inline void Assembler::JMP_long(NIns* t) {
590	count_jmp();
591	underrunProtect(5);
592	NanoAssert(t)do { } while (0);
593	intptr_t tt = (intptr_t)t - (intptr_t)_nIns;
594	IMM32(tt);
595	OPCODE(JMP320xe9);
596	asm_output("jmp %p", t);
597	verbose_only( verbose_outputf("%p:", (void*)_nIns); )
598	}
599
600	inline void Assembler::JMP_indexed(Register x, I32 ss, NIns** addr) {
601	underrunProtect(7);
602	IMM32(int32_t(addr));
603	SIB(ss, REGNUM(x), 5);
604	MODRM(0, 4, 4); // amode == addr(table + x<<ss)
605	OPCODE(0xff); // jmp
606	asm_output("jmp (%s%d+%p)", gpn(x), 1 << ss, (void*)addr);
607	}
608
609	inline void Assembler::JE(NIns* t) { JCC(0x04, t, "je"); }
610	inline void Assembler::JNE(NIns* t) { JCC(0x05, t, "jne"); }
611	inline void Assembler::JP(NIns* t) { JCC(0x0A, t, "jp"); }
612	inline void Assembler::JNP(NIns* t) { JCC(0x0B, t, "jnp"); }
613
614	inline void Assembler::JB(NIns* t) { JCC(0x02, t, "jb"); }
615	inline void Assembler::JNB(NIns* t) { JCC(0x03, t, "jnb"); }
616	inline void Assembler::JBE(NIns* t) { JCC(0x06, t, "jbe"); }
617	inline void Assembler::JNBE(NIns* t) { JCC(0x07, t, "jnbe"); }
618
619	inline void Assembler::JA(NIns* t) { JCC(0x07, t, "ja"); }
620	inline void Assembler::JNA(NIns* t) { JCC(0x06, t, "jna"); }
621	inline void Assembler::JAE(NIns* t) { JCC(0x03, t, "jae"); }
622	inline void Assembler::JNAE(NIns* t) { JCC(0x02, t, "jnae"); }
623
624	inline void Assembler::JL(NIns* t) { JCC(0x0C, t, "jl"); }
625	inline void Assembler::JNL(NIns* t) { JCC(0x0D, t, "jnl"); }
626	inline void Assembler::JLE(NIns* t) { JCC(0x0E, t, "jle"); }
627	inline void Assembler::JNLE(NIns* t) { JCC(0x0F, t, "jnle"); }
628
629	inline void Assembler::JG(NIns* t) { JCC(0x0F, t, "jg"); }
630	inline void Assembler::JNG(NIns* t) { JCC(0x0E, t, "jng"); }
631	inline void Assembler::JGE(NIns* t) { JCC(0x0D, t, "jge"); }
632	inline void Assembler::JNGE(NIns* t) { JCC(0x0C, t, "jnge"); }
633
634	inline void Assembler::JO(NIns* t) { JCC(0x00, t, "jo"); }
635	inline void Assembler::JNO(NIns* t) { JCC(0x01, t, "jno"); }
636
637	// sse instructions
638	inline void Assembler::SSE(I32 opc3, R d, R s) {
639	underrunProtect(9);
640	MODRMr(REGNUM(d)&7, REGNUM(s)&7);
641	OPCODE3(opc3);
642	}
643
644	inline void Assembler::SSEm(I32 opc3, R r, I32 d, R b) {
645	underrunProtect(9);
646	MODRMm(REGNUM(r)&7, d, b);
647	OPCODE3(opc3);
648	}
649
650	inline void Assembler::SSEsib(I32 opc3, R rr, I32 d, R rb, R ri, I32 scale) {
651	underrunProtect(9);
652	MODRMsib(REGNUM(rr)&7, rb, ri, scale, d);
653	OPCODE3(opc3);
654	}
655
656	inline void Assembler::LDSDm(R r, const double* addr) {
657	count_ldq();
658	underrunProtect(8);
659	IMM32(int32_t(addr));
660	MODRM(0, REGNUM(r) & 7, 5); // amode == addr(r)
661	OPCODE(0x10);
662	OPCODE(0x0f);
663	OPCODE(0xf2);
664	// *addr is a constant, so we can print it here.
665	asm_output("movsd %s,(%p) // =%f", gpn(r), (void)addr, addr);
666	}
667
668	inline void Assembler::SSE_LDQ( R r, I32 d, R b) { count_ldq(); SSEm(0xf30f7e, r, d, b); asm_output("movq %s,%d(%s)", gpn(r), d, gpn(b)); }
669	inline void Assembler::SSE_LDSS(R r, I32 d, R b) { count_ld(); SSEm(0xf30f10, r, d, b); asm_output("movss %s,%d(%s)", gpn(r), d, gpn(b)); }
670
671	inline void Assembler::SSE_LDQsib(R rr, I32 d, R rb, R ri, I32 scale)
672	{
673	count_ldq();
674	SSEsib(0xf30f7e, rr, d, rb, ri, scale);
675	asm_output("movq %s,%d(%s+%s*%c)", gpn(rr), d, gpn(rb), gpn(ri), SIBIDX(scale));
676	}
677
678	inline void Assembler::SSE_LDSSsib(R rr, I32 d, R rb, R ri, I32 scale)
679	{
680	count_ld();
681	SSEsib(0xf30f10, rr, d, rb, ri, scale);
682	asm_output("movss %s,%d(%s+%s*%c)", gpn(rr), d, gpn(rb), gpn(ri), SIBIDX(scale));
683	}
684
685	inline void Assembler::SSE_STSD(I32 d, R b, R r) { count_stq(); SSEm(0xf20f11, r, d, b); asm_output("movsd %d(%s),%s", d, gpn(b), gpn(r)); }
686	inline void Assembler::SSE_STQ( I32 d, R b, R r) { count_stq(); SSEm(0x660fd6, r, d, b); asm_output("movq %d(%s),%s", d, gpn(b), gpn(r)); }
687	inline void Assembler::SSE_STSS(I32 d, R b, R r) { count_st(); SSEm(0xf30f11, r, d, b); asm_output("movss %d(%s),%s", d, gpn(b), gpn(r)); }
688
689	inline void Assembler::SSE_STQsib(I32 d, R rb, R ri, I32 scale, R rv) {
690	count_stq();
691	SSEsib(0x660fd6, rv, d, rb, ri, scale);
692	asm_output("movq %d(%s+%s*%c),%s", d, gpn(rb), gpn(ri), SIBIDX(scale), gpn(rv));
693	}
694
695	inline void Assembler::SSE_CVTSI2SD(R xr, R gr) { count_fpu(); SSE(0xf20f2a, xr, gr); asm_output("cvtsi2sd %s,%s", gpn(xr), gpn(gr)); }
696	inline void Assembler::SSE_CVTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2d, gr, xr); asm_output("cvtsd2si %s,%s", gpn(gr), gpn(xr)); }
697	inline void Assembler::SSE_CVTTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2c, gr, xr); asm_output("cvttsd2si %s,%s",gpn(gr), gpn(xr)); }
698	inline void Assembler::SSE_CVTSD2SS(R xr, R gr) { count_fpu(); SSE(0xf20f5a, xr, gr); asm_output("cvtsd2ss %s,%s", gpn(xr), gpn(gr)); }
699	inline void Assembler::SSE_CVTSS2SD(R xr, R gr) { count_fpu(); SSE(0xf30f5a, xr, gr); asm_output("cvtss2sd %s,%s", gpn(xr), gpn(gr)); }
700	inline void Assembler::SSE_CVTDQ2PD(R d, R r) { count_fpu(); SSE(0xf30fe6, d, r); asm_output("cvtdq2pd %s,%s", gpn(d), gpn(r)); }
701
702	// Move and zero-extend GP reg to XMM reg.
703	inline void Assembler::SSE_MOVD(R d, R s) {
704	count_mov();
705	if (IsXmmReg(s)) {
706	NanoAssert(IsGpReg(d))do { } while (0);
707	SSE(0x660f7e, s, d);
708	} else {
709	NanoAssert(IsGpReg(s))do { } while (0);
710	NanoAssert(IsXmmReg(d))do { } while (0);
711	SSE(0x660f6e, d, s);
712	}
713	asm_output("movd %s,%s", gpn(d), gpn(s));
714	}
715
716	inline void Assembler::SSE_MOVSD(R rd, R rs) {
717	count_mov();
718	NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
719	SSE(0xf20f10, rd, rs);
720	asm_output("movsd %s,%s", gpn(rd), gpn(rs));
721	}
722
723	inline void Assembler::SSE_ADDSD(R rd, R rs) {
724	count_fpu();
725	NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
726	SSE(0xf20f58, rd, rs);
727	asm_output("addsd %s,%s", gpn(rd), gpn(rs));
728	}
729
730	inline void Assembler::SSE_ADDSDm(R r, const double* addr) {
731	count_fpuld();
732	underrunProtect(8);
733	NanoAssert(IsXmmReg(r))do { } while (0);
734	const double* daddr = addr;
735	IMM32(int32_t(daddr));
736	MODRM(0, REGNUM(r) & 7, 5); // amode == daddr(r)
737	OPCODE(0x58);
738	OPCODE(0x0f);
739	OPCODE(0xf2);
740	// *daddr is a constant, so we can print it here.
741	asm_output("addsd %s,(%p) // =%f", gpn(r), (void)daddr, daddr);
742	}
743
744	inline void Assembler::SSE_SUBSD(R rd, R rs) {
745	count_fpu();
746	NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
747	SSE(0xf20f5c, rd, rs);
748	asm_output("subsd %s,%s", gpn(rd), gpn(rs));
749	}
750
751	inline void Assembler::SSE_MULSD(R rd, R rs) {
752	count_fpu();
753	NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
754	SSE(0xf20f59, rd, rs);
755	asm_output("mulsd %s,%s", gpn(rd), gpn(rs));
756	}
757
758	inline void Assembler::SSE_DIVSD(R rd, R rs) {
759	count_fpu();
760	NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
761	SSE(0xf20f5e, rd, rs);
762	asm_output("divsd %s,%s", gpn(rd), gpn(rs));
763	}
764
765	inline void Assembler::SSE_UCOMISD(R rl, R rr) {
766	count_fpu();
767	NanoAssert(IsXmmReg(rl) && IsXmmReg(rr))do { } while (0);
768	SSE(0x660f2e, rl, rr);
769	asm_output("ucomisd %s,%s", gpn(rl), gpn(rr));
770	}
771
772	inline void Assembler::SSE_XORPD(R r, const uint32_t* maskaddr) {
773	count_fpuld();
774	underrunProtect(8);
775	IMM32(int32_t(maskaddr));
776	MODRM(0, REGNUM(r) & 7, 5); // amode == maskaddr(r)
777	OPCODE(0x57);
778	OPCODE(0x0f);
779	OPCODE(0x66);
780	asm_output("xorpd %s,(%p)", gpn(r), (void*)maskaddr);
781	}
782
783	inline void Assembler::SSE_XORPDr(R rd, R rs) {
784	count_fpu();
785	SSE(0x660f57, rd, rs);
786	asm_output("xorpd %s,%s", gpn(rd), gpn(rs));
787	}
788
789	// floating point unit
790	inline void Assembler::FPUc(I32 opc2) {
791	underrunProtect(2);
792	OPCODE2(opc2);
793	}
794
795	inline void Assembler::FPUm(I32 o, I32 d, R b) {
796	underrunProtect(7);
797	MODRMm(uint8_t(o), d, b);
798	OPCODE(o >> 8);
799	}
800
801	inline void Assembler::FPUdm(I32 o, const double* const m) {
802	underrunProtect(6);
803	MODRMdm(uint8_t(o), int32_t(m));
804	OPCODE(o >> 8);
805	}
806
807	inline void Assembler::TEST_AH(I32 i) {
808	count_alu();
809	underrunProtect(3);
810	OPCODE(i);
811	OPCODE(0xc4);
812	OPCODE(0xf6);
813	asm_output("test ah, %d", i);
814	}
815
816	// The FPU stack depth is the number of pushes in excess of the number of pops.
817	// Since we generate backwards, we track the FPU stack depth as a negative number.
818	// We use the top of the x87 stack as the single allocatable FP register, FST0.
819	// Thus, between LIR instructions, the depth of the FPU stack must be either 0 or -1,
820	// depending on whether FST0 is in use. Within the expansion of a single LIR
821	// instruction, however, deeper levels of the stack may be used as unmanaged
822	// temporaries. Hence, we allow for all eight levels in the assertions below.
823	void Assembler::fpu_push() {
824	debug_only( ++_fpuStkDepth; NanoAssert(_fpuStkDepth <= 0); )
825	}
826
827	void Assembler::fpu_pop() {
828	debug_only( --_fpuStkDepth; NanoAssert(_fpuStkDepth >= -7); )
829	}
830
831	inline void Assembler::FNSTSW_AX() { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); }
832	inline void Assembler::FCHS() { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); }
833	inline void Assembler::FLD1() { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); }
834	inline void Assembler::FLDZ() { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); }
835
836	inline void Assembler::FST32(boolbool p, I32 d, R b){ count_stq(); FPUm(0xd902\|(p?1:0), d, b); asm_output("fst%s32 %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
837	inline void Assembler::FSTQ(boolbool p, I32 d, R b) { count_stq(); FPUm(0xdd02\|(p?1:0), d, b); asm_output("fst%sq %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
838
839	inline void Assembler::FSTPQ(I32 d, R b) { FSTQ(1, d, b); }
840
841	inline void Assembler::FCOM(boolbool p, I32 d, R b) { count_fpuld(); FPUm(0xdc02\|(p?1:0), d, b); asm_output("fcom%s %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
842	inline void Assembler::FCOMdm(boolbool p, const double* dm) {
843	count_fpuld();
844	FPUdm(0xdc02\|(p?1:0), dm);
845	asm_output("fcom%s (%p)", (p?"p":""), (void*)dm);
846	if (p) fpu_pop();
847	}
848
849	inline void Assembler::FLD32(I32 d, R b) { count_ldq(); FPUm(0xd900, d, b); asm_output("fld32 %d(%s)", d, gpn(b)); fpu_push();}
850	inline void Assembler::FLDQ(I32 d, R b) { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)", d, gpn(b)); fpu_push();}
851	inline void Assembler::FLDQdm(const double* dm) { count_ldq(); FPUdm(0xdd00, dm); asm_output("fldq (%p)", (void*)dm); fpu_push();}
852	inline void Assembler::FILDQ(I32 d, R b) { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)", d, gpn(b)); fpu_push(); }
853	inline void Assembler::FILD(I32 d, R b) { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)", d, gpn(b)); fpu_push(); }
854
855	inline void Assembler::FIST(boolbool p, I32 d, R b) {
856	count_fpu();
857	FPUm(0xdb02 \| (p?1:0), d, b);
858	asm_output("fist%s %d(%s)", (p?"p":""), d, gpn(b));
859	if (p) fpu_pop();
860	}
861
862	inline void Assembler::FADD( I32 d, R b) { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)", d, gpn(b)); }
863	inline void Assembler::FSUB( I32 d, R b) { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)", d, gpn(b)); }
864	inline void Assembler::FSUBR(I32 d, R b) { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)", d, gpn(b)); }
865	inline void Assembler::FMUL( I32 d, R b) { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)", d, gpn(b)); }
866	inline void Assembler::FDIV( I32 d, R b) { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)", d, gpn(b)); }
867	inline void Assembler::FDIVR(I32 d, R b) { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)", d, gpn(b)); }
868
869	inline void Assembler::FADDdm( const double dm) { count_ldq(); FPUdm(0xdc00, dm); asm_output("fadd (%p)", (void)dm); }
870	inline void Assembler::FSUBRdm(const double* dm) { count_ldq(); FPUdm(0xdc05, dm); asm_output("fsubr (%p)", (void*)dm); }
871	inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); }
872	inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)", (void*)dm); }
873
874	inline void Assembler::FCOMP() { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();}
875	inline void Assembler::FCOMPP() { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();}
876	inline void Assembler::FLDr(R r) { count_ldq(); FPU(0xd9c0, r); asm_output("fld %s", gpn(r)); fpu_push(); }
877	inline void Assembler::EMMS() { count_fpu(); FPUc(0x0f77); asm_output("emms"); }
878
879	// standard direct call
880	inline void Assembler::CALL(const CallInfo* ci) {
881	count_call();
882	underrunProtect(5);
883	int offset = (ci->_address) - ((int)_nIns);
884	IMM32((uint32_t)offset);
885	OPCODE(0xE8);
886	verbose_only(asm_output("call %s", (ci->_name));)
887	debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();)
888	}
889
890	// indirect call thru register
891	inline void Assembler::CALLr(const CallInfo* ci, Register r) {
892	count_calli();
893	underrunProtect(2);
894	ALU(0xff, 2, r);
895	verbose_only(asm_output("call %s", gpn(r));)
896	debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();) (void)ci;
897	}
898
899	void Assembler::nInit()
900	{
901	nHints[LIR_calli] = rmask(retRegs[0]);
902	nHints[LIR_calld] = rmask(FST0);
903	nHints[LIR_paramp] = PREFER_SPECIAL;
904	nHints[LIR_immi] = ScratchRegs;
905	// Nb: Doing this with a loop future-proofs against the possibilty of
906	// new comparison operations being added.
907	for (LOpcode op = LOpcode(0); op < LIR_sentinel; op = LOpcode(op+1))
908	if (isCmpOpcode(op))
909	nHints[op] = AllowableByteRegs;
910	}
911
912	void Assembler::nBeginAssembly() {
913	max_stk_args = 0;
914	}
915
916	NIns* Assembler::genPrologue()
917	{
918	// Prologue
919	uint32_t stackNeeded = max_stk_args + STACK_GRANULARITYsizeof(void ) _activation.stackSlotsNeeded();
920
921	uint32_t stackPushed =
922	STACK_GRANULARITYsizeof(void *) + // returnaddr
923	STACK_GRANULARITYsizeof(void *); // ebp
924
925	uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK)((((uintptr_t)(stackNeeded + stackPushed))+(((uintptr_t)NJ_ALIGN_STACK )-1))&~(((uintptr_t)NJ_ALIGN_STACK)-1));
926	uint32_t amt = aligned - stackPushed;
927
928	#ifdef _WIN32
929	// Windows uses a single guard page for extending the stack, so
930	// new stack pages must be first touched in stack-growth order.
931	// We touch each whole page that will be allocated to the frame
932	// (following the saved FP) to cause the OS to commit the page if
933	// necessary. Since we don't calculate page boundaries, but just
934	// probe at intervals of the pagesize, it is possible that the
935	// last page of the frame will be touched unnecessarily. Note that
936	// we must generate the probes in the reverse order of their execution.
937	// We require that the page size be a power of 2.
938	size_t pageSize = VMPI_getVMPageSize();
939	NanoAssert((pageSize & (pageSize-1)) == 0)do { } while (0);
940	size_t pageRounded = amt & ~(pageSize-1);
941	for (int32_t d = pageRounded; d > 0; d -= pageSize) {
942	STi(rEBP, -d, 0);
943	}
944	#endif
945
946	// Reserve stackNeeded bytes, padded
947	// to preserve NJ_ALIGN_STACK-byte alignment.
948	if (amt) {
949	SUBi(SP, amt);
950	}
951
952	verbose_only( asm_output("[frag entry]"); )
953	NIns *fragEntry = _nIns;
954	MR(FP, SP); // Establish our own FP.
955	PUSHr(FP); // Save caller's FP.
956
957	return fragEntry;
958	}
959
960	void Assembler::nFragExit(LIns* guard)
961	{
962	SideExit *exit = guard->record()->exit;
963	Fragment *frag = exit->target;
964	GuardRecord *lr = 0;
965	boolbool destKnown = (frag && frag->fragEntry);
966
967	// Generate jump to epilog and initialize lr.
968	// If the guard already exists, use a simple jump.
969	if (destKnown) {
970	JMP(frag->fragEntry);
971	lr = 0;
972	} else { // Target doesn't exist. Jump to an epilogue for now. This can be patched later.
973	if (!_epilogue)
974	_epilogue = genEpilogue();
975	lr = guard->record();
976	JMP_long(_epilogue);
977	lr->jmp = _nIns;
978	}
979
980	// profiling for the exit
981	verbose_only(
982	if (_logc->lcbits & LC_FragProfile) {
983	INCLi(int32_t(&guard->record()->profCount));
984	}
985	)
986
987	// Restore rESP from rEBP, undoing SUBi(SP,amt) in the prologue
988	MR(SP,FP);
989
990	// return value is GuardRecord*
991	asm_immi(rEAX, int(lr), /canClobberCCs/truetrue);
992	}
993
994	NIns *Assembler::genEpilogue()
995	{
996	RET();
997	POPr(FP); // Restore caller's FP.
998
999	return _nIns;
1000	}
1001
1002	void Assembler::asm_call(LIns* ins)
1003	{
1004	if (!ins->isop(LIR_callv)) {
1005	Register rr = ( ins->isop(LIR_calld) ? FST0 : retRegs[0] );
1006	prepareResultReg(ins, rmask(rr));
1007	evictScratchRegsExcept(rmask(rr));
1008	} else {
1009	evictScratchRegsExcept(0);
1010	}
1011	const CallInfo* call = ins->callInfo();
1012	// must be signed, not unsigned
1013	uint32_t iargs = call->count_int32_args();
1014	int32_t fargs = call->count_args() - iargs;
1015
1016	boolbool indirect = call->isIndirect();
1017	if (indirect) {
1018	// target arg isn't pushed, its consumed in the call
1019	iargs --;
1020	}
1021
1022	AbiKind abi = call->_abi;
1023	uint32_t max_regs = max_abi_regs[abi];
1024	if (max_regs > iargs)
1025	max_regs = iargs;
1026
1027	int32_t istack = iargs-max_regs; // first 2 4B args are in registers
1028	int32_t extra = 0;
1029	const int32_t pushsize = 4istack + 8fargs; // actual stack space used
1030
1031	#if _MSC_VER
1032	// msc only provides 4-byte alignment but we have 8 byte stack adjustment
1033	// logic so maintain our 8 byte alignment.
1034	uint32_t align = 8;
1035	#else
1036	uint32_t align = NJ_ALIGN_STACK;
1037	#endif
1038
1039	if (pushsize) {
1040	if (_config.i386_fixed_esp) {
1041	// In case of fastcall, stdcall and thiscall the callee cleans up the stack,
1042	// and since we reserve max_stk_args words in the prolog to call functions
1043	// and don't adjust the stack pointer individually for each call we have
1044	// to undo here any changes the callee just did to the stack.
1045	if (abi != ABI_CDECL)
1046	SUBi(SP, pushsize);
1047	} else {
1048	// stack re-alignment
1049	// only pop our adjustment amount since callee pops args in FASTCALL mode
1050	extra = alignUp(pushsize, align)((((uintptr_t)(pushsize))+(((uintptr_t)align)-1))&~(((uintptr_t )align)-1)) - pushsize;
1051	if (call->_abi == ABI_CDECL) {
1052	// with CDECL only, caller pops args
1053	ADDi(SP, extra+pushsize);
1054	} else if (extra > 0) {
1055	ADDi(SP, extra);
1056	}
1057	}
1058	}
1059
1060	NanoAssert(ins->isop(LIR_callv) \|\| ins->isop(LIR_callp) \|\| ins->isop(LIR_calld))do { } while (0);
1061	if (!indirect) {
1062	CALL(call);
1063	}
1064	else {
1065	// Indirect call. x86 Calling conventions don't use rEAX as an
1066	// argument, and do use rEAX as a return value. We need a register
1067	// for the address to call, so we use rEAX since it will always be
1068	// available.
1069	CALLr(call, rEAX);
1070	}
1071
1072	// Call this now so that the arg setup can involve 'rr'.
1073	freeResourcesOf(ins);
1074
1075	// Make sure fpu stack is empty before call.
1076	NanoAssert(_allocator.isFree(FST0))do { } while (0);
1077
1078	// Pre-assign registers to the first N 4B args based on the calling convention.
1079	uint32_t n = 0;
1080
1081	ArgType argTypes[MAXARGS];
1082	uint32_t argc = call->getArgTypes(argTypes);
1083	int32_t stkd = 0;
1084
1085	if (indirect) {
1086	argc--;
1087	asm_arg(ARGTYPE_P, ins->arg(argc), rEAX, stkd);
1088	if (!_config.i386_fixed_esp)
1089	stkd = 0;
1090	}
1091
1092	for (uint32_t i = 0; i < argc; i++)
1093	{
1094	uint32_t j = argc-i-1;
1095	ArgType ty = argTypes[j];
1096	Register r = UnspecifiedReg;
1097	if (n < max_regs && ty != ARGTYPE_D) {
1098	r = argRegs[n++]; // tell asm_arg what reg to use
1099	}
1100	asm_arg(ty, ins->arg(j), r, stkd);
1101	if (!_config.i386_fixed_esp)
1102	stkd = 0;
1103	}
1104
1105	if (_config.i386_fixed_esp) {
1106	if (pushsize > max_stk_args)
1107	max_stk_args = pushsize;
1108	} else if (extra > 0) {
1109	SUBi(SP, extra);
1110	}
1111	}
1112
1113	Register Assembler::nRegisterAllocFromSet(RegisterMask set)
1114	{
1115	Register r;
1116	RegAlloc &regs = _allocator;
1117	#ifdef _MSC_VER
1118	_asm
1119	{
1120	mov ecx, regs
1121	bsf eax, set // i = first bit set
1122	btr RegAlloc::free[ecx], eax // free &= ~rmask(i)
1123	mov r, eax
1124	}
1125	#else
1126	asm(
1127	"bsf %1, %%eax\n\t"
1128	"btr %%eax, %2\n\t"
1129	"movl %%eax, %0\n\t"
1130	: "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
1131	#endif /* _MSC_VER */
1132	return r;
1133	}
1134
1135	void Assembler::nRegisterResetAll(RegAlloc& a)
1136	{
1137	// add scratch registers to our free list for the allocator
1138	a.clear();
1139	a.free = SavedRegs \| ScratchRegs;
1140	if (!_config.i386_sse2)
1141	a.free &= ~XmmRegs;
1142	}
1143
1144	void Assembler::nPatchBranch(NIns* branch, NIns* targ)
1145	{
1146	intptr_t offset = intptr_t(targ) - intptr_t(branch);
1147	if (branch[0] == JMP320xe9) {
1148	(int32_t)&branch[1] = offset - 5;
1149	} else if (branch[0] == JCC320x0f) {
1150	(int32_t)&branch[2] = offset - 6;
1151	} else
1152	NanoAssertMsg(0, "Unknown branch type in nPatchBranch")do { } while (0);
1153	}
1154
1155	RegisterMask Assembler::nHint(LIns* ins)
1156	{
1157	NanoAssert(ins->isop(LIR_paramp))do { } while (0);
1158	RegisterMask prefer = 0;
1159	uint8_t arg = ins->paramArg();
1160	if (ins->paramKind() == 0) {
1161	uint32_t max_regs = max_abi_regs[_thisfrag->lirbuf->abi];
1162	if (arg < max_regs)
1163	prefer = rmask(argRegs[arg]);
1164	} else {
1165	if (arg < NumSavedRegs)
1166	prefer = rmask(savedRegs[arg]);
1167	}
1168	return prefer;
1169	}
1170
1171	// Return true if we can generate code for this instruction that neither
1172	// sets CCs nor clobbers any input register.
1173	// LEA is the only native instruction that fits those requirements.
1174	boolbool canRematLEA(LIns* ins)
1175	{
1176	if (ins->isop(LIR_addi))
1177	return ins->oprnd1()->isInReg() && ins->oprnd2()->isImmI();
1178	// Subtract and some left-shifts could be rematerialized using LEA,
1179	// but it hasn't shown to help in real code yet. Noting them anyway:
1180	// maybe sub? R = subl rL, const => leal R, [rL + -const]
1181	// maybe lsh? R = lshl rL, 1/2/3 => leal R, [rL * 2/4/8]
1182	return falsefalse;
1183	}
1184
1185	boolbool Assembler::canRemat(LIns* ins)
1186	{
1187	return ins->isImmAny() \|\| ins->isop(LIR_allocp) \|\| canRematLEA(ins);
1188	}
1189
1190	// WARNING: the code generated by this function must not affect the
1191	// condition codes. See asm_cmpi().
1192	void Assembler::asm_restore(LIns* ins, Register r)
1193	{
1194	NanoAssert(ins->getReg() == r)do { } while (0);
1195
1196	uint32_t arg;
1197	uint32_t abi_regcount;
1198	if (ins->isop(LIR_allocp)) {
1199	// The value of a LIR_allocp instruction is the address of the
1200	// stack allocation. We can rematerialize that from the record we
1201	// have of where the allocation lies in the stack.
1202	NanoAssert(ins->isInAr())do { } while (0); // must have stack slots allocated
1203	LEA(r, arDisp(ins), FP);
1204
1205	} else if (ins->isImmI()) {
1206	asm_immi(r, ins->immI(), /canClobberCCs/falsefalse);
1207
1208	} else if (ins->isImmD()) {
1209	asm_immd(r, ins->immDasQ(), ins->immD(), /canClobberCCs/falsefalse);
1210
1211	} else if (ins->isop(LIR_paramp) && ins->paramKind() == 0 &&
1212	(arg = ins->paramArg()) >= (abi_regcount = max_abi_regs[_thisfrag->lirbuf->abi])) {
1213	// Incoming arg is on stack, can restore it from there instead of spilling.
1214
1215	// this case is intentionally not detected in canRemat(), because we still
1216	// emit a load instead of a fast ALU operation. We don't want parameter
1217	// spills to have precedence over immediates & ALU ops, but if one does
1218	// spill, we want to load it directly from its stack area, saving a store
1219	// in the prolog.
1220
1221	// Compute position of argument relative to ebp. Higher argument
1222	// numbers are at higher positive offsets. The first abi_regcount
1223	// arguments are in registers, rest on stack. +8 accomodates the
1224	// return address and saved ebp value. Assuming abi_regcount == 0:
1225	//
1226	// low-addr ebp
1227	// [frame...][saved-ebp][return-addr][arg0][arg1]...
1228	//
1229	int d = (arg - abi_regcount) * sizeof(intptr_t) + 8;
1230	LD(r, d, FP);
1231
1232	} else if (canRematLEA(ins)) {
1233	LEA(r, ins->oprnd2()->immI(), ins->oprnd1()->getReg());
1234
1235	} else {
1236	int d = findMemFor(ins);
1237	if (ins->isI()) {
1238	NanoAssert(rmask(r) & GpRegs)do { } while (0);
1239	LD(r, d, FP);
1240	} else {
1241	NanoAssert(ins->isD())do { } while (0);
1242	if (rmask(r) & XmmRegs) {
1243	SSE_LDQ(r, d, FP);
1244	} else {
1245	NanoAssert(r == FST0)do { } while (0);
1246	FLDQ(d, FP);
1247	}
1248	}
1249	}
1250	}
1251
1252	void Assembler::asm_store32(LOpcode op, LIns* value, int dr, LIns* base)
1253	{
1254	if (value->isImmI()) {
1255	if (base->opcode() == LIR_addp) {
1256	LIns* index;
1257	int scale;
1258	getBaseIndexScale(base, &base, &index, &scale);
1259
1260	Register rb, ri;
1261	getBaseReg2(GpRegs, index, ri, GpRegs, base, rb, dr);
1262
1263	int c = value->immI();
1264	switch (op) {
1265	case LIR_sti2c: ST8isib( dr, rb, ri, scale, c); break;
1266	case LIR_sti2s: ST16isib(dr, rb, ri, scale, c); break;
1267	case LIR_sti: STisib( dr, rb, ri, scale, c); break;
1268	default: NanoAssert(0)do { } while (0); break;
1269	}
1270	} else {
1271	Register rb = getBaseReg(base, dr, GpRegs);
1272	int c = value->immI();
1273	switch (op) {
1274	case LIR_sti2c: ST8i( rb, dr, c); break;
1275	case LIR_sti2s: ST16i(rb, dr, c); break;
1276	case LIR_sti: STi( rb, dr, c); break;
1277	default: NanoAssert(0)do { } while (0); break;
1278	}
1279	}
1280
1281	} else {
1282	// Quirk of x86-32: reg must be a/b/c/d for single-byte stores.
1283	const RegisterMask SrcRegs = (op == LIR_sti2c) ? AllowableByteRegs : GpRegs;
1284
1285	Register rv, rb;
1286	if (base->opcode() == LIR_addp) {
1287	LIns* index;
1288	int scale;
1289	getBaseIndexScale(base, &base, &index, &scale);
1290
1291	Register rb, ri, rv;
1292	getBaseReg2(SrcRegs, value, rv, GpRegs, base, rb, dr);
1293	ri = (index == value) ? rv
1294	: (index == base) ? rb
1295	: findRegFor(index, GpRegs & ~(rmask(rb)\|rmask(rv)));
1296
1297	switch (op) {
1298	case LIR_sti2c: ST8sib( dr, rb, ri, scale, rv); break;
1299	case LIR_sti2s: ST16sib(dr, rb, ri, scale, rv); break;
1300	case LIR_sti: STsib( dr, rb, ri, scale, rv); break;
1301	default: NanoAssert(0)do { } while (0); break;
1302	}
1303
1304	} else {
1305	if (base->isImmI()) {
1306	// absolute address
1307	rb = UnspecifiedReg;
1308	dr += base->immI();
1309	rv = findRegFor(value, SrcRegs);
1310	} else {
1311	getBaseReg2(SrcRegs, value, rv, GpRegs, base, rb, dr);
1312	}
1313	switch (op) {
1314	case LIR_sti2c: ST8( rb, dr, rv); break;
1315	case LIR_sti2s: ST16(rb, dr, rv); break;
1316	case LIR_sti: ST( rb, dr, rv); break;
1317	default: NanoAssert(0)do { } while (0); break;
1318	}
1319	}
1320	}
1321	}
1322
1323	void Assembler::asm_spill(Register rr, int d, boolbool pop)
1324	{
1325	NanoAssert(d)do { } while (0);
1326	if (rmask(rr) & GpRegs) {
1327	ST(FP, d, rr);
1328	} else if (rmask(rr) & XmmRegs) {
1329	SSE_STQ(d, FP, rr);
1330	} else {
1331	NanoAssert(rr == FST0)do { } while (0);
1332	FSTQ(pop, d, FP);
1333	}
1334	}
1335
1336	void Assembler::asm_load64(LIns* ins)
1337	{
1338	LIns* base = ins->oprnd1();
1339	int d = ins->disp();
1340
1341	// There are two cases:
1342	// - 'ins' is in FpRegs: load it.
1343	// - otherwise: there's no point loading the value into a register
1344	// because its only use will be to immediately spill it. Instead we
1345	// do a memory-to-memory move from the load address directly to the
1346	// spill slot. (There must be a spill slot assigned.) This is why
1347	// we don't use prepareResultReg() here unlike most other places --
1348	// because it mandates bringing the value into a register.
1349	//
1350	if (ins->isInReg()) {
1351	Register rr = prepareResultReg(ins, rmask(ins->getReg()));
1352
1353	if (base->opcode() == LIR_addp && rmask(rr) & XmmRegs) {
1354	LIns* index;
1355	int scale;
1356	getBaseIndexScale(base, &base, &index, &scale);
1357
1358	// (**) We don't have the usual opportunity to clobber 'base'
1359	// or 'ins' with the result because it has a different type.
1360	Register rb, ri;
1361	RegisterMask allow = GpRegs & ~rmask(rr);
1362	getBaseReg2(allow, index, ri, allow, base, rb, d);
1363
1364	switch (ins->opcode()) {
1365	case LIR_ldd: SSE_LDQsib(rr, d, rb, ri, scale); break;
1366	case LIR_ldf2d: SSE_CVTSS2SD(rr, rr);
1367	SSE_LDSSsib(rr, d, rb, ri, scale);
1368	SSE_XORPDr(rr, rr); break;
1369	default: NanoAssert(0)do { } while (0); break;
1370	}
1371
1372	} else {
1373	// (**) We don't have the usual opportunity to clobber 'base'
1374	// or 'ins' with the result because it has a different type.
1375	Register rb = getBaseReg(base, d, GpRegs);
1376	if (rmask(rr) & XmmRegs) {
1377	switch (ins->opcode()) {
1378	case LIR_ldd: SSE_LDQ(rr, d, rb); break;
1379	case LIR_ldf2d: SSE_CVTSS2SD(rr, rr);
1380	SSE_LDSS(rr, d, rb);
1381	SSE_XORPDr(rr, rr); break;
1382	default: NanoAssert(0)do { } while (0); break;
1383	}
1384	} else {
1385	NanoAssert(rr == FST0)do { } while (0);
1386	switch (ins->opcode()) {
1387	case LIR_ldd: FLDQ(d, rb); break;
1388	case LIR_ldf2d: FLD32(d, rb); break;
1389	default: NanoAssert(0)do { } while (0); break;
1390	}
1391	}
1392	}
1393
1394	} else {
1395	Register rb = getBaseReg(base, d, GpRegs);
1396
1397	NanoAssert(ins->isInAr())do { } while (0);
1398	int dr = arDisp(ins);
1399
1400	switch (ins->opcode()) {
1401	case LIR_ldd:
1402	// Don't use an fpu reg to simply load & store the value.
1403	asm_mmq(FP, dr, rb, d);
1404	break;
1405
1406	case LIR_ldf2d:
1407	// Need to use fpu to expand 32->64.
1408	FSTPQ(dr, FP);
1409	FLD32(d, rb);
1410	break;
1411
1412	default:
1413	NanoAssert(0)do { } while (0);
1414	break;
1415	}
1416	}
1417
1418	freeResourcesOf(ins);
1419	// Nb: no need for a possible findSpecificRegForUnallocated() call
1420	// here because of (**) above.
1421	}
1422
1423	void Assembler::asm_store64(LOpcode op, LIns* value, int d, LIns* base)
1424	{
1425	if (op == LIR_std2f) {
1426	Register rb = getBaseReg(base, d, GpRegs);
1427	boolbool pop = !value->isInReg();
1428	Register rv = ( pop
1429	? findRegFor(value, _config.i386_sse2 ? XmmRegs : FpRegs)
1430	: value->getReg() );
1431
1432	if (rmask(rv) & XmmRegs) {
1433	// need a scratch reg
1434	Register rt = registerAllocTmp(XmmRegs);
1435
1436	// cvt to single-precision and store
1437	SSE_STSS(d, rb, rt);
1438	SSE_CVTSD2SS(rt, rv);
1439	SSE_XORPDr(rt, rt); // zero dest to ensure no dependency stalls
1440
1441	} else {
1442	FST32(pop, d, rb);
1443	}
1444
1445	} else if (value->isImmD()) {
1446	Register rb = getBaseReg(base, d, GpRegs);
1447	STi(rb, d+4, value->immDhi());
1448	STi(rb, d, value->immDlo());
1449
1450	} else if (base->opcode() == LIR_addp && _config.i386_sse2) {
1451	LIns* index;
1452	int scale;
1453	getBaseIndexScale(base, &base, &index, &scale);
1454
1455	Register rb, ri;
1456	getBaseReg2(GpRegs, index, ri, GpRegs, base, rb, d);
1457
1458	Register rv = value->isInReg() ? value->getReg() : findRegFor(value, XmmRegs);
1459	NanoAssert(rmask(rv) & XmmRegs)do { } while (0);
1460	SSE_STQsib(d, rb, ri, scale, rv);
1461
1462	} else if (value->isop(LIR_ldd) && !_config.i386_sse2) {
1463	// 'value' may be live in an FPU reg. Either way, don't put it on
1464	// the FPU stack just to load & store it.
1465	Register rb = getBaseReg(base, d, GpRegs);
1466	int da = findMemFor(value);
1467	asm_mmq(rb, d, FP, da);
1468
1469	} else {
1470	Register rb = getBaseReg(base, d, GpRegs);
1471	boolbool pop = !value->isInReg();
1472	Register rv = ( pop
1473	? findRegFor(value, _config.i386_sse2 ? XmmRegs : FpRegs)
1474	: value->getReg() );
1475	if (rmask(rv) & XmmRegs)
1476	SSE_STQ(d, rb, rv);
1477	else
1478	FSTQ(pop, d, rb);
1479	}
1480	}
1481
1482	// Copy 64 bits: (rd+dd) <- (rs+ds).
1483	//
1484	void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
1485	{
1486	// Value is either a 64-bit struct or maybe a float that isn't live in
1487	// an FPU reg. Either way, avoid allocating an FPU reg just to load
1488	// and store it.
1489	if (_config.i386_sse2) {
1490	Register t = registerAllocTmp(XmmRegs);
1491	SSE_STQ(dd, rd, t);
1492	SSE_LDQ(t, ds, rs);
1493	} else {
1494	// We avoid copying via the FP stack because it's slow and likely
1495	// to cause spills.
1496	Register t = registerAllocTmp(GpRegs & ~(rmask(rd)\|rmask(rs)));
1497	ST(rd, dd+4, t);
1498	LD(t, ds+4, rs);
1499	ST(rd, dd, t);
1500	LD(t, ds, rs);
1501	}
1502	}
1503
1504	Branches Assembler::asm_branch_helper(boolbool branchOnFalse, LIns* cond, NIns* targ)
1505	{
1506	return isCmpDOpcode(cond->opcode())
1507	? asm_branchd_helper(branchOnFalse, cond, targ)
1508	: asm_branchi_helper(branchOnFalse, cond, targ);
1509	}
1510
1511	Branches Assembler::asm_branchi_helper(boolbool branchOnFalse, LIns* cond, NIns* targ)
1512	{
1513	if (branchOnFalse) {
1514	// op == LIR_xf/LIR_jf
1515	switch (cond->opcode()) {
1516	case LIR_eqi: JNE(targ); break;
1517	case LIR_lti: JNL(targ); break;
1518	case LIR_lei: JNLE(targ); break;
1519	case LIR_gti: JNG(targ); break;
1520	case LIR_gei: JNGE(targ); break;
1521	case LIR_ltui: JNB(targ); break;
1522	case LIR_leui: JNBE(targ); break;
1523	case LIR_gtui: JNA(targ); break;
1524	case LIR_geui: JNAE(targ); break;
1525	default: NanoAssert(0)do { } while (0); break;
1526	}
1527	} else {
1528	// op == LIR_xt/LIR_jt
1529	switch (cond->opcode()) {
1530	case LIR_eqi: JE(targ); break;
1531	case LIR_lti: JL(targ); break;
1532	case LIR_lei: JLE(targ); break;
1533	case LIR_gti: JG(targ); break;
1534	case LIR_gei: JGE(targ); break;
1535	case LIR_ltui: JB(targ); break;
1536	case LIR_leui: JBE(targ); break;
1537	case LIR_gtui: JA(targ); break;
1538	case LIR_geui: JAE(targ); break;
1539	default: NanoAssert(0)do { } while (0); break;
1540	}
1541	}
1542	return Branches(_nIns);
1543	}
1544
1545	Branches Assembler::asm_branch(boolbool branchOnFalse, LIns* cond, NIns* targ)
1546	{
1547	Branches branches = asm_branch_helper(branchOnFalse, cond, targ);
1548	asm_cmp(cond);
1549	return branches;
1550	}
1551
1552	NIns* Assembler::asm_branch_ov(LOpcode, NIns* target)
1553	{
1554	JO(target);
1555	return _nIns;
1556	}
1557
1558	void Assembler::asm_jtbl(LIns* ins, NIns** table)
1559	{
1560	Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
1561	JMP_indexed(indexreg, 2, table);
1562	}
1563
1564	void Assembler::asm_cmp(LIns *cond)
1565	{
1566	isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond);
1567	}
1568
1569	// This generates a 'test' or 'cmp' instruction for a condition, which
1570	// causes the condition codes to be set appropriately. It's used with
1571	// conditional branches, conditional moves, and when generating
1572	// conditional values. For example:
1573	//
1574	// LIR: eq1 = eq a, 0
1575	// LIR: xf1: xf eq1 -> ...
1576	// asm: test edx, edx # generated by this function
1577	// asm: je ...
1578	//
1579	// If this is the only use of eq1, then on entry 'cond' is not marked as
1580	// used, and we do not allocate a register for it. That's because its
1581	// result ends up in the condition codes rather than a normal register.
1582	// This doesn't get recorded in the regstate and so the asm code that
1583	// consumes the result (eg. a conditional branch like 'je') must follow
1584	// shortly after.
1585	//
1586	// If eq1 is instead used again later, we will also generate code
1587	// (eg. in asm_cond()) to compute it into a normal register, something
1588	// like this:
1589	//
1590	// LIR: eq1 = eq a, 0
1591	// LIR: test edx, edx
1592	// asm: sete ebx
1593	// asm: movzx ebx, ebx
1594	//
1595	// In this case we end up computing the condition twice, but that's ok, as
1596	// it's just as short as testing eq1's value in the code generated for the
1597	// guard.
1598	//
1599	// WARNING: Because the condition code update is not recorded in the
1600	// regstate, this function cannot generate any code that will affect the
1601	// condition codes prior to the generation of the test/cmp, because any
1602	// such code will be run after the test/cmp but before the instruction
1603	// that consumes the condition code. And because this function calls
1604	// findRegFor() before the test/cmp is generated, and findRegFor() calls
1605	// asm_restore(), that means that asm_restore() cannot generate code which
1606	// affects the condition codes.
1607	//
1608	void Assembler::asm_cmpi(LIns *cond)
1609	{
1610	LIns* lhs = cond->oprnd1();
1611	LIns* rhs = cond->oprnd2();
1612
1613	NanoAssert(lhs->isI() && rhs->isI())do { } while (0);
1614
1615	// Ready to issue the compare.
1616	if (rhs->isImmI()) {
1617	int c = rhs->immI();
1618	// findRegFor() can call asm_restore() -- asm_restore() better not
1619	// disturb the CCs!
1620	Register r = findRegFor(lhs, GpRegs);
1621	if (c == 0 && cond->isop(LIR_eqi)) {
1622	boolbool canSkipTest = lhs->isop(LIR_andi) \|\| lhs->isop(LIR_ori);
1623	if (canSkipTest) {
1624	// Setup a short-lived reader to do lookahead; does no
1625	// optimisations but that should be good enough for this
1626	// simple case, something like this:
1627	//
1628	// a = andi x, y # lhs
1629	// eq1 = eq a, 0 # cond
1630	// xt eq1 # currIns
1631	//
1632	// Note that we don't have to worry about lookahead
1633	// hitting the start of the buffer, because read() will
1634	// just return LIR_start repeatedly in that case.
1635	//
1636	LirReader lookahead(currIns);
1637	canSkipTest = currIns == lookahead.read() &&
1638	cond == lookahead.read() &&
1639	lhs == lookahead.read();
1640	}
1641	if (canSkipTest) {
1642	// Do nothing. At run-time, 'lhs' will have just computed
1643	// by an i386 instruction that sets ZF for us ('and' or
1644	// 'or'), so we don't have to do it ourselves.
1645	} else {
1646	TEST(r, r); // sets ZF according to the value of 'lhs'
1647	}
1648	} else {
1649	CMPi(r, c);
1650	}
1651	} else {
1652	Register ra, rb;
1653	findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);
1654	CMP(ra, rb);
1655	}
1656	}
1657
1658	void Assembler::asm_condd(LIns* ins)
1659	{
1660	LOpcode opcode = ins->opcode();
1661	Register r = prepareResultReg(ins, AllowableByteRegs);
1662
1663	// SETcc only sets low 8 bits, so extend
1664	MOVZX8(r,r);
1665
1666	if (_config.i386_sse2) {
1667	// LIR_ltd and LIR_gtd are handled by the same case because
1668	// asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise
1669	// for LIR_led/LIR_ged.
1670	switch (opcode) {
1671	case LIR_eqd:
1672	if (ins->oprnd1() == ins->oprnd2()) {
1673	SETNP(r);
1674	} else {
1675	// result = ZF & !PF, must do logic on flags
1676	AND8R(r); // and rl,rh rl &= rh
1677	SETNPH(r); // setnp rh rh = !PF
1678	SETE(r); // sete rl rl = ZF
1679	}
1680	break;
1681	case LIR_ltd:
1682	case LIR_gtd: SETA(r); break;
1683	case LIR_led:
1684	case LIR_ged: SETAE(r); break;
1685	default: NanoAssert(0)do { } while (0); break;
1686	}
1687	} else {
1688	SETNP(r);
1689	}
1690
1691	freeResourcesOf(ins);
1692
1693	asm_cmpd(ins);
1694	}
1695
1696	void Assembler::asm_cond(LIns* ins)
1697	{
1698	LOpcode op = ins->opcode();
1699
1700	Register r = prepareResultReg(ins, AllowableByteRegs);
1701
1702	// SETcc only sets low 8 bits, so extend
1703	MOVZX8(r,r);
1704	switch (op) {
1705	case LIR_eqi: SETE(r); break;
1706	case LIR_lti: SETL(r); break;
1707	case LIR_lei: SETLE(r); break;
1708	case LIR_gti: SETG(r); break;
1709	case LIR_gei: SETGE(r); break;
1710	case LIR_ltui: SETB(r); break;
1711	case LIR_leui: SETBE(r); break;
1712	case LIR_gtui: SETA(r); break;
1713	case LIR_geui: SETAE(r); break;
1714	default: NanoAssert(0)do { } while (0); break;
1715	}
1716
1717	freeResourcesOf(ins);
1718
1719	asm_cmpi(ins);
1720	}
1721
1722	// Two example cases for "ins = add lhs, rhs". '*' lines are those
1723	// generated in this function.
1724	//
1725	// asm: define lhs into rr
1726	// asm: define rhs into rb
1727	// ...
1728	// * asm: add rr, rb
1729	// * asm: spill rr if necessary
1730	// ... no more uses of lhs in rr...
1731	//
1732	// asm: define lhs into ra
1733	// asm: define rhs into rb
1734	// ...
1735	// * asm: mov rr, ra
1736	// * asm: add rr, rb
1737	// * asm: spill rr if necessary
1738	// ... some uses of lhs in ra...
1739	//
1740	void Assembler::asm_arith(LIns* ins)
1741	{
1742	LOpcode op = ins->opcode();
1743
1744	// First special case.
1745	if (op == LIR_modi) {
1746	asm_div_mod(ins);
1747	return;
1748	}
1749
1750	LIns* lhs = ins->oprnd1();
1751	LIns* rhs = ins->oprnd2();
1752
1753	// Second special case.
1754	// XXX: bug 547125: don't need this once LEA is used for LIR_addi in all cases below
1755	if (op == LIR_addi && lhs->isop(LIR_allocp) && rhs->isImmI()) {
1756	// LIR_addi(LIR_allocp, LIR_immi) -- use lea.
1757	Register rr = prepareResultReg(ins, GpRegs);
1758	int d = findMemFor(lhs) + rhs->immI();
1759
1760	LEA(rr, d, FP);
1761
1762	freeResourcesOf(ins);
1763
1764	return;
1765	}
1766
1767	boolbool isConstRhs;
1768	RegisterMask allow = GpRegs;
1769	Register rb = UnspecifiedReg;
1770
1771	switch (op) {
1772	case LIR_divi:
1773	// Nb: if the div feeds into a mod it will be handled by
1774	// asm_div_mod() rather than here.
1775	isConstRhs = falsefalse;
1776	rb = findRegFor(rhs, (GpRegs & ~(rmask(rEAX)\|rmask(rEDX))));
1777	allow = rmask(rEAX);
1778	evictIfActive(rEDX);
1779	break;
1780	case LIR_muli:
1781	case LIR_muljovi:
1782	case LIR_mulxovi:
1783	isConstRhs = falsefalse;
1784	if (lhs != rhs) {
1785	rb = findRegFor(rhs, allow);
1786	allow &= ~rmask(rb);
1787	}
1788	break;
1789	case LIR_lshi:
1790	case LIR_rshi:
1791	case LIR_rshui:
1792	isConstRhs = rhs->isImmI();
1793	if (!isConstRhs) {
1794	rb = findSpecificRegFor(rhs, rECX);
1795	allow &= ~rmask(rb);
1796	}
1797	break;
1798	default:
1799	isConstRhs = rhs->isImmI();
1800	if (!isConstRhs && lhs != rhs) {
1801	rb = findRegFor(rhs, allow);
1802	allow &= ~rmask(rb);
1803	}
1804	break;
1805	}
1806
1807	// Somewhere for the result of 'ins'.
1808	Register rr = prepareResultReg(ins, allow);
1809
1810	// If 'lhs' isn't in a register, it can be clobbered by 'ins'.
1811	Register ra = lhs->isInReg() ? lhs->getReg() : rr;
1812
1813	if (!isConstRhs) {
1814	if (lhs == rhs)
1815	rb = ra;
1816
1817	switch (op) {
1818	case LIR_addi:
1819	case LIR_addjovi:
1820	case LIR_addxovi: ADD(rr, rb); break; // XXX: bug 547125: could use LEA for LIR_addi
1821	case LIR_subi:
1822	case LIR_subjovi:
1823	case LIR_subxovi: SUB(rr, rb); break;
1824	case LIR_muli:
1825	case LIR_muljovi:
1826	case LIR_mulxovi: IMUL(rr, rb); break;
1827	case LIR_andi: AND(rr, rb); break;
1828	case LIR_ori: OR( rr, rb); break;
1829	case LIR_xori: XOR(rr, rb); break;
1830	case LIR_lshi: SHL(rr, rb); break;
1831	case LIR_rshi: SAR(rr, rb); break;
1832	case LIR_rshui: SHR(rr, rb); break;
1833	case LIR_divi:
1834	DIV(rb);
1835	CDQ(); // sign-extend rEAX into rEDX:rEAX
1836	break;
1837	default: NanoAssert(0)do { } while (0); break;
1838	}
1839
1840	} else {
1841	int c = rhs->immI();
1842	switch (op) {
1843	case LIR_addi:
1844	// this doesn't set cc's, only use it when cc's not required.
1845	LEA(rr, c, ra);
1846	ra = rr; // suppress mov
1847	break;
1848	case LIR_addjovi:
1849	case LIR_addxovi: ADDi(rr, c); break;
1850	case LIR_subi:
1851	case LIR_subjovi:
1852	case LIR_subxovi: SUBi(rr, c); break;
1853	case LIR_andi: ANDi(rr, c); break;
1854	case LIR_ori: ORi( rr, c); break;
1855	case LIR_xori: XORi(rr, c); break;
1856	case LIR_lshi: SHLi(rr, c); break;
1857	case LIR_rshi: SARi(rr, c); break;
1858	case LIR_rshui: SHRi(rr, c); break;
1859	default: NanoAssert(0)do { } while (0); break;
1860	}
1861	}
1862
1863	if (rr != ra)
1864	MR(rr, ra);
1865
1866	freeResourcesOf(ins);
1867	if (!lhs->isInReg()) {
1868	NanoAssert(ra == rr)do { } while (0);
1869	findSpecificRegForUnallocated(lhs, ra);
1870	}
1871	}
1872
1873	// Generates code for a LIR_modi(LIR_divi(divL, divR)) sequence.
1874	void Assembler::asm_div_mod(LIns* mod)
1875	{
1876	LIns* div = mod->oprnd1();
1877
1878	// LIR_modi expects the LIR_divi to be near (no interference from the register allocator).
1879	NanoAssert(mod->isop(LIR_modi))do { } while (0);
1880	NanoAssert(div->isop(LIR_divi))do { } while (0);
1881
1882	LIns* divL = div->oprnd1();
1883	LIns* divR = div->oprnd2();
1884
1885	prepareResultReg(mod, rmask(rEDX));
1886	prepareResultReg(div, rmask(rEAX));
1887
1888	Register rDivR = findRegFor(divR, (GpRegs & ~(rmask(rEAX)\|rmask(rEDX))));
1889	Register rDivL = divL->isInReg() ? divL->getReg() : rEAX;
1890
1891	DIV(rDivR);
1892	CDQ(); // sign-extend rEAX into rEDX:rEAX
1893	if (rEAX != rDivL)
1894	MR(rEAX, rDivL);
1895
1896	freeResourcesOf(mod);
1897	freeResourcesOf(div);
1898	if (!divL->isInReg()) {
1899	NanoAssert(rDivL == rEAX)do { } while (0);
1900	findSpecificRegForUnallocated(divL, rEAX);
1901	}
1902	}
1903
1904	// Two example cases for "ins = neg lhs". Lines marked with '*' are
1905	// generated in this function.
1906	//
1907	// asm: define lhs into rr
1908	// ...
1909	// * asm: neg rr
1910	// * asm: spill rr if necessary
1911	// ... no more uses of lhs in rr...
1912	//
1913	//
1914	// asm: define lhs into ra
1915	// ...
1916	// * asm: mov rr, ra
1917	// * asm: neg rr
1918	// * asm: spill rr if necessary
1919	// ... more uses of lhs in ra...
1920	//
1921	void Assembler::asm_neg_not(LIns* ins)
1922	{
1923	LIns* lhs = ins->oprnd1();
1924
1925	Register rr = prepareResultReg(ins, GpRegs);
1926
1927	// If 'lhs' isn't in a register, it can be clobbered by 'ins'.
1928	Register ra = lhs->isInReg() ? lhs->getReg() : rr;
1929
1930	if (ins->isop(LIR_noti)) {
1931	NOT(rr);
1932	} else {
1933	NanoAssert(ins->isop(LIR_negi))do { } while (0);
1934	NEG(rr);
1935	}
1936	if (rr != ra)
1937	MR(rr, ra);
1938
1939	freeResourcesOf(ins);
1940	if (!lhs->isInReg()) {
1941	NanoAssert(ra == rr)do { } while (0);
1942	findSpecificRegForUnallocated(lhs, ra);
1943	}
1944	}
1945
1946	void Assembler::asm_load32(LIns* ins)
1947	{
1948	LOpcode op = ins->opcode();
1949	LIns* base = ins->oprnd1();
1950	int32_t d = ins->disp();
1951
1952	Register rr = prepareResultReg(ins, GpRegs);
1953
1954	if (base->isImmI()) {
1955	intptr_t addr = base->immI();
1956	addr += d;
1957	switch (op) {
1958	case LIR_lduc2ui: LD8Zdm( rr, addr); break;
1959	case LIR_ldc2i: LD8Sdm( rr, addr); break;
1960	case LIR_ldus2ui: LD16Zdm(rr, addr); break;
1961	case LIR_lds2i: LD16Sdm(rr, addr); break;
1962	case LIR_ldi: LDdm( rr, addr); break;
1963	default: NanoAssert(0)do { } while (0); break;
1964	}
1965
1966	freeResourcesOf(ins);
1967
1968	} else if (base->opcode() == LIR_addp) {
1969	LIns* index;
1970	int scale;
1971	getBaseIndexScale(base, &base, &index, &scale);
1972
1973	// If 'base' isn't in a register, it can be clobbered by 'ins'.
1974	// Likewise for 'rhs', but we try it with 'base' first.
1975	Register rb, ri;
1976	// @todo -- If base and/or index is const, we could eliminate a register use.
1977	if (!base->isInReg()) {
1978	rb = rr;
1979	ri = findRegFor(index, GpRegs & ~(rmask(rb)));
1980
1981	} else {
1982	rb = base->getReg();
1983	NanoAssert(rb != rr)do { } while (0);
1984	ri = index->isInReg() ? findRegFor(index, GpRegs & ~(rmask(rb))) : rr;
1985	}
1986
1987	switch (op) {
1988	case LIR_lduc2ui: LD8Zsib( rr, d, rb, ri, scale); break;
1989	case LIR_ldc2i: LD8Ssib( rr, d, rb, ri, scale); break;
1990	case LIR_ldus2ui: LD16Zsib(rr, d, rb, ri, scale); break;
1991	case LIR_lds2i: LD16Ssib(rr, d, rb, ri, scale); break;
1992	case LIR_ldi: LDsib( rr, d, rb, ri, scale); break;
1993	default: NanoAssert(0)do { } while (0); break;
1994	}
1995
1996	freeResourcesOf(ins);
1997	if (!base->isInReg()) {
1998	NanoAssert(rb == rr)do { } while (0);
1999	findSpecificRegForUnallocated(base, rb);
2000	} else if (!index->isInReg()) {
2001	NanoAssert(ri == rr)do { } while (0);
2002	findSpecificRegForUnallocated(index, ri);
2003	}
2004
2005	} else {
2006	Register ra = getBaseReg(base, d, GpRegs);
2007
2008	switch (op) {
2009	case LIR_lduc2ui: LD8Z( rr, d, ra); break;
2010	case LIR_ldc2i: LD8S( rr, d, ra); break;
2011	case LIR_ldus2ui: LD16Z(rr, d, ra); break;
2012	case LIR_lds2i: LD16S(rr, d, ra); break;
2013	case LIR_ldi: LD( rr, d, ra); break;
2014	default: NanoAssert(0)do { } while (0); break;
2015	}
2016
2017	freeResourcesOf(ins);
2018	if (!base->isop(LIR_allocp) && !base->isInReg()) {
2019	NanoAssert(ra == rr)do { } while (0);
2020	findSpecificRegForUnallocated(base, ra);
2021	}
2022	}
2023	}
2024
2025	void Assembler::asm_cmov(LIns* ins)
2026	{
2027	LIns* condval = ins->oprnd1();
2028	LIns* iftrue = ins->oprnd2();
2029	LIns* iffalse = ins->oprnd3();
2030
2031	NanoAssert(condval->isCmp())do { } while (0);
2032	NanoAssert((ins->isop(LIR_cmovi) && iftrue->isI() && iffalse->isI()) \|\|do { } while (0)
2033	(ins->isop(LIR_cmovd) && iftrue->isD() && iffalse->isD()))do { } while (0);
2034
2035	if (!_config.i386_sse2 && ins->isop(LIR_cmovd)) {
2036	// See the SSE2 case below for an explanation of the subtleties here.
2037	debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
2038	NanoAssert(FST0 == rr)do { } while (0);
2039	NanoAssert(!iftrue->isInReg() && !iffalse->isInReg())do { } while (0);
2040
2041	NIns* target = _nIns;
2042
2043	if (iffalse->isImmD()) {
2044	asm_immd(FST0, iffalse->immDasQ(), iffalse->immD(), /canClobberCCs/falsefalse);
2045	} else {
2046	int df = findMemFor(iffalse);
2047	FLDQ(df, FP);
2048	}
2049	FSTP(FST0); // pop the stack
2050	asm_branch_helper(falsefalse, condval, target);
2051
2052	NanoAssert(ins->getReg() == rr)do { } while (0);
2053	freeResourcesOf(ins);
2054	if (!iftrue->isInReg())
2055	findSpecificRegForUnallocated(iftrue, FST0);
2056
2057	asm_cmp(condval);
2058
2059	return;
2060	}
2061
2062	RegisterMask allow = ins->isD() ? XmmRegs : GpRegs;
2063	Register rr = prepareResultReg(ins, allow);
2064	Register rf = findRegFor(iffalse, allow & ~rmask(rr));
2065
2066	if (ins->isop(LIR_cmovd)) {
2067	// The obvious way to handle this is as follows:
2068	//
2069	// mov rr, rt # only needed if rt is live afterwards
2070	// do comparison
2071	// jt end
2072	// mov rr, rf
2073	// end:
2074	//
2075	// The problem with this is that doing the comparison can cause
2076	// registers to be evicted, possibly including 'rr', which holds
2077	// 'ins'. And that screws things up. So instead we do this:
2078	//
2079	// do comparison
2080	// mov rr, rt # only needed if rt is live afterwards
2081	// jt end
2082	// mov rr, rf
2083	// end:
2084	//
2085	// Putting the 'mov' between the comparison and the jump is ok
2086	// because move instructions don't modify the condition codes.
2087	//
2088	NIns* target = _nIns;
2089	asm_nongp_copy(rr, rf);
2090	asm_branch_helper(falsefalse, condval, target);
2091
2092	// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
2093	Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
2094
2095	if (rr != rt)
2096	asm_nongp_copy(rr, rt);
2097
2098	NanoAssert(ins->getReg() == rr)do { } while (0);
2099	freeResourcesOf(ins);
2100	if (!iftrue->isInReg()) {
2101	NanoAssert(rt == rr)do { } while (0);
2102	findSpecificRegForUnallocated(iftrue, rr);
2103	}
2104
2105	asm_cmp(condval);
2106	return;
2107	}
2108
2109	// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
2110	Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
2111	NanoAssert(ins->isop(LIR_cmovi))do { } while (0);
2112
2113	// WARNING: We cannot generate any code that affects the condition
2114	// codes between the MRcc generation here and the asm_cmpi() call
2115	// below. See asm_cmpi() for more details.
2116	switch (condval->opcode()) {
2117	// Note that these are all opposites...
2118	case LIR_eqi: MRNE(rr, rf); break;
2119	case LIR_lti: MRGE(rr, rf); break;
2120	case LIR_lei: MRG( rr, rf); break;
2121	case LIR_gti: MRLE(rr, rf); break;
2122	case LIR_gei: MRL( rr, rf); break;
2123	case LIR_ltui: MRAE(rr, rf); break;
2124	case LIR_leui: MRA( rr, rf); break;
2125	case LIR_gtui: MRBE(rr, rf); break;
2126	case LIR_geui: MRB( rr, rf); break;
2127	default: NanoAssert(0)do { } while (0); break;
2128	}
2129
2130	if (rr != rt)
2131	MR(rr, rt);
2132
2133	NanoAssert(ins->getReg() == rr)do { } while (0);
2134	freeResourcesOf(ins);
2135	if (!iftrue->isInReg()) {
2136	NanoAssert(rt == rr)do { } while (0);
2137	findSpecificRegForUnallocated(iftrue, rr);
2138	}
2139
2140	asm_cmp(condval);
2141	}
2142
2143	void Assembler::asm_param(LIns* ins)
2144	{
2145	uint32_t arg = ins->paramArg();
2146	uint32_t kind = ins->paramKind();
2147	if (kind == 0) {
2148	// ordinary param
2149	AbiKind abi = _thisfrag->lirbuf->abi;
2150	uint32_t abi_regcount = max_abi_regs[abi];
2151	// argRegs must have as many elements as the largest argument register
2152	// requirement of an abi. Currently, this is 2, for ABI_FASTCALL. See
2153	// the definition of max_abi_regs earlier in this file. The following
2154	// assertion reflects this invariant:
2155	NanoAssert(abi_regcount <= sizeof(argRegs)/sizeof(argRegs[0]))do { } while (0);
2156	if (arg < abi_regcount) {
2157	// Incoming arg in register.
2158	prepareResultReg(ins, rmask(argRegs[arg]));
2159	// No code to generate.
2160	} else {
2161	// Incoming arg is on stack, and rEBP points nearby (see genPrologue()).
2162	Register r = prepareResultReg(ins, GpRegs);
2163	int d = (arg - abi_regcount) * sizeof(intptr_t) + 8;
2164	LD(r, d, FP);
2165	}
2166	} else {
2167	// Saved param.
2168	prepareResultReg(ins, rmask(savedRegs[arg]));
2169	// No code to generate.
2170	}
2171	freeResourcesOf(ins);
2172	}
2173
2174	void Assembler::asm_immi(LIns* ins)
2175	{
2176	Register rr = prepareResultReg(ins, GpRegs);
2177
2178	asm_immi(rr, ins->immI(), /canClobberCCs/truetrue);
2179
2180	freeResourcesOf(ins);
2181	}
2182
2183	void Assembler::asm_immi(Register r, int32_t val, boolbool canClobberCCs)
2184	{
2185	if (val == 0 && canClobberCCs)
2186	XOR(r, r);
2187	else
2188	LDi(r, val);
2189	}
2190
2191	void Assembler::asm_immd(Register r, uint64_t q, double d, boolbool canClobberCCs)
2192	{
2193	// Floats require non-standard handling. There is no load-64-bit-immediate
2194	// instruction on i386, so in the general case, we must load it from memory.
2195	// This is unlike most other LIR operations which can be computed directly
2196	// in a register. We can special-case 0.0 and various other small ints
2197	// (1.0 on x87, any int32_t value on SSE2), but for all other values, we
2198	// allocate an 8-byte chunk via dataAlloc and load from there. Note that
2199	// this implies that floats never require spill area, since they will always
2200	// be rematerialized from const data (or inline instructions in the special cases).
2201
2202	if (rmask(r) & XmmRegs) {
2203	if (q == 0) {
2204	// test (int64)0 since -0.0 == 0.0
2205	SSE_XORPDr(r, r);
2206	} else if (d && d == (int)d && canClobberCCs) {
2207	// can fit in 32bits? then use cvt which is faster
2208	Register tr = registerAllocTmp(GpRegs);
2209	SSE_CVTSI2SD(r, tr);
2210	SSE_XORPDr(r, r); // zero r to ensure no dependency stalls
2211	asm_immi(tr, (int)d, canClobberCCs);
2212	} else {
2213	const uint64_t* p = findImmDFromPool(q);
2214	LDSDm(r, (const double*)p);
2215	}
2216	} else {
2217	NanoAssert(r == FST0)do { } while (0);
2218	if (q == 0) {
2219	// test (int64)0 since -0.0 == 0.0
2220	FLDZ();
2221	} else if (d == 1.0) {
2222	FLD1();
2223	} else {
2224	const uint64_t* p = findImmDFromPool(q);
2225	FLDQdm((const double*)p);
2226	}
2227	}
2228	}
2229
2230	void Assembler::asm_immd(LIns* ins)
2231	{
2232	NanoAssert(ins->isImmD())do { } while (0);
2233	if (ins->isInReg()) {
2234	Register rr = ins->getReg();
2235	NanoAssert(rmask(rr) & FpRegs)do { } while (0);
2236	asm_immd(rr, ins->immDasQ(), ins->immD(), /canClobberCCs/truetrue);
2237	} else {
2238	// Do nothing, will be rematerialized when necessary.
2239	}
2240
2241	freeResourcesOf(ins);
2242	}
2243
2244	// negateMask is used by asm_fneg.
2245	#if defined __SUNPRO_CC
2246	// From Sun Studio C++ Readme: #pragma align inside namespace requires mangled names.
2247	// Initialize here to avoid multithreading contention issues during initialization.
2248	static uint32_t negateMask_temp[] = {0, 0, 0, 0, 0, 0, 0};
2249
2250	static uint32_t* negateMaskInit()
2251	{
2252	uint32_t* negateMask = (uint32_t*)alignUp(negateMask_temp, 16)((((uintptr_t)(negateMask_temp))+(((uintptr_t)16)-1))&~(( (uintptr_t)16)-1));
2253	negateMask[1] = 0x80000000;
2254	return negateMask;
2255	}
2256
2257	static uint32_t *negateMask = negateMaskInit();
2258	#else
2259	static const AVMPLUS_ALIGN16(uint32_t)uint32_t __attribute__ ((aligned (16))) negateMask[] = {0,0x80000000,0,0};
2260	#endif
2261
2262	void Assembler::asm_fneg(LIns* ins)
2263	{
2264	LIns *lhs = ins->oprnd1();
2265
2266	if (_config.i386_sse2) {
2267	Register rr = prepareResultReg(ins, XmmRegs);
2268
2269	// If 'lhs' isn't in a register, it can be clobbered by 'ins'.
2270	Register ra;
2271	if (!lhs->isInReg()) {
2272	ra = rr;
2273	} else if (!(rmask(lhs->getReg()) & XmmRegs)) {
2274	// We need to evict lhs from x87Regs, which then puts us in
2275	// the same situation as the !isInReg() case.
2276	evict(lhs);
2277	ra = rr;
2278	} else {
2279	ra = lhs->getReg();
2280	}
2281
2282	SSE_XORPD(rr, negateMask);
2283
2284	if (rr != ra)
2285	SSE_MOVSD(rr, ra);
2286
2287	freeResourcesOf(ins);
2288	if (!lhs->isInReg()) {
2289	NanoAssert(ra == rr)do { } while (0);
2290	findSpecificRegForUnallocated(lhs, ra);
2291	}
2292
2293	} else {
2294	debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
2295	NanoAssert(FST0 == rr)do { } while (0);
2296
2297	NanoAssert(!lhs->isInReg() \|\| FST0 == lhs->getReg())do { } while (0);
2298
2299	FCHS();
2300
2301	freeResourcesOf(ins);
2302	if (!lhs->isInReg())
2303	findSpecificRegForUnallocated(lhs, FST0);
2304	}
2305	}
2306
2307	void Assembler::asm_arg(ArgType ty, LIns* ins, Register r, int32_t& stkd)
2308	{
2309	// If 'r' is known, then that's the register we have to put 'ins'
2310	// into.
2311
2312	if (ty == ARGTYPE_I \|\| ty == ARGTYPE_UI) {
2313	if (r != UnspecifiedReg) {
2314	if (ins->isImmI()) {
2315	// Rematerialize the constant.
2316	asm_immi(r, ins->immI(), /canClobberCCs/truetrue);
2317	} else if (ins->isInReg()) {
2318	if (r != ins->getReg())
2319	MR(r, ins->getReg());
2320	} else if (ins->isInAr()) {
2321	int d = arDisp(ins);
2322	NanoAssert(d != 0)do { } while (0);
2323	if (ins->isop(LIR_allocp)) {
2324	LEA(r, d, FP);
2325	} else {
2326	LD(r, d, FP);
2327	}
2328
2329	} else {
2330	// This is the last use, so fine to assign it
2331	// to the scratch reg, it's dead after this point.
2332	findSpecificRegForUnallocated(ins, r);
2333	}
2334	}
2335	else {
2336	if (_config.i386_fixed_esp)
2337	asm_stkarg(ins, stkd);
2338	else
2339	asm_pusharg(ins);
2340	}
2341
2342	} else {
2343	NanoAssert(ty == ARGTYPE_D)do { } while (0);
2344	asm_farg(ins, stkd);
2345	}
2346	}
2347
2348	void Assembler::asm_pusharg(LIns* ins)
2349	{
2350	// arg goes on stack
2351	if (!ins->isExtant() && ins->isImmI()) {
2352	PUSHi(ins->immI()); // small const we push directly
2353	} else if (!ins->isExtant() \|\| ins->isop(LIR_allocp)) {
2354	Register ra = findRegFor(ins, GpRegs);
2355	PUSHr(ra);
2356	} else if (ins->isInReg()) {
2357	PUSHr(ins->getReg());
2358	} else {
2359	NanoAssert(ins->isInAr())do { } while (0);
2360	PUSHm(arDisp(ins), FP);
2361	}
2362	}
2363
2364	void Assembler::asm_stkarg(LIns* ins, int32_t& stkd)
2365	{
2366	// arg goes on stack
2367	if (!ins->isExtant() && ins->isImmI())
2368	{
2369	// small const we push directly
2370	STi(SP, stkd, ins->immI());
2371	}
2372	else {
2373	Register ra;
2374	if (!ins->isInReg() \|\| ins->isop(LIR_allocp))
2375	ra = findRegFor(ins, GpRegs & (~SavedRegs));
2376	else
2377	ra = ins->getReg();
2378	ST(SP, stkd, ra);
2379	}
2380
2381	stkd += sizeof(int32_t);
2382	}
2383
2384	void Assembler::asm_farg(LIns* ins, int32_t& stkd)
2385	{
2386	NanoAssert(ins->isD())do { } while (0);
2387	Register r = findRegFor(ins, FpRegs);
2388	if (rmask(r) & XmmRegs) {
2389	SSE_STQ(stkd, SP, r);
2390	} else {
2391	FSTPQ(stkd, SP);
2392
2393	// 22Jul09 rickr - Enabling the evict causes a 10% slowdown on primes
2394	//
2395	// evict() triggers a very expensive fstpq/fldq pair around the store.
2396	// We need to resolve the bug some other way.
2397	//
2398	// see https://bugzilla.mozilla.org/show_bug.cgi?id=491084
2399
2400	// It's possible that the same LIns* with r=FST0 will appear in the argument list more
2401	// than once. In this case FST0 will not have been evicted and the multiple pop
2402	// actions will unbalance the FPU stack. A quick fix is to always evict FST0 manually.
2403	NanoAssert(r == FST0)do { } while (0);
2404	NanoAssert(ins == _allocator.getActive(r))do { } while (0);
2405	evict(ins);
2406	}
2407	if (!_config.i386_fixed_esp)
2408	SUBi(rESP, 8);
2409
2410	stkd += sizeof(double);
2411	}
2412
2413	void Assembler::asm_fop(LIns* ins)
2414	{
2415	LOpcode op = ins->opcode();
2416	if (_config.i386_sse2)
2417	{
2418	LIns *lhs = ins->oprnd1();
2419	LIns *rhs = ins->oprnd2();
2420
2421	RegisterMask allow = XmmRegs;
2422	Register rb = UnspecifiedReg;
2423	if (lhs != rhs) {
2424	rb = findRegFor(rhs, allow);
2425	allow &= ~rmask(rb);
2426	}
2427
2428	Register rr = prepareResultReg(ins, allow);
2429
2430	// If 'lhs' isn't in a register, it can be clobbered by 'ins'.
2431	Register ra;
2432	if (!lhs->isInReg()) {
2433	ra = rr;
2434
2435	} else if (!(rmask(lhs->getReg()) & XmmRegs)) {
2436	NanoAssert(lhs->getReg() == FST0)do { } while (0);
2437
2438	// We need to evict lhs from x87Regs, which then puts us in
2439	// the same situation as the !isInReg() case.
2440	evict(lhs);
2441	ra = rr;
2442
2443	} else {
2444	ra = lhs->getReg();
2445	NanoAssert(rmask(ra) & XmmRegs)do { } while (0);
2446	}
2447
2448	if (lhs == rhs)
2449	rb = ra;
2450
2451	switch (op) {
2452	case LIR_addd: SSE_ADDSD(rr, rb); break;
2453	case LIR_subd: SSE_SUBSD(rr, rb); break;
2454	case LIR_muld: SSE_MULSD(rr, rb); break;
2455	case LIR_divd: SSE_DIVSD(rr, rb); break;
2456	default: NanoAssert(0)do { } while (0);
2457	}
2458
2459	if (rr != ra)
2460	SSE_MOVSD(rr, ra);
2461
2462	freeResourcesOf(ins);
2463	if (!lhs->isInReg()) {
2464	NanoAssert(ra == rr)do { } while (0);
2465	findSpecificRegForUnallocated(lhs, ra);
2466	}
2467	}
2468	else
2469	{
2470	// We swap lhs/rhs on purpose here, it works out better with
2471	// only one fpu reg -- we can use divr/subr.
2472	LIns* rhs = ins->oprnd1();
2473	LIns* lhs = ins->oprnd2();
2474	debug_only( Register rr = ) prepareResultReg(ins, rmask(FST0));
2475	NanoAssert(FST0 == rr)do { } while (0);
2476	NanoAssert(!lhs->isInReg() \|\| FST0 == lhs->getReg())do { } while (0);
2477
2478	if (rhs->isImmD()) {
2479	const uint64_t* p = findImmDFromPool(rhs->immDasQ());
2480
2481	switch (op) {
2482	case LIR_addd: FADDdm( (const double*)p); break;
2483	case LIR_subd: FSUBRdm((const double*)p); break;
2484	case LIR_muld: FMULdm( (const double*)p); break;
2485	case LIR_divd: FDIVRdm((const double*)p); break;
2486	default: NanoAssert(0)do { } while (0);
2487	}
2488
2489	} else {
2490	int db = findMemFor(rhs);
2491
2492	switch (op) {
2493	case LIR_addd: FADD( db, FP); break;
2494	case LIR_subd: FSUBR(db, FP); break;
2495	case LIR_muld: FMUL( db, FP); break;
2496	case LIR_divd: FDIVR(db, FP); break;
2497	default: NanoAssert(0)do { } while (0);
2498	}
2499	}
2500	freeResourcesOf(ins);
2501	if (!lhs->isInReg()) {
2502	findSpecificRegForUnallocated(lhs, FST0);
2503	}
2504	}
2505	}
2506
2507	void Assembler::asm_i2d(LIns* ins)
2508	{
2509	LIns* lhs = ins->oprnd1();
2510
2511	Register rr = prepareResultReg(ins, FpRegs);
2512	if (rmask(rr) & XmmRegs) {
2513	// todo support int value in memory
2514	Register ra = findRegFor(lhs, GpRegs);
2515	SSE_CVTSI2SD(rr, ra);
2516	SSE_XORPDr(rr, rr); // zero rr to ensure no dependency stalls
2517	} else {
2518	int d = findMemFor(lhs);
2519	FILD(d, FP);
2520	}
2521
2522	freeResourcesOf(ins);
2523	}
2524
2525	void Assembler::asm_ui2d(LIns* ins)
2526	{
2527	LIns* lhs = ins->oprnd1();
2528
2529	Register rr = prepareResultReg(ins, FpRegs);
2530	if (rmask(rr) & XmmRegs) {
2531	Register rt = registerAllocTmp(GpRegs);
2532
2533	// Technique inspired by gcc disassembly. Edwin explains it:
2534	//
2535	// rt is 0..2^32-1
2536	//
2537	// sub rt,0x80000000
2538	//
2539	// Now rt is -2^31..2^31-1, i.e. the range of int, but not the same value
2540	// as before.
2541	//
2542	// cvtsi2sd rr,rt
2543	//
2544	// rr is now a double with the int value range.
2545	//
2546	// addsd rr, 2147483648.0
2547	//
2548	// Adding back double(0x80000000) makes the range 0..2^32-1.
2549
2550	static const double k_NEGONE = 2147483648.0;
2551	SSE_ADDSDm(rr, &k_NEGONE);
2552
2553	SSE_CVTSI2SD(rr, rt);
2554	SSE_XORPDr(rr, rr); // zero rr to ensure no dependency stalls
2555
2556	if (lhs->isInRegMask(GpRegs)) {
2557	Register ra = lhs->getReg();
2558	LEA(rt, 0x80000000, ra);
2559
2560	} else {
2561	const int d = findMemFor(lhs);
2562	SUBi(rt, 0x80000000);
2563	LD(rt, d, FP);
2564	}
2565
2566	} else {
2567	// Use space just below rESP and use PUSH to avoid writing
2568	// past the end of the stack, see bug 590553.
2569	Register ra = findRegFor(lhs, GpRegs);
2570	NanoAssert(rr == FST0)do { } while (0);
2571	ADDi(SP, 8); // fix up the stack
2572	FILDQ(0, SP); // convert int64 to double
2573	PUSHr(ra); // low 32 bits = unsigned value
2574	PUSHi(0); // high 32 bits = 0
2575	}
2576
2577	freeResourcesOf(ins);
2578	}
2579
2580	void Assembler::asm_d2i(LIns* ins)
2581	{
2582	LIns *lhs = ins->oprnd1();
2583
2584	if (_config.i386_sse2) {
2585	Register rr = prepareResultReg(ins, GpRegs);
2586	Register ra = findRegFor(lhs, XmmRegs);
2587	SSE_CVTTSD2SI(rr, ra);
2588	} else {
2589	boolbool pop = !lhs->isInReg();
2590	findSpecificRegFor(lhs, FST0);
2591	if (ins->isInReg())
2592	evict(ins);
2593	int d = findMemFor(ins);
2594	FIST(pop, d, FP);
2595	}
2596
2597	freeResourcesOf(ins);
2598	}
2599
2600	void Assembler::asm_nongp_copy(Register rd, Register rs)
2601	{
2602	if ((rmask(rd) & XmmRegs) && (rmask(rs) & XmmRegs)) {
2603	// xmm -> xmm
2604	SSE_MOVSD(rd, rs);
2605	} else if ((rmask(rd) & GpRegs) && (rmask(rs) & XmmRegs)) {
2606	// xmm -> gp
2607	SSE_MOVD(rd, rs);
2608	} else {
2609	NanoAssertMsgf(false, "bad asm_nongp_copy(%s, %s)", gpn(rd), gpn(rs))do { } while (0);
2610	}
2611	}
2612
2613	Branches Assembler::asm_branchd_helper(boolbool branchOnFalse, LIns* cond, NIns *targ)
2614	{
2615	NIns* patch1 = NULL__null;
2616	NIns* patch2 = NULL__null;
2617	LOpcode opcode = cond->opcode();
2618
2619	if (_config.i386_sse2) {
2620	// LIR_ltd and LIR_gtd are handled by the same case because
2621	// asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise
2622	// for LIR_led/LIR_ged.
2623	if (branchOnFalse) {
2624	// op == LIR_xf
2625	switch (opcode) {
2626	case LIR_eqd:
2627	if (cond->oprnd1() == cond->oprnd2()) {
2628	JP(targ);
2629	} else {
2630	JP(targ); // unordered
2631	patch1 = _nIns;
2632	JNE(targ);
2633	patch2 = _nIns;
2634	}
2635	break;
2636	case LIR_ltd:
2637	case LIR_gtd: JNA(targ); break;
2638	case LIR_led:
2639	case LIR_ged: JNAE(targ); break;
2640	default: NanoAssert(0)do { } while (0); break;
2641	}
2642	} else {
2643	// op == LIR_xt
2644	switch (opcode) {
2645	case LIR_eqd:
2646	if (cond->oprnd1() == cond->oprnd2()) {
2647	JNP(targ);
2648	} else {
2649	// jp skip (2byte)
2650	// je target
2651	// skip: ...
2652	underrunProtect(16); // underrun of 7 needed but we write 2 instr --> 16
2653	NIns *skip = _nIns;
2654	JE(targ);
2655	patch1 = _nIns;
2656	JP(skip); // unordered
2657	}
2658	break;
2659	case LIR_ltd:
2660	case LIR_gtd: JA(targ); break;
2661	case LIR_led:
2662	case LIR_ged: JAE(targ); break;
2663	default: NanoAssert(0)do { } while (0); break;
2664	}
2665	}
2666	} else {
2667	if (branchOnFalse)
2668	JP(targ);
2669	else
2670	JNP(targ);
2671	}
2672
2673	if (!patch1)
2674	patch1 = _nIns;
2675
2676	return Branches(patch1, patch2);
2677	}
2678
2679	// WARNING: This function cannot generate any code that will affect the
2680	// condition codes prior to the generation of the
2681	// ucomisd/fcompp/fcmop/fcom. See asm_cmpi() for more details.
2682	void Assembler::asm_cmpd(LIns *cond)
2683	{
2684	LOpcode condop = cond->opcode();
2685	NanoAssert(isCmpDOpcode(condop))do { } while (0);
2686	LIns* lhs = cond->oprnd1();
2687	LIns* rhs = cond->oprnd2();
2688	NanoAssert(lhs->isD() && rhs->isD())do { } while (0);
2689
2690	if (_config.i386_sse2) {
2691	// First, we convert (a < b) into (b > a), and (a <= b) into (b >= a).
2692	if (condop == LIR_ltd) {
2693	condop = LIR_gtd;
2694	LIns* t = lhs; lhs = rhs; rhs = t;
2695	} else if (condop == LIR_led) {
2696	condop = LIR_ged;
	Value stored to 'condop' is never read
2697	LIns* t = lhs; lhs = rhs; rhs = t;
2698	}
2699
2700	// LIR_eqd, if lhs == rhs:
2701	// ucomisd ZPC outcome (SETNP/JNP succeeds if P==0)
2702	// ------- --- -------
2703	// UNORDERED 111 SETNP/JNP fails
2704	// EQUAL 100 SETNP/JNP succeeds
2705	//
2706	// LIR_eqd, if lhs != rhs;
2707	// ucomisd ZPC outcome (SETP/JP succeeds if P==0,
2708	// SETE/JE succeeds if Z==0)
2709	// ------- --- -------
2710	// UNORDERED 111 SETP/JP succeeds (and skips to fail target)
2711	// EQUAL 100 SETP/JP fails, SETE/JE succeeds
2712	// GREATER_THAN 000 SETP/JP fails, SETE/JE fails
2713	// LESS_THAN 001 SETP/JP fails, SETE/JE fails
2714	//
2715	// LIR_gtd:
2716	// ucomisd ZPC outcome (SETA/JA succeeds if CZ==00)
2717	// ------- --- -------
2718	// UNORDERED 111 SETA/JA fails
2719	// EQUAL 100 SETA/JA fails
2720	// GREATER_THAN 000 SETA/JA succeeds
2721	// LESS_THAN 001 SETA/JA fails
2722	//
2723	// LIR_ged:
2724	// ucomisd ZPC outcome (SETAE/JAE succeeds if C==0)
2725	// ------- --- -------
2726	// UNORDERED 111 SETAE/JAE fails
2727	// EQUAL 100 SETAE/JAE succeeds
2728	// GREATER_THAN 000 SETAE/JAE succeeds
2729	// LESS_THAN 001 SETAE/JAE fails
2730
2731	Register ra, rb;
2732	findRegFor2(XmmRegs, lhs, ra, XmmRegs, rhs, rb);
2733	SSE_UCOMISD(ra, rb);
2734
2735	} else {
2736	// First, we convert (a > b) into (b < a), and (a >= b) into (b <= a).
2737	// Note that this is the opposite of the sse2 conversion above.
2738	if (condop == LIR_gtd) {
2739	condop = LIR_ltd;
2740	LIns* t = lhs; lhs = rhs; rhs = t;
2741	} else if (condop == LIR_ged) {
2742	condop = LIR_led;
2743	LIns* t = lhs; lhs = rhs; rhs = t;
2744	}
2745
2746	// FNSTSW_AX puts the flags into AH like so: B:C3:TOP3:TOP2:TOP1:C2:C1:C0.
2747	// Furthermore, fcom/fcomp/fcompp sets C3:C2:C0 the same values
2748	// that Z:P:C are set by ucomisd, and the relative positions in AH
2749	// line up. (Someone at Intel has a sense of humour.) Therefore
2750	// we can use the same lahf/test(mask) technique as used in the
2751	// sse2 case above. We could use fcomi/fcomip/fcomipp which set
2752	// ZPC directly and then use LAHF instead of FNSTSW_AX and make
2753	// this code generally more like the sse2 code, but we don't
2754	// because fcomi/fcomip/fcomipp/lahf aren't available on earlier
2755	// x86 machines.
2756	//
2757	// The masks are as follows:
2758	// - LIR_eqd: mask == 0x44 == 0100_0100b, which extracts 0Z00_0P00 from AH.
2759	// - LIR_ltd: mask == 0x05 == 0000_0101b, which extracts 0000_0P0C from AH.
2760	// - LIR_led: mask == 0x41 == 0100_0001b, which extracts 0Z00_000C from AH.
2761	//
2762	// LIR_eqd:
2763	// ucomisd C3:C2:C0 lahf/test(0x44) SZP outcome
2764	// ------- -------- --------- --- -------
2765	// UNORDERED 111 0100_0100 001 SETNP fails
2766	// EQUAL 100 0100_0000 000 SETNP succeeds
2767	// GREATER_THAN 000 0000_0000 011 SETNP fails
2768	// LESS_THAN 001 0000_0000 011 SETNP fails
2769	//
2770	// LIR_ltd:
2771	// fcom C3:C2:C0 lahf/test(0x05) SZP outcome
2772	// ------- -------- --------- --- -------
2773	// UNORDERED 111 0000_0101 001 SETNP fails
2774	// EQUAL 100 0000_0000 011 SETNP fails
2775	// GREATER_THAN 000 0000_0000 011 SETNP fails
2776	// LESS_THAN 001 0000_0001 000 SETNP succeeds
2777	//
2778	// LIR_led:
2779	// fcom C3:C2:C0 lahf/test(0x41) SZP outcome
2780	// ------- --- --------- --- -------
2781	// UNORDERED 111 0100_0001 001 SETNP fails
2782	// EQUAL 100 0100_0000 000 SETNP succeeds
2783	// GREATER_THAN 000 0000_0000 011 SETNP fails
2784	// LESS_THAN 001 0000_0001 010 SETNP succeeds
2785
2786	int mask = 0; // init to avoid MSVC compile warnings
2787	switch (condop) {
2788	case LIR_eqd: mask = 0x44; break;
2789	case LIR_ltd: mask = 0x05; break;
2790	case LIR_led: mask = 0x41; break;
2791	default: NanoAssert(0)do { } while (0); break;
2792	}
2793
2794	evictIfActive(rEAX);
2795	boolbool pop = !lhs->isInReg();
2796	findSpecificRegFor(lhs, FST0);
2797
2798	if (lhs == rhs) {
2799	// NaN test.
2800	TEST_AH(mask);
2801	FNSTSW_AX(); // requires rEAX to be free
2802	if (pop)
2803	FCOMPP();
2804	else
2805	FCOMP();
2806	FLDr(FST0); // DUP
2807	} else {
2808	TEST_AH(mask);
2809	FNSTSW_AX(); // requires rEAX to be free
2810	if (rhs->isImmD()) {
2811	const uint64_t* p = findImmDFromPool(rhs->immDasQ());
2812	FCOMdm(pop, (const double*)p);
2813	} else {
2814	int d = findMemFor(rhs);
2815	FCOM(pop, d, FP);
2816	}
2817	}
2818	}
2819	}
2820
2821	// Increment the 32-bit profiling counter at pCtr, without
2822	// changing any registers.
2823	verbose_only(
2824	void Assembler::asm_inc_m32(uint32_t* pCtr)
2825	{
2826	INCLi(int32_t(pCtr));
2827	}
2828	)
2829
2830	void Assembler::nativePageReset()
2831	{}
2832
2833	void Assembler::nativePageSetup()
2834	{
2835	NanoAssert(!_inExit)do { } while (0);
2836	if (!_nIns)
2837	codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
2838
2839	// add some random padding, so functions aren't predictably placed.
2840	if (_config.harden_function_alignment)
2841	{
2842	int32_t pad = _noise->getValue(LARGEST_UNDERRUN_PROT);
2843	underrunProtect(pad);
2844	_nIns -= pad;
2845	VMPI_memset::memset(_nIns, INT3_OP, pad);
2846	PERFM_NVPROF("hardening:func-align", pad);
2847	}
2848	}
2849
2850	// enough room for n bytes
2851	void Assembler::underrunProtect(int n)
2852	{
2853	NIns *eip = _nIns;
2854	NanoAssertMsg(n<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small")do { } while (0);
2855	// This may be in a normal code chunk or an exit code chunk.
2856	if (eip - n < codeStart) {
2857	codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
2858	JMP(eip);
2859	}
2860	}
2861
2862	void Assembler::asm_insert_random_nop()
2863	{
2864	// one of a random nop instructions
2865	uint32_t r = _noise->getValue(5);
2866	switch(r)
2867	{
2868	case 0: MR(rEAX,rEAX); break;
2869	case 1: MR(rEDI,rEDI); break;
2870	case 2: MR(rECX,rECX); break;
2871	case 3: LEA(rECX,0,rECX); break;
2872	case 4: LEA(rESP,0,rESP); break;
2873	}
2874	}
2875
2876	void Assembler::asm_ret(LIns* ins)
2877	{
2878	// Unreachable, so assume correct stack depth.
2879	debug_only( _fpuStkDepth = 0; )
2880
2881	genEpilogue();
2882
2883	// Restore rESP from rEBP, undoing SUBi(SP,amt) in the prologue
2884	MR(SP,FP);
2885
2886	releaseRegisters();
2887	assignSavedRegs();
2888
2889	LIns *val = ins->oprnd1();
2890	if (ins->isop(LIR_reti)) {
2891	findSpecificRegFor(val, retRegs[0]);
2892	} else {
2893	NanoAssert(ins->isop(LIR_retd))do { } while (0);
2894	findSpecificRegFor(val, FST0);
2895	fpu_pop();
2896	}
2897	}
2898
2899	void Assembler::swapCodeChunks() {
2900	if (!_nExitIns)
2901	codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
2902	SWAP(NIns, _nIns, _nExitIns)do { NIns tmp = _nIns; _nIns = _nExitIns; _nExitIns = tmp; } while (0);
2903	SWAP(NIns, codeStart, exitStart)do { NIns tmp = codeStart; codeStart = exitStart; exitStart = tmp; } while (0);
2904	SWAP(NIns, codeEnd, exitEnd)do { NIns tmp = codeEnd; codeEnd = exitEnd; exitEnd = tmp; } while (0);
2905	verbose_only( SWAP(size_t, codeBytes, exitBytes); )
2906	}
2907
2908	#endif /* FEATURE_NANOJIT */
2909	}