Bug Summary

File:platform/mac/avmshell/../../../nanojit/Nativei386.cpp
Location:line 2696, column 17
Description:Value stored to 'condop' is never read

Annotated Source Code

1/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3/* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * The Original Code is [Open Source Virtual Machine].
17 *
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
22 *
23 * Contributor(s):
24 * Adobe AS3 Team
25 * Mozilla TraceMonkey Team
26 * Asko Tontti <atontti@cc.hut.fi>
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * either the GNU General Public License Version 2 or later (the "GPL"), or
30 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
31 * in which case the provisions of the GPL or the LGPL are applicable instead
32 * of those above. If you wish to allow use of your version of this file only
33 * under the terms of either the GPL or the LGPL, and not to allow others to
34 * use your version of this file under the terms of the MPL, indicate your
35 * decision by deleting the provisions above and replace them with the notice
36 * and other provisions required by the GPL or the LGPL. If you do not delete
37 * the provisions above, a recipient may use your version of this file under
38 * the terms of any one of the MPL, the GPL or the LGPL.
39 *
40 * ***** END LICENSE BLOCK ***** */
41#include "nanojit.h"
42
43#ifdef _MSC_VER
44 // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
45 #pragma warning(disable:4310) // cast truncates constant value
46#endif
47
48namespace nanojit
49{
50 #if defined FEATURE_NANOJIT && defined NANOJIT_IA32
51
52 #ifdef NJ_VERBOSE
53 const char *regNames[] = {
54 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
55 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
56 "f0"
57 };
58
59 const char *gpRegNames8lo[] = { "al", "cl", "dl", "bl" };
60 const char *gpRegNames8hi[] = { "ah", "ch", "dh", "bh" };
61 #endif
62
63 #define TODO(x)do{ do { } while (0); } while(0) do{ verbose_only(outputf(#x);) NanoAssertMsgf(false, "%s", #x)do { } while (0); } while(0)
64
65 const Register Assembler::argRegs[] = { rECX, rEDX };
66 const Register Assembler::retRegs[] = { rEAX, rEDX };
67 const Register Assembler::savedRegs[] = { rEBX, rESI, rEDI };
68
69 const static uint8_t max_abi_regs[] = {
70 2, /* ABI_FASTCALL */
71 1, /* ABI_THISCALL */
72 0, /* ABI_STDCALL */
73 0 /* ABI_CDECL */
74 };
75
76 #define RB(r)gpRegNames8lo[REGNUM(r)] gpRegNames8lo[REGNUM(r)]
77 #define RBhi(r)gpRegNames8hi[REGNUM(r)] gpRegNames8hi[REGNUM(r)]
78
79 typedef Register R;
80 typedef int32_t I32;
81
82 // Length: 2--6 bytes.
83 inline void Assembler::MODRMsib(I32 r, R b, R i, I32 s, I32 d) {
84 if (d == 0 && b != rEBP) {
85 SIB(s, REGNUM(i), REGNUM(b));
86 MODRM(0, r, 4); // amode == (b + i<<s)
87 } else if (isS8(d)( int32_t(d) == int8_t(d) )) {
88 IMM8(d);
89 SIB(s, REGNUM(i), REGNUM(b));
90 MODRM(1, r, 4); // amode == d8(b + i<<s)
91 } else {
92 IMM32(d);
93 SIB(s, REGNUM(i), REGNUM(b));
94 MODRM(2, r, 4); // amode == d32(b + i<<s)
95 }
96 }
97
98 // Length: 1--6 bytes.
99 inline void Assembler::MODRMm(I32 r, I32 d, R b) {
100 if (b == UnspecifiedReg) {
101 IMM32(d);
102 MODRM(0, r, 5); // amode == (d32)
103 } else if (b == rESP) {
104 MODRMsib(r, b, rESP, 0, d); // amode == d(b)
105 } else if (d == 0 && b != rEBP) {
106 MODRM(0, r, REGNUM(b)); // amode == (r)
107 } else if (isS8(d)( int32_t(d) == int8_t(d) )) {
108 IMM8(d);
109 MODRM(1, r, REGNUM(b)); // amode == d8(b)
110 } else {
111 IMM32(d);
112 MODRM(2, r, REGNUM(b)); // amode == d32(b)
113 }
114 }
115
116 // Length: 5 bytes.
117 inline void Assembler::MODRMdm(I32 r, I32 addr) {
118 IMM32(addr);
119 MODRM(0, r, 5); // amode == d32(r)
120 }
121
122 inline void Assembler::ALU0(I32 opc) {
123 underrunProtect(1);
124 OPCODE(opc);
125 }
126
127 inline void Assembler::ALUm(I32 opc, I32 r, I32 d, R b) {
128 underrunProtect(8);
129 MODRMm(r, d, b);
130 OPCODE(opc);
131 }
132
133 inline void Assembler::ALUdm(I32 opc, R r, I32 addr) {
134 underrunProtect(6);
135 MODRMdm(REGNUM(r), addr);
136 OPCODE(opc);
137 }
138
139 inline void Assembler::ALUsib(I32 opc, R r, R base, R index, I32 scale, I32 disp) {
140 underrunProtect(7);
141 MODRMsib(REGNUM(r), base, index, scale, disp);
142 OPCODE(opc);
143 }
144
145 inline void Assembler::ALUsib16(I32 opc, R r, R base, R index, I32 scale, I32 disp) {
146 underrunProtect(8);
147 MODRMsib(REGNUM(r), base, index, scale, disp);
148 OPCODE(opc);
149 OPCODE(0x66);
150 }
151
152 inline void Assembler::ALUm16(I32 opc, I32 r, I32 d, R b) {
153 underrunProtect(9);
154 MODRMm(r, d, b);
155 OPCODE(opc);
156 OPCODE(0x66);
157 }
158
159 inline void Assembler::ALU2dm(I32 opc2, R r, I32 addr) {
160 underrunProtect(7);
161 MODRMdm(REGNUM(r), addr);
162 OPCODE2(opc2);
163 }
164
165 inline void Assembler::ALU2m(I32 opc2, R r, I32 d, R b) {
166 underrunProtect(9);
167 MODRMm(REGNUM(r), d, b);
168 OPCODE2(opc2);
169 }
170
171 inline void Assembler::ALU2sib(I32 opc2, Register r, R base, R index, I32 scale, I32 disp) {
172 underrunProtect(8);
173 MODRMsib(REGNUM(r), base, index, scale, disp);
174 OPCODE2(opc2);
175 }
176
177 inline void Assembler::ALUi(I32 opc, R r, I32 i) {
178 underrunProtect(6);
179 NanoAssert(REGNUM(r) < 8)do { } while (0);
180 if (isS8(i)( int32_t(i) == int8_t(i) )) {
181 IMM8(i);
182 MODRMr(opc >> 3, REGNUM(r));
183 OPCODE(0x83);
184 } else {
185 IMM32(i);
186 if ( r == rEAX) {
187 OPCODE(opc);
188 } else {
189 MODRMr(opc >> 3, REGNUM(r));
190 OPCODE(0x81);
191 }
192 }
193 }
194
195 inline void Assembler::ALUmi(I32 opc, I32 d, Register b, I32 i) {
196 underrunProtect(10);
197 NanoAssert(REGNUM(b) < 8)do { } while (0);
198 if (isS8(i)( int32_t(i) == int8_t(i) )) {
199 IMM8(i);
200 MODRMm(opc >> 3, d, b);
201 OPCODE(0x83);
202 } else {
203 IMM32(i);
204 MODRMm(opc >> 3, d, b);
205 OPCODE(0x81);
206 }
207 }
208
209 inline void Assembler::ALU2(I32 opc2, R d, R s) {
210 underrunProtect(3);
211 MODRMr(REGNUM(d), REGNUM(s));
212 OPCODE2(opc2);
213 }
214
215 inline Register Assembler::AL2AHReg(R r) {
216 NanoAssert(REGNUM(r) < 4)do { } while (0); // one of: al, cl, dl, bl
217 Register r2 = { REGNUM(r) | 4 }; // convert to one of: ah, ch, dh, bh
218 return r2;
219 }
220
221 inline void Assembler::OR(R l, R r) { count_alu(); ALU(0x0b, REGNUM(l), r); asm_output("or %s,%s", gpn(l), gpn(r)); }
222 inline void Assembler::AND(R l, R r) { count_alu(); ALU(0x23, REGNUM(l), r); asm_output("and %s,%s", gpn(l), gpn(r)); }
223 inline void Assembler::XOR(R l, R r) { count_alu(); ALU(0x33, REGNUM(l), r); asm_output("xor %s,%s", gpn(l), gpn(r)); }
224 inline void Assembler::ADD(R l, R r) { count_alu(); ALU(0x03, REGNUM(l), r); asm_output("add %s,%s", gpn(l), gpn(r)); }
225 inline void Assembler::SUB(R l, R r) { count_alu(); ALU(0x2b, REGNUM(l), r); asm_output("sub %s,%s", gpn(l), gpn(r)); }
226 inline void Assembler::IMUL(R l, R r){ count_alu(); ALU2(0x0faf, l, r); asm_output("imul %s,%s", gpn(l), gpn(r)); }
227 inline void Assembler::DIV(R r) { count_alu(); ALU(0xf7, 7, r); asm_output("idiv edx:eax, %s", gpn(r)); }
228 inline void Assembler::NOT(R r) { count_alu(); ALU(0xf7, 2, r); asm_output("not %s", gpn(r)); }
229 inline void Assembler::NEG(R r) { count_alu(); ALU(0xf7, 3, r); asm_output("neg %s", gpn(r)); }
230 inline void Assembler::AND8R(R r) { count_alu(); ALU(0x22, REGNUM(r), AL2AHReg(r)); asm_output("andb %s, %s", RB(r), RBhi(r)); }
231
232 inline void Assembler::SHR(R r, R s) {
233 count_alu();
234 NanoAssert(s == rECX)do { } while (0); (void)s;
235 ALU(0xd3, 5, r);
236 asm_output("shr %s,%s", gpn(r), gpn(s));
237 }
238
239 inline void Assembler::SAR(R r, R s) {
240 count_alu();
241 NanoAssert(s == rECX)do { } while (0); (void)s;
242 ALU(0xd3, 7, r);
243 asm_output("sar %s,%s", gpn(r), gpn(s));
244 }
245
246 inline void Assembler::SHL(R r, R s) {
247 count_alu();
248 NanoAssert(s == rECX)do { } while (0); (void)s;
249 ALU(0xd3, 4, r);
250 asm_output("shl %s,%s", gpn(r), gpn(s));
251 }
252
253 inline void Assembler::SHIFTi(I32 c, R r, I32 i) {
254 underrunProtect(3);
255 IMM8(i);
256 MODRMr(c, REGNUM(r));
257 OPCODE(0xc1);
258 }
259
260 inline void Assembler::SHLi(R r, I32 i) { count_alu(); SHIFTi(4, r, i); asm_output("shl %s,%d", gpn(r), i); }
261 inline void Assembler::SHRi(R r, I32 i) { count_alu(); SHIFTi(5, r, i); asm_output("shr %s,%d", gpn(r), i); }
262 inline void Assembler::SARi(R r, I32 i) { count_alu(); SHIFTi(7, r, i); asm_output("sar %s,%d", gpn(r), i); }
263
264 inline void Assembler::MOVZX8(R d, R s) { count_alu(); ALU2(0x0fb6, d, s); asm_output("movzx %s,%s", gpn(d), gpn(s)); }
265
266 inline void Assembler::SUBi(R r, I32 i) { count_alu(); ALUi(0x2d, r, i); asm_output("sub %s,%d", gpn(r), i); }
267 inline void Assembler::ADDi(R r, I32 i) { count_alu(); ALUi(0x05, r, i); asm_output("add %s,%d", gpn(r), i); }
268 inline void Assembler::ANDi(R r, I32 i) { count_alu(); ALUi(0x25, r, i); asm_output("and %s,%d", gpn(r), i); }
269 inline void Assembler::ORi(R r, I32 i) { count_alu(); ALUi(0x0d, r, i); asm_output("or %s,%d", gpn(r), i); }
270 inline void Assembler::XORi(R r, I32 i) { count_alu(); ALUi(0x35, r, i); asm_output("xor %s,%d", gpn(r), i); }
271
272 inline void Assembler::ADDmi(I32 d, R b, I32 i) { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); }
273
274 inline void Assembler::TEST(R d, R s) { count_alu(); ALU(0x85, REGNUM(d), s); asm_output("test %s,%s", gpn(d), gpn(s)); }
275 inline void Assembler::CMP(R l, R r) { count_alu(); ALU(0x3b, REGNUM(l), r); asm_output("cmp %s,%s", gpn(l), gpn(r)); }
276 inline void Assembler::CMPi(R r, I32 i) { count_alu(); ALUi(0x3d, r, i); asm_output("cmp %s,%d", gpn(r), i); }
277
278 inline void Assembler::LEA(R r, I32 d, R b) { count_alu(); ALUm(0x8d, REGNUM(r), d, b); asm_output("lea %s,%d(%s)", gpn(r), d, gpn(b)); }
279
280 inline void Assembler::CDQ() { SARi(rEDX, 31); MR(rEDX, rEAX); }
281
282 inline void Assembler::INCLi(I32 p) {
283 count_alu();
284 underrunProtect(6);
285 IMM32((uint32_t)(ptrdiff_t)p);
286 OPCODE(0x05);
287 OPCODE(0xFF);
288 asm_output("incl (%p)", (void*)p);
289 }
290
291 inline void Assembler::SETE( R r) { count_alu(); ALU2(0x0f94, r, r); asm_output("sete %s", gpn(r)); }
292 inline void Assembler::SETNP(R r) { count_alu(); ALU2(0x0f9B, r, r); asm_output("setnp %s", gpn(r)); }
293 inline void Assembler::SETNPH(R r) { count_alu(); ALU2(0x0f9B, AL2AHReg(r), AL2AHReg(r)); asm_output("setnp %s", RBhi(r)); }
294 inline void Assembler::SETL( R r) { count_alu(); ALU2(0x0f9C, r, r); asm_output("setl %s", gpn(r)); }
295 inline void Assembler::SETLE(R r) { count_alu(); ALU2(0x0f9E, r, r); asm_output("setle %s", gpn(r)); }
296 inline void Assembler::SETG( R r) { count_alu(); ALU2(0x0f9F, r, r); asm_output("setg %s", gpn(r)); }
297 inline void Assembler::SETGE(R r) { count_alu(); ALU2(0x0f9D, r, r); asm_output("setge %s", gpn(r)); }
298 inline void Assembler::SETB( R r) { count_alu(); ALU2(0x0f92, r, r); asm_output("setb %s", gpn(r)); }
299 inline void Assembler::SETBE(R r) { count_alu(); ALU2(0x0f96, r, r); asm_output("setbe %s", gpn(r)); }
300 inline void Assembler::SETA( R r) { count_alu(); ALU2(0x0f97, r, r); asm_output("seta %s", gpn(r)); }
301 inline void Assembler::SETAE(R r) { count_alu(); ALU2(0x0f93, r, r); asm_output("setae %s", gpn(r)); }
302 inline void Assembler::SETO( R r) { count_alu(); ALU2(0x0f92, r, r); asm_output("seto %s", gpn(r)); }
303
304 inline void Assembler::MREQ(R d, R s) { count_alu(); ALU2(0x0f44, d, s); asm_output("cmove %s,%s", gpn(d), gpn(s)); }
305 inline void Assembler::MRNE(R d, R s) { count_alu(); ALU2(0x0f45, d, s); asm_output("cmovne %s,%s", gpn(d), gpn(s)); }
306 inline void Assembler::MRL( R d, R s) { count_alu(); ALU2(0x0f4C, d, s); asm_output("cmovl %s,%s", gpn(d), gpn(s)); }
307 inline void Assembler::MRLE(R d, R s) { count_alu(); ALU2(0x0f4E, d, s); asm_output("cmovle %s,%s", gpn(d), gpn(s)); }
308 inline void Assembler::MRG( R d, R s) { count_alu(); ALU2(0x0f4F, d, s); asm_output("cmovg %s,%s", gpn(d), gpn(s)); }
309 inline void Assembler::MRGE(R d, R s) { count_alu(); ALU2(0x0f4D, d, s); asm_output("cmovge %s,%s", gpn(d), gpn(s)); }
310 inline void Assembler::MRB( R d, R s) { count_alu(); ALU2(0x0f42, d, s); asm_output("cmovb %s,%s", gpn(d), gpn(s)); }
311 inline void Assembler::MRBE(R d, R s) { count_alu(); ALU2(0x0f46, d, s); asm_output("cmovbe %s,%s", gpn(d), gpn(s)); }
312 inline void Assembler::MRA( R d, R s) { count_alu(); ALU2(0x0f47, d, s); asm_output("cmova %s,%s", gpn(d), gpn(s)); }
313 inline void Assembler::MRAE(R d, R s) { count_alu(); ALU2(0x0f43, d, s); asm_output("cmovae %s,%s", gpn(d), gpn(s)); }
314 inline void Assembler::MRNO(R d, R s) { count_alu(); ALU2(0x0f41, d, s); asm_output("cmovno %s,%s", gpn(d), gpn(s)); }
315
316 // these aren't currently used but left in for reference
317 //#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
318 //#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
319
320 inline void Assembler::LD(R reg, I32 disp, R base) {
321 count_ld();
322 ALUm(0x8b, REGNUM(reg), disp, base);
323 asm_output("mov %s,%d(%s)", gpn(reg), disp, gpn(base));
324 }
325
326 inline void Assembler::LDdm(R reg, I32 addr) {
327 count_ld();
328 ALUdm(0x8b, reg, addr);
329 asm_output("mov %s,0(%p)", gpn(reg), (void*)addr);
330 }
331
332#define SIBIDX(n)"1248"[n] "1248"[n]
333
334 inline void Assembler::LDsib(R reg, I32 disp, R base, R index, I32 scale) {
335 count_ld();
336 ALUsib(0x8b, reg, base, index, scale, disp);
337 asm_output("mov %s,%d(%s+%s*%c)", gpn(reg), disp, gpn(base), gpn(index), SIBIDX(scale));
338 }
339
340 // note: movzx/movsx are being output with an 8/16 suffix to indicate the
341 // size being loaded. This doesn't really match standard intel format
342 // (though is arguably terser and more obvious in this case) and would
343 // probably be nice to fix. (Likewise, the 8/16 bit stores being output
344 // as "mov8" and "mov16" respectively.)
345
346 // Load 16-bit, sign extend.
347 inline void Assembler::LD16S(R r, I32 d, R b) {
348 count_ld();
349 ALU2m(0x0fbf, r, d, b);
350 asm_output("movsx16 %s,%d(%s)", gpn(r), d, gpn(b));
351 }
352
353 inline void Assembler::LD16Sdm(R r, I32 addr) {
354 count_ld();
355 ALU2dm(0x0fbf, r, addr);
356 asm_output("movsx16 %s,0(%lx)", gpn(r), (unsigned long)addr);
357 }
358
359 inline void Assembler::LD16Ssib(R r, I32 disp, R base, R index, I32 scale) {
360 count_ld();
361 ALU2sib(0x0fbf, r, base, index, scale, disp);
362 asm_output("movsx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
363 }
364
365 // Load 16-bit, zero extend.
366 inline void Assembler::LD16Z(R r, I32 d, R b) {
367 count_ld();
368 ALU2m(0x0fb7, r, d, b);
369 asm_output("movzx16 %s,%d(%s)", gpn(r), d, gpn(b));
370 }
371
372 inline void Assembler::LD16Zdm(R r, I32 addr) {
373 count_ld();
374 ALU2dm(0x0fb7, r, addr);
375 asm_output("movzx16 %s,0(%lx)", gpn(r), (unsigned long)addr);
376 }
377
378 inline void Assembler::LD16Zsib(R r, I32 disp, R base, R index, I32 scale) {
379 count_ld();
380 ALU2sib(0x0fb7, r, base, index, scale, disp);
381 asm_output("movzx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
382 }
383
384 // Load 8-bit, zero extend.
385 inline void Assembler::LD8Z(R r, I32 d, R b) {
386 count_ld();
387 ALU2m(0x0fb6, r, d, b);
388 asm_output("movzx8 %s,%d(%s)", gpn(r), d, gpn(b));
389 }
390
391 inline void Assembler::LD8Zdm(R r, I32 addr) {
392 count_ld();
393 ALU2dm(0x0fb6, r, addr);
394 asm_output("movzx8 %s,0(%lx)", gpn(r), (long unsigned)addr);
395 }
396
397 inline void Assembler::LD8Zsib(R r, I32 disp, R base, R index, I32 scale) {
398 count_ld();
399 ALU2sib(0x0fb6, r, base, index, scale, disp);
400 asm_output("movzx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
401 }
402
403 // Load 8-bit, sign extend.
404 inline void Assembler::LD8S(R r, I32 d, R b) {
405 count_ld();
406 ALU2m(0x0fbe, r, d, b);
407 asm_output("movsx8 %s,%d(%s)", gpn(r), d, gpn(b));
408 }
409
410 inline void Assembler::LD8Sdm(R r, I32 addr) {
411 count_ld();
412 ALU2dm(0x0fbe, r, addr);
413 asm_output("movsx8 %s,0(%lx)", gpn(r), (long unsigned)addr);
414 }
415
416 inline void Assembler::LD8Ssib(R r, I32 disp, R base, R index, I32 scale) {
417 count_ld();
418 ALU2sib(0x0fbe, r, base, index, scale, disp);
419 asm_output("movsx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale));
420 }
421
422 inline void Assembler::LDi(R r, I32 i) {
423 count_ld();
424 underrunProtect(5);
425 IMM32(i);
426 NanoAssert(REGNUM(r) < 8)do { } while (0);
427 OPCODE(0xb8 | REGNUM(r));
428 asm_output("mov %s,%d", gpn(r), i);
429 }
430
431 // Quirk of x86-32: reg must be a/b/c/d for byte stores here.
432 inline void Assembler::ST8(R base, I32 disp, R reg) {
433 count_st();
434 NanoAssert(REGNUM(reg) < 4)do { } while (0);
435 ALUm(0x88, REGNUM(reg), disp, base);
436 asm_output("mov8 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
437 }
438
439 // Quirk of x86-32: reg must be a/b/c/d for byte stores here.
440 inline void Assembler::ST8sib(I32 disp, R base, R index, I32 scale, R reg) {
441 count_st();
442 NanoAssert(REGNUM(reg) < 4)do { } while (0);
443 ALUsib(0x88, reg, base, index, scale, disp);
444 asm_output("mov8 %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base),
445 gpn(index), SIBIDX(scale), gpn(reg));
446 }
447
448 inline void Assembler::ST16(R base, I32 disp, R reg) {
449 count_st();
450 ALUm16(0x89, REGNUM(reg), disp, base);
451 asm_output("mov16 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
452 }
453
454 inline void Assembler::ST16sib(I32 disp, R base, R index, I32 scale, R reg) {
455 count_st();
456 ALUsib16(0x89, reg, base, index, scale, disp);
457 asm_output("mov16 %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base),
458 gpn(index), SIBIDX(scale), gpn(reg));
459 }
460
461 inline void Assembler::ST(R base, I32 disp, R reg) {
462 count_st();
463 ALUm(0x89, REGNUM(reg), disp, base);
464 asm_output("mov %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg));
465 }
466
467 inline void Assembler::STsib(I32 disp, R base, R index, I32 scale, R reg) {
468 count_st();
469 ALUsib(0x89, reg, base, index, scale, disp);
470 asm_output("mov %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base),
471 gpn(index), SIBIDX(scale), gpn(reg));
472 }
473
474 inline void Assembler::ST8i(R base, I32 disp, I32 imm) {
475 count_st();
476 underrunProtect(8);
477 IMM8(imm);
478 MODRMm(0, disp, base);
479 OPCODE(0xc6);
480 asm_output("mov8 %d(%s),%d", disp, gpn(base), imm);
481 }
482
483 inline void Assembler::ST8isib(I32 disp, R base, R index, I32 scale, I32 imm) {
484 count_st();
485 underrunProtect(8);
486 IMM8(imm);
487 MODRMsib(0, base, index, scale, disp);
488 OPCODE(0xc6);
489 asm_output("mov8 %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm);
490 }
491
492 inline void Assembler::ST16i(R base, I32 disp, I32 imm) {
493 count_st();
494 underrunProtect(10);
495 IMM16(imm);
496 MODRMm(0, disp, base);
497 OPCODE(0xc7);
498 OPCODE(0x66);
499 asm_output("mov16 %d(%s),%d", disp, gpn(base), imm);
500 }
501
502 inline void Assembler::ST16isib(I32 disp, R base, R index, I32 scale, I32 imm) {
503 count_st();
504 underrunProtect(10);
505 IMM16(imm);
506 MODRMsib(0, base, index, scale, disp);
507 OPCODE(0xc7);
508 OPCODE(0x66);
509 asm_output("mov16 %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm);
510 }
511
512 inline void Assembler::STi(R base, I32 disp, I32 imm) {
513 count_st();
514 underrunProtect(11);
515 IMM32(imm);
516 MODRMm(0, disp, base);
517 OPCODE(0xc7);
518 asm_output("mov %d(%s),%d", disp, gpn(base), imm);
519 }
520
521 inline void Assembler::STisib(I32 disp, R base, R index, I32 scale, I32 imm) {
522 count_st();
523 underrunProtect(11);
524 IMM32(imm);
525 MODRMsib(0, base, index, scale, disp);
526 OPCODE(0xc7);
527 asm_output("mov %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm);
528 }
529
530 const uint8_t INT3_OP = 0xcc;
531
532 inline void Assembler::RET() { count_ret(); ALU0(0xc3); asm_output("ret"); }
533 inline void Assembler::NOP() { count_alu(); ALU0(0x90); asm_output("nop"); }
534 inline void Assembler::INT3() { ALU0(INT3_OP); asm_output("int3"); }
535
536 inline void Assembler::PUSHi(I32 i) {
537 count_push();
538 if (isS8(i)( int32_t(i) == int8_t(i) )) {
539 underrunProtect(2);
540 IMM8(i);
541 OPCODE(0x6a);
542 asm_output("push %d", i);
543 } else {
544 underrunProtect(5);
545 IMM32(i);
546 OPCODE(0x68);
547 asm_output("push %d", i);
548 }
549 }
550
551 inline void Assembler::PUSHr(R r) {
552 count_push();
553 underrunProtect(1);
554 NanoAssert(REGNUM(r) < 8)do { } while (0);
555 OPCODE(0x50 | REGNUM(r));
556 asm_output("push %s", gpn(r));
557 }
558
559 inline void Assembler::PUSHm(I32 d, R b) {
560 count_pushld();
561 ALUm(0xff, 6, d, b);
562 asm_output("push %d(%s)", d, gpn(b));
563 }
564
565 inline void Assembler::POPr(R r) {
566 count_pop();
567 underrunProtect(1);
568 NanoAssert(REGNUM(r) < 8)do { } while (0);
569 OPCODE(0x58 | REGNUM(r));
570 asm_output("pop %s", gpn(r));
571 }
572
573 inline void Assembler::JCC(I32 o, NIns* t, const char* n) {
574 count_jcc();
575 underrunProtect(6);
576 intptr_t tt = (intptr_t)t - (intptr_t)_nIns;
577 if (t && isS8(tt)( int32_t(tt) == int8_t(tt) )) {
578 IMM8(tt);
579 OPCODE(0x70 | o);
580 } else {
581 IMM32(tt);
582 OPCODE(0x80 | o);
583 OPCODE(JCC320x0f);
584 }
585 asm_output("%-5s %p", n, t);
586 (void) n;
587 }
588
589 inline void Assembler::JMP_long(NIns* t) {
590 count_jmp();
591 underrunProtect(5);
592 NanoAssert(t)do { } while (0);
593 intptr_t tt = (intptr_t)t - (intptr_t)_nIns;
594 IMM32(tt);
595 OPCODE(JMP320xe9);
596 asm_output("jmp %p", t);
597 verbose_only( verbose_outputf("%p:", (void*)_nIns); )
598 }
599
600 inline void Assembler::JMP_indexed(Register x, I32 ss, NIns** addr) {
601 underrunProtect(7);
602 IMM32(int32_t(addr));
603 SIB(ss, REGNUM(x), 5);
604 MODRM(0, 4, 4); // amode == addr(table + x<<ss)
605 OPCODE(0xff); // jmp
606 asm_output("jmp *(%s*%d+%p)", gpn(x), 1 << ss, (void*)addr);
607 }
608
609 inline void Assembler::JE(NIns* t) { JCC(0x04, t, "je"); }
610 inline void Assembler::JNE(NIns* t) { JCC(0x05, t, "jne"); }
611 inline void Assembler::JP(NIns* t) { JCC(0x0A, t, "jp"); }
612 inline void Assembler::JNP(NIns* t) { JCC(0x0B, t, "jnp"); }
613
614 inline void Assembler::JB(NIns* t) { JCC(0x02, t, "jb"); }
615 inline void Assembler::JNB(NIns* t) { JCC(0x03, t, "jnb"); }
616 inline void Assembler::JBE(NIns* t) { JCC(0x06, t, "jbe"); }
617 inline void Assembler::JNBE(NIns* t) { JCC(0x07, t, "jnbe"); }
618
619 inline void Assembler::JA(NIns* t) { JCC(0x07, t, "ja"); }
620 inline void Assembler::JNA(NIns* t) { JCC(0x06, t, "jna"); }
621 inline void Assembler::JAE(NIns* t) { JCC(0x03, t, "jae"); }
622 inline void Assembler::JNAE(NIns* t) { JCC(0x02, t, "jnae"); }
623
624 inline void Assembler::JL(NIns* t) { JCC(0x0C, t, "jl"); }
625 inline void Assembler::JNL(NIns* t) { JCC(0x0D, t, "jnl"); }
626 inline void Assembler::JLE(NIns* t) { JCC(0x0E, t, "jle"); }
627 inline void Assembler::JNLE(NIns* t) { JCC(0x0F, t, "jnle"); }
628
629 inline void Assembler::JG(NIns* t) { JCC(0x0F, t, "jg"); }
630 inline void Assembler::JNG(NIns* t) { JCC(0x0E, t, "jng"); }
631 inline void Assembler::JGE(NIns* t) { JCC(0x0D, t, "jge"); }
632 inline void Assembler::JNGE(NIns* t) { JCC(0x0C, t, "jnge"); }
633
634 inline void Assembler::JO(NIns* t) { JCC(0x00, t, "jo"); }
635 inline void Assembler::JNO(NIns* t) { JCC(0x01, t, "jno"); }
636
637 // sse instructions
638 inline void Assembler::SSE(I32 opc3, R d, R s) {
639 underrunProtect(9);
640 MODRMr(REGNUM(d)&7, REGNUM(s)&7);
641 OPCODE3(opc3);
642 }
643
644 inline void Assembler::SSEm(I32 opc3, R r, I32 d, R b) {
645 underrunProtect(9);
646 MODRMm(REGNUM(r)&7, d, b);
647 OPCODE3(opc3);
648 }
649
650 inline void Assembler::SSEsib(I32 opc3, R rr, I32 d, R rb, R ri, I32 scale) {
651 underrunProtect(9);
652 MODRMsib(REGNUM(rr)&7, rb, ri, scale, d);
653 OPCODE3(opc3);
654 }
655
656 inline void Assembler::LDSDm(R r, const double* addr) {
657 count_ldq();
658 underrunProtect(8);
659 IMM32(int32_t(addr));
660 MODRM(0, REGNUM(r) & 7, 5); // amode == addr(r)
661 OPCODE(0x10);
662 OPCODE(0x0f);
663 OPCODE(0xf2);
664 // *addr is a constant, so we can print it here.
665 asm_output("movsd %s,(%p) // =%f", gpn(r), (void*)addr, *addr);
666 }
667
668 inline void Assembler::SSE_LDQ( R r, I32 d, R b) { count_ldq(); SSEm(0xf30f7e, r, d, b); asm_output("movq %s,%d(%s)", gpn(r), d, gpn(b)); }
669 inline void Assembler::SSE_LDSS(R r, I32 d, R b) { count_ld(); SSEm(0xf30f10, r, d, b); asm_output("movss %s,%d(%s)", gpn(r), d, gpn(b)); }
670
671 inline void Assembler::SSE_LDQsib(R rr, I32 d, R rb, R ri, I32 scale)
672 {
673 count_ldq();
674 SSEsib(0xf30f7e, rr, d, rb, ri, scale);
675 asm_output("movq %s,%d(%s+%s*%c)", gpn(rr), d, gpn(rb), gpn(ri), SIBIDX(scale));
676 }
677
678 inline void Assembler::SSE_LDSSsib(R rr, I32 d, R rb, R ri, I32 scale)
679 {
680 count_ld();
681 SSEsib(0xf30f10, rr, d, rb, ri, scale);
682 asm_output("movss %s,%d(%s+%s*%c)", gpn(rr), d, gpn(rb), gpn(ri), SIBIDX(scale));
683 }
684
685 inline void Assembler::SSE_STSD(I32 d, R b, R r) { count_stq(); SSEm(0xf20f11, r, d, b); asm_output("movsd %d(%s),%s", d, gpn(b), gpn(r)); }
686 inline void Assembler::SSE_STQ( I32 d, R b, R r) { count_stq(); SSEm(0x660fd6, r, d, b); asm_output("movq %d(%s),%s", d, gpn(b), gpn(r)); }
687 inline void Assembler::SSE_STSS(I32 d, R b, R r) { count_st(); SSEm(0xf30f11, r, d, b); asm_output("movss %d(%s),%s", d, gpn(b), gpn(r)); }
688
689 inline void Assembler::SSE_STQsib(I32 d, R rb, R ri, I32 scale, R rv) {
690 count_stq();
691 SSEsib(0x660fd6, rv, d, rb, ri, scale);
692 asm_output("movq %d(%s+%s*%c),%s", d, gpn(rb), gpn(ri), SIBIDX(scale), gpn(rv));
693 }
694
695 inline void Assembler::SSE_CVTSI2SD(R xr, R gr) { count_fpu(); SSE(0xf20f2a, xr, gr); asm_output("cvtsi2sd %s,%s", gpn(xr), gpn(gr)); }
696 inline void Assembler::SSE_CVTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2d, gr, xr); asm_output("cvtsd2si %s,%s", gpn(gr), gpn(xr)); }
697 inline void Assembler::SSE_CVTTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2c, gr, xr); asm_output("cvttsd2si %s,%s",gpn(gr), gpn(xr)); }
698 inline void Assembler::SSE_CVTSD2SS(R xr, R gr) { count_fpu(); SSE(0xf20f5a, xr, gr); asm_output("cvtsd2ss %s,%s", gpn(xr), gpn(gr)); }
699 inline void Assembler::SSE_CVTSS2SD(R xr, R gr) { count_fpu(); SSE(0xf30f5a, xr, gr); asm_output("cvtss2sd %s,%s", gpn(xr), gpn(gr)); }
700 inline void Assembler::SSE_CVTDQ2PD(R d, R r) { count_fpu(); SSE(0xf30fe6, d, r); asm_output("cvtdq2pd %s,%s", gpn(d), gpn(r)); }
701
702 // Move and zero-extend GP reg to XMM reg.
703 inline void Assembler::SSE_MOVD(R d, R s) {
704 count_mov();
705 if (IsXmmReg(s)) {
706 NanoAssert(IsGpReg(d))do { } while (0);
707 SSE(0x660f7e, s, d);
708 } else {
709 NanoAssert(IsGpReg(s))do { } while (0);
710 NanoAssert(IsXmmReg(d))do { } while (0);
711 SSE(0x660f6e, d, s);
712 }
713 asm_output("movd %s,%s", gpn(d), gpn(s));
714 }
715
716 inline void Assembler::SSE_MOVSD(R rd, R rs) {
717 count_mov();
718 NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
719 SSE(0xf20f10, rd, rs);
720 asm_output("movsd %s,%s", gpn(rd), gpn(rs));
721 }
722
723 inline void Assembler::SSE_ADDSD(R rd, R rs) {
724 count_fpu();
725 NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
726 SSE(0xf20f58, rd, rs);
727 asm_output("addsd %s,%s", gpn(rd), gpn(rs));
728 }
729
730 inline void Assembler::SSE_ADDSDm(R r, const double* addr) {
731 count_fpuld();
732 underrunProtect(8);
733 NanoAssert(IsXmmReg(r))do { } while (0);
734 const double* daddr = addr;
735 IMM32(int32_t(daddr));
736 MODRM(0, REGNUM(r) & 7, 5); // amode == daddr(r)
737 OPCODE(0x58);
738 OPCODE(0x0f);
739 OPCODE(0xf2);
740 // *daddr is a constant, so we can print it here.
741 asm_output("addsd %s,(%p) // =%f", gpn(r), (void*)daddr, *daddr);
742 }
743
744 inline void Assembler::SSE_SUBSD(R rd, R rs) {
745 count_fpu();
746 NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
747 SSE(0xf20f5c, rd, rs);
748 asm_output("subsd %s,%s", gpn(rd), gpn(rs));
749 }
750
751 inline void Assembler::SSE_MULSD(R rd, R rs) {
752 count_fpu();
753 NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
754 SSE(0xf20f59, rd, rs);
755 asm_output("mulsd %s,%s", gpn(rd), gpn(rs));
756 }
757
758 inline void Assembler::SSE_DIVSD(R rd, R rs) {
759 count_fpu();
760 NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0);
761 SSE(0xf20f5e, rd, rs);
762 asm_output("divsd %s,%s", gpn(rd), gpn(rs));
763 }
764
765 inline void Assembler::SSE_UCOMISD(R rl, R rr) {
766 count_fpu();
767 NanoAssert(IsXmmReg(rl) && IsXmmReg(rr))do { } while (0);
768 SSE(0x660f2e, rl, rr);
769 asm_output("ucomisd %s,%s", gpn(rl), gpn(rr));
770 }
771
772 inline void Assembler::SSE_XORPD(R r, const uint32_t* maskaddr) {
773 count_fpuld();
774 underrunProtect(8);
775 IMM32(int32_t(maskaddr));
776 MODRM(0, REGNUM(r) & 7, 5); // amode == maskaddr(r)
777 OPCODE(0x57);
778 OPCODE(0x0f);
779 OPCODE(0x66);
780 asm_output("xorpd %s,(%p)", gpn(r), (void*)maskaddr);
781 }
782
783 inline void Assembler::SSE_XORPDr(R rd, R rs) {
784 count_fpu();
785 SSE(0x660f57, rd, rs);
786 asm_output("xorpd %s,%s", gpn(rd), gpn(rs));
787 }
788
789 // floating point unit
790 inline void Assembler::FPUc(I32 opc2) {
791 underrunProtect(2);
792 OPCODE2(opc2);
793 }
794
795 inline void Assembler::FPUm(I32 o, I32 d, R b) {
796 underrunProtect(7);
797 MODRMm(uint8_t(o), d, b);
798 OPCODE(o >> 8);
799 }
800
801 inline void Assembler::FPUdm(I32 o, const double* const m) {
802 underrunProtect(6);
803 MODRMdm(uint8_t(o), int32_t(m));
804 OPCODE(o >> 8);
805 }
806
807 inline void Assembler::TEST_AH(I32 i) {
808 count_alu();
809 underrunProtect(3);
810 OPCODE(i);
811 OPCODE(0xc4);
812 OPCODE(0xf6);
813 asm_output("test ah, %d", i);
814 }
815
816 // The FPU stack depth is the number of pushes in excess of the number of pops.
817 // Since we generate backwards, we track the FPU stack depth as a negative number.
818 // We use the top of the x87 stack as the single allocatable FP register, FST0.
819 // Thus, between LIR instructions, the depth of the FPU stack must be either 0 or -1,
820 // depending on whether FST0 is in use. Within the expansion of a single LIR
821 // instruction, however, deeper levels of the stack may be used as unmanaged
822 // temporaries. Hence, we allow for all eight levels in the assertions below.
823 void Assembler::fpu_push() {
824 debug_only( ++_fpuStkDepth; NanoAssert(_fpuStkDepth <= 0); )
825 }
826
827 void Assembler::fpu_pop() {
828 debug_only( --_fpuStkDepth; NanoAssert(_fpuStkDepth >= -7); )
829 }
830
831 inline void Assembler::FNSTSW_AX() { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); }
832 inline void Assembler::FCHS() { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); }
833 inline void Assembler::FLD1() { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); }
834 inline void Assembler::FLDZ() { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); }
835
836 inline void Assembler::FST32(boolbool p, I32 d, R b){ count_stq(); FPUm(0xd902|(p?1:0), d, b); asm_output("fst%s32 %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
837 inline void Assembler::FSTQ(boolbool p, I32 d, R b) { count_stq(); FPUm(0xdd02|(p?1:0), d, b); asm_output("fst%sq %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
838
839 inline void Assembler::FSTPQ(I32 d, R b) { FSTQ(1, d, b); }
840
841 inline void Assembler::FCOM(boolbool p, I32 d, R b) { count_fpuld(); FPUm(0xdc02|(p?1:0), d, b); asm_output("fcom%s %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
842 inline void Assembler::FCOMdm(boolbool p, const double* dm) {
843 count_fpuld();
844 FPUdm(0xdc02|(p?1:0), dm);
845 asm_output("fcom%s (%p)", (p?"p":""), (void*)dm);
846 if (p) fpu_pop();
847 }
848
849 inline void Assembler::FLD32(I32 d, R b) { count_ldq(); FPUm(0xd900, d, b); asm_output("fld32 %d(%s)", d, gpn(b)); fpu_push();}
850 inline void Assembler::FLDQ(I32 d, R b) { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)", d, gpn(b)); fpu_push();}
851 inline void Assembler::FLDQdm(const double* dm) { count_ldq(); FPUdm(0xdd00, dm); asm_output("fldq (%p)", (void*)dm); fpu_push();}
852 inline void Assembler::FILDQ(I32 d, R b) { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)", d, gpn(b)); fpu_push(); }
853 inline void Assembler::FILD(I32 d, R b) { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)", d, gpn(b)); fpu_push(); }
854
855 inline void Assembler::FIST(boolbool p, I32 d, R b) {
856 count_fpu();
857 FPUm(0xdb02 | (p?1:0), d, b);
858 asm_output("fist%s %d(%s)", (p?"p":""), d, gpn(b));
859 if (p) fpu_pop();
860 }
861
862 inline void Assembler::FADD( I32 d, R b) { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)", d, gpn(b)); }
863 inline void Assembler::FSUB( I32 d, R b) { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)", d, gpn(b)); }
864 inline void Assembler::FSUBR(I32 d, R b) { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)", d, gpn(b)); }
865 inline void Assembler::FMUL( I32 d, R b) { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)", d, gpn(b)); }
866 inline void Assembler::FDIV( I32 d, R b) { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)", d, gpn(b)); }
867 inline void Assembler::FDIVR(I32 d, R b) { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)", d, gpn(b)); }
868
869 inline void Assembler::FADDdm( const double *dm) { count_ldq(); FPUdm(0xdc00, dm); asm_output("fadd (%p)", (void*)dm); }
870 inline void Assembler::FSUBRdm(const double* dm) { count_ldq(); FPUdm(0xdc05, dm); asm_output("fsubr (%p)", (void*)dm); }
871 inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); }
872 inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)", (void*)dm); }
873
874 inline void Assembler::FCOMP() { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();}
875 inline void Assembler::FCOMPP() { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();}
876 inline void Assembler::FLDr(R r) { count_ldq(); FPU(0xd9c0, r); asm_output("fld %s", gpn(r)); fpu_push(); }
877 inline void Assembler::EMMS() { count_fpu(); FPUc(0x0f77); asm_output("emms"); }
878
879 // standard direct call
880 inline void Assembler::CALL(const CallInfo* ci) {
881 count_call();
882 underrunProtect(5);
883 int offset = (ci->_address) - ((int)_nIns);
884 IMM32((uint32_t)offset);
885 OPCODE(0xE8);
886 verbose_only(asm_output("call %s", (ci->_name));)
887 debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();)
888 }
889
890 // indirect call thru register
891 inline void Assembler::CALLr(const CallInfo* ci, Register r) {
892 count_calli();
893 underrunProtect(2);
894 ALU(0xff, 2, r);
895 verbose_only(asm_output("call %s", gpn(r));)
896 debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();) (void)ci;
897 }
898
899 void Assembler::nInit()
900 {
901 nHints[LIR_calli] = rmask(retRegs[0]);
902 nHints[LIR_calld] = rmask(FST0);
903 nHints[LIR_paramp] = PREFER_SPECIAL;
904 nHints[LIR_immi] = ScratchRegs;
905 // Nb: Doing this with a loop future-proofs against the possibilty of
906 // new comparison operations being added.
907 for (LOpcode op = LOpcode(0); op < LIR_sentinel; op = LOpcode(op+1))
908 if (isCmpOpcode(op))
909 nHints[op] = AllowableByteRegs;
910 }
911
912 void Assembler::nBeginAssembly() {
913 max_stk_args = 0;
914 }
915
916 NIns* Assembler::genPrologue()
917 {
918 // Prologue
919 uint32_t stackNeeded = max_stk_args + STACK_GRANULARITYsizeof(void *) * _activation.stackSlotsNeeded();
920
921 uint32_t stackPushed =
922 STACK_GRANULARITYsizeof(void *) + // returnaddr
923 STACK_GRANULARITYsizeof(void *); // ebp
924
925 uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK)((((uintptr_t)(stackNeeded + stackPushed))+(((uintptr_t)NJ_ALIGN_STACK
)-1))&~(((uintptr_t)NJ_ALIGN_STACK)-1))
;
926 uint32_t amt = aligned - stackPushed;
927
928#ifdef _WIN32
929 // Windows uses a single guard page for extending the stack, so
930 // new stack pages must be first touched in stack-growth order.
931 // We touch each whole page that will be allocated to the frame
932 // (following the saved FP) to cause the OS to commit the page if
933 // necessary. Since we don't calculate page boundaries, but just
934 // probe at intervals of the pagesize, it is possible that the
935 // last page of the frame will be touched unnecessarily. Note that
936 // we must generate the probes in the reverse order of their execution.
937 // We require that the page size be a power of 2.
938 size_t pageSize = VMPI_getVMPageSize();
939 NanoAssert((pageSize & (pageSize-1)) == 0)do { } while (0);
940 size_t pageRounded = amt & ~(pageSize-1);
941 for (int32_t d = pageRounded; d > 0; d -= pageSize) {
942 STi(rEBP, -d, 0);
943 }
944#endif
945
946 // Reserve stackNeeded bytes, padded
947 // to preserve NJ_ALIGN_STACK-byte alignment.
948 if (amt) {
949 SUBi(SP, amt);
950 }
951
952 verbose_only( asm_output("[frag entry]"); )
953 NIns *fragEntry = _nIns;
954 MR(FP, SP); // Establish our own FP.
955 PUSHr(FP); // Save caller's FP.
956
957 return fragEntry;
958 }
959
960 void Assembler::nFragExit(LIns* guard)
961 {
962 SideExit *exit = guard->record()->exit;
963 Fragment *frag = exit->target;
964 GuardRecord *lr = 0;
965 boolbool destKnown = (frag && frag->fragEntry);
966
967 // Generate jump to epilog and initialize lr.
968 // If the guard already exists, use a simple jump.
969 if (destKnown) {
970 JMP(frag->fragEntry);
971 lr = 0;
972 } else { // Target doesn't exist. Jump to an epilogue for now. This can be patched later.
973 if (!_epilogue)
974 _epilogue = genEpilogue();
975 lr = guard->record();
976 JMP_long(_epilogue);
977 lr->jmp = _nIns;
978 }
979
980 // profiling for the exit
981 verbose_only(
982 if (_logc->lcbits & LC_FragProfile) {
983 INCLi(int32_t(&guard->record()->profCount));
984 }
985 )
986
987 // Restore rESP from rEBP, undoing SUBi(SP,amt) in the prologue
988 MR(SP,FP);
989
990 // return value is GuardRecord*
991 asm_immi(rEAX, int(lr), /*canClobberCCs*/truetrue);
992 }
993
994 NIns *Assembler::genEpilogue()
995 {
996 RET();
997 POPr(FP); // Restore caller's FP.
998
999 return _nIns;
1000 }
1001
1002 void Assembler::asm_call(LIns* ins)
1003 {
1004 if (!ins->isop(LIR_callv)) {
1005 Register rr = ( ins->isop(LIR_calld) ? FST0 : retRegs[0] );
1006 prepareResultReg(ins, rmask(rr));
1007 evictScratchRegsExcept(rmask(rr));
1008 } else {
1009 evictScratchRegsExcept(0);
1010 }
1011 const CallInfo* call = ins->callInfo();
1012 // must be signed, not unsigned
1013 uint32_t iargs = call->count_int32_args();
1014 int32_t fargs = call->count_args() - iargs;
1015
1016 boolbool indirect = call->isIndirect();
1017 if (indirect) {
1018 // target arg isn't pushed, its consumed in the call
1019 iargs --;
1020 }
1021
1022 AbiKind abi = call->_abi;
1023 uint32_t max_regs = max_abi_regs[abi];
1024 if (max_regs > iargs)
1025 max_regs = iargs;
1026
1027 int32_t istack = iargs-max_regs; // first 2 4B args are in registers
1028 int32_t extra = 0;
1029 const int32_t pushsize = 4*istack + 8*fargs; // actual stack space used
1030
1031#if _MSC_VER
1032 // msc only provides 4-byte alignment but we have 8 byte stack adjustment
1033 // logic so maintain our 8 byte alignment.
1034 uint32_t align = 8;
1035#else
1036 uint32_t align = NJ_ALIGN_STACK;
1037#endif
1038
1039 if (pushsize) {
1040 if (_config.i386_fixed_esp) {
1041 // In case of fastcall, stdcall and thiscall the callee cleans up the stack,
1042 // and since we reserve max_stk_args words in the prolog to call functions
1043 // and don't adjust the stack pointer individually for each call we have
1044 // to undo here any changes the callee just did to the stack.
1045 if (abi != ABI_CDECL)
1046 SUBi(SP, pushsize);
1047 } else {
1048 // stack re-alignment
1049 // only pop our adjustment amount since callee pops args in FASTCALL mode
1050 extra = alignUp(pushsize, align)((((uintptr_t)(pushsize))+(((uintptr_t)align)-1))&~(((uintptr_t
)align)-1))
- pushsize;
1051 if (call->_abi == ABI_CDECL) {
1052 // with CDECL only, caller pops args
1053 ADDi(SP, extra+pushsize);
1054 } else if (extra > 0) {
1055 ADDi(SP, extra);
1056 }
1057 }
1058 }
1059
1060 NanoAssert(ins->isop(LIR_callv) || ins->isop(LIR_callp) || ins->isop(LIR_calld))do { } while (0);
1061 if (!indirect) {
1062 CALL(call);
1063 }
1064 else {
1065 // Indirect call. x86 Calling conventions don't use rEAX as an
1066 // argument, and do use rEAX as a return value. We need a register
1067 // for the address to call, so we use rEAX since it will always be
1068 // available.
1069 CALLr(call, rEAX);
1070 }
1071
1072 // Call this now so that the arg setup can involve 'rr'.
1073 freeResourcesOf(ins);
1074
1075 // Make sure fpu stack is empty before call.
1076 NanoAssert(_allocator.isFree(FST0))do { } while (0);
1077
1078 // Pre-assign registers to the first N 4B args based on the calling convention.
1079 uint32_t n = 0;
1080
1081 ArgType argTypes[MAXARGS];
1082 uint32_t argc = call->getArgTypes(argTypes);
1083 int32_t stkd = 0;
1084
1085 if (indirect) {
1086 argc--;
1087 asm_arg(ARGTYPE_P, ins->arg(argc), rEAX, stkd);
1088 if (!_config.i386_fixed_esp)
1089 stkd = 0;
1090 }
1091
1092 for (uint32_t i = 0; i < argc; i++)
1093 {
1094 uint32_t j = argc-i-1;
1095 ArgType ty = argTypes[j];
1096 Register r = UnspecifiedReg;
1097 if (n < max_regs && ty != ARGTYPE_D) {
1098 r = argRegs[n++]; // tell asm_arg what reg to use
1099 }
1100 asm_arg(ty, ins->arg(j), r, stkd);
1101 if (!_config.i386_fixed_esp)
1102 stkd = 0;
1103 }
1104
1105 if (_config.i386_fixed_esp) {
1106 if (pushsize > max_stk_args)
1107 max_stk_args = pushsize;
1108 } else if (extra > 0) {
1109 SUBi(SP, extra);
1110 }
1111 }
1112
1113 Register Assembler::nRegisterAllocFromSet(RegisterMask set)
1114 {
1115 Register r;
1116 RegAlloc &regs = _allocator;
1117 #ifdef _MSC_VER
1118 _asm
1119 {
1120 mov ecx, regs
1121 bsf eax, set // i = first bit set
1122 btr RegAlloc::free[ecx], eax // free &= ~rmask(i)
1123 mov r, eax
1124 }
1125 #else
1126 asm(
1127 "bsf %1, %%eax\n\t"
1128 "btr %%eax, %2\n\t"
1129 "movl %%eax, %0\n\t"
1130 : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
1131 #endif /* _MSC_VER */
1132 return r;
1133 }
1134
1135 void Assembler::nRegisterResetAll(RegAlloc& a)
1136 {
1137 // add scratch registers to our free list for the allocator
1138 a.clear();
1139 a.free = SavedRegs | ScratchRegs;
1140 if (!_config.i386_sse2)
1141 a.free &= ~XmmRegs;
1142 }
1143
1144 void Assembler::nPatchBranch(NIns* branch, NIns* targ)
1145 {
1146 intptr_t offset = intptr_t(targ) - intptr_t(branch);
1147 if (branch[0] == JMP320xe9) {
1148 *(int32_t*)&branch[1] = offset - 5;
1149 } else if (branch[0] == JCC320x0f) {
1150 *(int32_t*)&branch[2] = offset - 6;
1151 } else
1152 NanoAssertMsg(0, "Unknown branch type in nPatchBranch")do { } while (0);
1153 }
1154
1155 RegisterMask Assembler::nHint(LIns* ins)
1156 {
1157 NanoAssert(ins->isop(LIR_paramp))do { } while (0);
1158 RegisterMask prefer = 0;
1159 uint8_t arg = ins->paramArg();
1160 if (ins->paramKind() == 0) {
1161 uint32_t max_regs = max_abi_regs[_thisfrag->lirbuf->abi];
1162 if (arg < max_regs)
1163 prefer = rmask(argRegs[arg]);
1164 } else {
1165 if (arg < NumSavedRegs)
1166 prefer = rmask(savedRegs[arg]);
1167 }
1168 return prefer;
1169 }
1170
1171 // Return true if we can generate code for this instruction that neither
1172 // sets CCs nor clobbers any input register.
1173 // LEA is the only native instruction that fits those requirements.
1174 boolbool canRematLEA(LIns* ins)
1175 {
1176 if (ins->isop(LIR_addi))
1177 return ins->oprnd1()->isInReg() && ins->oprnd2()->isImmI();
1178 // Subtract and some left-shifts could be rematerialized using LEA,
1179 // but it hasn't shown to help in real code yet. Noting them anyway:
1180 // maybe sub? R = subl rL, const => leal R, [rL + -const]
1181 // maybe lsh? R = lshl rL, 1/2/3 => leal R, [rL * 2/4/8]
1182 return falsefalse;
1183 }
1184
1185 boolbool Assembler::canRemat(LIns* ins)
1186 {
1187 return ins->isImmAny() || ins->isop(LIR_allocp) || canRematLEA(ins);
1188 }
1189
1190 // WARNING: the code generated by this function must not affect the
1191 // condition codes. See asm_cmpi().
1192 void Assembler::asm_restore(LIns* ins, Register r)
1193 {
1194 NanoAssert(ins->getReg() == r)do { } while (0);
1195
1196 uint32_t arg;
1197 uint32_t abi_regcount;
1198 if (ins->isop(LIR_allocp)) {
1199 // The value of a LIR_allocp instruction is the address of the
1200 // stack allocation. We can rematerialize that from the record we
1201 // have of where the allocation lies in the stack.
1202 NanoAssert(ins->isInAr())do { } while (0); // must have stack slots allocated
1203 LEA(r, arDisp(ins), FP);
1204
1205 } else if (ins->isImmI()) {
1206 asm_immi(r, ins->immI(), /*canClobberCCs*/falsefalse);
1207
1208 } else if (ins->isImmD()) {
1209 asm_immd(r, ins->immDasQ(), ins->immD(), /*canClobberCCs*/falsefalse);
1210
1211 } else if (ins->isop(LIR_paramp) && ins->paramKind() == 0 &&
1212 (arg = ins->paramArg()) >= (abi_regcount = max_abi_regs[_thisfrag->lirbuf->abi])) {
1213 // Incoming arg is on stack, can restore it from there instead of spilling.
1214
1215 // this case is intentionally not detected in canRemat(), because we still
1216 // emit a load instead of a fast ALU operation. We don't want parameter
1217 // spills to have precedence over immediates & ALU ops, but if one does
1218 // spill, we want to load it directly from its stack area, saving a store
1219 // in the prolog.
1220
1221 // Compute position of argument relative to ebp. Higher argument
1222 // numbers are at higher positive offsets. The first abi_regcount
1223 // arguments are in registers, rest on stack. +8 accomodates the
1224 // return address and saved ebp value. Assuming abi_regcount == 0:
1225 //
1226 // low-addr ebp
1227 // [frame...][saved-ebp][return-addr][arg0][arg1]...
1228 //
1229 int d = (arg - abi_regcount) * sizeof(intptr_t) + 8;
1230 LD(r, d, FP);
1231
1232 } else if (canRematLEA(ins)) {
1233 LEA(r, ins->oprnd2()->immI(), ins->oprnd1()->getReg());
1234
1235 } else {
1236 int d = findMemFor(ins);
1237 if (ins->isI()) {
1238 NanoAssert(rmask(r) & GpRegs)do { } while (0);
1239 LD(r, d, FP);
1240 } else {
1241 NanoAssert(ins->isD())do { } while (0);
1242 if (rmask(r) & XmmRegs) {
1243 SSE_LDQ(r, d, FP);
1244 } else {
1245 NanoAssert(r == FST0)do { } while (0);
1246 FLDQ(d, FP);
1247 }
1248 }
1249 }
1250 }
1251
1252 void Assembler::asm_store32(LOpcode op, LIns* value, int dr, LIns* base)
1253 {
1254 if (value->isImmI()) {
1255 if (base->opcode() == LIR_addp) {
1256 LIns* index;
1257 int scale;
1258 getBaseIndexScale(base, &base, &index, &scale);
1259
1260 Register rb, ri;
1261 getBaseReg2(GpRegs, index, ri, GpRegs, base, rb, dr);
1262
1263 int c = value->immI();
1264 switch (op) {
1265 case LIR_sti2c: ST8isib( dr, rb, ri, scale, c); break;
1266 case LIR_sti2s: ST16isib(dr, rb, ri, scale, c); break;
1267 case LIR_sti: STisib( dr, rb, ri, scale, c); break;
1268 default: NanoAssert(0)do { } while (0); break;
1269 }
1270 } else {
1271 Register rb = getBaseReg(base, dr, GpRegs);
1272 int c = value->immI();
1273 switch (op) {
1274 case LIR_sti2c: ST8i( rb, dr, c); break;
1275 case LIR_sti2s: ST16i(rb, dr, c); break;
1276 case LIR_sti: STi( rb, dr, c); break;
1277 default: NanoAssert(0)do { } while (0); break;
1278 }
1279 }
1280
1281 } else {
1282 // Quirk of x86-32: reg must be a/b/c/d for single-byte stores.
1283 const RegisterMask SrcRegs = (op == LIR_sti2c) ? AllowableByteRegs : GpRegs;
1284
1285 Register rv, rb;
1286 if (base->opcode() == LIR_addp) {
1287 LIns* index;
1288 int scale;
1289 getBaseIndexScale(base, &base, &index, &scale);
1290
1291 Register rb, ri, rv;
1292 getBaseReg2(SrcRegs, value, rv, GpRegs, base, rb, dr);
1293 ri = (index == value) ? rv
1294 : (index == base) ? rb
1295 : findRegFor(index, GpRegs & ~(rmask(rb)|rmask(rv)));
1296
1297 switch (op) {
1298 case LIR_sti2c: ST8sib( dr, rb, ri, scale, rv); break;
1299 case LIR_sti2s: ST16sib(dr, rb, ri, scale, rv); break;
1300 case LIR_sti: STsib( dr, rb, ri, scale, rv); break;
1301 default: NanoAssert(0)do { } while (0); break;
1302 }
1303
1304 } else {
1305 if (base->isImmI()) {
1306 // absolute address
1307 rb = UnspecifiedReg;
1308 dr += base->immI();
1309 rv = findRegFor(value, SrcRegs);
1310 } else {
1311 getBaseReg2(SrcRegs, value, rv, GpRegs, base, rb, dr);
1312 }
1313 switch (op) {
1314 case LIR_sti2c: ST8( rb, dr, rv); break;
1315 case LIR_sti2s: ST16(rb, dr, rv); break;
1316 case LIR_sti: ST( rb, dr, rv); break;
1317 default: NanoAssert(0)do { } while (0); break;
1318 }
1319 }
1320 }
1321 }
1322
1323 void Assembler::asm_spill(Register rr, int d, boolbool pop)
1324 {
1325 NanoAssert(d)do { } while (0);
1326 if (rmask(rr) & GpRegs) {
1327 ST(FP, d, rr);
1328 } else if (rmask(rr) & XmmRegs) {
1329 SSE_STQ(d, FP, rr);
1330 } else {
1331 NanoAssert(rr == FST0)do { } while (0);
1332 FSTQ(pop, d, FP);
1333 }
1334 }
1335
1336 void Assembler::asm_load64(LIns* ins)
1337 {
1338 LIns* base = ins->oprnd1();
1339 int d = ins->disp();
1340
1341 // There are two cases:
1342 // - 'ins' is in FpRegs: load it.
1343 // - otherwise: there's no point loading the value into a register
1344 // because its only use will be to immediately spill it. Instead we
1345 // do a memory-to-memory move from the load address directly to the
1346 // spill slot. (There must be a spill slot assigned.) This is why
1347 // we don't use prepareResultReg() here unlike most other places --
1348 // because it mandates bringing the value into a register.
1349 //
1350 if (ins->isInReg()) {
1351 Register rr = prepareResultReg(ins, rmask(ins->getReg()));
1352
1353 if (base->opcode() == LIR_addp && rmask(rr) & XmmRegs) {
1354 LIns* index;
1355 int scale;
1356 getBaseIndexScale(base, &base, &index, &scale);
1357
1358 // (**) We don't have the usual opportunity to clobber 'base'
1359 // or 'ins' with the result because it has a different type.
1360 Register rb, ri;
1361 RegisterMask allow = GpRegs & ~rmask(rr);
1362 getBaseReg2(allow, index, ri, allow, base, rb, d);
1363
1364 switch (ins->opcode()) {
1365 case LIR_ldd: SSE_LDQsib(rr, d, rb, ri, scale); break;
1366 case LIR_ldf2d: SSE_CVTSS2SD(rr, rr);
1367 SSE_LDSSsib(rr, d, rb, ri, scale);
1368 SSE_XORPDr(rr, rr); break;
1369 default: NanoAssert(0)do { } while (0); break;
1370 }
1371
1372 } else {
1373 // (**) We don't have the usual opportunity to clobber 'base'
1374 // or 'ins' with the result because it has a different type.
1375 Register rb = getBaseReg(base, d, GpRegs);
1376 if (rmask(rr) & XmmRegs) {
1377 switch (ins->opcode()) {
1378 case LIR_ldd: SSE_LDQ(rr, d, rb); break;
1379 case LIR_ldf2d: SSE_CVTSS2SD(rr, rr);
1380 SSE_LDSS(rr, d, rb);
1381 SSE_XORPDr(rr, rr); break;
1382 default: NanoAssert(0)do { } while (0); break;
1383 }
1384 } else {
1385 NanoAssert(rr == FST0)do { } while (0);
1386 switch (ins->opcode()) {
1387 case LIR_ldd: FLDQ(d, rb); break;
1388 case LIR_ldf2d: FLD32(d, rb); break;
1389 default: NanoAssert(0)do { } while (0); break;
1390 }
1391 }
1392 }
1393
1394 } else {
1395 Register rb = getBaseReg(base, d, GpRegs);
1396
1397 NanoAssert(ins->isInAr())do { } while (0);
1398 int dr = arDisp(ins);
1399
1400 switch (ins->opcode()) {
1401 case LIR_ldd:
1402 // Don't use an fpu reg to simply load & store the value.
1403 asm_mmq(FP, dr, rb, d);
1404 break;
1405
1406 case LIR_ldf2d:
1407 // Need to use fpu to expand 32->64.
1408 FSTPQ(dr, FP);
1409 FLD32(d, rb);
1410 break;
1411
1412 default:
1413 NanoAssert(0)do { } while (0);
1414 break;
1415 }
1416 }
1417
1418 freeResourcesOf(ins);
1419 // Nb: no need for a possible findSpecificRegForUnallocated() call
1420 // here because of (**) above.
1421 }
1422
1423 void Assembler::asm_store64(LOpcode op, LIns* value, int d, LIns* base)
1424 {
1425 if (op == LIR_std2f) {
1426 Register rb = getBaseReg(base, d, GpRegs);
1427 boolbool pop = !value->isInReg();
1428 Register rv = ( pop
1429 ? findRegFor(value, _config.i386_sse2 ? XmmRegs : FpRegs)
1430 : value->getReg() );
1431
1432 if (rmask(rv) & XmmRegs) {
1433 // need a scratch reg
1434 Register rt = registerAllocTmp(XmmRegs);
1435
1436 // cvt to single-precision and store
1437 SSE_STSS(d, rb, rt);
1438 SSE_CVTSD2SS(rt, rv);
1439 SSE_XORPDr(rt, rt); // zero dest to ensure no dependency stalls
1440
1441 } else {
1442 FST32(pop, d, rb);
1443 }
1444
1445 } else if (value->isImmD()) {
1446 Register rb = getBaseReg(base, d, GpRegs);
1447 STi(rb, d+4, value->immDhi());
1448 STi(rb, d, value->immDlo());
1449
1450 } else if (base->opcode() == LIR_addp && _config.i386_sse2) {
1451 LIns* index;
1452 int scale;
1453 getBaseIndexScale(base, &base, &index, &scale);
1454
1455 Register rb, ri;
1456 getBaseReg2(GpRegs, index, ri, GpRegs, base, rb, d);
1457
1458 Register rv = value->isInReg() ? value->getReg() : findRegFor(value, XmmRegs);
1459 NanoAssert(rmask(rv) & XmmRegs)do { } while (0);
1460 SSE_STQsib(d, rb, ri, scale, rv);
1461
1462 } else if (value->isop(LIR_ldd) && !_config.i386_sse2) {
1463 // 'value' may be live in an FPU reg. Either way, don't put it on
1464 // the FPU stack just to load & store it.
1465 Register rb = getBaseReg(base, d, GpRegs);
1466 int da = findMemFor(value);
1467 asm_mmq(rb, d, FP, da);
1468
1469 } else {
1470 Register rb = getBaseReg(base, d, GpRegs);
1471 boolbool pop = !value->isInReg();
1472 Register rv = ( pop
1473 ? findRegFor(value, _config.i386_sse2 ? XmmRegs : FpRegs)
1474 : value->getReg() );
1475 if (rmask(rv) & XmmRegs)
1476 SSE_STQ(d, rb, rv);
1477 else
1478 FSTQ(pop, d, rb);
1479 }
1480 }
1481
1482 // Copy 64 bits: (rd+dd) <- (rs+ds).
1483 //
1484 void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
1485 {
1486 // Value is either a 64-bit struct or maybe a float that isn't live in
1487 // an FPU reg. Either way, avoid allocating an FPU reg just to load
1488 // and store it.
1489 if (_config.i386_sse2) {
1490 Register t = registerAllocTmp(XmmRegs);
1491 SSE_STQ(dd, rd, t);
1492 SSE_LDQ(t, ds, rs);
1493 } else {
1494 // We avoid copying via the FP stack because it's slow and likely
1495 // to cause spills.
1496 Register t = registerAllocTmp(GpRegs & ~(rmask(rd)|rmask(rs)));
1497 ST(rd, dd+4, t);
1498 LD(t, ds+4, rs);
1499 ST(rd, dd, t);
1500 LD(t, ds, rs);
1501 }
1502 }
1503
1504 Branches Assembler::asm_branch_helper(boolbool branchOnFalse, LIns* cond, NIns* targ)
1505 {
1506 return isCmpDOpcode(cond->opcode())
1507 ? asm_branchd_helper(branchOnFalse, cond, targ)
1508 : asm_branchi_helper(branchOnFalse, cond, targ);
1509 }
1510
1511 Branches Assembler::asm_branchi_helper(boolbool branchOnFalse, LIns* cond, NIns* targ)
1512 {
1513 if (branchOnFalse) {
1514 // op == LIR_xf/LIR_jf
1515 switch (cond->opcode()) {
1516 case LIR_eqi: JNE(targ); break;
1517 case LIR_lti: JNL(targ); break;
1518 case LIR_lei: JNLE(targ); break;
1519 case LIR_gti: JNG(targ); break;
1520 case LIR_gei: JNGE(targ); break;
1521 case LIR_ltui: JNB(targ); break;
1522 case LIR_leui: JNBE(targ); break;
1523 case LIR_gtui: JNA(targ); break;
1524 case LIR_geui: JNAE(targ); break;
1525 default: NanoAssert(0)do { } while (0); break;
1526 }
1527 } else {
1528 // op == LIR_xt/LIR_jt
1529 switch (cond->opcode()) {
1530 case LIR_eqi: JE(targ); break;
1531 case LIR_lti: JL(targ); break;
1532 case LIR_lei: JLE(targ); break;
1533 case LIR_gti: JG(targ); break;
1534 case LIR_gei: JGE(targ); break;
1535 case LIR_ltui: JB(targ); break;
1536 case LIR_leui: JBE(targ); break;
1537 case LIR_gtui: JA(targ); break;
1538 case LIR_geui: JAE(targ); break;
1539 default: NanoAssert(0)do { } while (0); break;
1540 }
1541 }
1542 return Branches(_nIns);
1543 }
1544
1545 Branches Assembler::asm_branch(boolbool branchOnFalse, LIns* cond, NIns* targ)
1546 {
1547 Branches branches = asm_branch_helper(branchOnFalse, cond, targ);
1548 asm_cmp(cond);
1549 return branches;
1550 }
1551
1552 NIns* Assembler::asm_branch_ov(LOpcode, NIns* target)
1553 {
1554 JO(target);
1555 return _nIns;
1556 }
1557
1558 void Assembler::asm_jtbl(LIns* ins, NIns** table)
1559 {
1560 Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
1561 JMP_indexed(indexreg, 2, table);
1562 }
1563
1564 void Assembler::asm_cmp(LIns *cond)
1565 {
1566 isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond);
1567 }
1568
1569 // This generates a 'test' or 'cmp' instruction for a condition, which
1570 // causes the condition codes to be set appropriately. It's used with
1571 // conditional branches, conditional moves, and when generating
1572 // conditional values. For example:
1573 //
1574 // LIR: eq1 = eq a, 0
1575 // LIR: xf1: xf eq1 -> ...
1576 // asm: test edx, edx # generated by this function
1577 // asm: je ...
1578 //
1579 // If this is the only use of eq1, then on entry 'cond' is *not* marked as
1580 // used, and we do not allocate a register for it. That's because its
1581 // result ends up in the condition codes rather than a normal register.
1582 // This doesn't get recorded in the regstate and so the asm code that
1583 // consumes the result (eg. a conditional branch like 'je') must follow
1584 // shortly after.
1585 //
1586 // If eq1 is instead used again later, we will also generate code
1587 // (eg. in asm_cond()) to compute it into a normal register, something
1588 // like this:
1589 //
1590 // LIR: eq1 = eq a, 0
1591 // LIR: test edx, edx
1592 // asm: sete ebx
1593 // asm: movzx ebx, ebx
1594 //
1595 // In this case we end up computing the condition twice, but that's ok, as
1596 // it's just as short as testing eq1's value in the code generated for the
1597 // guard.
1598 //
1599 // WARNING: Because the condition code update is not recorded in the
1600 // regstate, this function cannot generate any code that will affect the
1601 // condition codes prior to the generation of the test/cmp, because any
1602 // such code will be run after the test/cmp but before the instruction
1603 // that consumes the condition code. And because this function calls
1604 // findRegFor() before the test/cmp is generated, and findRegFor() calls
1605 // asm_restore(), that means that asm_restore() cannot generate code which
1606 // affects the condition codes.
1607 //
1608 void Assembler::asm_cmpi(LIns *cond)
1609 {
1610 LIns* lhs = cond->oprnd1();
1611 LIns* rhs = cond->oprnd2();
1612
1613 NanoAssert(lhs->isI() && rhs->isI())do { } while (0);
1614
1615 // Ready to issue the compare.
1616 if (rhs->isImmI()) {
1617 int c = rhs->immI();
1618 // findRegFor() can call asm_restore() -- asm_restore() better not
1619 // disturb the CCs!
1620 Register r = findRegFor(lhs, GpRegs);
1621 if (c == 0 && cond->isop(LIR_eqi)) {
1622 boolbool canSkipTest = lhs->isop(LIR_andi) || lhs->isop(LIR_ori);
1623 if (canSkipTest) {
1624 // Setup a short-lived reader to do lookahead; does no
1625 // optimisations but that should be good enough for this
1626 // simple case, something like this:
1627 //
1628 // a = andi x, y # lhs
1629 // eq1 = eq a, 0 # cond
1630 // xt eq1 # currIns
1631 //
1632 // Note that we don't have to worry about lookahead
1633 // hitting the start of the buffer, because read() will
1634 // just return LIR_start repeatedly in that case.
1635 //
1636 LirReader lookahead(currIns);
1637 canSkipTest = currIns == lookahead.read() &&
1638 cond == lookahead.read() &&
1639 lhs == lookahead.read();
1640 }
1641 if (canSkipTest) {
1642 // Do nothing. At run-time, 'lhs' will have just computed
1643 // by an i386 instruction that sets ZF for us ('and' or
1644 // 'or'), so we don't have to do it ourselves.
1645 } else {
1646 TEST(r, r); // sets ZF according to the value of 'lhs'
1647 }
1648 } else {
1649 CMPi(r, c);
1650 }
1651 } else {
1652 Register ra, rb;
1653 findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);
1654 CMP(ra, rb);
1655 }
1656 }
1657
1658 void Assembler::asm_condd(LIns* ins)
1659 {
1660 LOpcode opcode = ins->opcode();
1661 Register r = prepareResultReg(ins, AllowableByteRegs);
1662
1663 // SETcc only sets low 8 bits, so extend
1664 MOVZX8(r,r);
1665
1666 if (_config.i386_sse2) {
1667 // LIR_ltd and LIR_gtd are handled by the same case because
1668 // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise
1669 // for LIR_led/LIR_ged.
1670 switch (opcode) {
1671 case LIR_eqd:
1672 if (ins->oprnd1() == ins->oprnd2()) {
1673 SETNP(r);
1674 } else {
1675 // result = ZF & !PF, must do logic on flags
1676 AND8R(r); // and rl,rh rl &= rh
1677 SETNPH(r); // setnp rh rh = !PF
1678 SETE(r); // sete rl rl = ZF
1679 }
1680 break;
1681 case LIR_ltd:
1682 case LIR_gtd: SETA(r); break;
1683 case LIR_led:
1684 case LIR_ged: SETAE(r); break;
1685 default: NanoAssert(0)do { } while (0); break;
1686 }
1687 } else {
1688 SETNP(r);
1689 }
1690
1691 freeResourcesOf(ins);
1692
1693 asm_cmpd(ins);
1694 }
1695
1696 void Assembler::asm_cond(LIns* ins)
1697 {
1698 LOpcode op = ins->opcode();
1699
1700 Register r = prepareResultReg(ins, AllowableByteRegs);
1701
1702 // SETcc only sets low 8 bits, so extend
1703 MOVZX8(r,r);
1704 switch (op) {
1705 case LIR_eqi: SETE(r); break;
1706 case LIR_lti: SETL(r); break;
1707 case LIR_lei: SETLE(r); break;
1708 case LIR_gti: SETG(r); break;
1709 case LIR_gei: SETGE(r); break;
1710 case LIR_ltui: SETB(r); break;
1711 case LIR_leui: SETBE(r); break;
1712 case LIR_gtui: SETA(r); break;
1713 case LIR_geui: SETAE(r); break;
1714 default: NanoAssert(0)do { } while (0); break;
1715 }
1716
1717 freeResourcesOf(ins);
1718
1719 asm_cmpi(ins);
1720 }
1721
1722 // Two example cases for "ins = add lhs, rhs". '*' lines are those
1723 // generated in this function.
1724 //
1725 // asm: define lhs into rr
1726 // asm: define rhs into rb
1727 // ...
1728 // * asm: add rr, rb
1729 // * asm: spill rr if necessary
1730 // ... no more uses of lhs in rr...
1731 //
1732 // asm: define lhs into ra
1733 // asm: define rhs into rb
1734 // ...
1735 // * asm: mov rr, ra
1736 // * asm: add rr, rb
1737 // * asm: spill rr if necessary
1738 // ... some uses of lhs in ra...
1739 //
1740 void Assembler::asm_arith(LIns* ins)
1741 {
1742 LOpcode op = ins->opcode();
1743
1744 // First special case.
1745 if (op == LIR_modi) {
1746 asm_div_mod(ins);
1747 return;
1748 }
1749
1750 LIns* lhs = ins->oprnd1();
1751 LIns* rhs = ins->oprnd2();
1752
1753 // Second special case.
1754 // XXX: bug 547125: don't need this once LEA is used for LIR_addi in all cases below
1755 if (op == LIR_addi && lhs->isop(LIR_allocp) && rhs->isImmI()) {
1756 // LIR_addi(LIR_allocp, LIR_immi) -- use lea.
1757 Register rr = prepareResultReg(ins, GpRegs);
1758 int d = findMemFor(lhs) + rhs->immI();
1759
1760 LEA(rr, d, FP);
1761
1762 freeResourcesOf(ins);
1763
1764 return;
1765 }
1766
1767 boolbool isConstRhs;
1768 RegisterMask allow = GpRegs;
1769 Register rb = UnspecifiedReg;
1770
1771 switch (op) {
1772 case LIR_divi:
1773 // Nb: if the div feeds into a mod it will be handled by
1774 // asm_div_mod() rather than here.
1775 isConstRhs = falsefalse;
1776 rb = findRegFor(rhs, (GpRegs & ~(rmask(rEAX)|rmask(rEDX))));
1777 allow = rmask(rEAX);
1778 evictIfActive(rEDX);
1779 break;
1780 case LIR_muli:
1781 case LIR_muljovi:
1782 case LIR_mulxovi:
1783 isConstRhs = falsefalse;
1784 if (lhs != rhs) {
1785 rb = findRegFor(rhs, allow);
1786 allow &= ~rmask(rb);
1787 }
1788 break;
1789 case LIR_lshi:
1790 case LIR_rshi:
1791 case LIR_rshui:
1792 isConstRhs = rhs->isImmI();
1793 if (!isConstRhs) {
1794 rb = findSpecificRegFor(rhs, rECX);
1795 allow &= ~rmask(rb);
1796 }
1797 break;
1798 default:
1799 isConstRhs = rhs->isImmI();
1800 if (!isConstRhs && lhs != rhs) {
1801 rb = findRegFor(rhs, allow);
1802 allow &= ~rmask(rb);
1803 }
1804 break;
1805 }
1806
1807 // Somewhere for the result of 'ins'.
1808 Register rr = prepareResultReg(ins, allow);
1809
1810 // If 'lhs' isn't in a register, it can be clobbered by 'ins'.
1811 Register ra = lhs->isInReg() ? lhs->getReg() : rr;
1812
1813 if (!isConstRhs) {
1814 if (lhs == rhs)
1815 rb = ra;
1816
1817 switch (op) {
1818 case LIR_addi:
1819 case LIR_addjovi:
1820 case LIR_addxovi: ADD(rr, rb); break; // XXX: bug 547125: could use LEA for LIR_addi
1821 case LIR_subi:
1822 case LIR_subjovi:
1823 case LIR_subxovi: SUB(rr, rb); break;
1824 case LIR_muli:
1825 case LIR_muljovi:
1826 case LIR_mulxovi: IMUL(rr, rb); break;
1827 case LIR_andi: AND(rr, rb); break;
1828 case LIR_ori: OR( rr, rb); break;
1829 case LIR_xori: XOR(rr, rb); break;
1830 case LIR_lshi: SHL(rr, rb); break;
1831 case LIR_rshi: SAR(rr, rb); break;
1832 case LIR_rshui: SHR(rr, rb); break;
1833 case LIR_divi:
1834 DIV(rb);
1835 CDQ(); // sign-extend rEAX into rEDX:rEAX
1836 break;
1837 default: NanoAssert(0)do { } while (0); break;
1838 }
1839
1840 } else {
1841 int c = rhs->immI();
1842 switch (op) {
1843 case LIR_addi:
1844 // this doesn't set cc's, only use it when cc's not required.
1845 LEA(rr, c, ra);
1846 ra = rr; // suppress mov
1847 break;
1848 case LIR_addjovi:
1849 case LIR_addxovi: ADDi(rr, c); break;
1850 case LIR_subi:
1851 case LIR_subjovi:
1852 case LIR_subxovi: SUBi(rr, c); break;
1853 case LIR_andi: ANDi(rr, c); break;
1854 case LIR_ori: ORi( rr, c); break;
1855 case LIR_xori: XORi(rr, c); break;
1856 case LIR_lshi: SHLi(rr, c); break;
1857 case LIR_rshi: SARi(rr, c); break;
1858 case LIR_rshui: SHRi(rr, c); break;
1859 default: NanoAssert(0)do { } while (0); break;
1860 }
1861 }
1862
1863 if (rr != ra)
1864 MR(rr, ra);
1865
1866 freeResourcesOf(ins);
1867 if (!lhs->isInReg()) {
1868 NanoAssert(ra == rr)do { } while (0);
1869 findSpecificRegForUnallocated(lhs, ra);
1870 }
1871 }
1872
1873 // Generates code for a LIR_modi(LIR_divi(divL, divR)) sequence.
1874 void Assembler::asm_div_mod(LIns* mod)
1875 {
1876 LIns* div = mod->oprnd1();
1877
1878 // LIR_modi expects the LIR_divi to be near (no interference from the register allocator).
1879 NanoAssert(mod->isop(LIR_modi))do { } while (0);
1880 NanoAssert(div->isop(LIR_divi))do { } while (0);
1881
1882 LIns* divL = div->oprnd1();
1883 LIns* divR = div->oprnd2();
1884
1885 prepareResultReg(mod, rmask(rEDX));
1886 prepareResultReg(div, rmask(rEAX));
1887
1888 Register rDivR = findRegFor(divR, (GpRegs & ~(rmask(rEAX)|rmask(rEDX))));
1889 Register rDivL = divL->isInReg() ? divL->getReg() : rEAX;
1890
1891 DIV(rDivR);
1892 CDQ(); // sign-extend rEAX into rEDX:rEAX
1893 if (rEAX != rDivL)
1894 MR(rEAX, rDivL);
1895
1896 freeResourcesOf(mod);
1897 freeResourcesOf(div);
1898 if (!divL->isInReg()) {
1899 NanoAssert(rDivL == rEAX)do { } while (0);
1900 findSpecificRegForUnallocated(divL, rEAX);
1901 }
1902 }
1903
1904 // Two example cases for "ins = neg lhs". Lines marked with '*' are
1905 // generated in this function.
1906 //
1907 // asm: define lhs into rr
1908 // ...
1909 // * asm: neg rr
1910 // * asm: spill rr if necessary
1911 // ... no more uses of lhs in rr...
1912 //
1913 //
1914 // asm: define lhs into ra
1915 // ...
1916 // * asm: mov rr, ra
1917 // * asm: neg rr
1918 // * asm: spill rr if necessary
1919 // ... more uses of lhs in ra...
1920 //
1921 void Assembler::asm_neg_not(LIns* ins)
1922 {
1923 LIns* lhs = ins->oprnd1();
1924
1925 Register rr = prepareResultReg(ins, GpRegs);
1926
1927 // If 'lhs' isn't in a register, it can be clobbered by 'ins'.
1928 Register ra = lhs->isInReg() ? lhs->getReg() : rr;
1929
1930 if (ins->isop(LIR_noti)) {
1931 NOT(rr);
1932 } else {
1933 NanoAssert(ins->isop(LIR_negi))do { } while (0);
1934 NEG(rr);
1935 }
1936 if (rr != ra)
1937 MR(rr, ra);
1938
1939 freeResourcesOf(ins);
1940 if (!lhs->isInReg()) {
1941 NanoAssert(ra == rr)do { } while (0);
1942 findSpecificRegForUnallocated(lhs, ra);
1943 }
1944 }
1945
1946 void Assembler::asm_load32(LIns* ins)
1947 {
1948 LOpcode op = ins->opcode();
1949 LIns* base = ins->oprnd1();
1950 int32_t d = ins->disp();
1951
1952 Register rr = prepareResultReg(ins, GpRegs);
1953
1954 if (base->isImmI()) {
1955 intptr_t addr = base->immI();
1956 addr += d;
1957 switch (op) {
1958 case LIR_lduc2ui: LD8Zdm( rr, addr); break;
1959 case LIR_ldc2i: LD8Sdm( rr, addr); break;
1960 case LIR_ldus2ui: LD16Zdm(rr, addr); break;
1961 case LIR_lds2i: LD16Sdm(rr, addr); break;
1962 case LIR_ldi: LDdm( rr, addr); break;
1963 default: NanoAssert(0)do { } while (0); break;
1964 }
1965
1966 freeResourcesOf(ins);
1967
1968 } else if (base->opcode() == LIR_addp) {
1969 LIns* index;
1970 int scale;
1971 getBaseIndexScale(base, &base, &index, &scale);
1972
1973 // If 'base' isn't in a register, it can be clobbered by 'ins'.
1974 // Likewise for 'rhs', but we try it with 'base' first.
1975 Register rb, ri;
1976 // @todo -- If base and/or index is const, we could eliminate a register use.
1977 if (!base->isInReg()) {
1978 rb = rr;
1979 ri = findRegFor(index, GpRegs & ~(rmask(rb)));
1980
1981 } else {
1982 rb = base->getReg();
1983 NanoAssert(rb != rr)do { } while (0);
1984 ri = index->isInReg() ? findRegFor(index, GpRegs & ~(rmask(rb))) : rr;
1985 }
1986
1987 switch (op) {
1988 case LIR_lduc2ui: LD8Zsib( rr, d, rb, ri, scale); break;
1989 case LIR_ldc2i: LD8Ssib( rr, d, rb, ri, scale); break;
1990 case LIR_ldus2ui: LD16Zsib(rr, d, rb, ri, scale); break;
1991 case LIR_lds2i: LD16Ssib(rr, d, rb, ri, scale); break;
1992 case LIR_ldi: LDsib( rr, d, rb, ri, scale); break;
1993 default: NanoAssert(0)do { } while (0); break;
1994 }
1995
1996 freeResourcesOf(ins);
1997 if (!base->isInReg()) {
1998 NanoAssert(rb == rr)do { } while (0);
1999 findSpecificRegForUnallocated(base, rb);
2000 } else if (!index->isInReg()) {
2001 NanoAssert(ri == rr)do { } while (0);
2002 findSpecificRegForUnallocated(index, ri);
2003 }
2004
2005 } else {
2006 Register ra = getBaseReg(base, d, GpRegs);
2007
2008 switch (op) {
2009 case LIR_lduc2ui: LD8Z( rr, d, ra); break;
2010 case LIR_ldc2i: LD8S( rr, d, ra); break;
2011 case LIR_ldus2ui: LD16Z(rr, d, ra); break;
2012 case LIR_lds2i: LD16S(rr, d, ra); break;
2013 case LIR_ldi: LD( rr, d, ra); break;
2014 default: NanoAssert(0)do { } while (0); break;
2015 }
2016
2017 freeResourcesOf(ins);
2018 if (!base->isop(LIR_allocp) && !base->isInReg()) {
2019 NanoAssert(ra == rr)do { } while (0);
2020 findSpecificRegForUnallocated(base, ra);
2021 }
2022 }
2023 }
2024
2025 void Assembler::asm_cmov(LIns* ins)
2026 {
2027 LIns* condval = ins->oprnd1();
2028 LIns* iftrue = ins->oprnd2();
2029 LIns* iffalse = ins->oprnd3();
2030
2031 NanoAssert(condval->isCmp())do { } while (0);
2032 NanoAssert((ins->isop(LIR_cmovi) && iftrue->isI() && iffalse->isI()) ||do { } while (0)
2033 (ins->isop(LIR_cmovd) && iftrue->isD() && iffalse->isD()))do { } while (0);
2034
2035 if (!_config.i386_sse2 && ins->isop(LIR_cmovd)) {
2036 // See the SSE2 case below for an explanation of the subtleties here.
2037 debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
2038 NanoAssert(FST0 == rr)do { } while (0);
2039 NanoAssert(!iftrue->isInReg() && !iffalse->isInReg())do { } while (0);
2040
2041 NIns* target = _nIns;
2042
2043 if (iffalse->isImmD()) {
2044 asm_immd(FST0, iffalse->immDasQ(), iffalse->immD(), /*canClobberCCs*/falsefalse);
2045 } else {
2046 int df = findMemFor(iffalse);
2047 FLDQ(df, FP);
2048 }
2049 FSTP(FST0); // pop the stack
2050 asm_branch_helper(falsefalse, condval, target);
2051
2052 NanoAssert(ins->getReg() == rr)do { } while (0);
2053 freeResourcesOf(ins);
2054 if (!iftrue->isInReg())
2055 findSpecificRegForUnallocated(iftrue, FST0);
2056
2057 asm_cmp(condval);
2058
2059 return;
2060 }
2061
2062 RegisterMask allow = ins->isD() ? XmmRegs : GpRegs;
2063 Register rr = prepareResultReg(ins, allow);
2064 Register rf = findRegFor(iffalse, allow & ~rmask(rr));
2065
2066 if (ins->isop(LIR_cmovd)) {
2067 // The obvious way to handle this is as follows:
2068 //
2069 // mov rr, rt # only needed if rt is live afterwards
2070 // do comparison
2071 // jt end
2072 // mov rr, rf
2073 // end:
2074 //
2075 // The problem with this is that doing the comparison can cause
2076 // registers to be evicted, possibly including 'rr', which holds
2077 // 'ins'. And that screws things up. So instead we do this:
2078 //
2079 // do comparison
2080 // mov rr, rt # only needed if rt is live afterwards
2081 // jt end
2082 // mov rr, rf
2083 // end:
2084 //
2085 // Putting the 'mov' between the comparison and the jump is ok
2086 // because move instructions don't modify the condition codes.
2087 //
2088 NIns* target = _nIns;
2089 asm_nongp_copy(rr, rf);
2090 asm_branch_helper(falsefalse, condval, target);
2091
2092 // If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
2093 Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
2094
2095 if (rr != rt)
2096 asm_nongp_copy(rr, rt);
2097
2098 NanoAssert(ins->getReg() == rr)do { } while (0);
2099 freeResourcesOf(ins);
2100 if (!iftrue->isInReg()) {
2101 NanoAssert(rt == rr)do { } while (0);
2102 findSpecificRegForUnallocated(iftrue, rr);
2103 }
2104
2105 asm_cmp(condval);
2106 return;
2107 }
2108
2109 // If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
2110 Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
2111 NanoAssert(ins->isop(LIR_cmovi))do { } while (0);
2112
2113 // WARNING: We cannot generate any code that affects the condition
2114 // codes between the MRcc generation here and the asm_cmpi() call
2115 // below. See asm_cmpi() for more details.
2116 switch (condval->opcode()) {
2117 // Note that these are all opposites...
2118 case LIR_eqi: MRNE(rr, rf); break;
2119 case LIR_lti: MRGE(rr, rf); break;
2120 case LIR_lei: MRG( rr, rf); break;
2121 case LIR_gti: MRLE(rr, rf); break;
2122 case LIR_gei: MRL( rr, rf); break;
2123 case LIR_ltui: MRAE(rr, rf); break;
2124 case LIR_leui: MRA( rr, rf); break;
2125 case LIR_gtui: MRBE(rr, rf); break;
2126 case LIR_geui: MRB( rr, rf); break;
2127 default: NanoAssert(0)do { } while (0); break;
2128 }
2129
2130 if (rr != rt)
2131 MR(rr, rt);
2132
2133 NanoAssert(ins->getReg() == rr)do { } while (0);
2134 freeResourcesOf(ins);
2135 if (!iftrue->isInReg()) {
2136 NanoAssert(rt == rr)do { } while (0);
2137 findSpecificRegForUnallocated(iftrue, rr);
2138 }
2139
2140 asm_cmp(condval);
2141 }
2142
2143 void Assembler::asm_param(LIns* ins)
2144 {
2145 uint32_t arg = ins->paramArg();
2146 uint32_t kind = ins->paramKind();
2147 if (kind == 0) {
2148 // ordinary param
2149 AbiKind abi = _thisfrag->lirbuf->abi;
2150 uint32_t abi_regcount = max_abi_regs[abi];
2151 // argRegs must have as many elements as the largest argument register
2152 // requirement of an abi. Currently, this is 2, for ABI_FASTCALL. See
2153 // the definition of max_abi_regs earlier in this file. The following
2154 // assertion reflects this invariant:
2155 NanoAssert(abi_regcount <= sizeof(argRegs)/sizeof(argRegs[0]))do { } while (0);
2156 if (arg < abi_regcount) {
2157 // Incoming arg in register.
2158 prepareResultReg(ins, rmask(argRegs[arg]));
2159 // No code to generate.
2160 } else {
2161 // Incoming arg is on stack, and rEBP points nearby (see genPrologue()).
2162 Register r = prepareResultReg(ins, GpRegs);
2163 int d = (arg - abi_regcount) * sizeof(intptr_t) + 8;
2164 LD(r, d, FP);
2165 }
2166 } else {
2167 // Saved param.
2168 prepareResultReg(ins, rmask(savedRegs[arg]));
2169 // No code to generate.
2170 }
2171 freeResourcesOf(ins);
2172 }
2173
2174 void Assembler::asm_immi(LIns* ins)
2175 {
2176 Register rr = prepareResultReg(ins, GpRegs);
2177
2178 asm_immi(rr, ins->immI(), /*canClobberCCs*/truetrue);
2179
2180 freeResourcesOf(ins);
2181 }
2182
2183 void Assembler::asm_immi(Register r, int32_t val, boolbool canClobberCCs)
2184 {
2185 if (val == 0 && canClobberCCs)
2186 XOR(r, r);
2187 else
2188 LDi(r, val);
2189 }
2190
2191 void Assembler::asm_immd(Register r, uint64_t q, double d, boolbool canClobberCCs)
2192 {
2193 // Floats require non-standard handling. There is no load-64-bit-immediate
2194 // instruction on i386, so in the general case, we must load it from memory.
2195 // This is unlike most other LIR operations which can be computed directly
2196 // in a register. We can special-case 0.0 and various other small ints
2197 // (1.0 on x87, any int32_t value on SSE2), but for all other values, we
2198 // allocate an 8-byte chunk via dataAlloc and load from there. Note that
2199 // this implies that floats never require spill area, since they will always
2200 // be rematerialized from const data (or inline instructions in the special cases).
2201
2202 if (rmask(r) & XmmRegs) {
2203 if (q == 0) {
2204 // test (int64)0 since -0.0 == 0.0
2205 SSE_XORPDr(r, r);
2206 } else if (d && d == (int)d && canClobberCCs) {
2207 // can fit in 32bits? then use cvt which is faster
2208 Register tr = registerAllocTmp(GpRegs);
2209 SSE_CVTSI2SD(r, tr);
2210 SSE_XORPDr(r, r); // zero r to ensure no dependency stalls
2211 asm_immi(tr, (int)d, canClobberCCs);
2212 } else {
2213 const uint64_t* p = findImmDFromPool(q);
2214 LDSDm(r, (const double*)p);
2215 }
2216 } else {
2217 NanoAssert(r == FST0)do { } while (0);
2218 if (q == 0) {
2219 // test (int64)0 since -0.0 == 0.0
2220 FLDZ();
2221 } else if (d == 1.0) {
2222 FLD1();
2223 } else {
2224 const uint64_t* p = findImmDFromPool(q);
2225 FLDQdm((const double*)p);
2226 }
2227 }
2228 }
2229
2230 void Assembler::asm_immd(LIns* ins)
2231 {
2232 NanoAssert(ins->isImmD())do { } while (0);
2233 if (ins->isInReg()) {
2234 Register rr = ins->getReg();
2235 NanoAssert(rmask(rr) & FpRegs)do { } while (0);
2236 asm_immd(rr, ins->immDasQ(), ins->immD(), /*canClobberCCs*/truetrue);
2237 } else {
2238 // Do nothing, will be rematerialized when necessary.
2239 }
2240
2241 freeResourcesOf(ins);
2242 }
2243
2244 // negateMask is used by asm_fneg.
2245#if defined __SUNPRO_CC
2246 // From Sun Studio C++ Readme: #pragma align inside namespace requires mangled names.
2247 // Initialize here to avoid multithreading contention issues during initialization.
2248 static uint32_t negateMask_temp[] = {0, 0, 0, 0, 0, 0, 0};
2249
2250 static uint32_t* negateMaskInit()
2251 {
2252 uint32_t* negateMask = (uint32_t*)alignUp(negateMask_temp, 16)((((uintptr_t)(negateMask_temp))+(((uintptr_t)16)-1))&~((
(uintptr_t)16)-1))
;
2253 negateMask[1] = 0x80000000;
2254 return negateMask;
2255 }
2256
2257 static uint32_t *negateMask = negateMaskInit();
2258#else
2259 static const AVMPLUS_ALIGN16(uint32_t)uint32_t __attribute__ ((aligned (16))) negateMask[] = {0,0x80000000,0,0};
2260#endif
2261
2262 void Assembler::asm_fneg(LIns* ins)
2263 {
2264 LIns *lhs = ins->oprnd1();
2265
2266 if (_config.i386_sse2) {
2267 Register rr = prepareResultReg(ins, XmmRegs);
2268
2269 // If 'lhs' isn't in a register, it can be clobbered by 'ins'.
2270 Register ra;
2271 if (!lhs->isInReg()) {
2272 ra = rr;
2273 } else if (!(rmask(lhs->getReg()) & XmmRegs)) {
2274 // We need to evict lhs from x87Regs, which then puts us in
2275 // the same situation as the !isInReg() case.
2276 evict(lhs);
2277 ra = rr;
2278 } else {
2279 ra = lhs->getReg();
2280 }
2281
2282 SSE_XORPD(rr, negateMask);
2283
2284 if (rr != ra)
2285 SSE_MOVSD(rr, ra);
2286
2287 freeResourcesOf(ins);
2288 if (!lhs->isInReg()) {
2289 NanoAssert(ra == rr)do { } while (0);
2290 findSpecificRegForUnallocated(lhs, ra);
2291 }
2292
2293 } else {
2294 debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
2295 NanoAssert(FST0 == rr)do { } while (0);
2296
2297 NanoAssert(!lhs->isInReg() || FST0 == lhs->getReg())do { } while (0);
2298
2299 FCHS();
2300
2301 freeResourcesOf(ins);
2302 if (!lhs->isInReg())
2303 findSpecificRegForUnallocated(lhs, FST0);
2304 }
2305 }
2306
2307 void Assembler::asm_arg(ArgType ty, LIns* ins, Register r, int32_t& stkd)
2308 {
2309 // If 'r' is known, then that's the register we have to put 'ins'
2310 // into.
2311
2312 if (ty == ARGTYPE_I || ty == ARGTYPE_UI) {
2313 if (r != UnspecifiedReg) {
2314 if (ins->isImmI()) {
2315 // Rematerialize the constant.
2316 asm_immi(r, ins->immI(), /*canClobberCCs*/truetrue);
2317 } else if (ins->isInReg()) {
2318 if (r != ins->getReg())
2319 MR(r, ins->getReg());
2320 } else if (ins->isInAr()) {
2321 int d = arDisp(ins);
2322 NanoAssert(d != 0)do { } while (0);
2323 if (ins->isop(LIR_allocp)) {
2324 LEA(r, d, FP);
2325 } else {
2326 LD(r, d, FP);
2327 }
2328
2329 } else {
2330 // This is the last use, so fine to assign it
2331 // to the scratch reg, it's dead after this point.
2332 findSpecificRegForUnallocated(ins, r);
2333 }
2334 }
2335 else {
2336 if (_config.i386_fixed_esp)
2337 asm_stkarg(ins, stkd);
2338 else
2339 asm_pusharg(ins);
2340 }
2341
2342 } else {
2343 NanoAssert(ty == ARGTYPE_D)do { } while (0);
2344 asm_farg(ins, stkd);
2345 }
2346 }
2347
2348 void Assembler::asm_pusharg(LIns* ins)
2349 {
2350 // arg goes on stack
2351 if (!ins->isExtant() && ins->isImmI()) {
2352 PUSHi(ins->immI()); // small const we push directly
2353 } else if (!ins->isExtant() || ins->isop(LIR_allocp)) {
2354 Register ra = findRegFor(ins, GpRegs);
2355 PUSHr(ra);
2356 } else if (ins->isInReg()) {
2357 PUSHr(ins->getReg());
2358 } else {
2359 NanoAssert(ins->isInAr())do { } while (0);
2360 PUSHm(arDisp(ins), FP);
2361 }
2362 }
2363
2364 void Assembler::asm_stkarg(LIns* ins, int32_t& stkd)
2365 {
2366 // arg goes on stack
2367 if (!ins->isExtant() && ins->isImmI())
2368 {
2369 // small const we push directly
2370 STi(SP, stkd, ins->immI());
2371 }
2372 else {
2373 Register ra;
2374 if (!ins->isInReg() || ins->isop(LIR_allocp))
2375 ra = findRegFor(ins, GpRegs & (~SavedRegs));
2376 else
2377 ra = ins->getReg();
2378 ST(SP, stkd, ra);
2379 }
2380
2381 stkd += sizeof(int32_t);
2382 }
2383
2384 void Assembler::asm_farg(LIns* ins, int32_t& stkd)
2385 {
2386 NanoAssert(ins->isD())do { } while (0);
2387 Register r = findRegFor(ins, FpRegs);
2388 if (rmask(r) & XmmRegs) {
2389 SSE_STQ(stkd, SP, r);
2390 } else {
2391 FSTPQ(stkd, SP);
2392
2393 // 22Jul09 rickr - Enabling the evict causes a 10% slowdown on primes
2394 //
2395 // evict() triggers a very expensive fstpq/fldq pair around the store.
2396 // We need to resolve the bug some other way.
2397 //
2398 // see https://bugzilla.mozilla.org/show_bug.cgi?id=491084
2399
2400 // It's possible that the same LIns* with r=FST0 will appear in the argument list more
2401 // than once. In this case FST0 will not have been evicted and the multiple pop
2402 // actions will unbalance the FPU stack. A quick fix is to always evict FST0 manually.
2403 NanoAssert(r == FST0)do { } while (0);
2404 NanoAssert(ins == _allocator.getActive(r))do { } while (0);
2405 evict(ins);
2406 }
2407 if (!_config.i386_fixed_esp)
2408 SUBi(rESP, 8);
2409
2410 stkd += sizeof(double);
2411 }
2412
2413 void Assembler::asm_fop(LIns* ins)
2414 {
2415 LOpcode op = ins->opcode();
2416 if (_config.i386_sse2)
2417 {
2418 LIns *lhs = ins->oprnd1();
2419 LIns *rhs = ins->oprnd2();
2420
2421 RegisterMask allow = XmmRegs;
2422 Register rb = UnspecifiedReg;
2423 if (lhs != rhs) {
2424 rb = findRegFor(rhs, allow);
2425 allow &= ~rmask(rb);
2426 }
2427
2428 Register rr = prepareResultReg(ins, allow);
2429
2430 // If 'lhs' isn't in a register, it can be clobbered by 'ins'.
2431 Register ra;
2432 if (!lhs->isInReg()) {
2433 ra = rr;
2434
2435 } else if (!(rmask(lhs->getReg()) & XmmRegs)) {
2436 NanoAssert(lhs->getReg() == FST0)do { } while (0);
2437
2438 // We need to evict lhs from x87Regs, which then puts us in
2439 // the same situation as the !isInReg() case.
2440 evict(lhs);
2441 ra = rr;
2442
2443 } else {
2444 ra = lhs->getReg();
2445 NanoAssert(rmask(ra) & XmmRegs)do { } while (0);
2446 }
2447
2448 if (lhs == rhs)
2449 rb = ra;
2450
2451 switch (op) {
2452 case LIR_addd: SSE_ADDSD(rr, rb); break;
2453 case LIR_subd: SSE_SUBSD(rr, rb); break;
2454 case LIR_muld: SSE_MULSD(rr, rb); break;
2455 case LIR_divd: SSE_DIVSD(rr, rb); break;
2456 default: NanoAssert(0)do { } while (0);
2457 }
2458
2459 if (rr != ra)
2460 SSE_MOVSD(rr, ra);
2461
2462 freeResourcesOf(ins);
2463 if (!lhs->isInReg()) {
2464 NanoAssert(ra == rr)do { } while (0);
2465 findSpecificRegForUnallocated(lhs, ra);
2466 }
2467 }
2468 else
2469 {
2470 // We swap lhs/rhs on purpose here, it works out better with
2471 // only one fpu reg -- we can use divr/subr.
2472 LIns* rhs = ins->oprnd1();
2473 LIns* lhs = ins->oprnd2();
2474 debug_only( Register rr = ) prepareResultReg(ins, rmask(FST0));
2475 NanoAssert(FST0 == rr)do { } while (0);
2476 NanoAssert(!lhs->isInReg() || FST0 == lhs->getReg())do { } while (0);
2477
2478 if (rhs->isImmD()) {
2479 const uint64_t* p = findImmDFromPool(rhs->immDasQ());
2480
2481 switch (op) {
2482 case LIR_addd: FADDdm( (const double*)p); break;
2483 case LIR_subd: FSUBRdm((const double*)p); break;
2484 case LIR_muld: FMULdm( (const double*)p); break;
2485 case LIR_divd: FDIVRdm((const double*)p); break;
2486 default: NanoAssert(0)do { } while (0);
2487 }
2488
2489 } else {
2490 int db = findMemFor(rhs);
2491
2492 switch (op) {
2493 case LIR_addd: FADD( db, FP); break;
2494 case LIR_subd: FSUBR(db, FP); break;
2495 case LIR_muld: FMUL( db, FP); break;
2496 case LIR_divd: FDIVR(db, FP); break;
2497 default: NanoAssert(0)do { } while (0);
2498 }
2499 }
2500 freeResourcesOf(ins);
2501 if (!lhs->isInReg()) {
2502 findSpecificRegForUnallocated(lhs, FST0);
2503 }
2504 }
2505 }
2506
2507 void Assembler::asm_i2d(LIns* ins)
2508 {
2509 LIns* lhs = ins->oprnd1();
2510
2511 Register rr = prepareResultReg(ins, FpRegs);
2512 if (rmask(rr) & XmmRegs) {
2513 // todo support int value in memory
2514 Register ra = findRegFor(lhs, GpRegs);
2515 SSE_CVTSI2SD(rr, ra);
2516 SSE_XORPDr(rr, rr); // zero rr to ensure no dependency stalls
2517 } else {
2518 int d = findMemFor(lhs);
2519 FILD(d, FP);
2520 }
2521
2522 freeResourcesOf(ins);
2523 }
2524
2525 void Assembler::asm_ui2d(LIns* ins)
2526 {
2527 LIns* lhs = ins->oprnd1();
2528
2529 Register rr = prepareResultReg(ins, FpRegs);
2530 if (rmask(rr) & XmmRegs) {
2531 Register rt = registerAllocTmp(GpRegs);
2532
2533 // Technique inspired by gcc disassembly. Edwin explains it:
2534 //
2535 // rt is 0..2^32-1
2536 //
2537 // sub rt,0x80000000
2538 //
2539 // Now rt is -2^31..2^31-1, i.e. the range of int, but not the same value
2540 // as before.
2541 //
2542 // cvtsi2sd rr,rt
2543 //
2544 // rr is now a double with the int value range.
2545 //
2546 // addsd rr, 2147483648.0
2547 //
2548 // Adding back double(0x80000000) makes the range 0..2^32-1.
2549
2550 static const double k_NEGONE = 2147483648.0;
2551 SSE_ADDSDm(rr, &k_NEGONE);
2552
2553 SSE_CVTSI2SD(rr, rt);
2554 SSE_XORPDr(rr, rr); // zero rr to ensure no dependency stalls
2555
2556 if (lhs->isInRegMask(GpRegs)) {
2557 Register ra = lhs->getReg();
2558 LEA(rt, 0x80000000, ra);
2559
2560 } else {
2561 const int d = findMemFor(lhs);
2562 SUBi(rt, 0x80000000);
2563 LD(rt, d, FP);
2564 }
2565
2566 } else {
2567 // Use space just below rESP and use PUSH to avoid writing
2568 // past the end of the stack, see bug 590553.
2569 Register ra = findRegFor(lhs, GpRegs);
2570 NanoAssert(rr == FST0)do { } while (0);
2571 ADDi(SP, 8); // fix up the stack
2572 FILDQ(0, SP); // convert int64 to double
2573 PUSHr(ra); // low 32 bits = unsigned value
2574 PUSHi(0); // high 32 bits = 0
2575 }
2576
2577 freeResourcesOf(ins);
2578 }
2579
2580 void Assembler::asm_d2i(LIns* ins)
2581 {
2582 LIns *lhs = ins->oprnd1();
2583
2584 if (_config.i386_sse2) {
2585 Register rr = prepareResultReg(ins, GpRegs);
2586 Register ra = findRegFor(lhs, XmmRegs);
2587 SSE_CVTTSD2SI(rr, ra);
2588 } else {
2589 boolbool pop = !lhs->isInReg();
2590 findSpecificRegFor(lhs, FST0);
2591 if (ins->isInReg())
2592 evict(ins);
2593 int d = findMemFor(ins);
2594 FIST(pop, d, FP);
2595 }
2596
2597 freeResourcesOf(ins);
2598 }
2599
2600 void Assembler::asm_nongp_copy(Register rd, Register rs)
2601 {
2602 if ((rmask(rd) & XmmRegs) && (rmask(rs) & XmmRegs)) {
2603 // xmm -> xmm
2604 SSE_MOVSD(rd, rs);
2605 } else if ((rmask(rd) & GpRegs) && (rmask(rs) & XmmRegs)) {
2606 // xmm -> gp
2607 SSE_MOVD(rd, rs);
2608 } else {
2609 NanoAssertMsgf(false, "bad asm_nongp_copy(%s, %s)", gpn(rd), gpn(rs))do { } while (0);
2610 }
2611 }
2612
2613 Branches Assembler::asm_branchd_helper(boolbool branchOnFalse, LIns* cond, NIns *targ)
2614 {
2615 NIns* patch1 = NULL__null;
2616 NIns* patch2 = NULL__null;
2617 LOpcode opcode = cond->opcode();
2618
2619 if (_config.i386_sse2) {
2620 // LIR_ltd and LIR_gtd are handled by the same case because
2621 // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise
2622 // for LIR_led/LIR_ged.
2623 if (branchOnFalse) {
2624 // op == LIR_xf
2625 switch (opcode) {
2626 case LIR_eqd:
2627 if (cond->oprnd1() == cond->oprnd2()) {
2628 JP(targ);
2629 } else {
2630 JP(targ); // unordered
2631 patch1 = _nIns;
2632 JNE(targ);
2633 patch2 = _nIns;
2634 }
2635 break;
2636 case LIR_ltd:
2637 case LIR_gtd: JNA(targ); break;
2638 case LIR_led:
2639 case LIR_ged: JNAE(targ); break;
2640 default: NanoAssert(0)do { } while (0); break;
2641 }
2642 } else {
2643 // op == LIR_xt
2644 switch (opcode) {
2645 case LIR_eqd:
2646 if (cond->oprnd1() == cond->oprnd2()) {
2647 JNP(targ);
2648 } else {
2649 // jp skip (2byte)
2650 // je target
2651 // skip: ...
2652 underrunProtect(16); // underrun of 7 needed but we write 2 instr --> 16
2653 NIns *skip = _nIns;
2654 JE(targ);
2655 patch1 = _nIns;
2656 JP(skip); // unordered
2657 }
2658 break;
2659 case LIR_ltd:
2660 case LIR_gtd: JA(targ); break;
2661 case LIR_led:
2662 case LIR_ged: JAE(targ); break;
2663 default: NanoAssert(0)do { } while (0); break;
2664 }
2665 }
2666 } else {
2667 if (branchOnFalse)
2668 JP(targ);
2669 else
2670 JNP(targ);
2671 }
2672
2673 if (!patch1)
2674 patch1 = _nIns;
2675
2676 return Branches(patch1, patch2);
2677 }
2678
2679 // WARNING: This function cannot generate any code that will affect the
2680 // condition codes prior to the generation of the
2681 // ucomisd/fcompp/fcmop/fcom. See asm_cmpi() for more details.
2682 void Assembler::asm_cmpd(LIns *cond)
2683 {
2684 LOpcode condop = cond->opcode();
2685 NanoAssert(isCmpDOpcode(condop))do { } while (0);
2686 LIns* lhs = cond->oprnd1();
2687 LIns* rhs = cond->oprnd2();
2688 NanoAssert(lhs->isD() && rhs->isD())do { } while (0);
2689
2690 if (_config.i386_sse2) {
2691 // First, we convert (a < b) into (b > a), and (a <= b) into (b >= a).
2692 if (condop == LIR_ltd) {
2693 condop = LIR_gtd;
2694 LIns* t = lhs; lhs = rhs; rhs = t;
2695 } else if (condop == LIR_led) {
2696 condop = LIR_ged;
Value stored to 'condop' is never read
2697 LIns* t = lhs; lhs = rhs; rhs = t;
2698 }
2699
2700 // LIR_eqd, if lhs == rhs:
2701 // ucomisd ZPC outcome (SETNP/JNP succeeds if P==0)
2702 // ------- --- -------
2703 // UNORDERED 111 SETNP/JNP fails
2704 // EQUAL 100 SETNP/JNP succeeds
2705 //
2706 // LIR_eqd, if lhs != rhs;
2707 // ucomisd ZPC outcome (SETP/JP succeeds if P==0,
2708 // SETE/JE succeeds if Z==0)
2709 // ------- --- -------
2710 // UNORDERED 111 SETP/JP succeeds (and skips to fail target)
2711 // EQUAL 100 SETP/JP fails, SETE/JE succeeds
2712 // GREATER_THAN 000 SETP/JP fails, SETE/JE fails
2713 // LESS_THAN 001 SETP/JP fails, SETE/JE fails
2714 //
2715 // LIR_gtd:
2716 // ucomisd ZPC outcome (SETA/JA succeeds if CZ==00)
2717 // ------- --- -------
2718 // UNORDERED 111 SETA/JA fails
2719 // EQUAL 100 SETA/JA fails
2720 // GREATER_THAN 000 SETA/JA succeeds
2721 // LESS_THAN 001 SETA/JA fails
2722 //
2723 // LIR_ged:
2724 // ucomisd ZPC outcome (SETAE/JAE succeeds if C==0)
2725 // ------- --- -------
2726 // UNORDERED 111 SETAE/JAE fails
2727 // EQUAL 100 SETAE/JAE succeeds
2728 // GREATER_THAN 000 SETAE/JAE succeeds
2729 // LESS_THAN 001 SETAE/JAE fails
2730
2731 Register ra, rb;
2732 findRegFor2(XmmRegs, lhs, ra, XmmRegs, rhs, rb);
2733 SSE_UCOMISD(ra, rb);
2734
2735 } else {
2736 // First, we convert (a > b) into (b < a), and (a >= b) into (b <= a).
2737 // Note that this is the opposite of the sse2 conversion above.
2738 if (condop == LIR_gtd) {
2739 condop = LIR_ltd;
2740 LIns* t = lhs; lhs = rhs; rhs = t;
2741 } else if (condop == LIR_ged) {
2742 condop = LIR_led;
2743 LIns* t = lhs; lhs = rhs; rhs = t;
2744 }
2745
2746 // FNSTSW_AX puts the flags into AH like so: B:C3:TOP3:TOP2:TOP1:C2:C1:C0.
2747 // Furthermore, fcom/fcomp/fcompp sets C3:C2:C0 the same values
2748 // that Z:P:C are set by ucomisd, and the relative positions in AH
2749 // line up. (Someone at Intel has a sense of humour.) Therefore
2750 // we can use the same lahf/test(mask) technique as used in the
2751 // sse2 case above. We could use fcomi/fcomip/fcomipp which set
2752 // ZPC directly and then use LAHF instead of FNSTSW_AX and make
2753 // this code generally more like the sse2 code, but we don't
2754 // because fcomi/fcomip/fcomipp/lahf aren't available on earlier
2755 // x86 machines.
2756 //
2757 // The masks are as follows:
2758 // - LIR_eqd: mask == 0x44 == 0100_0100b, which extracts 0Z00_0P00 from AH.
2759 // - LIR_ltd: mask == 0x05 == 0000_0101b, which extracts 0000_0P0C from AH.
2760 // - LIR_led: mask == 0x41 == 0100_0001b, which extracts 0Z00_000C from AH.
2761 //
2762 // LIR_eqd:
2763 // ucomisd C3:C2:C0 lahf/test(0x44) SZP outcome
2764 // ------- -------- --------- --- -------
2765 // UNORDERED 111 0100_0100 001 SETNP fails
2766 // EQUAL 100 0100_0000 000 SETNP succeeds
2767 // GREATER_THAN 000 0000_0000 011 SETNP fails
2768 // LESS_THAN 001 0000_0000 011 SETNP fails
2769 //
2770 // LIR_ltd:
2771 // fcom C3:C2:C0 lahf/test(0x05) SZP outcome
2772 // ------- -------- --------- --- -------
2773 // UNORDERED 111 0000_0101 001 SETNP fails
2774 // EQUAL 100 0000_0000 011 SETNP fails
2775 // GREATER_THAN 000 0000_0000 011 SETNP fails
2776 // LESS_THAN 001 0000_0001 000 SETNP succeeds
2777 //
2778 // LIR_led:
2779 // fcom C3:C2:C0 lahf/test(0x41) SZP outcome
2780 // ------- --- --------- --- -------
2781 // UNORDERED 111 0100_0001 001 SETNP fails
2782 // EQUAL 100 0100_0000 000 SETNP succeeds
2783 // GREATER_THAN 000 0000_0000 011 SETNP fails
2784 // LESS_THAN 001 0000_0001 010 SETNP succeeds
2785
2786 int mask = 0; // init to avoid MSVC compile warnings
2787 switch (condop) {
2788 case LIR_eqd: mask = 0x44; break;
2789 case LIR_ltd: mask = 0x05; break;
2790 case LIR_led: mask = 0x41; break;
2791 default: NanoAssert(0)do { } while (0); break;
2792 }
2793
2794 evictIfActive(rEAX);
2795 boolbool pop = !lhs->isInReg();
2796 findSpecificRegFor(lhs, FST0);
2797
2798 if (lhs == rhs) {
2799 // NaN test.
2800 TEST_AH(mask);
2801 FNSTSW_AX(); // requires rEAX to be free
2802 if (pop)
2803 FCOMPP();
2804 else
2805 FCOMP();
2806 FLDr(FST0); // DUP
2807 } else {
2808 TEST_AH(mask);
2809 FNSTSW_AX(); // requires rEAX to be free
2810 if (rhs->isImmD()) {
2811 const uint64_t* p = findImmDFromPool(rhs->immDasQ());
2812 FCOMdm(pop, (const double*)p);
2813 } else {
2814 int d = findMemFor(rhs);
2815 FCOM(pop, d, FP);
2816 }
2817 }
2818 }
2819 }
2820
2821 // Increment the 32-bit profiling counter at pCtr, without
2822 // changing any registers.
2823 verbose_only(
2824 void Assembler::asm_inc_m32(uint32_t* pCtr)
2825 {
2826 INCLi(int32_t(pCtr));
2827 }
2828 )
2829
2830 void Assembler::nativePageReset()
2831 {}
2832
2833 void Assembler::nativePageSetup()
2834 {
2835 NanoAssert(!_inExit)do { } while (0);
2836 if (!_nIns)
2837 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
2838
2839 // add some random padding, so functions aren't predictably placed.
2840 if (_config.harden_function_alignment)
2841 {
2842 int32_t pad = _noise->getValue(LARGEST_UNDERRUN_PROT);
2843 underrunProtect(pad);
2844 _nIns -= pad;
2845 VMPI_memset::memset(_nIns, INT3_OP, pad);
2846 PERFM_NVPROF("hardening:func-align", pad);
2847 }
2848 }
2849
2850 // enough room for n bytes
2851 void Assembler::underrunProtect(int n)
2852 {
2853 NIns *eip = _nIns;
2854 NanoAssertMsg(n<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small")do { } while (0);
2855 // This may be in a normal code chunk or an exit code chunk.
2856 if (eip - n < codeStart) {
2857 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
2858 JMP(eip);
2859 }
2860 }
2861
2862 void Assembler::asm_insert_random_nop()
2863 {
2864 // one of a random nop instructions
2865 uint32_t r = _noise->getValue(5);
2866 switch(r)
2867 {
2868 case 0: MR(rEAX,rEAX); break;
2869 case 1: MR(rEDI,rEDI); break;
2870 case 2: MR(rECX,rECX); break;
2871 case 3: LEA(rECX,0,rECX); break;
2872 case 4: LEA(rESP,0,rESP); break;
2873 }
2874 }
2875
2876 void Assembler::asm_ret(LIns* ins)
2877 {
2878 // Unreachable, so assume correct stack depth.
2879 debug_only( _fpuStkDepth = 0; )
2880
2881 genEpilogue();
2882
2883 // Restore rESP from rEBP, undoing SUBi(SP,amt) in the prologue
2884 MR(SP,FP);
2885
2886 releaseRegisters();
2887 assignSavedRegs();
2888
2889 LIns *val = ins->oprnd1();
2890 if (ins->isop(LIR_reti)) {
2891 findSpecificRegFor(val, retRegs[0]);
2892 } else {
2893 NanoAssert(ins->isop(LIR_retd))do { } while (0);
2894 findSpecificRegFor(val, FST0);
2895 fpu_pop();
2896 }
2897 }
2898
2899 void Assembler::swapCodeChunks() {
2900 if (!_nExitIns)
2901 codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
2902 SWAP(NIns*, _nIns, _nExitIns)do { NIns* tmp = _nIns; _nIns = _nExitIns; _nExitIns = tmp; }
while (0)
;
2903 SWAP(NIns*, codeStart, exitStart)do { NIns* tmp = codeStart; codeStart = exitStart; exitStart =
tmp; } while (0)
;
2904 SWAP(NIns*, codeEnd, exitEnd)do { NIns* tmp = codeEnd; codeEnd = exitEnd; exitEnd = tmp; }
while (0)
;
2905 verbose_only( SWAP(size_t, codeBytes, exitBytes); )
2906 }
2907
2908 #endif /* FEATURE_NANOJIT */
2909}