File: | platform/mac/avmshell/../../../nanojit/Nativei386.cpp |
Location: | line 2696, column 17 |
Description: | Value stored to 'condop' is never read |
1 | /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */ |
2 | /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */ |
3 | /* ***** BEGIN LICENSE BLOCK ***** |
4 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
5 | * |
6 | * The contents of this file are subject to the Mozilla Public License Version |
7 | * 1.1 (the "License"); you may not use this file except in compliance with |
8 | * the License. You may obtain a copy of the License at |
9 | * http://www.mozilla.org/MPL/ |
10 | * |
11 | * Software distributed under the License is distributed on an "AS IS" basis, |
12 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
13 | * for the specific language governing rights and limitations under the |
14 | * License. |
15 | * |
16 | * The Original Code is [Open Source Virtual Machine]. |
17 | * |
18 | * The Initial Developer of the Original Code is |
19 | * Adobe System Incorporated. |
20 | * Portions created by the Initial Developer are Copyright (C) 2004-2007 |
21 | * the Initial Developer. All Rights Reserved. |
22 | * |
23 | * Contributor(s): |
24 | * Adobe AS3 Team |
25 | * Mozilla TraceMonkey Team |
26 | * Asko Tontti <atontti@cc.hut.fi> |
27 | * |
28 | * Alternatively, the contents of this file may be used under the terms of |
29 | * either the GNU General Public License Version 2 or later (the "GPL"), or |
30 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
31 | * in which case the provisions of the GPL or the LGPL are applicable instead |
32 | * of those above. If you wish to allow use of your version of this file only |
33 | * under the terms of either the GPL or the LGPL, and not to allow others to |
34 | * use your version of this file under the terms of the MPL, indicate your |
35 | * decision by deleting the provisions above and replace them with the notice |
36 | * and other provisions required by the GPL or the LGPL. If you do not delete |
37 | * the provisions above, a recipient may use your version of this file under |
38 | * the terms of any one of the MPL, the GPL or the LGPL. |
39 | * |
40 | * ***** END LICENSE BLOCK ***** */ |
41 | #include "nanojit.h" |
42 | |
43 | #ifdef _MSC_VER |
44 | // disable some specific warnings which are normally useful, but pervasive in the code-gen macros |
45 | #pragma warning(disable:4310) // cast truncates constant value |
46 | #endif |
47 | |
48 | namespace nanojit |
49 | { |
50 | #if defined FEATURE_NANOJIT && defined NANOJIT_IA32 |
51 | |
52 | #ifdef NJ_VERBOSE |
53 | const char *regNames[] = { |
54 | "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", |
55 | "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", |
56 | "f0" |
57 | }; |
58 | |
59 | const char *gpRegNames8lo[] = { "al", "cl", "dl", "bl" }; |
60 | const char *gpRegNames8hi[] = { "ah", "ch", "dh", "bh" }; |
61 | #endif |
62 | |
63 | #define TODO(x)do{ do { } while (0); } while(0) do{ verbose_only(outputf(#x);) NanoAssertMsgf(false, "%s", #x)do { } while (0); } while(0) |
64 | |
65 | const Register Assembler::argRegs[] = { rECX, rEDX }; |
66 | const Register Assembler::retRegs[] = { rEAX, rEDX }; |
67 | const Register Assembler::savedRegs[] = { rEBX, rESI, rEDI }; |
68 | |
69 | const static uint8_t max_abi_regs[] = { |
70 | 2, /* ABI_FASTCALL */ |
71 | 1, /* ABI_THISCALL */ |
72 | 0, /* ABI_STDCALL */ |
73 | 0 /* ABI_CDECL */ |
74 | }; |
75 | |
76 | #define RB(r)gpRegNames8lo[REGNUM(r)] gpRegNames8lo[REGNUM(r)] |
77 | #define RBhi(r)gpRegNames8hi[REGNUM(r)] gpRegNames8hi[REGNUM(r)] |
78 | |
79 | typedef Register R; |
80 | typedef int32_t I32; |
81 | |
82 | // Length: 2--6 bytes. |
83 | inline void Assembler::MODRMsib(I32 r, R b, R i, I32 s, I32 d) { |
84 | if (d == 0 && b != rEBP) { |
85 | SIB(s, REGNUM(i), REGNUM(b)); |
86 | MODRM(0, r, 4); // amode == (b + i<<s) |
87 | } else if (isS8(d)( int32_t(d) == int8_t(d) )) { |
88 | IMM8(d); |
89 | SIB(s, REGNUM(i), REGNUM(b)); |
90 | MODRM(1, r, 4); // amode == d8(b + i<<s) |
91 | } else { |
92 | IMM32(d); |
93 | SIB(s, REGNUM(i), REGNUM(b)); |
94 | MODRM(2, r, 4); // amode == d32(b + i<<s) |
95 | } |
96 | } |
97 | |
98 | // Length: 1--6 bytes. |
99 | inline void Assembler::MODRMm(I32 r, I32 d, R b) { |
100 | if (b == UnspecifiedReg) { |
101 | IMM32(d); |
102 | MODRM(0, r, 5); // amode == (d32) |
103 | } else if (b == rESP) { |
104 | MODRMsib(r, b, rESP, 0, d); // amode == d(b) |
105 | } else if (d == 0 && b != rEBP) { |
106 | MODRM(0, r, REGNUM(b)); // amode == (r) |
107 | } else if (isS8(d)( int32_t(d) == int8_t(d) )) { |
108 | IMM8(d); |
109 | MODRM(1, r, REGNUM(b)); // amode == d8(b) |
110 | } else { |
111 | IMM32(d); |
112 | MODRM(2, r, REGNUM(b)); // amode == d32(b) |
113 | } |
114 | } |
115 | |
116 | // Length: 5 bytes. |
117 | inline void Assembler::MODRMdm(I32 r, I32 addr) { |
118 | IMM32(addr); |
119 | MODRM(0, r, 5); // amode == d32(r) |
120 | } |
121 | |
122 | inline void Assembler::ALU0(I32 opc) { |
123 | underrunProtect(1); |
124 | OPCODE(opc); |
125 | } |
126 | |
127 | inline void Assembler::ALUm(I32 opc, I32 r, I32 d, R b) { |
128 | underrunProtect(8); |
129 | MODRMm(r, d, b); |
130 | OPCODE(opc); |
131 | } |
132 | |
133 | inline void Assembler::ALUdm(I32 opc, R r, I32 addr) { |
134 | underrunProtect(6); |
135 | MODRMdm(REGNUM(r), addr); |
136 | OPCODE(opc); |
137 | } |
138 | |
139 | inline void Assembler::ALUsib(I32 opc, R r, R base, R index, I32 scale, I32 disp) { |
140 | underrunProtect(7); |
141 | MODRMsib(REGNUM(r), base, index, scale, disp); |
142 | OPCODE(opc); |
143 | } |
144 | |
145 | inline void Assembler::ALUsib16(I32 opc, R r, R base, R index, I32 scale, I32 disp) { |
146 | underrunProtect(8); |
147 | MODRMsib(REGNUM(r), base, index, scale, disp); |
148 | OPCODE(opc); |
149 | OPCODE(0x66); |
150 | } |
151 | |
152 | inline void Assembler::ALUm16(I32 opc, I32 r, I32 d, R b) { |
153 | underrunProtect(9); |
154 | MODRMm(r, d, b); |
155 | OPCODE(opc); |
156 | OPCODE(0x66); |
157 | } |
158 | |
159 | inline void Assembler::ALU2dm(I32 opc2, R r, I32 addr) { |
160 | underrunProtect(7); |
161 | MODRMdm(REGNUM(r), addr); |
162 | OPCODE2(opc2); |
163 | } |
164 | |
165 | inline void Assembler::ALU2m(I32 opc2, R r, I32 d, R b) { |
166 | underrunProtect(9); |
167 | MODRMm(REGNUM(r), d, b); |
168 | OPCODE2(opc2); |
169 | } |
170 | |
171 | inline void Assembler::ALU2sib(I32 opc2, Register r, R base, R index, I32 scale, I32 disp) { |
172 | underrunProtect(8); |
173 | MODRMsib(REGNUM(r), base, index, scale, disp); |
174 | OPCODE2(opc2); |
175 | } |
176 | |
177 | inline void Assembler::ALUi(I32 opc, R r, I32 i) { |
178 | underrunProtect(6); |
179 | NanoAssert(REGNUM(r) < 8)do { } while (0); |
180 | if (isS8(i)( int32_t(i) == int8_t(i) )) { |
181 | IMM8(i); |
182 | MODRMr(opc >> 3, REGNUM(r)); |
183 | OPCODE(0x83); |
184 | } else { |
185 | IMM32(i); |
186 | if ( r == rEAX) { |
187 | OPCODE(opc); |
188 | } else { |
189 | MODRMr(opc >> 3, REGNUM(r)); |
190 | OPCODE(0x81); |
191 | } |
192 | } |
193 | } |
194 | |
195 | inline void Assembler::ALUmi(I32 opc, I32 d, Register b, I32 i) { |
196 | underrunProtect(10); |
197 | NanoAssert(REGNUM(b) < 8)do { } while (0); |
198 | if (isS8(i)( int32_t(i) == int8_t(i) )) { |
199 | IMM8(i); |
200 | MODRMm(opc >> 3, d, b); |
201 | OPCODE(0x83); |
202 | } else { |
203 | IMM32(i); |
204 | MODRMm(opc >> 3, d, b); |
205 | OPCODE(0x81); |
206 | } |
207 | } |
208 | |
209 | inline void Assembler::ALU2(I32 opc2, R d, R s) { |
210 | underrunProtect(3); |
211 | MODRMr(REGNUM(d), REGNUM(s)); |
212 | OPCODE2(opc2); |
213 | } |
214 | |
215 | inline Register Assembler::AL2AHReg(R r) { |
216 | NanoAssert(REGNUM(r) < 4)do { } while (0); // one of: al, cl, dl, bl |
217 | Register r2 = { REGNUM(r) | 4 }; // convert to one of: ah, ch, dh, bh |
218 | return r2; |
219 | } |
220 | |
221 | inline void Assembler::OR(R l, R r) { count_alu(); ALU(0x0b, REGNUM(l), r); asm_output("or %s,%s", gpn(l), gpn(r)); } |
222 | inline void Assembler::AND(R l, R r) { count_alu(); ALU(0x23, REGNUM(l), r); asm_output("and %s,%s", gpn(l), gpn(r)); } |
223 | inline void Assembler::XOR(R l, R r) { count_alu(); ALU(0x33, REGNUM(l), r); asm_output("xor %s,%s", gpn(l), gpn(r)); } |
224 | inline void Assembler::ADD(R l, R r) { count_alu(); ALU(0x03, REGNUM(l), r); asm_output("add %s,%s", gpn(l), gpn(r)); } |
225 | inline void Assembler::SUB(R l, R r) { count_alu(); ALU(0x2b, REGNUM(l), r); asm_output("sub %s,%s", gpn(l), gpn(r)); } |
226 | inline void Assembler::IMUL(R l, R r){ count_alu(); ALU2(0x0faf, l, r); asm_output("imul %s,%s", gpn(l), gpn(r)); } |
227 | inline void Assembler::DIV(R r) { count_alu(); ALU(0xf7, 7, r); asm_output("idiv edx:eax, %s", gpn(r)); } |
228 | inline void Assembler::NOT(R r) { count_alu(); ALU(0xf7, 2, r); asm_output("not %s", gpn(r)); } |
229 | inline void Assembler::NEG(R r) { count_alu(); ALU(0xf7, 3, r); asm_output("neg %s", gpn(r)); } |
230 | inline void Assembler::AND8R(R r) { count_alu(); ALU(0x22, REGNUM(r), AL2AHReg(r)); asm_output("andb %s, %s", RB(r), RBhi(r)); } |
231 | |
232 | inline void Assembler::SHR(R r, R s) { |
233 | count_alu(); |
234 | NanoAssert(s == rECX)do { } while (0); (void)s; |
235 | ALU(0xd3, 5, r); |
236 | asm_output("shr %s,%s", gpn(r), gpn(s)); |
237 | } |
238 | |
239 | inline void Assembler::SAR(R r, R s) { |
240 | count_alu(); |
241 | NanoAssert(s == rECX)do { } while (0); (void)s; |
242 | ALU(0xd3, 7, r); |
243 | asm_output("sar %s,%s", gpn(r), gpn(s)); |
244 | } |
245 | |
246 | inline void Assembler::SHL(R r, R s) { |
247 | count_alu(); |
248 | NanoAssert(s == rECX)do { } while (0); (void)s; |
249 | ALU(0xd3, 4, r); |
250 | asm_output("shl %s,%s", gpn(r), gpn(s)); |
251 | } |
252 | |
253 | inline void Assembler::SHIFTi(I32 c, R r, I32 i) { |
254 | underrunProtect(3); |
255 | IMM8(i); |
256 | MODRMr(c, REGNUM(r)); |
257 | OPCODE(0xc1); |
258 | } |
259 | |
260 | inline void Assembler::SHLi(R r, I32 i) { count_alu(); SHIFTi(4, r, i); asm_output("shl %s,%d", gpn(r), i); } |
261 | inline void Assembler::SHRi(R r, I32 i) { count_alu(); SHIFTi(5, r, i); asm_output("shr %s,%d", gpn(r), i); } |
262 | inline void Assembler::SARi(R r, I32 i) { count_alu(); SHIFTi(7, r, i); asm_output("sar %s,%d", gpn(r), i); } |
263 | |
264 | inline void Assembler::MOVZX8(R d, R s) { count_alu(); ALU2(0x0fb6, d, s); asm_output("movzx %s,%s", gpn(d), gpn(s)); } |
265 | |
266 | inline void Assembler::SUBi(R r, I32 i) { count_alu(); ALUi(0x2d, r, i); asm_output("sub %s,%d", gpn(r), i); } |
267 | inline void Assembler::ADDi(R r, I32 i) { count_alu(); ALUi(0x05, r, i); asm_output("add %s,%d", gpn(r), i); } |
268 | inline void Assembler::ANDi(R r, I32 i) { count_alu(); ALUi(0x25, r, i); asm_output("and %s,%d", gpn(r), i); } |
269 | inline void Assembler::ORi(R r, I32 i) { count_alu(); ALUi(0x0d, r, i); asm_output("or %s,%d", gpn(r), i); } |
270 | inline void Assembler::XORi(R r, I32 i) { count_alu(); ALUi(0x35, r, i); asm_output("xor %s,%d", gpn(r), i); } |
271 | |
272 | inline void Assembler::ADDmi(I32 d, R b, I32 i) { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } |
273 | |
274 | inline void Assembler::TEST(R d, R s) { count_alu(); ALU(0x85, REGNUM(d), s); asm_output("test %s,%s", gpn(d), gpn(s)); } |
275 | inline void Assembler::CMP(R l, R r) { count_alu(); ALU(0x3b, REGNUM(l), r); asm_output("cmp %s,%s", gpn(l), gpn(r)); } |
276 | inline void Assembler::CMPi(R r, I32 i) { count_alu(); ALUi(0x3d, r, i); asm_output("cmp %s,%d", gpn(r), i); } |
277 | |
278 | inline void Assembler::LEA(R r, I32 d, R b) { count_alu(); ALUm(0x8d, REGNUM(r), d, b); asm_output("lea %s,%d(%s)", gpn(r), d, gpn(b)); } |
279 | |
280 | inline void Assembler::CDQ() { SARi(rEDX, 31); MR(rEDX, rEAX); } |
281 | |
282 | inline void Assembler::INCLi(I32 p) { |
283 | count_alu(); |
284 | underrunProtect(6); |
285 | IMM32((uint32_t)(ptrdiff_t)p); |
286 | OPCODE(0x05); |
287 | OPCODE(0xFF); |
288 | asm_output("incl (%p)", (void*)p); |
289 | } |
290 | |
291 | inline void Assembler::SETE( R r) { count_alu(); ALU2(0x0f94, r, r); asm_output("sete %s", gpn(r)); } |
292 | inline void Assembler::SETNP(R r) { count_alu(); ALU2(0x0f9B, r, r); asm_output("setnp %s", gpn(r)); } |
293 | inline void Assembler::SETNPH(R r) { count_alu(); ALU2(0x0f9B, AL2AHReg(r), AL2AHReg(r)); asm_output("setnp %s", RBhi(r)); } |
294 | inline void Assembler::SETL( R r) { count_alu(); ALU2(0x0f9C, r, r); asm_output("setl %s", gpn(r)); } |
295 | inline void Assembler::SETLE(R r) { count_alu(); ALU2(0x0f9E, r, r); asm_output("setle %s", gpn(r)); } |
296 | inline void Assembler::SETG( R r) { count_alu(); ALU2(0x0f9F, r, r); asm_output("setg %s", gpn(r)); } |
297 | inline void Assembler::SETGE(R r) { count_alu(); ALU2(0x0f9D, r, r); asm_output("setge %s", gpn(r)); } |
298 | inline void Assembler::SETB( R r) { count_alu(); ALU2(0x0f92, r, r); asm_output("setb %s", gpn(r)); } |
299 | inline void Assembler::SETBE(R r) { count_alu(); ALU2(0x0f96, r, r); asm_output("setbe %s", gpn(r)); } |
300 | inline void Assembler::SETA( R r) { count_alu(); ALU2(0x0f97, r, r); asm_output("seta %s", gpn(r)); } |
301 | inline void Assembler::SETAE(R r) { count_alu(); ALU2(0x0f93, r, r); asm_output("setae %s", gpn(r)); } |
302 | inline void Assembler::SETO( R r) { count_alu(); ALU2(0x0f92, r, r); asm_output("seto %s", gpn(r)); } |
303 | |
304 | inline void Assembler::MREQ(R d, R s) { count_alu(); ALU2(0x0f44, d, s); asm_output("cmove %s,%s", gpn(d), gpn(s)); } |
305 | inline void Assembler::MRNE(R d, R s) { count_alu(); ALU2(0x0f45, d, s); asm_output("cmovne %s,%s", gpn(d), gpn(s)); } |
306 | inline void Assembler::MRL( R d, R s) { count_alu(); ALU2(0x0f4C, d, s); asm_output("cmovl %s,%s", gpn(d), gpn(s)); } |
307 | inline void Assembler::MRLE(R d, R s) { count_alu(); ALU2(0x0f4E, d, s); asm_output("cmovle %s,%s", gpn(d), gpn(s)); } |
308 | inline void Assembler::MRG( R d, R s) { count_alu(); ALU2(0x0f4F, d, s); asm_output("cmovg %s,%s", gpn(d), gpn(s)); } |
309 | inline void Assembler::MRGE(R d, R s) { count_alu(); ALU2(0x0f4D, d, s); asm_output("cmovge %s,%s", gpn(d), gpn(s)); } |
310 | inline void Assembler::MRB( R d, R s) { count_alu(); ALU2(0x0f42, d, s); asm_output("cmovb %s,%s", gpn(d), gpn(s)); } |
311 | inline void Assembler::MRBE(R d, R s) { count_alu(); ALU2(0x0f46, d, s); asm_output("cmovbe %s,%s", gpn(d), gpn(s)); } |
312 | inline void Assembler::MRA( R d, R s) { count_alu(); ALU2(0x0f47, d, s); asm_output("cmova %s,%s", gpn(d), gpn(s)); } |
313 | inline void Assembler::MRAE(R d, R s) { count_alu(); ALU2(0x0f43, d, s); asm_output("cmovae %s,%s", gpn(d), gpn(s)); } |
314 | inline void Assembler::MRNO(R d, R s) { count_alu(); ALU2(0x0f41, d, s); asm_output("cmovno %s,%s", gpn(d), gpn(s)); } |
315 | |
316 | // these aren't currently used but left in for reference |
317 | //#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) |
318 | //#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) |
319 | |
320 | inline void Assembler::LD(R reg, I32 disp, R base) { |
321 | count_ld(); |
322 | ALUm(0x8b, REGNUM(reg), disp, base); |
323 | asm_output("mov %s,%d(%s)", gpn(reg), disp, gpn(base)); |
324 | } |
325 | |
326 | inline void Assembler::LDdm(R reg, I32 addr) { |
327 | count_ld(); |
328 | ALUdm(0x8b, reg, addr); |
329 | asm_output("mov %s,0(%p)", gpn(reg), (void*)addr); |
330 | } |
331 | |
332 | #define SIBIDX(n)"1248"[n] "1248"[n] |
333 | |
334 | inline void Assembler::LDsib(R reg, I32 disp, R base, R index, I32 scale) { |
335 | count_ld(); |
336 | ALUsib(0x8b, reg, base, index, scale, disp); |
337 | asm_output("mov %s,%d(%s+%s*%c)", gpn(reg), disp, gpn(base), gpn(index), SIBIDX(scale)); |
338 | } |
339 | |
340 | // note: movzx/movsx are being output with an 8/16 suffix to indicate the |
341 | // size being loaded. This doesn't really match standard intel format |
342 | // (though is arguably terser and more obvious in this case) and would |
343 | // probably be nice to fix. (Likewise, the 8/16 bit stores being output |
344 | // as "mov8" and "mov16" respectively.) |
345 | |
346 | // Load 16-bit, sign extend. |
347 | inline void Assembler::LD16S(R r, I32 d, R b) { |
348 | count_ld(); |
349 | ALU2m(0x0fbf, r, d, b); |
350 | asm_output("movsx16 %s,%d(%s)", gpn(r), d, gpn(b)); |
351 | } |
352 | |
353 | inline void Assembler::LD16Sdm(R r, I32 addr) { |
354 | count_ld(); |
355 | ALU2dm(0x0fbf, r, addr); |
356 | asm_output("movsx16 %s,0(%lx)", gpn(r), (unsigned long)addr); |
357 | } |
358 | |
359 | inline void Assembler::LD16Ssib(R r, I32 disp, R base, R index, I32 scale) { |
360 | count_ld(); |
361 | ALU2sib(0x0fbf, r, base, index, scale, disp); |
362 | asm_output("movsx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale)); |
363 | } |
364 | |
365 | // Load 16-bit, zero extend. |
366 | inline void Assembler::LD16Z(R r, I32 d, R b) { |
367 | count_ld(); |
368 | ALU2m(0x0fb7, r, d, b); |
369 | asm_output("movzx16 %s,%d(%s)", gpn(r), d, gpn(b)); |
370 | } |
371 | |
372 | inline void Assembler::LD16Zdm(R r, I32 addr) { |
373 | count_ld(); |
374 | ALU2dm(0x0fb7, r, addr); |
375 | asm_output("movzx16 %s,0(%lx)", gpn(r), (unsigned long)addr); |
376 | } |
377 | |
378 | inline void Assembler::LD16Zsib(R r, I32 disp, R base, R index, I32 scale) { |
379 | count_ld(); |
380 | ALU2sib(0x0fb7, r, base, index, scale, disp); |
381 | asm_output("movzx16 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale)); |
382 | } |
383 | |
384 | // Load 8-bit, zero extend. |
385 | inline void Assembler::LD8Z(R r, I32 d, R b) { |
386 | count_ld(); |
387 | ALU2m(0x0fb6, r, d, b); |
388 | asm_output("movzx8 %s,%d(%s)", gpn(r), d, gpn(b)); |
389 | } |
390 | |
391 | inline void Assembler::LD8Zdm(R r, I32 addr) { |
392 | count_ld(); |
393 | ALU2dm(0x0fb6, r, addr); |
394 | asm_output("movzx8 %s,0(%lx)", gpn(r), (long unsigned)addr); |
395 | } |
396 | |
397 | inline void Assembler::LD8Zsib(R r, I32 disp, R base, R index, I32 scale) { |
398 | count_ld(); |
399 | ALU2sib(0x0fb6, r, base, index, scale, disp); |
400 | asm_output("movzx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale)); |
401 | } |
402 | |
403 | // Load 8-bit, sign extend. |
404 | inline void Assembler::LD8S(R r, I32 d, R b) { |
405 | count_ld(); |
406 | ALU2m(0x0fbe, r, d, b); |
407 | asm_output("movsx8 %s,%d(%s)", gpn(r), d, gpn(b)); |
408 | } |
409 | |
410 | inline void Assembler::LD8Sdm(R r, I32 addr) { |
411 | count_ld(); |
412 | ALU2dm(0x0fbe, r, addr); |
413 | asm_output("movsx8 %s,0(%lx)", gpn(r), (long unsigned)addr); |
414 | } |
415 | |
416 | inline void Assembler::LD8Ssib(R r, I32 disp, R base, R index, I32 scale) { |
417 | count_ld(); |
418 | ALU2sib(0x0fbe, r, base, index, scale, disp); |
419 | asm_output("movsx8 %s,%d(%s+%s*%c)", gpn(r), disp, gpn(base), gpn(index), SIBIDX(scale)); |
420 | } |
421 | |
422 | inline void Assembler::LDi(R r, I32 i) { |
423 | count_ld(); |
424 | underrunProtect(5); |
425 | IMM32(i); |
426 | NanoAssert(REGNUM(r) < 8)do { } while (0); |
427 | OPCODE(0xb8 | REGNUM(r)); |
428 | asm_output("mov %s,%d", gpn(r), i); |
429 | } |
430 | |
431 | // Quirk of x86-32: reg must be a/b/c/d for byte stores here. |
432 | inline void Assembler::ST8(R base, I32 disp, R reg) { |
433 | count_st(); |
434 | NanoAssert(REGNUM(reg) < 4)do { } while (0); |
435 | ALUm(0x88, REGNUM(reg), disp, base); |
436 | asm_output("mov8 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg)); |
437 | } |
438 | |
439 | // Quirk of x86-32: reg must be a/b/c/d for byte stores here. |
440 | inline void Assembler::ST8sib(I32 disp, R base, R index, I32 scale, R reg) { |
441 | count_st(); |
442 | NanoAssert(REGNUM(reg) < 4)do { } while (0); |
443 | ALUsib(0x88, reg, base, index, scale, disp); |
444 | asm_output("mov8 %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), |
445 | gpn(index), SIBIDX(scale), gpn(reg)); |
446 | } |
447 | |
448 | inline void Assembler::ST16(R base, I32 disp, R reg) { |
449 | count_st(); |
450 | ALUm16(0x89, REGNUM(reg), disp, base); |
451 | asm_output("mov16 %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg)); |
452 | } |
453 | |
454 | inline void Assembler::ST16sib(I32 disp, R base, R index, I32 scale, R reg) { |
455 | count_st(); |
456 | ALUsib16(0x89, reg, base, index, scale, disp); |
457 | asm_output("mov16 %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), |
458 | gpn(index), SIBIDX(scale), gpn(reg)); |
459 | } |
460 | |
461 | inline void Assembler::ST(R base, I32 disp, R reg) { |
462 | count_st(); |
463 | ALUm(0x89, REGNUM(reg), disp, base); |
464 | asm_output("mov %d(%s),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), gpn(reg)); |
465 | } |
466 | |
467 | inline void Assembler::STsib(I32 disp, R base, R index, I32 scale, R reg) { |
468 | count_st(); |
469 | ALUsib(0x89, reg, base, index, scale, disp); |
470 | asm_output("mov %d(%s+%s*%c),%s", disp, base==UnspecifiedReg ? "0" : gpn(base), |
471 | gpn(index), SIBIDX(scale), gpn(reg)); |
472 | } |
473 | |
474 | inline void Assembler::ST8i(R base, I32 disp, I32 imm) { |
475 | count_st(); |
476 | underrunProtect(8); |
477 | IMM8(imm); |
478 | MODRMm(0, disp, base); |
479 | OPCODE(0xc6); |
480 | asm_output("mov8 %d(%s),%d", disp, gpn(base), imm); |
481 | } |
482 | |
483 | inline void Assembler::ST8isib(I32 disp, R base, R index, I32 scale, I32 imm) { |
484 | count_st(); |
485 | underrunProtect(8); |
486 | IMM8(imm); |
487 | MODRMsib(0, base, index, scale, disp); |
488 | OPCODE(0xc6); |
489 | asm_output("mov8 %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm); |
490 | } |
491 | |
492 | inline void Assembler::ST16i(R base, I32 disp, I32 imm) { |
493 | count_st(); |
494 | underrunProtect(10); |
495 | IMM16(imm); |
496 | MODRMm(0, disp, base); |
497 | OPCODE(0xc7); |
498 | OPCODE(0x66); |
499 | asm_output("mov16 %d(%s),%d", disp, gpn(base), imm); |
500 | } |
501 | |
502 | inline void Assembler::ST16isib(I32 disp, R base, R index, I32 scale, I32 imm) { |
503 | count_st(); |
504 | underrunProtect(10); |
505 | IMM16(imm); |
506 | MODRMsib(0, base, index, scale, disp); |
507 | OPCODE(0xc7); |
508 | OPCODE(0x66); |
509 | asm_output("mov16 %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm); |
510 | } |
511 | |
512 | inline void Assembler::STi(R base, I32 disp, I32 imm) { |
513 | count_st(); |
514 | underrunProtect(11); |
515 | IMM32(imm); |
516 | MODRMm(0, disp, base); |
517 | OPCODE(0xc7); |
518 | asm_output("mov %d(%s),%d", disp, gpn(base), imm); |
519 | } |
520 | |
521 | inline void Assembler::STisib(I32 disp, R base, R index, I32 scale, I32 imm) { |
522 | count_st(); |
523 | underrunProtect(11); |
524 | IMM32(imm); |
525 | MODRMsib(0, base, index, scale, disp); |
526 | OPCODE(0xc7); |
527 | asm_output("mov %d(%s+%s*%c),%d", disp, gpn(base), gpn(index), SIBIDX(scale), imm); |
528 | } |
529 | |
530 | const uint8_t INT3_OP = 0xcc; |
531 | |
532 | inline void Assembler::RET() { count_ret(); ALU0(0xc3); asm_output("ret"); } |
533 | inline void Assembler::NOP() { count_alu(); ALU0(0x90); asm_output("nop"); } |
534 | inline void Assembler::INT3() { ALU0(INT3_OP); asm_output("int3"); } |
535 | |
536 | inline void Assembler::PUSHi(I32 i) { |
537 | count_push(); |
538 | if (isS8(i)( int32_t(i) == int8_t(i) )) { |
539 | underrunProtect(2); |
540 | IMM8(i); |
541 | OPCODE(0x6a); |
542 | asm_output("push %d", i); |
543 | } else { |
544 | underrunProtect(5); |
545 | IMM32(i); |
546 | OPCODE(0x68); |
547 | asm_output("push %d", i); |
548 | } |
549 | } |
550 | |
551 | inline void Assembler::PUSHr(R r) { |
552 | count_push(); |
553 | underrunProtect(1); |
554 | NanoAssert(REGNUM(r) < 8)do { } while (0); |
555 | OPCODE(0x50 | REGNUM(r)); |
556 | asm_output("push %s", gpn(r)); |
557 | } |
558 | |
559 | inline void Assembler::PUSHm(I32 d, R b) { |
560 | count_pushld(); |
561 | ALUm(0xff, 6, d, b); |
562 | asm_output("push %d(%s)", d, gpn(b)); |
563 | } |
564 | |
565 | inline void Assembler::POPr(R r) { |
566 | count_pop(); |
567 | underrunProtect(1); |
568 | NanoAssert(REGNUM(r) < 8)do { } while (0); |
569 | OPCODE(0x58 | REGNUM(r)); |
570 | asm_output("pop %s", gpn(r)); |
571 | } |
572 | |
573 | inline void Assembler::JCC(I32 o, NIns* t, const char* n) { |
574 | count_jcc(); |
575 | underrunProtect(6); |
576 | intptr_t tt = (intptr_t)t - (intptr_t)_nIns; |
577 | if (t && isS8(tt)( int32_t(tt) == int8_t(tt) )) { |
578 | IMM8(tt); |
579 | OPCODE(0x70 | o); |
580 | } else { |
581 | IMM32(tt); |
582 | OPCODE(0x80 | o); |
583 | OPCODE(JCC320x0f); |
584 | } |
585 | asm_output("%-5s %p", n, t); |
586 | (void) n; |
587 | } |
588 | |
589 | inline void Assembler::JMP_long(NIns* t) { |
590 | count_jmp(); |
591 | underrunProtect(5); |
592 | NanoAssert(t)do { } while (0); |
593 | intptr_t tt = (intptr_t)t - (intptr_t)_nIns; |
594 | IMM32(tt); |
595 | OPCODE(JMP320xe9); |
596 | asm_output("jmp %p", t); |
597 | verbose_only( verbose_outputf("%p:", (void*)_nIns); ) |
598 | } |
599 | |
600 | inline void Assembler::JMP_indexed(Register x, I32 ss, NIns** addr) { |
601 | underrunProtect(7); |
602 | IMM32(int32_t(addr)); |
603 | SIB(ss, REGNUM(x), 5); |
604 | MODRM(0, 4, 4); // amode == addr(table + x<<ss) |
605 | OPCODE(0xff); // jmp |
606 | asm_output("jmp *(%s*%d+%p)", gpn(x), 1 << ss, (void*)addr); |
607 | } |
608 | |
609 | inline void Assembler::JE(NIns* t) { JCC(0x04, t, "je"); } |
610 | inline void Assembler::JNE(NIns* t) { JCC(0x05, t, "jne"); } |
611 | inline void Assembler::JP(NIns* t) { JCC(0x0A, t, "jp"); } |
612 | inline void Assembler::JNP(NIns* t) { JCC(0x0B, t, "jnp"); } |
613 | |
614 | inline void Assembler::JB(NIns* t) { JCC(0x02, t, "jb"); } |
615 | inline void Assembler::JNB(NIns* t) { JCC(0x03, t, "jnb"); } |
616 | inline void Assembler::JBE(NIns* t) { JCC(0x06, t, "jbe"); } |
617 | inline void Assembler::JNBE(NIns* t) { JCC(0x07, t, "jnbe"); } |
618 | |
619 | inline void Assembler::JA(NIns* t) { JCC(0x07, t, "ja"); } |
620 | inline void Assembler::JNA(NIns* t) { JCC(0x06, t, "jna"); } |
621 | inline void Assembler::JAE(NIns* t) { JCC(0x03, t, "jae"); } |
622 | inline void Assembler::JNAE(NIns* t) { JCC(0x02, t, "jnae"); } |
623 | |
624 | inline void Assembler::JL(NIns* t) { JCC(0x0C, t, "jl"); } |
625 | inline void Assembler::JNL(NIns* t) { JCC(0x0D, t, "jnl"); } |
626 | inline void Assembler::JLE(NIns* t) { JCC(0x0E, t, "jle"); } |
627 | inline void Assembler::JNLE(NIns* t) { JCC(0x0F, t, "jnle"); } |
628 | |
629 | inline void Assembler::JG(NIns* t) { JCC(0x0F, t, "jg"); } |
630 | inline void Assembler::JNG(NIns* t) { JCC(0x0E, t, "jng"); } |
631 | inline void Assembler::JGE(NIns* t) { JCC(0x0D, t, "jge"); } |
632 | inline void Assembler::JNGE(NIns* t) { JCC(0x0C, t, "jnge"); } |
633 | |
634 | inline void Assembler::JO(NIns* t) { JCC(0x00, t, "jo"); } |
635 | inline void Assembler::JNO(NIns* t) { JCC(0x01, t, "jno"); } |
636 | |
637 | // sse instructions |
638 | inline void Assembler::SSE(I32 opc3, R d, R s) { |
639 | underrunProtect(9); |
640 | MODRMr(REGNUM(d)&7, REGNUM(s)&7); |
641 | OPCODE3(opc3); |
642 | } |
643 | |
644 | inline void Assembler::SSEm(I32 opc3, R r, I32 d, R b) { |
645 | underrunProtect(9); |
646 | MODRMm(REGNUM(r)&7, d, b); |
647 | OPCODE3(opc3); |
648 | } |
649 | |
650 | inline void Assembler::SSEsib(I32 opc3, R rr, I32 d, R rb, R ri, I32 scale) { |
651 | underrunProtect(9); |
652 | MODRMsib(REGNUM(rr)&7, rb, ri, scale, d); |
653 | OPCODE3(opc3); |
654 | } |
655 | |
656 | inline void Assembler::LDSDm(R r, const double* addr) { |
657 | count_ldq(); |
658 | underrunProtect(8); |
659 | IMM32(int32_t(addr)); |
660 | MODRM(0, REGNUM(r) & 7, 5); // amode == addr(r) |
661 | OPCODE(0x10); |
662 | OPCODE(0x0f); |
663 | OPCODE(0xf2); |
664 | // *addr is a constant, so we can print it here. |
665 | asm_output("movsd %s,(%p) // =%f", gpn(r), (void*)addr, *addr); |
666 | } |
667 | |
668 | inline void Assembler::SSE_LDQ( R r, I32 d, R b) { count_ldq(); SSEm(0xf30f7e, r, d, b); asm_output("movq %s,%d(%s)", gpn(r), d, gpn(b)); } |
669 | inline void Assembler::SSE_LDSS(R r, I32 d, R b) { count_ld(); SSEm(0xf30f10, r, d, b); asm_output("movss %s,%d(%s)", gpn(r), d, gpn(b)); } |
670 | |
671 | inline void Assembler::SSE_LDQsib(R rr, I32 d, R rb, R ri, I32 scale) |
672 | { |
673 | count_ldq(); |
674 | SSEsib(0xf30f7e, rr, d, rb, ri, scale); |
675 | asm_output("movq %s,%d(%s+%s*%c)", gpn(rr), d, gpn(rb), gpn(ri), SIBIDX(scale)); |
676 | } |
677 | |
678 | inline void Assembler::SSE_LDSSsib(R rr, I32 d, R rb, R ri, I32 scale) |
679 | { |
680 | count_ld(); |
681 | SSEsib(0xf30f10, rr, d, rb, ri, scale); |
682 | asm_output("movss %s,%d(%s+%s*%c)", gpn(rr), d, gpn(rb), gpn(ri), SIBIDX(scale)); |
683 | } |
684 | |
685 | inline void Assembler::SSE_STSD(I32 d, R b, R r) { count_stq(); SSEm(0xf20f11, r, d, b); asm_output("movsd %d(%s),%s", d, gpn(b), gpn(r)); } |
686 | inline void Assembler::SSE_STQ( I32 d, R b, R r) { count_stq(); SSEm(0x660fd6, r, d, b); asm_output("movq %d(%s),%s", d, gpn(b), gpn(r)); } |
687 | inline void Assembler::SSE_STSS(I32 d, R b, R r) { count_st(); SSEm(0xf30f11, r, d, b); asm_output("movss %d(%s),%s", d, gpn(b), gpn(r)); } |
688 | |
689 | inline void Assembler::SSE_STQsib(I32 d, R rb, R ri, I32 scale, R rv) { |
690 | count_stq(); |
691 | SSEsib(0x660fd6, rv, d, rb, ri, scale); |
692 | asm_output("movq %d(%s+%s*%c),%s", d, gpn(rb), gpn(ri), SIBIDX(scale), gpn(rv)); |
693 | } |
694 | |
695 | inline void Assembler::SSE_CVTSI2SD(R xr, R gr) { count_fpu(); SSE(0xf20f2a, xr, gr); asm_output("cvtsi2sd %s,%s", gpn(xr), gpn(gr)); } |
696 | inline void Assembler::SSE_CVTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2d, gr, xr); asm_output("cvtsd2si %s,%s", gpn(gr), gpn(xr)); } |
697 | inline void Assembler::SSE_CVTTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2c, gr, xr); asm_output("cvttsd2si %s,%s",gpn(gr), gpn(xr)); } |
698 | inline void Assembler::SSE_CVTSD2SS(R xr, R gr) { count_fpu(); SSE(0xf20f5a, xr, gr); asm_output("cvtsd2ss %s,%s", gpn(xr), gpn(gr)); } |
699 | inline void Assembler::SSE_CVTSS2SD(R xr, R gr) { count_fpu(); SSE(0xf30f5a, xr, gr); asm_output("cvtss2sd %s,%s", gpn(xr), gpn(gr)); } |
700 | inline void Assembler::SSE_CVTDQ2PD(R d, R r) { count_fpu(); SSE(0xf30fe6, d, r); asm_output("cvtdq2pd %s,%s", gpn(d), gpn(r)); } |
701 | |
702 | // Move and zero-extend GP reg to XMM reg. |
703 | inline void Assembler::SSE_MOVD(R d, R s) { |
704 | count_mov(); |
705 | if (IsXmmReg(s)) { |
706 | NanoAssert(IsGpReg(d))do { } while (0); |
707 | SSE(0x660f7e, s, d); |
708 | } else { |
709 | NanoAssert(IsGpReg(s))do { } while (0); |
710 | NanoAssert(IsXmmReg(d))do { } while (0); |
711 | SSE(0x660f6e, d, s); |
712 | } |
713 | asm_output("movd %s,%s", gpn(d), gpn(s)); |
714 | } |
715 | |
716 | inline void Assembler::SSE_MOVSD(R rd, R rs) { |
717 | count_mov(); |
718 | NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0); |
719 | SSE(0xf20f10, rd, rs); |
720 | asm_output("movsd %s,%s", gpn(rd), gpn(rs)); |
721 | } |
722 | |
723 | inline void Assembler::SSE_ADDSD(R rd, R rs) { |
724 | count_fpu(); |
725 | NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0); |
726 | SSE(0xf20f58, rd, rs); |
727 | asm_output("addsd %s,%s", gpn(rd), gpn(rs)); |
728 | } |
729 | |
730 | inline void Assembler::SSE_ADDSDm(R r, const double* addr) { |
731 | count_fpuld(); |
732 | underrunProtect(8); |
733 | NanoAssert(IsXmmReg(r))do { } while (0); |
734 | const double* daddr = addr; |
735 | IMM32(int32_t(daddr)); |
736 | MODRM(0, REGNUM(r) & 7, 5); // amode == daddr(r) |
737 | OPCODE(0x58); |
738 | OPCODE(0x0f); |
739 | OPCODE(0xf2); |
740 | // *daddr is a constant, so we can print it here. |
741 | asm_output("addsd %s,(%p) // =%f", gpn(r), (void*)daddr, *daddr); |
742 | } |
743 | |
744 | inline void Assembler::SSE_SUBSD(R rd, R rs) { |
745 | count_fpu(); |
746 | NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0); |
747 | SSE(0xf20f5c, rd, rs); |
748 | asm_output("subsd %s,%s", gpn(rd), gpn(rs)); |
749 | } |
750 | |
751 | inline void Assembler::SSE_MULSD(R rd, R rs) { |
752 | count_fpu(); |
753 | NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0); |
754 | SSE(0xf20f59, rd, rs); |
755 | asm_output("mulsd %s,%s", gpn(rd), gpn(rs)); |
756 | } |
757 | |
758 | inline void Assembler::SSE_DIVSD(R rd, R rs) { |
759 | count_fpu(); |
760 | NanoAssert(IsXmmReg(rd) && IsXmmReg(rs))do { } while (0); |
761 | SSE(0xf20f5e, rd, rs); |
762 | asm_output("divsd %s,%s", gpn(rd), gpn(rs)); |
763 | } |
764 | |
765 | inline void Assembler::SSE_UCOMISD(R rl, R rr) { |
766 | count_fpu(); |
767 | NanoAssert(IsXmmReg(rl) && IsXmmReg(rr))do { } while (0); |
768 | SSE(0x660f2e, rl, rr); |
769 | asm_output("ucomisd %s,%s", gpn(rl), gpn(rr)); |
770 | } |
771 | |
772 | inline void Assembler::SSE_XORPD(R r, const uint32_t* maskaddr) { |
773 | count_fpuld(); |
774 | underrunProtect(8); |
775 | IMM32(int32_t(maskaddr)); |
776 | MODRM(0, REGNUM(r) & 7, 5); // amode == maskaddr(r) |
777 | OPCODE(0x57); |
778 | OPCODE(0x0f); |
779 | OPCODE(0x66); |
780 | asm_output("xorpd %s,(%p)", gpn(r), (void*)maskaddr); |
781 | } |
782 | |
783 | inline void Assembler::SSE_XORPDr(R rd, R rs) { |
784 | count_fpu(); |
785 | SSE(0x660f57, rd, rs); |
786 | asm_output("xorpd %s,%s", gpn(rd), gpn(rs)); |
787 | } |
788 | |
789 | // floating point unit |
790 | inline void Assembler::FPUc(I32 opc2) { |
791 | underrunProtect(2); |
792 | OPCODE2(opc2); |
793 | } |
794 | |
795 | inline void Assembler::FPUm(I32 o, I32 d, R b) { |
796 | underrunProtect(7); |
797 | MODRMm(uint8_t(o), d, b); |
798 | OPCODE(o >> 8); |
799 | } |
800 | |
801 | inline void Assembler::FPUdm(I32 o, const double* const m) { |
802 | underrunProtect(6); |
803 | MODRMdm(uint8_t(o), int32_t(m)); |
804 | OPCODE(o >> 8); |
805 | } |
806 | |
807 | inline void Assembler::TEST_AH(I32 i) { |
808 | count_alu(); |
809 | underrunProtect(3); |
810 | OPCODE(i); |
811 | OPCODE(0xc4); |
812 | OPCODE(0xf6); |
813 | asm_output("test ah, %d", i); |
814 | } |
815 | |
816 | // The FPU stack depth is the number of pushes in excess of the number of pops. |
817 | // Since we generate backwards, we track the FPU stack depth as a negative number. |
818 | // We use the top of the x87 stack as the single allocatable FP register, FST0. |
819 | // Thus, between LIR instructions, the depth of the FPU stack must be either 0 or -1, |
820 | // depending on whether FST0 is in use. Within the expansion of a single LIR |
821 | // instruction, however, deeper levels of the stack may be used as unmanaged |
822 | // temporaries. Hence, we allow for all eight levels in the assertions below. |
823 | void Assembler::fpu_push() { |
824 | debug_only( ++_fpuStkDepth; NanoAssert(_fpuStkDepth <= 0); ) |
825 | } |
826 | |
827 | void Assembler::fpu_pop() { |
828 | debug_only( --_fpuStkDepth; NanoAssert(_fpuStkDepth >= -7); ) |
829 | } |
830 | |
831 | inline void Assembler::FNSTSW_AX() { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); } |
832 | inline void Assembler::FCHS() { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); } |
833 | inline void Assembler::FLD1() { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } |
834 | inline void Assembler::FLDZ() { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } |
835 | |
836 | inline void Assembler::FST32(boolbool p, I32 d, R b){ count_stq(); FPUm(0xd902|(p?1:0), d, b); asm_output("fst%s32 %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); } |
837 | inline void Assembler::FSTQ(boolbool p, I32 d, R b) { count_stq(); FPUm(0xdd02|(p?1:0), d, b); asm_output("fst%sq %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); } |
838 | |
839 | inline void Assembler::FSTPQ(I32 d, R b) { FSTQ(1, d, b); } |
840 | |
841 | inline void Assembler::FCOM(boolbool p, I32 d, R b) { count_fpuld(); FPUm(0xdc02|(p?1:0), d, b); asm_output("fcom%s %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); } |
842 | inline void Assembler::FCOMdm(boolbool p, const double* dm) { |
843 | count_fpuld(); |
844 | FPUdm(0xdc02|(p?1:0), dm); |
845 | asm_output("fcom%s (%p)", (p?"p":""), (void*)dm); |
846 | if (p) fpu_pop(); |
847 | } |
848 | |
849 | inline void Assembler::FLD32(I32 d, R b) { count_ldq(); FPUm(0xd900, d, b); asm_output("fld32 %d(%s)", d, gpn(b)); fpu_push();} |
850 | inline void Assembler::FLDQ(I32 d, R b) { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)", d, gpn(b)); fpu_push();} |
851 | inline void Assembler::FLDQdm(const double* dm) { count_ldq(); FPUdm(0xdd00, dm); asm_output("fldq (%p)", (void*)dm); fpu_push();} |
852 | inline void Assembler::FILDQ(I32 d, R b) { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)", d, gpn(b)); fpu_push(); } |
853 | inline void Assembler::FILD(I32 d, R b) { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)", d, gpn(b)); fpu_push(); } |
854 | |
855 | inline void Assembler::FIST(boolbool p, I32 d, R b) { |
856 | count_fpu(); |
857 | FPUm(0xdb02 | (p?1:0), d, b); |
858 | asm_output("fist%s %d(%s)", (p?"p":""), d, gpn(b)); |
859 | if (p) fpu_pop(); |
860 | } |
861 | |
862 | inline void Assembler::FADD( I32 d, R b) { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)", d, gpn(b)); } |
863 | inline void Assembler::FSUB( I32 d, R b) { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)", d, gpn(b)); } |
864 | inline void Assembler::FSUBR(I32 d, R b) { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)", d, gpn(b)); } |
865 | inline void Assembler::FMUL( I32 d, R b) { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)", d, gpn(b)); } |
866 | inline void Assembler::FDIV( I32 d, R b) { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)", d, gpn(b)); } |
867 | inline void Assembler::FDIVR(I32 d, R b) { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)", d, gpn(b)); } |
868 | |
869 | inline void Assembler::FADDdm( const double *dm) { count_ldq(); FPUdm(0xdc00, dm); asm_output("fadd (%p)", (void*)dm); } |
870 | inline void Assembler::FSUBRdm(const double* dm) { count_ldq(); FPUdm(0xdc05, dm); asm_output("fsubr (%p)", (void*)dm); } |
871 | inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); } |
872 | inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)", (void*)dm); } |
873 | |
874 | inline void Assembler::FCOMP() { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} |
875 | inline void Assembler::FCOMPP() { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} |
876 | inline void Assembler::FLDr(R r) { count_ldq(); FPU(0xd9c0, r); asm_output("fld %s", gpn(r)); fpu_push(); } |
877 | inline void Assembler::EMMS() { count_fpu(); FPUc(0x0f77); asm_output("emms"); } |
878 | |
879 | // standard direct call |
880 | inline void Assembler::CALL(const CallInfo* ci) { |
881 | count_call(); |
882 | underrunProtect(5); |
883 | int offset = (ci->_address) - ((int)_nIns); |
884 | IMM32((uint32_t)offset); |
885 | OPCODE(0xE8); |
886 | verbose_only(asm_output("call %s", (ci->_name));) |
887 | debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();) |
888 | } |
889 | |
890 | // indirect call thru register |
891 | inline void Assembler::CALLr(const CallInfo* ci, Register r) { |
892 | count_calli(); |
893 | underrunProtect(2); |
894 | ALU(0xff, 2, r); |
895 | verbose_only(asm_output("call %s", gpn(r));) |
896 | debug_only(if (ci->returnType()==ARGTYPE_D) fpu_push();) (void)ci; |
897 | } |
898 | |
899 | void Assembler::nInit() |
900 | { |
901 | nHints[LIR_calli] = rmask(retRegs[0]); |
902 | nHints[LIR_calld] = rmask(FST0); |
903 | nHints[LIR_paramp] = PREFER_SPECIAL; |
904 | nHints[LIR_immi] = ScratchRegs; |
905 | // Nb: Doing this with a loop future-proofs against the possibilty of |
906 | // new comparison operations being added. |
907 | for (LOpcode op = LOpcode(0); op < LIR_sentinel; op = LOpcode(op+1)) |
908 | if (isCmpOpcode(op)) |
909 | nHints[op] = AllowableByteRegs; |
910 | } |
911 | |
912 | void Assembler::nBeginAssembly() { |
913 | max_stk_args = 0; |
914 | } |
915 | |
916 | NIns* Assembler::genPrologue() |
917 | { |
918 | // Prologue |
919 | uint32_t stackNeeded = max_stk_args + STACK_GRANULARITYsizeof(void *) * _activation.stackSlotsNeeded(); |
920 | |
921 | uint32_t stackPushed = |
922 | STACK_GRANULARITYsizeof(void *) + // returnaddr |
923 | STACK_GRANULARITYsizeof(void *); // ebp |
924 | |
925 | uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK)((((uintptr_t)(stackNeeded + stackPushed))+(((uintptr_t)NJ_ALIGN_STACK )-1))&~(((uintptr_t)NJ_ALIGN_STACK)-1)); |
926 | uint32_t amt = aligned - stackPushed; |
927 | |
928 | #ifdef _WIN32 |
929 | // Windows uses a single guard page for extending the stack, so |
930 | // new stack pages must be first touched in stack-growth order. |
931 | // We touch each whole page that will be allocated to the frame |
932 | // (following the saved FP) to cause the OS to commit the page if |
933 | // necessary. Since we don't calculate page boundaries, but just |
934 | // probe at intervals of the pagesize, it is possible that the |
935 | // last page of the frame will be touched unnecessarily. Note that |
936 | // we must generate the probes in the reverse order of their execution. |
937 | // We require that the page size be a power of 2. |
938 | size_t pageSize = VMPI_getVMPageSize(); |
939 | NanoAssert((pageSize & (pageSize-1)) == 0)do { } while (0); |
940 | size_t pageRounded = amt & ~(pageSize-1); |
941 | for (int32_t d = pageRounded; d > 0; d -= pageSize) { |
942 | STi(rEBP, -d, 0); |
943 | } |
944 | #endif |
945 | |
946 | // Reserve stackNeeded bytes, padded |
947 | // to preserve NJ_ALIGN_STACK-byte alignment. |
948 | if (amt) { |
949 | SUBi(SP, amt); |
950 | } |
951 | |
952 | verbose_only( asm_output("[frag entry]"); ) |
953 | NIns *fragEntry = _nIns; |
954 | MR(FP, SP); // Establish our own FP. |
955 | PUSHr(FP); // Save caller's FP. |
956 | |
957 | return fragEntry; |
958 | } |
959 | |
960 | void Assembler::nFragExit(LIns* guard) |
961 | { |
962 | SideExit *exit = guard->record()->exit; |
963 | Fragment *frag = exit->target; |
964 | GuardRecord *lr = 0; |
965 | boolbool destKnown = (frag && frag->fragEntry); |
966 | |
967 | // Generate jump to epilog and initialize lr. |
968 | // If the guard already exists, use a simple jump. |
969 | if (destKnown) { |
970 | JMP(frag->fragEntry); |
971 | lr = 0; |
972 | } else { // Target doesn't exist. Jump to an epilogue for now. This can be patched later. |
973 | if (!_epilogue) |
974 | _epilogue = genEpilogue(); |
975 | lr = guard->record(); |
976 | JMP_long(_epilogue); |
977 | lr->jmp = _nIns; |
978 | } |
979 | |
980 | // profiling for the exit |
981 | verbose_only( |
982 | if (_logc->lcbits & LC_FragProfile) { |
983 | INCLi(int32_t(&guard->record()->profCount)); |
984 | } |
985 | ) |
986 | |
987 | // Restore rESP from rEBP, undoing SUBi(SP,amt) in the prologue |
988 | MR(SP,FP); |
989 | |
990 | // return value is GuardRecord* |
991 | asm_immi(rEAX, int(lr), /*canClobberCCs*/truetrue); |
992 | } |
993 | |
994 | NIns *Assembler::genEpilogue() |
995 | { |
996 | RET(); |
997 | POPr(FP); // Restore caller's FP. |
998 | |
999 | return _nIns; |
1000 | } |
1001 | |
1002 | void Assembler::asm_call(LIns* ins) |
1003 | { |
1004 | if (!ins->isop(LIR_callv)) { |
1005 | Register rr = ( ins->isop(LIR_calld) ? FST0 : retRegs[0] ); |
1006 | prepareResultReg(ins, rmask(rr)); |
1007 | evictScratchRegsExcept(rmask(rr)); |
1008 | } else { |
1009 | evictScratchRegsExcept(0); |
1010 | } |
1011 | const CallInfo* call = ins->callInfo(); |
1012 | // must be signed, not unsigned |
1013 | uint32_t iargs = call->count_int32_args(); |
1014 | int32_t fargs = call->count_args() - iargs; |
1015 | |
1016 | boolbool indirect = call->isIndirect(); |
1017 | if (indirect) { |
1018 | // target arg isn't pushed, its consumed in the call |
1019 | iargs --; |
1020 | } |
1021 | |
1022 | AbiKind abi = call->_abi; |
1023 | uint32_t max_regs = max_abi_regs[abi]; |
1024 | if (max_regs > iargs) |
1025 | max_regs = iargs; |
1026 | |
1027 | int32_t istack = iargs-max_regs; // first 2 4B args are in registers |
1028 | int32_t extra = 0; |
1029 | const int32_t pushsize = 4*istack + 8*fargs; // actual stack space used |
1030 | |
1031 | #if _MSC_VER |
1032 | // msc only provides 4-byte alignment but we have 8 byte stack adjustment |
1033 | // logic so maintain our 8 byte alignment. |
1034 | uint32_t align = 8; |
1035 | #else |
1036 | uint32_t align = NJ_ALIGN_STACK; |
1037 | #endif |
1038 | |
1039 | if (pushsize) { |
1040 | if (_config.i386_fixed_esp) { |
1041 | // In case of fastcall, stdcall and thiscall the callee cleans up the stack, |
1042 | // and since we reserve max_stk_args words in the prolog to call functions |
1043 | // and don't adjust the stack pointer individually for each call we have |
1044 | // to undo here any changes the callee just did to the stack. |
1045 | if (abi != ABI_CDECL) |
1046 | SUBi(SP, pushsize); |
1047 | } else { |
1048 | // stack re-alignment |
1049 | // only pop our adjustment amount since callee pops args in FASTCALL mode |
1050 | extra = alignUp(pushsize, align)((((uintptr_t)(pushsize))+(((uintptr_t)align)-1))&~(((uintptr_t )align)-1)) - pushsize; |
1051 | if (call->_abi == ABI_CDECL) { |
1052 | // with CDECL only, caller pops args |
1053 | ADDi(SP, extra+pushsize); |
1054 | } else if (extra > 0) { |
1055 | ADDi(SP, extra); |
1056 | } |
1057 | } |
1058 | } |
1059 | |
1060 | NanoAssert(ins->isop(LIR_callv) || ins->isop(LIR_callp) || ins->isop(LIR_calld))do { } while (0); |
1061 | if (!indirect) { |
1062 | CALL(call); |
1063 | } |
1064 | else { |
1065 | // Indirect call. x86 Calling conventions don't use rEAX as an |
1066 | // argument, and do use rEAX as a return value. We need a register |
1067 | // for the address to call, so we use rEAX since it will always be |
1068 | // available. |
1069 | CALLr(call, rEAX); |
1070 | } |
1071 | |
1072 | // Call this now so that the arg setup can involve 'rr'. |
1073 | freeResourcesOf(ins); |
1074 | |
1075 | // Make sure fpu stack is empty before call. |
1076 | NanoAssert(_allocator.isFree(FST0))do { } while (0); |
1077 | |
1078 | // Pre-assign registers to the first N 4B args based on the calling convention. |
1079 | uint32_t n = 0; |
1080 | |
1081 | ArgType argTypes[MAXARGS]; |
1082 | uint32_t argc = call->getArgTypes(argTypes); |
1083 | int32_t stkd = 0; |
1084 | |
1085 | if (indirect) { |
1086 | argc--; |
1087 | asm_arg(ARGTYPE_P, ins->arg(argc), rEAX, stkd); |
1088 | if (!_config.i386_fixed_esp) |
1089 | stkd = 0; |
1090 | } |
1091 | |
1092 | for (uint32_t i = 0; i < argc; i++) |
1093 | { |
1094 | uint32_t j = argc-i-1; |
1095 | ArgType ty = argTypes[j]; |
1096 | Register r = UnspecifiedReg; |
1097 | if (n < max_regs && ty != ARGTYPE_D) { |
1098 | r = argRegs[n++]; // tell asm_arg what reg to use |
1099 | } |
1100 | asm_arg(ty, ins->arg(j), r, stkd); |
1101 | if (!_config.i386_fixed_esp) |
1102 | stkd = 0; |
1103 | } |
1104 | |
1105 | if (_config.i386_fixed_esp) { |
1106 | if (pushsize > max_stk_args) |
1107 | max_stk_args = pushsize; |
1108 | } else if (extra > 0) { |
1109 | SUBi(SP, extra); |
1110 | } |
1111 | } |
1112 | |
1113 | Register Assembler::nRegisterAllocFromSet(RegisterMask set) |
1114 | { |
1115 | Register r; |
1116 | RegAlloc ®s = _allocator; |
1117 | #ifdef _MSC_VER |
1118 | _asm |
1119 | { |
1120 | mov ecx, regs |
1121 | bsf eax, set // i = first bit set |
1122 | btr RegAlloc::free[ecx], eax // free &= ~rmask(i) |
1123 | mov r, eax |
1124 | } |
1125 | #else |
1126 | asm( |
1127 | "bsf %1, %%eax\n\t" |
1128 | "btr %%eax, %2\n\t" |
1129 | "movl %%eax, %0\n\t" |
1130 | : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" ); |
1131 | #endif /* _MSC_VER */ |
1132 | return r; |
1133 | } |
1134 | |
1135 | void Assembler::nRegisterResetAll(RegAlloc& a) |
1136 | { |
1137 | // add scratch registers to our free list for the allocator |
1138 | a.clear(); |
1139 | a.free = SavedRegs | ScratchRegs; |
1140 | if (!_config.i386_sse2) |
1141 | a.free &= ~XmmRegs; |
1142 | } |
1143 | |
1144 | void Assembler::nPatchBranch(NIns* branch, NIns* targ) |
1145 | { |
1146 | intptr_t offset = intptr_t(targ) - intptr_t(branch); |
1147 | if (branch[0] == JMP320xe9) { |
1148 | *(int32_t*)&branch[1] = offset - 5; |
1149 | } else if (branch[0] == JCC320x0f) { |
1150 | *(int32_t*)&branch[2] = offset - 6; |
1151 | } else |
1152 | NanoAssertMsg(0, "Unknown branch type in nPatchBranch")do { } while (0); |
1153 | } |
1154 | |
1155 | RegisterMask Assembler::nHint(LIns* ins) |
1156 | { |
1157 | NanoAssert(ins->isop(LIR_paramp))do { } while (0); |
1158 | RegisterMask prefer = 0; |
1159 | uint8_t arg = ins->paramArg(); |
1160 | if (ins->paramKind() == 0) { |
1161 | uint32_t max_regs = max_abi_regs[_thisfrag->lirbuf->abi]; |
1162 | if (arg < max_regs) |
1163 | prefer = rmask(argRegs[arg]); |
1164 | } else { |
1165 | if (arg < NumSavedRegs) |
1166 | prefer = rmask(savedRegs[arg]); |
1167 | } |
1168 | return prefer; |
1169 | } |
1170 | |
1171 | // Return true if we can generate code for this instruction that neither |
1172 | // sets CCs nor clobbers any input register. |
1173 | // LEA is the only native instruction that fits those requirements. |
1174 | boolbool canRematLEA(LIns* ins) |
1175 | { |
1176 | if (ins->isop(LIR_addi)) |
1177 | return ins->oprnd1()->isInReg() && ins->oprnd2()->isImmI(); |
1178 | // Subtract and some left-shifts could be rematerialized using LEA, |
1179 | // but it hasn't shown to help in real code yet. Noting them anyway: |
1180 | // maybe sub? R = subl rL, const => leal R, [rL + -const] |
1181 | // maybe lsh? R = lshl rL, 1/2/3 => leal R, [rL * 2/4/8] |
1182 | return falsefalse; |
1183 | } |
1184 | |
1185 | boolbool Assembler::canRemat(LIns* ins) |
1186 | { |
1187 | return ins->isImmAny() || ins->isop(LIR_allocp) || canRematLEA(ins); |
1188 | } |
1189 | |
1190 | // WARNING: the code generated by this function must not affect the |
1191 | // condition codes. See asm_cmpi(). |
1192 | void Assembler::asm_restore(LIns* ins, Register r) |
1193 | { |
1194 | NanoAssert(ins->getReg() == r)do { } while (0); |
1195 | |
1196 | uint32_t arg; |
1197 | uint32_t abi_regcount; |
1198 | if (ins->isop(LIR_allocp)) { |
1199 | // The value of a LIR_allocp instruction is the address of the |
1200 | // stack allocation. We can rematerialize that from the record we |
1201 | // have of where the allocation lies in the stack. |
1202 | NanoAssert(ins->isInAr())do { } while (0); // must have stack slots allocated |
1203 | LEA(r, arDisp(ins), FP); |
1204 | |
1205 | } else if (ins->isImmI()) { |
1206 | asm_immi(r, ins->immI(), /*canClobberCCs*/falsefalse); |
1207 | |
1208 | } else if (ins->isImmD()) { |
1209 | asm_immd(r, ins->immDasQ(), ins->immD(), /*canClobberCCs*/falsefalse); |
1210 | |
1211 | } else if (ins->isop(LIR_paramp) && ins->paramKind() == 0 && |
1212 | (arg = ins->paramArg()) >= (abi_regcount = max_abi_regs[_thisfrag->lirbuf->abi])) { |
1213 | // Incoming arg is on stack, can restore it from there instead of spilling. |
1214 | |
1215 | // this case is intentionally not detected in canRemat(), because we still |
1216 | // emit a load instead of a fast ALU operation. We don't want parameter |
1217 | // spills to have precedence over immediates & ALU ops, but if one does |
1218 | // spill, we want to load it directly from its stack area, saving a store |
1219 | // in the prolog. |
1220 | |
1221 | // Compute position of argument relative to ebp. Higher argument |
1222 | // numbers are at higher positive offsets. The first abi_regcount |
1223 | // arguments are in registers, rest on stack. +8 accomodates the |
1224 | // return address and saved ebp value. Assuming abi_regcount == 0: |
1225 | // |
1226 | // low-addr ebp |
1227 | // [frame...][saved-ebp][return-addr][arg0][arg1]... |
1228 | // |
1229 | int d = (arg - abi_regcount) * sizeof(intptr_t) + 8; |
1230 | LD(r, d, FP); |
1231 | |
1232 | } else if (canRematLEA(ins)) { |
1233 | LEA(r, ins->oprnd2()->immI(), ins->oprnd1()->getReg()); |
1234 | |
1235 | } else { |
1236 | int d = findMemFor(ins); |
1237 | if (ins->isI()) { |
1238 | NanoAssert(rmask(r) & GpRegs)do { } while (0); |
1239 | LD(r, d, FP); |
1240 | } else { |
1241 | NanoAssert(ins->isD())do { } while (0); |
1242 | if (rmask(r) & XmmRegs) { |
1243 | SSE_LDQ(r, d, FP); |
1244 | } else { |
1245 | NanoAssert(r == FST0)do { } while (0); |
1246 | FLDQ(d, FP); |
1247 | } |
1248 | } |
1249 | } |
1250 | } |
1251 | |
1252 | void Assembler::asm_store32(LOpcode op, LIns* value, int dr, LIns* base) |
1253 | { |
1254 | if (value->isImmI()) { |
1255 | if (base->opcode() == LIR_addp) { |
1256 | LIns* index; |
1257 | int scale; |
1258 | getBaseIndexScale(base, &base, &index, &scale); |
1259 | |
1260 | Register rb, ri; |
1261 | getBaseReg2(GpRegs, index, ri, GpRegs, base, rb, dr); |
1262 | |
1263 | int c = value->immI(); |
1264 | switch (op) { |
1265 | case LIR_sti2c: ST8isib( dr, rb, ri, scale, c); break; |
1266 | case LIR_sti2s: ST16isib(dr, rb, ri, scale, c); break; |
1267 | case LIR_sti: STisib( dr, rb, ri, scale, c); break; |
1268 | default: NanoAssert(0)do { } while (0); break; |
1269 | } |
1270 | } else { |
1271 | Register rb = getBaseReg(base, dr, GpRegs); |
1272 | int c = value->immI(); |
1273 | switch (op) { |
1274 | case LIR_sti2c: ST8i( rb, dr, c); break; |
1275 | case LIR_sti2s: ST16i(rb, dr, c); break; |
1276 | case LIR_sti: STi( rb, dr, c); break; |
1277 | default: NanoAssert(0)do { } while (0); break; |
1278 | } |
1279 | } |
1280 | |
1281 | } else { |
1282 | // Quirk of x86-32: reg must be a/b/c/d for single-byte stores. |
1283 | const RegisterMask SrcRegs = (op == LIR_sti2c) ? AllowableByteRegs : GpRegs; |
1284 | |
1285 | Register rv, rb; |
1286 | if (base->opcode() == LIR_addp) { |
1287 | LIns* index; |
1288 | int scale; |
1289 | getBaseIndexScale(base, &base, &index, &scale); |
1290 | |
1291 | Register rb, ri, rv; |
1292 | getBaseReg2(SrcRegs, value, rv, GpRegs, base, rb, dr); |
1293 | ri = (index == value) ? rv |
1294 | : (index == base) ? rb |
1295 | : findRegFor(index, GpRegs & ~(rmask(rb)|rmask(rv))); |
1296 | |
1297 | switch (op) { |
1298 | case LIR_sti2c: ST8sib( dr, rb, ri, scale, rv); break; |
1299 | case LIR_sti2s: ST16sib(dr, rb, ri, scale, rv); break; |
1300 | case LIR_sti: STsib( dr, rb, ri, scale, rv); break; |
1301 | default: NanoAssert(0)do { } while (0); break; |
1302 | } |
1303 | |
1304 | } else { |
1305 | if (base->isImmI()) { |
1306 | // absolute address |
1307 | rb = UnspecifiedReg; |
1308 | dr += base->immI(); |
1309 | rv = findRegFor(value, SrcRegs); |
1310 | } else { |
1311 | getBaseReg2(SrcRegs, value, rv, GpRegs, base, rb, dr); |
1312 | } |
1313 | switch (op) { |
1314 | case LIR_sti2c: ST8( rb, dr, rv); break; |
1315 | case LIR_sti2s: ST16(rb, dr, rv); break; |
1316 | case LIR_sti: ST( rb, dr, rv); break; |
1317 | default: NanoAssert(0)do { } while (0); break; |
1318 | } |
1319 | } |
1320 | } |
1321 | } |
1322 | |
1323 | void Assembler::asm_spill(Register rr, int d, boolbool pop) |
1324 | { |
1325 | NanoAssert(d)do { } while (0); |
1326 | if (rmask(rr) & GpRegs) { |
1327 | ST(FP, d, rr); |
1328 | } else if (rmask(rr) & XmmRegs) { |
1329 | SSE_STQ(d, FP, rr); |
1330 | } else { |
1331 | NanoAssert(rr == FST0)do { } while (0); |
1332 | FSTQ(pop, d, FP); |
1333 | } |
1334 | } |
1335 | |
1336 | void Assembler::asm_load64(LIns* ins) |
1337 | { |
1338 | LIns* base = ins->oprnd1(); |
1339 | int d = ins->disp(); |
1340 | |
1341 | // There are two cases: |
1342 | // - 'ins' is in FpRegs: load it. |
1343 | // - otherwise: there's no point loading the value into a register |
1344 | // because its only use will be to immediately spill it. Instead we |
1345 | // do a memory-to-memory move from the load address directly to the |
1346 | // spill slot. (There must be a spill slot assigned.) This is why |
1347 | // we don't use prepareResultReg() here unlike most other places -- |
1348 | // because it mandates bringing the value into a register. |
1349 | // |
1350 | if (ins->isInReg()) { |
1351 | Register rr = prepareResultReg(ins, rmask(ins->getReg())); |
1352 | |
1353 | if (base->opcode() == LIR_addp && rmask(rr) & XmmRegs) { |
1354 | LIns* index; |
1355 | int scale; |
1356 | getBaseIndexScale(base, &base, &index, &scale); |
1357 | |
1358 | // (**) We don't have the usual opportunity to clobber 'base' |
1359 | // or 'ins' with the result because it has a different type. |
1360 | Register rb, ri; |
1361 | RegisterMask allow = GpRegs & ~rmask(rr); |
1362 | getBaseReg2(allow, index, ri, allow, base, rb, d); |
1363 | |
1364 | switch (ins->opcode()) { |
1365 | case LIR_ldd: SSE_LDQsib(rr, d, rb, ri, scale); break; |
1366 | case LIR_ldf2d: SSE_CVTSS2SD(rr, rr); |
1367 | SSE_LDSSsib(rr, d, rb, ri, scale); |
1368 | SSE_XORPDr(rr, rr); break; |
1369 | default: NanoAssert(0)do { } while (0); break; |
1370 | } |
1371 | |
1372 | } else { |
1373 | // (**) We don't have the usual opportunity to clobber 'base' |
1374 | // or 'ins' with the result because it has a different type. |
1375 | Register rb = getBaseReg(base, d, GpRegs); |
1376 | if (rmask(rr) & XmmRegs) { |
1377 | switch (ins->opcode()) { |
1378 | case LIR_ldd: SSE_LDQ(rr, d, rb); break; |
1379 | case LIR_ldf2d: SSE_CVTSS2SD(rr, rr); |
1380 | SSE_LDSS(rr, d, rb); |
1381 | SSE_XORPDr(rr, rr); break; |
1382 | default: NanoAssert(0)do { } while (0); break; |
1383 | } |
1384 | } else { |
1385 | NanoAssert(rr == FST0)do { } while (0); |
1386 | switch (ins->opcode()) { |
1387 | case LIR_ldd: FLDQ(d, rb); break; |
1388 | case LIR_ldf2d: FLD32(d, rb); break; |
1389 | default: NanoAssert(0)do { } while (0); break; |
1390 | } |
1391 | } |
1392 | } |
1393 | |
1394 | } else { |
1395 | Register rb = getBaseReg(base, d, GpRegs); |
1396 | |
1397 | NanoAssert(ins->isInAr())do { } while (0); |
1398 | int dr = arDisp(ins); |
1399 | |
1400 | switch (ins->opcode()) { |
1401 | case LIR_ldd: |
1402 | // Don't use an fpu reg to simply load & store the value. |
1403 | asm_mmq(FP, dr, rb, d); |
1404 | break; |
1405 | |
1406 | case LIR_ldf2d: |
1407 | // Need to use fpu to expand 32->64. |
1408 | FSTPQ(dr, FP); |
1409 | FLD32(d, rb); |
1410 | break; |
1411 | |
1412 | default: |
1413 | NanoAssert(0)do { } while (0); |
1414 | break; |
1415 | } |
1416 | } |
1417 | |
1418 | freeResourcesOf(ins); |
1419 | // Nb: no need for a possible findSpecificRegForUnallocated() call |
1420 | // here because of (**) above. |
1421 | } |
1422 | |
1423 | void Assembler::asm_store64(LOpcode op, LIns* value, int d, LIns* base) |
1424 | { |
1425 | if (op == LIR_std2f) { |
1426 | Register rb = getBaseReg(base, d, GpRegs); |
1427 | boolbool pop = !value->isInReg(); |
1428 | Register rv = ( pop |
1429 | ? findRegFor(value, _config.i386_sse2 ? XmmRegs : FpRegs) |
1430 | : value->getReg() ); |
1431 | |
1432 | if (rmask(rv) & XmmRegs) { |
1433 | // need a scratch reg |
1434 | Register rt = registerAllocTmp(XmmRegs); |
1435 | |
1436 | // cvt to single-precision and store |
1437 | SSE_STSS(d, rb, rt); |
1438 | SSE_CVTSD2SS(rt, rv); |
1439 | SSE_XORPDr(rt, rt); // zero dest to ensure no dependency stalls |
1440 | |
1441 | } else { |
1442 | FST32(pop, d, rb); |
1443 | } |
1444 | |
1445 | } else if (value->isImmD()) { |
1446 | Register rb = getBaseReg(base, d, GpRegs); |
1447 | STi(rb, d+4, value->immDhi()); |
1448 | STi(rb, d, value->immDlo()); |
1449 | |
1450 | } else if (base->opcode() == LIR_addp && _config.i386_sse2) { |
1451 | LIns* index; |
1452 | int scale; |
1453 | getBaseIndexScale(base, &base, &index, &scale); |
1454 | |
1455 | Register rb, ri; |
1456 | getBaseReg2(GpRegs, index, ri, GpRegs, base, rb, d); |
1457 | |
1458 | Register rv = value->isInReg() ? value->getReg() : findRegFor(value, XmmRegs); |
1459 | NanoAssert(rmask(rv) & XmmRegs)do { } while (0); |
1460 | SSE_STQsib(d, rb, ri, scale, rv); |
1461 | |
1462 | } else if (value->isop(LIR_ldd) && !_config.i386_sse2) { |
1463 | // 'value' may be live in an FPU reg. Either way, don't put it on |
1464 | // the FPU stack just to load & store it. |
1465 | Register rb = getBaseReg(base, d, GpRegs); |
1466 | int da = findMemFor(value); |
1467 | asm_mmq(rb, d, FP, da); |
1468 | |
1469 | } else { |
1470 | Register rb = getBaseReg(base, d, GpRegs); |
1471 | boolbool pop = !value->isInReg(); |
1472 | Register rv = ( pop |
1473 | ? findRegFor(value, _config.i386_sse2 ? XmmRegs : FpRegs) |
1474 | : value->getReg() ); |
1475 | if (rmask(rv) & XmmRegs) |
1476 | SSE_STQ(d, rb, rv); |
1477 | else |
1478 | FSTQ(pop, d, rb); |
1479 | } |
1480 | } |
1481 | |
1482 | // Copy 64 bits: (rd+dd) <- (rs+ds). |
1483 | // |
1484 | void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds) |
1485 | { |
1486 | // Value is either a 64-bit struct or maybe a float that isn't live in |
1487 | // an FPU reg. Either way, avoid allocating an FPU reg just to load |
1488 | // and store it. |
1489 | if (_config.i386_sse2) { |
1490 | Register t = registerAllocTmp(XmmRegs); |
1491 | SSE_STQ(dd, rd, t); |
1492 | SSE_LDQ(t, ds, rs); |
1493 | } else { |
1494 | // We avoid copying via the FP stack because it's slow and likely |
1495 | // to cause spills. |
1496 | Register t = registerAllocTmp(GpRegs & ~(rmask(rd)|rmask(rs))); |
1497 | ST(rd, dd+4, t); |
1498 | LD(t, ds+4, rs); |
1499 | ST(rd, dd, t); |
1500 | LD(t, ds, rs); |
1501 | } |
1502 | } |
1503 | |
1504 | Branches Assembler::asm_branch_helper(boolbool branchOnFalse, LIns* cond, NIns* targ) |
1505 | { |
1506 | return isCmpDOpcode(cond->opcode()) |
1507 | ? asm_branchd_helper(branchOnFalse, cond, targ) |
1508 | : asm_branchi_helper(branchOnFalse, cond, targ); |
1509 | } |
1510 | |
1511 | Branches Assembler::asm_branchi_helper(boolbool branchOnFalse, LIns* cond, NIns* targ) |
1512 | { |
1513 | if (branchOnFalse) { |
1514 | // op == LIR_xf/LIR_jf |
1515 | switch (cond->opcode()) { |
1516 | case LIR_eqi: JNE(targ); break; |
1517 | case LIR_lti: JNL(targ); break; |
1518 | case LIR_lei: JNLE(targ); break; |
1519 | case LIR_gti: JNG(targ); break; |
1520 | case LIR_gei: JNGE(targ); break; |
1521 | case LIR_ltui: JNB(targ); break; |
1522 | case LIR_leui: JNBE(targ); break; |
1523 | case LIR_gtui: JNA(targ); break; |
1524 | case LIR_geui: JNAE(targ); break; |
1525 | default: NanoAssert(0)do { } while (0); break; |
1526 | } |
1527 | } else { |
1528 | // op == LIR_xt/LIR_jt |
1529 | switch (cond->opcode()) { |
1530 | case LIR_eqi: JE(targ); break; |
1531 | case LIR_lti: JL(targ); break; |
1532 | case LIR_lei: JLE(targ); break; |
1533 | case LIR_gti: JG(targ); break; |
1534 | case LIR_gei: JGE(targ); break; |
1535 | case LIR_ltui: JB(targ); break; |
1536 | case LIR_leui: JBE(targ); break; |
1537 | case LIR_gtui: JA(targ); break; |
1538 | case LIR_geui: JAE(targ); break; |
1539 | default: NanoAssert(0)do { } while (0); break; |
1540 | } |
1541 | } |
1542 | return Branches(_nIns); |
1543 | } |
1544 | |
1545 | Branches Assembler::asm_branch(boolbool branchOnFalse, LIns* cond, NIns* targ) |
1546 | { |
1547 | Branches branches = asm_branch_helper(branchOnFalse, cond, targ); |
1548 | asm_cmp(cond); |
1549 | return branches; |
1550 | } |
1551 | |
1552 | NIns* Assembler::asm_branch_ov(LOpcode, NIns* target) |
1553 | { |
1554 | JO(target); |
1555 | return _nIns; |
1556 | } |
1557 | |
1558 | void Assembler::asm_jtbl(LIns* ins, NIns** table) |
1559 | { |
1560 | Register indexreg = findRegFor(ins->oprnd1(), GpRegs); |
1561 | JMP_indexed(indexreg, 2, table); |
1562 | } |
1563 | |
1564 | void Assembler::asm_cmp(LIns *cond) |
1565 | { |
1566 | isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond); |
1567 | } |
1568 | |
1569 | // This generates a 'test' or 'cmp' instruction for a condition, which |
1570 | // causes the condition codes to be set appropriately. It's used with |
1571 | // conditional branches, conditional moves, and when generating |
1572 | // conditional values. For example: |
1573 | // |
1574 | // LIR: eq1 = eq a, 0 |
1575 | // LIR: xf1: xf eq1 -> ... |
1576 | // asm: test edx, edx # generated by this function |
1577 | // asm: je ... |
1578 | // |
1579 | // If this is the only use of eq1, then on entry 'cond' is *not* marked as |
1580 | // used, and we do not allocate a register for it. That's because its |
1581 | // result ends up in the condition codes rather than a normal register. |
1582 | // This doesn't get recorded in the regstate and so the asm code that |
1583 | // consumes the result (eg. a conditional branch like 'je') must follow |
1584 | // shortly after. |
1585 | // |
1586 | // If eq1 is instead used again later, we will also generate code |
1587 | // (eg. in asm_cond()) to compute it into a normal register, something |
1588 | // like this: |
1589 | // |
1590 | // LIR: eq1 = eq a, 0 |
1591 | // LIR: test edx, edx |
1592 | // asm: sete ebx |
1593 | // asm: movzx ebx, ebx |
1594 | // |
1595 | // In this case we end up computing the condition twice, but that's ok, as |
1596 | // it's just as short as testing eq1's value in the code generated for the |
1597 | // guard. |
1598 | // |
1599 | // WARNING: Because the condition code update is not recorded in the |
1600 | // regstate, this function cannot generate any code that will affect the |
1601 | // condition codes prior to the generation of the test/cmp, because any |
1602 | // such code will be run after the test/cmp but before the instruction |
1603 | // that consumes the condition code. And because this function calls |
1604 | // findRegFor() before the test/cmp is generated, and findRegFor() calls |
1605 | // asm_restore(), that means that asm_restore() cannot generate code which |
1606 | // affects the condition codes. |
1607 | // |
1608 | void Assembler::asm_cmpi(LIns *cond) |
1609 | { |
1610 | LIns* lhs = cond->oprnd1(); |
1611 | LIns* rhs = cond->oprnd2(); |
1612 | |
1613 | NanoAssert(lhs->isI() && rhs->isI())do { } while (0); |
1614 | |
1615 | // Ready to issue the compare. |
1616 | if (rhs->isImmI()) { |
1617 | int c = rhs->immI(); |
1618 | // findRegFor() can call asm_restore() -- asm_restore() better not |
1619 | // disturb the CCs! |
1620 | Register r = findRegFor(lhs, GpRegs); |
1621 | if (c == 0 && cond->isop(LIR_eqi)) { |
1622 | boolbool canSkipTest = lhs->isop(LIR_andi) || lhs->isop(LIR_ori); |
1623 | if (canSkipTest) { |
1624 | // Setup a short-lived reader to do lookahead; does no |
1625 | // optimisations but that should be good enough for this |
1626 | // simple case, something like this: |
1627 | // |
1628 | // a = andi x, y # lhs |
1629 | // eq1 = eq a, 0 # cond |
1630 | // xt eq1 # currIns |
1631 | // |
1632 | // Note that we don't have to worry about lookahead |
1633 | // hitting the start of the buffer, because read() will |
1634 | // just return LIR_start repeatedly in that case. |
1635 | // |
1636 | LirReader lookahead(currIns); |
1637 | canSkipTest = currIns == lookahead.read() && |
1638 | cond == lookahead.read() && |
1639 | lhs == lookahead.read(); |
1640 | } |
1641 | if (canSkipTest) { |
1642 | // Do nothing. At run-time, 'lhs' will have just computed |
1643 | // by an i386 instruction that sets ZF for us ('and' or |
1644 | // 'or'), so we don't have to do it ourselves. |
1645 | } else { |
1646 | TEST(r, r); // sets ZF according to the value of 'lhs' |
1647 | } |
1648 | } else { |
1649 | CMPi(r, c); |
1650 | } |
1651 | } else { |
1652 | Register ra, rb; |
1653 | findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb); |
1654 | CMP(ra, rb); |
1655 | } |
1656 | } |
1657 | |
1658 | void Assembler::asm_condd(LIns* ins) |
1659 | { |
1660 | LOpcode opcode = ins->opcode(); |
1661 | Register r = prepareResultReg(ins, AllowableByteRegs); |
1662 | |
1663 | // SETcc only sets low 8 bits, so extend |
1664 | MOVZX8(r,r); |
1665 | |
1666 | if (_config.i386_sse2) { |
1667 | // LIR_ltd and LIR_gtd are handled by the same case because |
1668 | // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise |
1669 | // for LIR_led/LIR_ged. |
1670 | switch (opcode) { |
1671 | case LIR_eqd: |
1672 | if (ins->oprnd1() == ins->oprnd2()) { |
1673 | SETNP(r); |
1674 | } else { |
1675 | // result = ZF & !PF, must do logic on flags |
1676 | AND8R(r); // and rl,rh rl &= rh |
1677 | SETNPH(r); // setnp rh rh = !PF |
1678 | SETE(r); // sete rl rl = ZF |
1679 | } |
1680 | break; |
1681 | case LIR_ltd: |
1682 | case LIR_gtd: SETA(r); break; |
1683 | case LIR_led: |
1684 | case LIR_ged: SETAE(r); break; |
1685 | default: NanoAssert(0)do { } while (0); break; |
1686 | } |
1687 | } else { |
1688 | SETNP(r); |
1689 | } |
1690 | |
1691 | freeResourcesOf(ins); |
1692 | |
1693 | asm_cmpd(ins); |
1694 | } |
1695 | |
1696 | void Assembler::asm_cond(LIns* ins) |
1697 | { |
1698 | LOpcode op = ins->opcode(); |
1699 | |
1700 | Register r = prepareResultReg(ins, AllowableByteRegs); |
1701 | |
1702 | // SETcc only sets low 8 bits, so extend |
1703 | MOVZX8(r,r); |
1704 | switch (op) { |
1705 | case LIR_eqi: SETE(r); break; |
1706 | case LIR_lti: SETL(r); break; |
1707 | case LIR_lei: SETLE(r); break; |
1708 | case LIR_gti: SETG(r); break; |
1709 | case LIR_gei: SETGE(r); break; |
1710 | case LIR_ltui: SETB(r); break; |
1711 | case LIR_leui: SETBE(r); break; |
1712 | case LIR_gtui: SETA(r); break; |
1713 | case LIR_geui: SETAE(r); break; |
1714 | default: NanoAssert(0)do { } while (0); break; |
1715 | } |
1716 | |
1717 | freeResourcesOf(ins); |
1718 | |
1719 | asm_cmpi(ins); |
1720 | } |
1721 | |
1722 | // Two example cases for "ins = add lhs, rhs". '*' lines are those |
1723 | // generated in this function. |
1724 | // |
1725 | // asm: define lhs into rr |
1726 | // asm: define rhs into rb |
1727 | // ... |
1728 | // * asm: add rr, rb |
1729 | // * asm: spill rr if necessary |
1730 | // ... no more uses of lhs in rr... |
1731 | // |
1732 | // asm: define lhs into ra |
1733 | // asm: define rhs into rb |
1734 | // ... |
1735 | // * asm: mov rr, ra |
1736 | // * asm: add rr, rb |
1737 | // * asm: spill rr if necessary |
1738 | // ... some uses of lhs in ra... |
1739 | // |
1740 | void Assembler::asm_arith(LIns* ins) |
1741 | { |
1742 | LOpcode op = ins->opcode(); |
1743 | |
1744 | // First special case. |
1745 | if (op == LIR_modi) { |
1746 | asm_div_mod(ins); |
1747 | return; |
1748 | } |
1749 | |
1750 | LIns* lhs = ins->oprnd1(); |
1751 | LIns* rhs = ins->oprnd2(); |
1752 | |
1753 | // Second special case. |
1754 | // XXX: bug 547125: don't need this once LEA is used for LIR_addi in all cases below |
1755 | if (op == LIR_addi && lhs->isop(LIR_allocp) && rhs->isImmI()) { |
1756 | // LIR_addi(LIR_allocp, LIR_immi) -- use lea. |
1757 | Register rr = prepareResultReg(ins, GpRegs); |
1758 | int d = findMemFor(lhs) + rhs->immI(); |
1759 | |
1760 | LEA(rr, d, FP); |
1761 | |
1762 | freeResourcesOf(ins); |
1763 | |
1764 | return; |
1765 | } |
1766 | |
1767 | boolbool isConstRhs; |
1768 | RegisterMask allow = GpRegs; |
1769 | Register rb = UnspecifiedReg; |
1770 | |
1771 | switch (op) { |
1772 | case LIR_divi: |
1773 | // Nb: if the div feeds into a mod it will be handled by |
1774 | // asm_div_mod() rather than here. |
1775 | isConstRhs = falsefalse; |
1776 | rb = findRegFor(rhs, (GpRegs & ~(rmask(rEAX)|rmask(rEDX)))); |
1777 | allow = rmask(rEAX); |
1778 | evictIfActive(rEDX); |
1779 | break; |
1780 | case LIR_muli: |
1781 | case LIR_muljovi: |
1782 | case LIR_mulxovi: |
1783 | isConstRhs = falsefalse; |
1784 | if (lhs != rhs) { |
1785 | rb = findRegFor(rhs, allow); |
1786 | allow &= ~rmask(rb); |
1787 | } |
1788 | break; |
1789 | case LIR_lshi: |
1790 | case LIR_rshi: |
1791 | case LIR_rshui: |
1792 | isConstRhs = rhs->isImmI(); |
1793 | if (!isConstRhs) { |
1794 | rb = findSpecificRegFor(rhs, rECX); |
1795 | allow &= ~rmask(rb); |
1796 | } |
1797 | break; |
1798 | default: |
1799 | isConstRhs = rhs->isImmI(); |
1800 | if (!isConstRhs && lhs != rhs) { |
1801 | rb = findRegFor(rhs, allow); |
1802 | allow &= ~rmask(rb); |
1803 | } |
1804 | break; |
1805 | } |
1806 | |
1807 | // Somewhere for the result of 'ins'. |
1808 | Register rr = prepareResultReg(ins, allow); |
1809 | |
1810 | // If 'lhs' isn't in a register, it can be clobbered by 'ins'. |
1811 | Register ra = lhs->isInReg() ? lhs->getReg() : rr; |
1812 | |
1813 | if (!isConstRhs) { |
1814 | if (lhs == rhs) |
1815 | rb = ra; |
1816 | |
1817 | switch (op) { |
1818 | case LIR_addi: |
1819 | case LIR_addjovi: |
1820 | case LIR_addxovi: ADD(rr, rb); break; // XXX: bug 547125: could use LEA for LIR_addi |
1821 | case LIR_subi: |
1822 | case LIR_subjovi: |
1823 | case LIR_subxovi: SUB(rr, rb); break; |
1824 | case LIR_muli: |
1825 | case LIR_muljovi: |
1826 | case LIR_mulxovi: IMUL(rr, rb); break; |
1827 | case LIR_andi: AND(rr, rb); break; |
1828 | case LIR_ori: OR( rr, rb); break; |
1829 | case LIR_xori: XOR(rr, rb); break; |
1830 | case LIR_lshi: SHL(rr, rb); break; |
1831 | case LIR_rshi: SAR(rr, rb); break; |
1832 | case LIR_rshui: SHR(rr, rb); break; |
1833 | case LIR_divi: |
1834 | DIV(rb); |
1835 | CDQ(); // sign-extend rEAX into rEDX:rEAX |
1836 | break; |
1837 | default: NanoAssert(0)do { } while (0); break; |
1838 | } |
1839 | |
1840 | } else { |
1841 | int c = rhs->immI(); |
1842 | switch (op) { |
1843 | case LIR_addi: |
1844 | // this doesn't set cc's, only use it when cc's not required. |
1845 | LEA(rr, c, ra); |
1846 | ra = rr; // suppress mov |
1847 | break; |
1848 | case LIR_addjovi: |
1849 | case LIR_addxovi: ADDi(rr, c); break; |
1850 | case LIR_subi: |
1851 | case LIR_subjovi: |
1852 | case LIR_subxovi: SUBi(rr, c); break; |
1853 | case LIR_andi: ANDi(rr, c); break; |
1854 | case LIR_ori: ORi( rr, c); break; |
1855 | case LIR_xori: XORi(rr, c); break; |
1856 | case LIR_lshi: SHLi(rr, c); break; |
1857 | case LIR_rshi: SARi(rr, c); break; |
1858 | case LIR_rshui: SHRi(rr, c); break; |
1859 | default: NanoAssert(0)do { } while (0); break; |
1860 | } |
1861 | } |
1862 | |
1863 | if (rr != ra) |
1864 | MR(rr, ra); |
1865 | |
1866 | freeResourcesOf(ins); |
1867 | if (!lhs->isInReg()) { |
1868 | NanoAssert(ra == rr)do { } while (0); |
1869 | findSpecificRegForUnallocated(lhs, ra); |
1870 | } |
1871 | } |
1872 | |
1873 | // Generates code for a LIR_modi(LIR_divi(divL, divR)) sequence. |
1874 | void Assembler::asm_div_mod(LIns* mod) |
1875 | { |
1876 | LIns* div = mod->oprnd1(); |
1877 | |
1878 | // LIR_modi expects the LIR_divi to be near (no interference from the register allocator). |
1879 | NanoAssert(mod->isop(LIR_modi))do { } while (0); |
1880 | NanoAssert(div->isop(LIR_divi))do { } while (0); |
1881 | |
1882 | LIns* divL = div->oprnd1(); |
1883 | LIns* divR = div->oprnd2(); |
1884 | |
1885 | prepareResultReg(mod, rmask(rEDX)); |
1886 | prepareResultReg(div, rmask(rEAX)); |
1887 | |
1888 | Register rDivR = findRegFor(divR, (GpRegs & ~(rmask(rEAX)|rmask(rEDX)))); |
1889 | Register rDivL = divL->isInReg() ? divL->getReg() : rEAX; |
1890 | |
1891 | DIV(rDivR); |
1892 | CDQ(); // sign-extend rEAX into rEDX:rEAX |
1893 | if (rEAX != rDivL) |
1894 | MR(rEAX, rDivL); |
1895 | |
1896 | freeResourcesOf(mod); |
1897 | freeResourcesOf(div); |
1898 | if (!divL->isInReg()) { |
1899 | NanoAssert(rDivL == rEAX)do { } while (0); |
1900 | findSpecificRegForUnallocated(divL, rEAX); |
1901 | } |
1902 | } |
1903 | |
1904 | // Two example cases for "ins = neg lhs". Lines marked with '*' are |
1905 | // generated in this function. |
1906 | // |
1907 | // asm: define lhs into rr |
1908 | // ... |
1909 | // * asm: neg rr |
1910 | // * asm: spill rr if necessary |
1911 | // ... no more uses of lhs in rr... |
1912 | // |
1913 | // |
1914 | // asm: define lhs into ra |
1915 | // ... |
1916 | // * asm: mov rr, ra |
1917 | // * asm: neg rr |
1918 | // * asm: spill rr if necessary |
1919 | // ... more uses of lhs in ra... |
1920 | // |
1921 | void Assembler::asm_neg_not(LIns* ins) |
1922 | { |
1923 | LIns* lhs = ins->oprnd1(); |
1924 | |
1925 | Register rr = prepareResultReg(ins, GpRegs); |
1926 | |
1927 | // If 'lhs' isn't in a register, it can be clobbered by 'ins'. |
1928 | Register ra = lhs->isInReg() ? lhs->getReg() : rr; |
1929 | |
1930 | if (ins->isop(LIR_noti)) { |
1931 | NOT(rr); |
1932 | } else { |
1933 | NanoAssert(ins->isop(LIR_negi))do { } while (0); |
1934 | NEG(rr); |
1935 | } |
1936 | if (rr != ra) |
1937 | MR(rr, ra); |
1938 | |
1939 | freeResourcesOf(ins); |
1940 | if (!lhs->isInReg()) { |
1941 | NanoAssert(ra == rr)do { } while (0); |
1942 | findSpecificRegForUnallocated(lhs, ra); |
1943 | } |
1944 | } |
1945 | |
1946 | void Assembler::asm_load32(LIns* ins) |
1947 | { |
1948 | LOpcode op = ins->opcode(); |
1949 | LIns* base = ins->oprnd1(); |
1950 | int32_t d = ins->disp(); |
1951 | |
1952 | Register rr = prepareResultReg(ins, GpRegs); |
1953 | |
1954 | if (base->isImmI()) { |
1955 | intptr_t addr = base->immI(); |
1956 | addr += d; |
1957 | switch (op) { |
1958 | case LIR_lduc2ui: LD8Zdm( rr, addr); break; |
1959 | case LIR_ldc2i: LD8Sdm( rr, addr); break; |
1960 | case LIR_ldus2ui: LD16Zdm(rr, addr); break; |
1961 | case LIR_lds2i: LD16Sdm(rr, addr); break; |
1962 | case LIR_ldi: LDdm( rr, addr); break; |
1963 | default: NanoAssert(0)do { } while (0); break; |
1964 | } |
1965 | |
1966 | freeResourcesOf(ins); |
1967 | |
1968 | } else if (base->opcode() == LIR_addp) { |
1969 | LIns* index; |
1970 | int scale; |
1971 | getBaseIndexScale(base, &base, &index, &scale); |
1972 | |
1973 | // If 'base' isn't in a register, it can be clobbered by 'ins'. |
1974 | // Likewise for 'rhs', but we try it with 'base' first. |
1975 | Register rb, ri; |
1976 | // @todo -- If base and/or index is const, we could eliminate a register use. |
1977 | if (!base->isInReg()) { |
1978 | rb = rr; |
1979 | ri = findRegFor(index, GpRegs & ~(rmask(rb))); |
1980 | |
1981 | } else { |
1982 | rb = base->getReg(); |
1983 | NanoAssert(rb != rr)do { } while (0); |
1984 | ri = index->isInReg() ? findRegFor(index, GpRegs & ~(rmask(rb))) : rr; |
1985 | } |
1986 | |
1987 | switch (op) { |
1988 | case LIR_lduc2ui: LD8Zsib( rr, d, rb, ri, scale); break; |
1989 | case LIR_ldc2i: LD8Ssib( rr, d, rb, ri, scale); break; |
1990 | case LIR_ldus2ui: LD16Zsib(rr, d, rb, ri, scale); break; |
1991 | case LIR_lds2i: LD16Ssib(rr, d, rb, ri, scale); break; |
1992 | case LIR_ldi: LDsib( rr, d, rb, ri, scale); break; |
1993 | default: NanoAssert(0)do { } while (0); break; |
1994 | } |
1995 | |
1996 | freeResourcesOf(ins); |
1997 | if (!base->isInReg()) { |
1998 | NanoAssert(rb == rr)do { } while (0); |
1999 | findSpecificRegForUnallocated(base, rb); |
2000 | } else if (!index->isInReg()) { |
2001 | NanoAssert(ri == rr)do { } while (0); |
2002 | findSpecificRegForUnallocated(index, ri); |
2003 | } |
2004 | |
2005 | } else { |
2006 | Register ra = getBaseReg(base, d, GpRegs); |
2007 | |
2008 | switch (op) { |
2009 | case LIR_lduc2ui: LD8Z( rr, d, ra); break; |
2010 | case LIR_ldc2i: LD8S( rr, d, ra); break; |
2011 | case LIR_ldus2ui: LD16Z(rr, d, ra); break; |
2012 | case LIR_lds2i: LD16S(rr, d, ra); break; |
2013 | case LIR_ldi: LD( rr, d, ra); break; |
2014 | default: NanoAssert(0)do { } while (0); break; |
2015 | } |
2016 | |
2017 | freeResourcesOf(ins); |
2018 | if (!base->isop(LIR_allocp) && !base->isInReg()) { |
2019 | NanoAssert(ra == rr)do { } while (0); |
2020 | findSpecificRegForUnallocated(base, ra); |
2021 | } |
2022 | } |
2023 | } |
2024 | |
2025 | void Assembler::asm_cmov(LIns* ins) |
2026 | { |
2027 | LIns* condval = ins->oprnd1(); |
2028 | LIns* iftrue = ins->oprnd2(); |
2029 | LIns* iffalse = ins->oprnd3(); |
2030 | |
2031 | NanoAssert(condval->isCmp())do { } while (0); |
2032 | NanoAssert((ins->isop(LIR_cmovi) && iftrue->isI() && iffalse->isI()) ||do { } while (0) |
2033 | (ins->isop(LIR_cmovd) && iftrue->isD() && iffalse->isD()))do { } while (0); |
2034 | |
2035 | if (!_config.i386_sse2 && ins->isop(LIR_cmovd)) { |
2036 | // See the SSE2 case below for an explanation of the subtleties here. |
2037 | debug_only( Register rr = ) prepareResultReg(ins, x87Regs); |
2038 | NanoAssert(FST0 == rr)do { } while (0); |
2039 | NanoAssert(!iftrue->isInReg() && !iffalse->isInReg())do { } while (0); |
2040 | |
2041 | NIns* target = _nIns; |
2042 | |
2043 | if (iffalse->isImmD()) { |
2044 | asm_immd(FST0, iffalse->immDasQ(), iffalse->immD(), /*canClobberCCs*/falsefalse); |
2045 | } else { |
2046 | int df = findMemFor(iffalse); |
2047 | FLDQ(df, FP); |
2048 | } |
2049 | FSTP(FST0); // pop the stack |
2050 | asm_branch_helper(falsefalse, condval, target); |
2051 | |
2052 | NanoAssert(ins->getReg() == rr)do { } while (0); |
2053 | freeResourcesOf(ins); |
2054 | if (!iftrue->isInReg()) |
2055 | findSpecificRegForUnallocated(iftrue, FST0); |
2056 | |
2057 | asm_cmp(condval); |
2058 | |
2059 | return; |
2060 | } |
2061 | |
2062 | RegisterMask allow = ins->isD() ? XmmRegs : GpRegs; |
2063 | Register rr = prepareResultReg(ins, allow); |
2064 | Register rf = findRegFor(iffalse, allow & ~rmask(rr)); |
2065 | |
2066 | if (ins->isop(LIR_cmovd)) { |
2067 | // The obvious way to handle this is as follows: |
2068 | // |
2069 | // mov rr, rt # only needed if rt is live afterwards |
2070 | // do comparison |
2071 | // jt end |
2072 | // mov rr, rf |
2073 | // end: |
2074 | // |
2075 | // The problem with this is that doing the comparison can cause |
2076 | // registers to be evicted, possibly including 'rr', which holds |
2077 | // 'ins'. And that screws things up. So instead we do this: |
2078 | // |
2079 | // do comparison |
2080 | // mov rr, rt # only needed if rt is live afterwards |
2081 | // jt end |
2082 | // mov rr, rf |
2083 | // end: |
2084 | // |
2085 | // Putting the 'mov' between the comparison and the jump is ok |
2086 | // because move instructions don't modify the condition codes. |
2087 | // |
2088 | NIns* target = _nIns; |
2089 | asm_nongp_copy(rr, rf); |
2090 | asm_branch_helper(falsefalse, condval, target); |
2091 | |
2092 | // If 'iftrue' isn't in a register, it can be clobbered by 'ins'. |
2093 | Register rt = iftrue->isInReg() ? iftrue->getReg() : rr; |
2094 | |
2095 | if (rr != rt) |
2096 | asm_nongp_copy(rr, rt); |
2097 | |
2098 | NanoAssert(ins->getReg() == rr)do { } while (0); |
2099 | freeResourcesOf(ins); |
2100 | if (!iftrue->isInReg()) { |
2101 | NanoAssert(rt == rr)do { } while (0); |
2102 | findSpecificRegForUnallocated(iftrue, rr); |
2103 | } |
2104 | |
2105 | asm_cmp(condval); |
2106 | return; |
2107 | } |
2108 | |
2109 | // If 'iftrue' isn't in a register, it can be clobbered by 'ins'. |
2110 | Register rt = iftrue->isInReg() ? iftrue->getReg() : rr; |
2111 | NanoAssert(ins->isop(LIR_cmovi))do { } while (0); |
2112 | |
2113 | // WARNING: We cannot generate any code that affects the condition |
2114 | // codes between the MRcc generation here and the asm_cmpi() call |
2115 | // below. See asm_cmpi() for more details. |
2116 | switch (condval->opcode()) { |
2117 | // Note that these are all opposites... |
2118 | case LIR_eqi: MRNE(rr, rf); break; |
2119 | case LIR_lti: MRGE(rr, rf); break; |
2120 | case LIR_lei: MRG( rr, rf); break; |
2121 | case LIR_gti: MRLE(rr, rf); break; |
2122 | case LIR_gei: MRL( rr, rf); break; |
2123 | case LIR_ltui: MRAE(rr, rf); break; |
2124 | case LIR_leui: MRA( rr, rf); break; |
2125 | case LIR_gtui: MRBE(rr, rf); break; |
2126 | case LIR_geui: MRB( rr, rf); break; |
2127 | default: NanoAssert(0)do { } while (0); break; |
2128 | } |
2129 | |
2130 | if (rr != rt) |
2131 | MR(rr, rt); |
2132 | |
2133 | NanoAssert(ins->getReg() == rr)do { } while (0); |
2134 | freeResourcesOf(ins); |
2135 | if (!iftrue->isInReg()) { |
2136 | NanoAssert(rt == rr)do { } while (0); |
2137 | findSpecificRegForUnallocated(iftrue, rr); |
2138 | } |
2139 | |
2140 | asm_cmp(condval); |
2141 | } |
2142 | |
2143 | void Assembler::asm_param(LIns* ins) |
2144 | { |
2145 | uint32_t arg = ins->paramArg(); |
2146 | uint32_t kind = ins->paramKind(); |
2147 | if (kind == 0) { |
2148 | // ordinary param |
2149 | AbiKind abi = _thisfrag->lirbuf->abi; |
2150 | uint32_t abi_regcount = max_abi_regs[abi]; |
2151 | // argRegs must have as many elements as the largest argument register |
2152 | // requirement of an abi. Currently, this is 2, for ABI_FASTCALL. See |
2153 | // the definition of max_abi_regs earlier in this file. The following |
2154 | // assertion reflects this invariant: |
2155 | NanoAssert(abi_regcount <= sizeof(argRegs)/sizeof(argRegs[0]))do { } while (0); |
2156 | if (arg < abi_regcount) { |
2157 | // Incoming arg in register. |
2158 | prepareResultReg(ins, rmask(argRegs[arg])); |
2159 | // No code to generate. |
2160 | } else { |
2161 | // Incoming arg is on stack, and rEBP points nearby (see genPrologue()). |
2162 | Register r = prepareResultReg(ins, GpRegs); |
2163 | int d = (arg - abi_regcount) * sizeof(intptr_t) + 8; |
2164 | LD(r, d, FP); |
2165 | } |
2166 | } else { |
2167 | // Saved param. |
2168 | prepareResultReg(ins, rmask(savedRegs[arg])); |
2169 | // No code to generate. |
2170 | } |
2171 | freeResourcesOf(ins); |
2172 | } |
2173 | |
2174 | void Assembler::asm_immi(LIns* ins) |
2175 | { |
2176 | Register rr = prepareResultReg(ins, GpRegs); |
2177 | |
2178 | asm_immi(rr, ins->immI(), /*canClobberCCs*/truetrue); |
2179 | |
2180 | freeResourcesOf(ins); |
2181 | } |
2182 | |
2183 | void Assembler::asm_immi(Register r, int32_t val, boolbool canClobberCCs) |
2184 | { |
2185 | if (val == 0 && canClobberCCs) |
2186 | XOR(r, r); |
2187 | else |
2188 | LDi(r, val); |
2189 | } |
2190 | |
2191 | void Assembler::asm_immd(Register r, uint64_t q, double d, boolbool canClobberCCs) |
2192 | { |
2193 | // Floats require non-standard handling. There is no load-64-bit-immediate |
2194 | // instruction on i386, so in the general case, we must load it from memory. |
2195 | // This is unlike most other LIR operations which can be computed directly |
2196 | // in a register. We can special-case 0.0 and various other small ints |
2197 | // (1.0 on x87, any int32_t value on SSE2), but for all other values, we |
2198 | // allocate an 8-byte chunk via dataAlloc and load from there. Note that |
2199 | // this implies that floats never require spill area, since they will always |
2200 | // be rematerialized from const data (or inline instructions in the special cases). |
2201 | |
2202 | if (rmask(r) & XmmRegs) { |
2203 | if (q == 0) { |
2204 | // test (int64)0 since -0.0 == 0.0 |
2205 | SSE_XORPDr(r, r); |
2206 | } else if (d && d == (int)d && canClobberCCs) { |
2207 | // can fit in 32bits? then use cvt which is faster |
2208 | Register tr = registerAllocTmp(GpRegs); |
2209 | SSE_CVTSI2SD(r, tr); |
2210 | SSE_XORPDr(r, r); // zero r to ensure no dependency stalls |
2211 | asm_immi(tr, (int)d, canClobberCCs); |
2212 | } else { |
2213 | const uint64_t* p = findImmDFromPool(q); |
2214 | LDSDm(r, (const double*)p); |
2215 | } |
2216 | } else { |
2217 | NanoAssert(r == FST0)do { } while (0); |
2218 | if (q == 0) { |
2219 | // test (int64)0 since -0.0 == 0.0 |
2220 | FLDZ(); |
2221 | } else if (d == 1.0) { |
2222 | FLD1(); |
2223 | } else { |
2224 | const uint64_t* p = findImmDFromPool(q); |
2225 | FLDQdm((const double*)p); |
2226 | } |
2227 | } |
2228 | } |
2229 | |
2230 | void Assembler::asm_immd(LIns* ins) |
2231 | { |
2232 | NanoAssert(ins->isImmD())do { } while (0); |
2233 | if (ins->isInReg()) { |
2234 | Register rr = ins->getReg(); |
2235 | NanoAssert(rmask(rr) & FpRegs)do { } while (0); |
2236 | asm_immd(rr, ins->immDasQ(), ins->immD(), /*canClobberCCs*/truetrue); |
2237 | } else { |
2238 | // Do nothing, will be rematerialized when necessary. |
2239 | } |
2240 | |
2241 | freeResourcesOf(ins); |
2242 | } |
2243 | |
2244 | // negateMask is used by asm_fneg. |
2245 | #if defined __SUNPRO_CC |
2246 | // From Sun Studio C++ Readme: #pragma align inside namespace requires mangled names. |
2247 | // Initialize here to avoid multithreading contention issues during initialization. |
2248 | static uint32_t negateMask_temp[] = {0, 0, 0, 0, 0, 0, 0}; |
2249 | |
2250 | static uint32_t* negateMaskInit() |
2251 | { |
2252 | uint32_t* negateMask = (uint32_t*)alignUp(negateMask_temp, 16)((((uintptr_t)(negateMask_temp))+(((uintptr_t)16)-1))&~(( (uintptr_t)16)-1)); |
2253 | negateMask[1] = 0x80000000; |
2254 | return negateMask; |
2255 | } |
2256 | |
2257 | static uint32_t *negateMask = negateMaskInit(); |
2258 | #else |
2259 | static const AVMPLUS_ALIGN16(uint32_t)uint32_t __attribute__ ((aligned (16))) negateMask[] = {0,0x80000000,0,0}; |
2260 | #endif |
2261 | |
2262 | void Assembler::asm_fneg(LIns* ins) |
2263 | { |
2264 | LIns *lhs = ins->oprnd1(); |
2265 | |
2266 | if (_config.i386_sse2) { |
2267 | Register rr = prepareResultReg(ins, XmmRegs); |
2268 | |
2269 | // If 'lhs' isn't in a register, it can be clobbered by 'ins'. |
2270 | Register ra; |
2271 | if (!lhs->isInReg()) { |
2272 | ra = rr; |
2273 | } else if (!(rmask(lhs->getReg()) & XmmRegs)) { |
2274 | // We need to evict lhs from x87Regs, which then puts us in |
2275 | // the same situation as the !isInReg() case. |
2276 | evict(lhs); |
2277 | ra = rr; |
2278 | } else { |
2279 | ra = lhs->getReg(); |
2280 | } |
2281 | |
2282 | SSE_XORPD(rr, negateMask); |
2283 | |
2284 | if (rr != ra) |
2285 | SSE_MOVSD(rr, ra); |
2286 | |
2287 | freeResourcesOf(ins); |
2288 | if (!lhs->isInReg()) { |
2289 | NanoAssert(ra == rr)do { } while (0); |
2290 | findSpecificRegForUnallocated(lhs, ra); |
2291 | } |
2292 | |
2293 | } else { |
2294 | debug_only( Register rr = ) prepareResultReg(ins, x87Regs); |
2295 | NanoAssert(FST0 == rr)do { } while (0); |
2296 | |
2297 | NanoAssert(!lhs->isInReg() || FST0 == lhs->getReg())do { } while (0); |
2298 | |
2299 | FCHS(); |
2300 | |
2301 | freeResourcesOf(ins); |
2302 | if (!lhs->isInReg()) |
2303 | findSpecificRegForUnallocated(lhs, FST0); |
2304 | } |
2305 | } |
2306 | |
2307 | void Assembler::asm_arg(ArgType ty, LIns* ins, Register r, int32_t& stkd) |
2308 | { |
2309 | // If 'r' is known, then that's the register we have to put 'ins' |
2310 | // into. |
2311 | |
2312 | if (ty == ARGTYPE_I || ty == ARGTYPE_UI) { |
2313 | if (r != UnspecifiedReg) { |
2314 | if (ins->isImmI()) { |
2315 | // Rematerialize the constant. |
2316 | asm_immi(r, ins->immI(), /*canClobberCCs*/truetrue); |
2317 | } else if (ins->isInReg()) { |
2318 | if (r != ins->getReg()) |
2319 | MR(r, ins->getReg()); |
2320 | } else if (ins->isInAr()) { |
2321 | int d = arDisp(ins); |
2322 | NanoAssert(d != 0)do { } while (0); |
2323 | if (ins->isop(LIR_allocp)) { |
2324 | LEA(r, d, FP); |
2325 | } else { |
2326 | LD(r, d, FP); |
2327 | } |
2328 | |
2329 | } else { |
2330 | // This is the last use, so fine to assign it |
2331 | // to the scratch reg, it's dead after this point. |
2332 | findSpecificRegForUnallocated(ins, r); |
2333 | } |
2334 | } |
2335 | else { |
2336 | if (_config.i386_fixed_esp) |
2337 | asm_stkarg(ins, stkd); |
2338 | else |
2339 | asm_pusharg(ins); |
2340 | } |
2341 | |
2342 | } else { |
2343 | NanoAssert(ty == ARGTYPE_D)do { } while (0); |
2344 | asm_farg(ins, stkd); |
2345 | } |
2346 | } |
2347 | |
2348 | void Assembler::asm_pusharg(LIns* ins) |
2349 | { |
2350 | // arg goes on stack |
2351 | if (!ins->isExtant() && ins->isImmI()) { |
2352 | PUSHi(ins->immI()); // small const we push directly |
2353 | } else if (!ins->isExtant() || ins->isop(LIR_allocp)) { |
2354 | Register ra = findRegFor(ins, GpRegs); |
2355 | PUSHr(ra); |
2356 | } else if (ins->isInReg()) { |
2357 | PUSHr(ins->getReg()); |
2358 | } else { |
2359 | NanoAssert(ins->isInAr())do { } while (0); |
2360 | PUSHm(arDisp(ins), FP); |
2361 | } |
2362 | } |
2363 | |
2364 | void Assembler::asm_stkarg(LIns* ins, int32_t& stkd) |
2365 | { |
2366 | // arg goes on stack |
2367 | if (!ins->isExtant() && ins->isImmI()) |
2368 | { |
2369 | // small const we push directly |
2370 | STi(SP, stkd, ins->immI()); |
2371 | } |
2372 | else { |
2373 | Register ra; |
2374 | if (!ins->isInReg() || ins->isop(LIR_allocp)) |
2375 | ra = findRegFor(ins, GpRegs & (~SavedRegs)); |
2376 | else |
2377 | ra = ins->getReg(); |
2378 | ST(SP, stkd, ra); |
2379 | } |
2380 | |
2381 | stkd += sizeof(int32_t); |
2382 | } |
2383 | |
2384 | void Assembler::asm_farg(LIns* ins, int32_t& stkd) |
2385 | { |
2386 | NanoAssert(ins->isD())do { } while (0); |
2387 | Register r = findRegFor(ins, FpRegs); |
2388 | if (rmask(r) & XmmRegs) { |
2389 | SSE_STQ(stkd, SP, r); |
2390 | } else { |
2391 | FSTPQ(stkd, SP); |
2392 | |
2393 | // 22Jul09 rickr - Enabling the evict causes a 10% slowdown on primes |
2394 | // |
2395 | // evict() triggers a very expensive fstpq/fldq pair around the store. |
2396 | // We need to resolve the bug some other way. |
2397 | // |
2398 | // see https://bugzilla.mozilla.org/show_bug.cgi?id=491084 |
2399 | |
2400 | // It's possible that the same LIns* with r=FST0 will appear in the argument list more |
2401 | // than once. In this case FST0 will not have been evicted and the multiple pop |
2402 | // actions will unbalance the FPU stack. A quick fix is to always evict FST0 manually. |
2403 | NanoAssert(r == FST0)do { } while (0); |
2404 | NanoAssert(ins == _allocator.getActive(r))do { } while (0); |
2405 | evict(ins); |
2406 | } |
2407 | if (!_config.i386_fixed_esp) |
2408 | SUBi(rESP, 8); |
2409 | |
2410 | stkd += sizeof(double); |
2411 | } |
2412 | |
2413 | void Assembler::asm_fop(LIns* ins) |
2414 | { |
2415 | LOpcode op = ins->opcode(); |
2416 | if (_config.i386_sse2) |
2417 | { |
2418 | LIns *lhs = ins->oprnd1(); |
2419 | LIns *rhs = ins->oprnd2(); |
2420 | |
2421 | RegisterMask allow = XmmRegs; |
2422 | Register rb = UnspecifiedReg; |
2423 | if (lhs != rhs) { |
2424 | rb = findRegFor(rhs, allow); |
2425 | allow &= ~rmask(rb); |
2426 | } |
2427 | |
2428 | Register rr = prepareResultReg(ins, allow); |
2429 | |
2430 | // If 'lhs' isn't in a register, it can be clobbered by 'ins'. |
2431 | Register ra; |
2432 | if (!lhs->isInReg()) { |
2433 | ra = rr; |
2434 | |
2435 | } else if (!(rmask(lhs->getReg()) & XmmRegs)) { |
2436 | NanoAssert(lhs->getReg() == FST0)do { } while (0); |
2437 | |
2438 | // We need to evict lhs from x87Regs, which then puts us in |
2439 | // the same situation as the !isInReg() case. |
2440 | evict(lhs); |
2441 | ra = rr; |
2442 | |
2443 | } else { |
2444 | ra = lhs->getReg(); |
2445 | NanoAssert(rmask(ra) & XmmRegs)do { } while (0); |
2446 | } |
2447 | |
2448 | if (lhs == rhs) |
2449 | rb = ra; |
2450 | |
2451 | switch (op) { |
2452 | case LIR_addd: SSE_ADDSD(rr, rb); break; |
2453 | case LIR_subd: SSE_SUBSD(rr, rb); break; |
2454 | case LIR_muld: SSE_MULSD(rr, rb); break; |
2455 | case LIR_divd: SSE_DIVSD(rr, rb); break; |
2456 | default: NanoAssert(0)do { } while (0); |
2457 | } |
2458 | |
2459 | if (rr != ra) |
2460 | SSE_MOVSD(rr, ra); |
2461 | |
2462 | freeResourcesOf(ins); |
2463 | if (!lhs->isInReg()) { |
2464 | NanoAssert(ra == rr)do { } while (0); |
2465 | findSpecificRegForUnallocated(lhs, ra); |
2466 | } |
2467 | } |
2468 | else |
2469 | { |
2470 | // We swap lhs/rhs on purpose here, it works out better with |
2471 | // only one fpu reg -- we can use divr/subr. |
2472 | LIns* rhs = ins->oprnd1(); |
2473 | LIns* lhs = ins->oprnd2(); |
2474 | debug_only( Register rr = ) prepareResultReg(ins, rmask(FST0)); |
2475 | NanoAssert(FST0 == rr)do { } while (0); |
2476 | NanoAssert(!lhs->isInReg() || FST0 == lhs->getReg())do { } while (0); |
2477 | |
2478 | if (rhs->isImmD()) { |
2479 | const uint64_t* p = findImmDFromPool(rhs->immDasQ()); |
2480 | |
2481 | switch (op) { |
2482 | case LIR_addd: FADDdm( (const double*)p); break; |
2483 | case LIR_subd: FSUBRdm((const double*)p); break; |
2484 | case LIR_muld: FMULdm( (const double*)p); break; |
2485 | case LIR_divd: FDIVRdm((const double*)p); break; |
2486 | default: NanoAssert(0)do { } while (0); |
2487 | } |
2488 | |
2489 | } else { |
2490 | int db = findMemFor(rhs); |
2491 | |
2492 | switch (op) { |
2493 | case LIR_addd: FADD( db, FP); break; |
2494 | case LIR_subd: FSUBR(db, FP); break; |
2495 | case LIR_muld: FMUL( db, FP); break; |
2496 | case LIR_divd: FDIVR(db, FP); break; |
2497 | default: NanoAssert(0)do { } while (0); |
2498 | } |
2499 | } |
2500 | freeResourcesOf(ins); |
2501 | if (!lhs->isInReg()) { |
2502 | findSpecificRegForUnallocated(lhs, FST0); |
2503 | } |
2504 | } |
2505 | } |
2506 | |
2507 | void Assembler::asm_i2d(LIns* ins) |
2508 | { |
2509 | LIns* lhs = ins->oprnd1(); |
2510 | |
2511 | Register rr = prepareResultReg(ins, FpRegs); |
2512 | if (rmask(rr) & XmmRegs) { |
2513 | // todo support int value in memory |
2514 | Register ra = findRegFor(lhs, GpRegs); |
2515 | SSE_CVTSI2SD(rr, ra); |
2516 | SSE_XORPDr(rr, rr); // zero rr to ensure no dependency stalls |
2517 | } else { |
2518 | int d = findMemFor(lhs); |
2519 | FILD(d, FP); |
2520 | } |
2521 | |
2522 | freeResourcesOf(ins); |
2523 | } |
2524 | |
2525 | void Assembler::asm_ui2d(LIns* ins) |
2526 | { |
2527 | LIns* lhs = ins->oprnd1(); |
2528 | |
2529 | Register rr = prepareResultReg(ins, FpRegs); |
2530 | if (rmask(rr) & XmmRegs) { |
2531 | Register rt = registerAllocTmp(GpRegs); |
2532 | |
2533 | // Technique inspired by gcc disassembly. Edwin explains it: |
2534 | // |
2535 | // rt is 0..2^32-1 |
2536 | // |
2537 | // sub rt,0x80000000 |
2538 | // |
2539 | // Now rt is -2^31..2^31-1, i.e. the range of int, but not the same value |
2540 | // as before. |
2541 | // |
2542 | // cvtsi2sd rr,rt |
2543 | // |
2544 | // rr is now a double with the int value range. |
2545 | // |
2546 | // addsd rr, 2147483648.0 |
2547 | // |
2548 | // Adding back double(0x80000000) makes the range 0..2^32-1. |
2549 | |
2550 | static const double k_NEGONE = 2147483648.0; |
2551 | SSE_ADDSDm(rr, &k_NEGONE); |
2552 | |
2553 | SSE_CVTSI2SD(rr, rt); |
2554 | SSE_XORPDr(rr, rr); // zero rr to ensure no dependency stalls |
2555 | |
2556 | if (lhs->isInRegMask(GpRegs)) { |
2557 | Register ra = lhs->getReg(); |
2558 | LEA(rt, 0x80000000, ra); |
2559 | |
2560 | } else { |
2561 | const int d = findMemFor(lhs); |
2562 | SUBi(rt, 0x80000000); |
2563 | LD(rt, d, FP); |
2564 | } |
2565 | |
2566 | } else { |
2567 | // Use space just below rESP and use PUSH to avoid writing |
2568 | // past the end of the stack, see bug 590553. |
2569 | Register ra = findRegFor(lhs, GpRegs); |
2570 | NanoAssert(rr == FST0)do { } while (0); |
2571 | ADDi(SP, 8); // fix up the stack |
2572 | FILDQ(0, SP); // convert int64 to double |
2573 | PUSHr(ra); // low 32 bits = unsigned value |
2574 | PUSHi(0); // high 32 bits = 0 |
2575 | } |
2576 | |
2577 | freeResourcesOf(ins); |
2578 | } |
2579 | |
2580 | void Assembler::asm_d2i(LIns* ins) |
2581 | { |
2582 | LIns *lhs = ins->oprnd1(); |
2583 | |
2584 | if (_config.i386_sse2) { |
2585 | Register rr = prepareResultReg(ins, GpRegs); |
2586 | Register ra = findRegFor(lhs, XmmRegs); |
2587 | SSE_CVTTSD2SI(rr, ra); |
2588 | } else { |
2589 | boolbool pop = !lhs->isInReg(); |
2590 | findSpecificRegFor(lhs, FST0); |
2591 | if (ins->isInReg()) |
2592 | evict(ins); |
2593 | int d = findMemFor(ins); |
2594 | FIST(pop, d, FP); |
2595 | } |
2596 | |
2597 | freeResourcesOf(ins); |
2598 | } |
2599 | |
2600 | void Assembler::asm_nongp_copy(Register rd, Register rs) |
2601 | { |
2602 | if ((rmask(rd) & XmmRegs) && (rmask(rs) & XmmRegs)) { |
2603 | // xmm -> xmm |
2604 | SSE_MOVSD(rd, rs); |
2605 | } else if ((rmask(rd) & GpRegs) && (rmask(rs) & XmmRegs)) { |
2606 | // xmm -> gp |
2607 | SSE_MOVD(rd, rs); |
2608 | } else { |
2609 | NanoAssertMsgf(false, "bad asm_nongp_copy(%s, %s)", gpn(rd), gpn(rs))do { } while (0); |
2610 | } |
2611 | } |
2612 | |
2613 | Branches Assembler::asm_branchd_helper(boolbool branchOnFalse, LIns* cond, NIns *targ) |
2614 | { |
2615 | NIns* patch1 = NULL__null; |
2616 | NIns* patch2 = NULL__null; |
2617 | LOpcode opcode = cond->opcode(); |
2618 | |
2619 | if (_config.i386_sse2) { |
2620 | // LIR_ltd and LIR_gtd are handled by the same case because |
2621 | // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise |
2622 | // for LIR_led/LIR_ged. |
2623 | if (branchOnFalse) { |
2624 | // op == LIR_xf |
2625 | switch (opcode) { |
2626 | case LIR_eqd: |
2627 | if (cond->oprnd1() == cond->oprnd2()) { |
2628 | JP(targ); |
2629 | } else { |
2630 | JP(targ); // unordered |
2631 | patch1 = _nIns; |
2632 | JNE(targ); |
2633 | patch2 = _nIns; |
2634 | } |
2635 | break; |
2636 | case LIR_ltd: |
2637 | case LIR_gtd: JNA(targ); break; |
2638 | case LIR_led: |
2639 | case LIR_ged: JNAE(targ); break; |
2640 | default: NanoAssert(0)do { } while (0); break; |
2641 | } |
2642 | } else { |
2643 | // op == LIR_xt |
2644 | switch (opcode) { |
2645 | case LIR_eqd: |
2646 | if (cond->oprnd1() == cond->oprnd2()) { |
2647 | JNP(targ); |
2648 | } else { |
2649 | // jp skip (2byte) |
2650 | // je target |
2651 | // skip: ... |
2652 | underrunProtect(16); // underrun of 7 needed but we write 2 instr --> 16 |
2653 | NIns *skip = _nIns; |
2654 | JE(targ); |
2655 | patch1 = _nIns; |
2656 | JP(skip); // unordered |
2657 | } |
2658 | break; |
2659 | case LIR_ltd: |
2660 | case LIR_gtd: JA(targ); break; |
2661 | case LIR_led: |
2662 | case LIR_ged: JAE(targ); break; |
2663 | default: NanoAssert(0)do { } while (0); break; |
2664 | } |
2665 | } |
2666 | } else { |
2667 | if (branchOnFalse) |
2668 | JP(targ); |
2669 | else |
2670 | JNP(targ); |
2671 | } |
2672 | |
2673 | if (!patch1) |
2674 | patch1 = _nIns; |
2675 | |
2676 | return Branches(patch1, patch2); |
2677 | } |
2678 | |
2679 | // WARNING: This function cannot generate any code that will affect the |
2680 | // condition codes prior to the generation of the |
2681 | // ucomisd/fcompp/fcmop/fcom. See asm_cmpi() for more details. |
2682 | void Assembler::asm_cmpd(LIns *cond) |
2683 | { |
2684 | LOpcode condop = cond->opcode(); |
2685 | NanoAssert(isCmpDOpcode(condop))do { } while (0); |
2686 | LIns* lhs = cond->oprnd1(); |
2687 | LIns* rhs = cond->oprnd2(); |
2688 | NanoAssert(lhs->isD() && rhs->isD())do { } while (0); |
2689 | |
2690 | if (_config.i386_sse2) { |
2691 | // First, we convert (a < b) into (b > a), and (a <= b) into (b >= a). |
2692 | if (condop == LIR_ltd) { |
2693 | condop = LIR_gtd; |
2694 | LIns* t = lhs; lhs = rhs; rhs = t; |
2695 | } else if (condop == LIR_led) { |
2696 | condop = LIR_ged; |
Value stored to 'condop' is never read | |
2697 | LIns* t = lhs; lhs = rhs; rhs = t; |
2698 | } |
2699 | |
2700 | // LIR_eqd, if lhs == rhs: |
2701 | // ucomisd ZPC outcome (SETNP/JNP succeeds if P==0) |
2702 | // ------- --- ------- |
2703 | // UNORDERED 111 SETNP/JNP fails |
2704 | // EQUAL 100 SETNP/JNP succeeds |
2705 | // |
2706 | // LIR_eqd, if lhs != rhs; |
2707 | // ucomisd ZPC outcome (SETP/JP succeeds if P==0, |
2708 | // SETE/JE succeeds if Z==0) |
2709 | // ------- --- ------- |
2710 | // UNORDERED 111 SETP/JP succeeds (and skips to fail target) |
2711 | // EQUAL 100 SETP/JP fails, SETE/JE succeeds |
2712 | // GREATER_THAN 000 SETP/JP fails, SETE/JE fails |
2713 | // LESS_THAN 001 SETP/JP fails, SETE/JE fails |
2714 | // |
2715 | // LIR_gtd: |
2716 | // ucomisd ZPC outcome (SETA/JA succeeds if CZ==00) |
2717 | // ------- --- ------- |
2718 | // UNORDERED 111 SETA/JA fails |
2719 | // EQUAL 100 SETA/JA fails |
2720 | // GREATER_THAN 000 SETA/JA succeeds |
2721 | // LESS_THAN 001 SETA/JA fails |
2722 | // |
2723 | // LIR_ged: |
2724 | // ucomisd ZPC outcome (SETAE/JAE succeeds if C==0) |
2725 | // ------- --- ------- |
2726 | // UNORDERED 111 SETAE/JAE fails |
2727 | // EQUAL 100 SETAE/JAE succeeds |
2728 | // GREATER_THAN 000 SETAE/JAE succeeds |
2729 | // LESS_THAN 001 SETAE/JAE fails |
2730 | |
2731 | Register ra, rb; |
2732 | findRegFor2(XmmRegs, lhs, ra, XmmRegs, rhs, rb); |
2733 | SSE_UCOMISD(ra, rb); |
2734 | |
2735 | } else { |
2736 | // First, we convert (a > b) into (b < a), and (a >= b) into (b <= a). |
2737 | // Note that this is the opposite of the sse2 conversion above. |
2738 | if (condop == LIR_gtd) { |
2739 | condop = LIR_ltd; |
2740 | LIns* t = lhs; lhs = rhs; rhs = t; |
2741 | } else if (condop == LIR_ged) { |
2742 | condop = LIR_led; |
2743 | LIns* t = lhs; lhs = rhs; rhs = t; |
2744 | } |
2745 | |
2746 | // FNSTSW_AX puts the flags into AH like so: B:C3:TOP3:TOP2:TOP1:C2:C1:C0. |
2747 | // Furthermore, fcom/fcomp/fcompp sets C3:C2:C0 the same values |
2748 | // that Z:P:C are set by ucomisd, and the relative positions in AH |
2749 | // line up. (Someone at Intel has a sense of humour.) Therefore |
2750 | // we can use the same lahf/test(mask) technique as used in the |
2751 | // sse2 case above. We could use fcomi/fcomip/fcomipp which set |
2752 | // ZPC directly and then use LAHF instead of FNSTSW_AX and make |
2753 | // this code generally more like the sse2 code, but we don't |
2754 | // because fcomi/fcomip/fcomipp/lahf aren't available on earlier |
2755 | // x86 machines. |
2756 | // |
2757 | // The masks are as follows: |
2758 | // - LIR_eqd: mask == 0x44 == 0100_0100b, which extracts 0Z00_0P00 from AH. |
2759 | // - LIR_ltd: mask == 0x05 == 0000_0101b, which extracts 0000_0P0C from AH. |
2760 | // - LIR_led: mask == 0x41 == 0100_0001b, which extracts 0Z00_000C from AH. |
2761 | // |
2762 | // LIR_eqd: |
2763 | // ucomisd C3:C2:C0 lahf/test(0x44) SZP outcome |
2764 | // ------- -------- --------- --- ------- |
2765 | // UNORDERED 111 0100_0100 001 SETNP fails |
2766 | // EQUAL 100 0100_0000 000 SETNP succeeds |
2767 | // GREATER_THAN 000 0000_0000 011 SETNP fails |
2768 | // LESS_THAN 001 0000_0000 011 SETNP fails |
2769 | // |
2770 | // LIR_ltd: |
2771 | // fcom C3:C2:C0 lahf/test(0x05) SZP outcome |
2772 | // ------- -------- --------- --- ------- |
2773 | // UNORDERED 111 0000_0101 001 SETNP fails |
2774 | // EQUAL 100 0000_0000 011 SETNP fails |
2775 | // GREATER_THAN 000 0000_0000 011 SETNP fails |
2776 | // LESS_THAN 001 0000_0001 000 SETNP succeeds |
2777 | // |
2778 | // LIR_led: |
2779 | // fcom C3:C2:C0 lahf/test(0x41) SZP outcome |
2780 | // ------- --- --------- --- ------- |
2781 | // UNORDERED 111 0100_0001 001 SETNP fails |
2782 | // EQUAL 100 0100_0000 000 SETNP succeeds |
2783 | // GREATER_THAN 000 0000_0000 011 SETNP fails |
2784 | // LESS_THAN 001 0000_0001 010 SETNP succeeds |
2785 | |
2786 | int mask = 0; // init to avoid MSVC compile warnings |
2787 | switch (condop) { |
2788 | case LIR_eqd: mask = 0x44; break; |
2789 | case LIR_ltd: mask = 0x05; break; |
2790 | case LIR_led: mask = 0x41; break; |
2791 | default: NanoAssert(0)do { } while (0); break; |
2792 | } |
2793 | |
2794 | evictIfActive(rEAX); |
2795 | boolbool pop = !lhs->isInReg(); |
2796 | findSpecificRegFor(lhs, FST0); |
2797 | |
2798 | if (lhs == rhs) { |
2799 | // NaN test. |
2800 | TEST_AH(mask); |
2801 | FNSTSW_AX(); // requires rEAX to be free |
2802 | if (pop) |
2803 | FCOMPP(); |
2804 | else |
2805 | FCOMP(); |
2806 | FLDr(FST0); // DUP |
2807 | } else { |
2808 | TEST_AH(mask); |
2809 | FNSTSW_AX(); // requires rEAX to be free |
2810 | if (rhs->isImmD()) { |
2811 | const uint64_t* p = findImmDFromPool(rhs->immDasQ()); |
2812 | FCOMdm(pop, (const double*)p); |
2813 | } else { |
2814 | int d = findMemFor(rhs); |
2815 | FCOM(pop, d, FP); |
2816 | } |
2817 | } |
2818 | } |
2819 | } |
2820 | |
2821 | // Increment the 32-bit profiling counter at pCtr, without |
2822 | // changing any registers. |
2823 | verbose_only( |
2824 | void Assembler::asm_inc_m32(uint32_t* pCtr) |
2825 | { |
2826 | INCLi(int32_t(pCtr)); |
2827 | } |
2828 | ) |
2829 | |
2830 | void Assembler::nativePageReset() |
2831 | {} |
2832 | |
2833 | void Assembler::nativePageSetup() |
2834 | { |
2835 | NanoAssert(!_inExit)do { } while (0); |
2836 | if (!_nIns) |
2837 | codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes)); |
2838 | |
2839 | // add some random padding, so functions aren't predictably placed. |
2840 | if (_config.harden_function_alignment) |
2841 | { |
2842 | int32_t pad = _noise->getValue(LARGEST_UNDERRUN_PROT); |
2843 | underrunProtect(pad); |
2844 | _nIns -= pad; |
2845 | VMPI_memset::memset(_nIns, INT3_OP, pad); |
2846 | PERFM_NVPROF("hardening:func-align", pad); |
2847 | } |
2848 | } |
2849 | |
2850 | // enough room for n bytes |
2851 | void Assembler::underrunProtect(int n) |
2852 | { |
2853 | NIns *eip = _nIns; |
2854 | NanoAssertMsg(n<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small")do { } while (0); |
2855 | // This may be in a normal code chunk or an exit code chunk. |
2856 | if (eip - n < codeStart) { |
2857 | codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes)); |
2858 | JMP(eip); |
2859 | } |
2860 | } |
2861 | |
2862 | void Assembler::asm_insert_random_nop() |
2863 | { |
2864 | // one of a random nop instructions |
2865 | uint32_t r = _noise->getValue(5); |
2866 | switch(r) |
2867 | { |
2868 | case 0: MR(rEAX,rEAX); break; |
2869 | case 1: MR(rEDI,rEDI); break; |
2870 | case 2: MR(rECX,rECX); break; |
2871 | case 3: LEA(rECX,0,rECX); break; |
2872 | case 4: LEA(rESP,0,rESP); break; |
2873 | } |
2874 | } |
2875 | |
2876 | void Assembler::asm_ret(LIns* ins) |
2877 | { |
2878 | // Unreachable, so assume correct stack depth. |
2879 | debug_only( _fpuStkDepth = 0; ) |
2880 | |
2881 | genEpilogue(); |
2882 | |
2883 | // Restore rESP from rEBP, undoing SUBi(SP,amt) in the prologue |
2884 | MR(SP,FP); |
2885 | |
2886 | releaseRegisters(); |
2887 | assignSavedRegs(); |
2888 | |
2889 | LIns *val = ins->oprnd1(); |
2890 | if (ins->isop(LIR_reti)) { |
2891 | findSpecificRegFor(val, retRegs[0]); |
2892 | } else { |
2893 | NanoAssert(ins->isop(LIR_retd))do { } while (0); |
2894 | findSpecificRegFor(val, FST0); |
2895 | fpu_pop(); |
2896 | } |
2897 | } |
2898 | |
2899 | void Assembler::swapCodeChunks() { |
2900 | if (!_nExitIns) |
2901 | codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes)); |
2902 | SWAP(NIns*, _nIns, _nExitIns)do { NIns* tmp = _nIns; _nIns = _nExitIns; _nExitIns = tmp; } while (0); |
2903 | SWAP(NIns*, codeStart, exitStart)do { NIns* tmp = codeStart; codeStart = exitStart; exitStart = tmp; } while (0); |
2904 | SWAP(NIns*, codeEnd, exitEnd)do { NIns* tmp = codeEnd; codeEnd = exitEnd; exitEnd = tmp; } while (0); |
2905 | verbose_only( SWAP(size_t, codeBytes, exitBytes); ) |
2906 | } |
2907 | |
2908 | #endif /* FEATURE_NANOJIT */ |
2909 | } |