NCBI C++ ToolKit
sljitNativeS390X.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * Stack-less Just-In-Time compiler
3  *
4  * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this list of
10  * conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13  * of conditions and the following disclaimer in the documentation and/or other materials
14  * provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/auxv.h>
28 
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35 
37 {
38  return "s390x" SLJIT_CPUINFO;
39 }
40 
41 /* Instructions are stored as 64 bit values regardless their size. */
43 
44 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
45 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
46 
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
48  0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
49 };
50 
51 /* there are also a[2-15] available, but they are slower to access and
52  * their use is limited as mundaym explained:
53  * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54  */
55 
56 /* General Purpose Registers [0-15]. */
58 
59 /*
60  * WARNING
61  * the following code is non standard and should be improved for
62  * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63  * registers because r0 and r1 are the ABI recommended volatiles.
64  * there is a gpr() function that maps sljit to physical register numbers
65  * that should be used instead of the usual index into reg_map[] and
66  * will be retired ASAP (TODO: carenas)
67  */
68 
69 static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70 static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71 static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
72 static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
73 static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
74 static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
75 static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
76 static const sljit_gpr r7 = 7; /* reg_map[6] */
77 static const sljit_gpr r8 = 8; /* reg_map[7] */
78 static const sljit_gpr r9 = 9; /* reg_map[8] */
79 static const sljit_gpr r10 = 10; /* reg_map[9] */
80 static const sljit_gpr r11 = 11; /* reg_map[10] */
81 static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
82 static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
83 static const sljit_gpr r14 = 14; /* reg_map[0]: return address */
84 static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85 
86 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
88 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89  * like we do know might be faster though, reserve?
90  */
91 
92 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93 #define tmp0 r0
94 #define tmp1 r1
95 
96 /* When reg cannot be unused. */
97 #define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
98 
99 /* Link register. */
100 static const sljit_gpr link_r = 14; /* r14 */
101 
102 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
103 
104 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
105  0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
106 };
107 
108 #define R0A(r) (r)
109 #define R4A(r) ((r) << 4)
110 #define R8A(r) ((r) << 8)
111 #define R12A(r) ((r) << 12)
112 #define R16A(r) ((r) << 16)
113 #define R20A(r) ((r) << 20)
114 #define R28A(r) ((r) << 28)
115 #define R32A(r) ((r) << 32)
116 #define R36A(r) ((r) << 36)
117 
118 #define R0(r) ((sljit_ins)reg_map[r])
119 
120 #define F0(r) ((sljit_ins)freg_map[r])
121 #define F4(r) (R4A((sljit_ins)freg_map[r]))
122 #define F12(r) (R12A((sljit_ins)freg_map[r]))
123 #define F20(r) (R20A((sljit_ins)freg_map[r]))
124 #define F28(r) (R28A((sljit_ins)freg_map[r]))
125 #define F32(r) (R32A((sljit_ins)freg_map[r]))
126 #define F36(r) (R36A((sljit_ins)freg_map[r]))
127 
129  struct sljit_const const_; /* must be first */
130  sljit_sw init_value; /* required to build literal pool */
131 };
132 
133 /* Convert SLJIT register to hardware register. */
135 {
136  SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
137  return reg_map[r];
138 }
139 
140 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
141 {
142  sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
143  FAIL_IF(!ibuf);
144  *ibuf = ins;
145 
146  SLJIT_ASSERT(ins <= 0xffffffffffffL);
147 
148  compiler->size++;
149  if (ins & 0xffff00000000L)
150  compiler->size++;
151 
152  if (ins & 0xffffffff0000L)
153  compiler->size++;
154 
155  return SLJIT_SUCCESS;
156 }
157 
158 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
159  (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
160  && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
161 
162 /* Map the given type to a 4-bit condition code mask. */
164  const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
165  const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
166  const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
167  const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
168 
169  switch (type) {
170  case SLJIT_EQUAL:
171  if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
172  sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
174  return cc0;
175  if (type == SLJIT_OVERFLOW)
176  return (cc0 | cc3);
177  return (cc0 | cc2);
178  }
179  /* fallthrough */
180 
181  case SLJIT_ATOMIC_STORED:
182  case SLJIT_F_EQUAL:
183  case SLJIT_ORDERED_EQUAL:
184  return cc0;
185 
186  case SLJIT_NOT_EQUAL:
187  if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
188  sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
190  return (cc1 | cc2 | cc3);
191  if (type == SLJIT_OVERFLOW)
192  return (cc1 | cc2);
193  return (cc1 | cc3);
194  }
195  /* fallthrough */
196 
198  return (cc1 | cc2 | cc3);
199 
200  case SLJIT_LESS:
202  return cc1;
203 
204  case SLJIT_GREATER_EQUAL:
206  return (cc0 | cc2 | cc3);
207 
208  case SLJIT_GREATER:
209  if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
210  return cc2;
211  return cc3;
212 
213  case SLJIT_LESS_EQUAL:
214  if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
215  return (cc0 | cc1);
216  return (cc0 | cc1 | cc2);
217 
218  case SLJIT_SIG_LESS:
219  case SLJIT_F_LESS:
220  case SLJIT_ORDERED_LESS:
221  return cc1;
222 
223  case SLJIT_NOT_CARRY:
224  if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
225  return (cc2 | cc3);
226  /* fallthrough */
227 
229  case SLJIT_F_LESS_EQUAL:
231  return (cc0 | cc1);
232 
233  case SLJIT_CARRY:
234  if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
235  return (cc0 | cc1);
236  /* fallthrough */
237 
238  case SLJIT_SIG_GREATER:
240  /* Overflow is considered greater, see SLJIT_SUB. */
241  return cc2 | cc3;
242 
244  return (cc0 | cc2 | cc3);
245 
246  case SLJIT_OVERFLOW:
247  if (compiler->status_flags_state & SLJIT_SET_Z)
248  return (cc2 | cc3);
249  /* fallthrough */
250 
251  case SLJIT_UNORDERED:
252  return cc3;
253 
254  case SLJIT_NOT_OVERFLOW:
255  if (compiler->status_flags_state & SLJIT_SET_Z)
256  return (cc0 | cc1);
257  /* fallthrough */
258 
259  case SLJIT_ORDERED:
260  return (cc0 | cc1 | cc2);
261 
262  case SLJIT_F_NOT_EQUAL:
264  return (cc1 | cc2);
265 
266  case SLJIT_F_GREATER:
268  return cc2;
269 
272  return (cc0 | cc2);
273 
275  return (cc0 | cc1 | cc3);
276 
278  return (cc0 | cc3);
279 
281  return (cc1 | cc3);
282  }
283 
285  return (sljit_u8)-1;
286 }
287 
288 /* Facility to bit index mappings.
289  Note: some facilities share the same bit index. */
291 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
292 #define FAST_LONG_DISPLACEMENT_FACILITY 19
293 #define EXTENDED_IMMEDIATE_FACILITY 21
294 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
295 #define DISTINCT_OPERAND_FACILITY 45
296 #define HIGH_WORD_FACILITY 45
297 #define POPULATION_COUNT_FACILITY 45
298 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
299 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
300 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
301 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
302 #define VECTOR_FACILITY 129
303 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
304 
305 /* Report whether a facility is known to be present due to the compiler
306  settings. This function should always be compiled to a constant
307  value given a constant argument. */
309 {
310 #if ENABLE_STATIC_FACILITY_DETECTION
311  switch (x) {
313  return (__ARCH__ >= 6 /* z990 */);
316  return (__ARCH__ >= 7 /* z9-109 */);
318  return (__ARCH__ >= 8 /* z10 */);
320  return (__ARCH__ >= 9 /* z196 */);
322  return (__ARCH__ >= 10 /* zEC12 */);
324  case VECTOR_FACILITY:
325  return (__ARCH__ >= 11 /* z13 */);
328  return (__ARCH__ >= 12 /* z14 */);
329  default:
331  }
332 #endif
333  return 0;
334 }
335 
336 static SLJIT_INLINE unsigned long get_hwcap()
337 {
338  static unsigned long hwcap = 0;
339  if (SLJIT_UNLIKELY(!hwcap)) {
340  hwcap = getauxval(AT_HWCAP);
341  SLJIT_ASSERT(hwcap != 0);
342  }
343  return hwcap;
344 }
345 
347 {
349  return 1;
350 
351  return (get_hwcap() & HWCAP_S390_STFLE);
352 }
353 
354 /* Report whether the given facility is available. This function always
355  performs a runtime check. */
357 {
358 #if ENABLE_DYNAMIC_FACILITY_DETECTION
359  static struct {
360  sljit_uw bits[4];
361  } cpu_features;
362  size_t size = sizeof(cpu_features);
363  const sljit_uw word_index = x >> 6;
364  const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
365 
366  SLJIT_ASSERT(x < size * 8);
367  if (SLJIT_UNLIKELY(!have_stfle()))
368  return 0;
369 
370  if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
371  __asm__ __volatile__ (
372  "lgr %%r0, %0;"
373  "stfle 0(%1);"
374  /* outputs */:
375  /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
376  /* clobbers */: "r0", "cc", "memory"
377  );
378  SLJIT_ASSERT(cpu_features.bits[0] != 0);
379  }
380  return (cpu_features.bits[word_index] & bit_index) != 0;
381 #else
382  return 0;
383 #endif
384 }
385 
386 #define HAVE_FACILITY(name, bit) \
387 static SLJIT_INLINE int name() \
388 { \
389  static int have = -1; \
390  /* Static check first. May allow the function to be optimized away. */ \
391  if (have_facility_static(bit)) \
392  have = 1; \
393  else if (SLJIT_UNLIKELY(have < 0)) \
394  have = have_facility_dynamic(bit) ? 1 : 0; \
395 \
396  return have; \
397 }
398 
405 #undef HAVE_FACILITY
406 
407 #define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
408 #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
409 
410 #define CHECK_SIGNED(v, bitlen) \
411  ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
412 
413 #define is_s8(d) CHECK_SIGNED((d), 8)
414 #define is_s16(d) CHECK_SIGNED((d), 16)
415 #define is_s20(d) CHECK_SIGNED((d), 20)
416 #define is_s32(d) ((d) == (sljit_s32)(d))
417 
419 {
420  sljit_uw dh, dl;
421 
422  SLJIT_ASSERT(is_s20(d));
423 
424  dh = (d >> 12) & 0xff;
425  dl = ((sljit_uw)d << 8) & 0xfff00;
426  return (dh | dl) << 8;
427 }
428 
429 /* TODO(carenas): variadic macro is not strictly needed */
430 #define SLJIT_S390X_INSTRUCTION(op, ...) \
431 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
432 
433 /* RR form instructions. */
434 #define SLJIT_S390X_RR(name, pattern) \
435 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
436 { \
437  return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
438 }
439 
440 /* AND */
442 
443 /* BRANCH AND SAVE */
445 
446 /* BRANCH ON CONDITION */
447 SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
448 
449 /* DIVIDE */
451 
452 /* EXCLUSIVE OR */
454 
455 /* LOAD */
457 
458 /* LOAD COMPLEMENT */
460 
461 /* OR */
463 
464 #undef SLJIT_S390X_RR
465 
466 /* RRE form instructions */
467 #define SLJIT_S390X_RRE(name, pattern) \
468 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
469 { \
470  return (pattern) | R4A(dst) | R0A(src); \
471 }
472 
473 /* AND */
474 SLJIT_S390X_RRE(ngr, 0xb9800000)
475 
476 /* DIVIDE LOGICAL */
477 SLJIT_S390X_RRE(dlr, 0xb9970000)
478 SLJIT_S390X_RRE(dlgr, 0xb9870000)
479 
480 /* DIVIDE SINGLE */
481 SLJIT_S390X_RRE(dsgr, 0xb90d0000)
482 
483 /* EXCLUSIVE OR */
484 SLJIT_S390X_RRE(xgr, 0xb9820000)
485 
486 /* LOAD */
487 SLJIT_S390X_RRE(lgr, 0xb9040000)
488 SLJIT_S390X_RRE(lgfr, 0xb9140000)
489 
490 /* LOAD BYTE */
491 SLJIT_S390X_RRE(lbr, 0xb9260000)
492 SLJIT_S390X_RRE(lgbr, 0xb9060000)
493 
494 /* LOAD COMPLEMENT */
495 SLJIT_S390X_RRE(lcgr, 0xb9030000)
496 
497 /* LOAD HALFWORD */
498 SLJIT_S390X_RRE(lhr, 0xb9270000)
499 SLJIT_S390X_RRE(lghr, 0xb9070000)
500 
501 /* LOAD LOGICAL */
502 SLJIT_S390X_RRE(llgfr, 0xb9160000)
503 
504 /* LOAD LOGICAL CHARACTER */
505 SLJIT_S390X_RRE(llcr, 0xb9940000)
506 SLJIT_S390X_RRE(llgcr, 0xb9840000)
507 
508 /* LOAD LOGICAL HALFWORD */
509 SLJIT_S390X_RRE(llhr, 0xb9950000)
510 SLJIT_S390X_RRE(llghr, 0xb9850000)
511 
512 /* MULTIPLY LOGICAL */
513 SLJIT_S390X_RRE(mlgr, 0xb9860000)
514 
515 /* MULTIPLY SINGLE */
516 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
517 
518 /* OR */
519 SLJIT_S390X_RRE(ogr, 0xb9810000)
520 
521 /* SUBTRACT */
522 SLJIT_S390X_RRE(sgr, 0xb9090000)
523 
524 #undef SLJIT_S390X_RRE
525 
526 /* RI-a form instructions */
527 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
528 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
529 { \
530  return (pattern) | R20A(reg) | (imm & 0xffff); \
531 }
532 
533 /* ADD HALFWORD IMMEDIATE */
535 
536 /* LOAD HALFWORD IMMEDIATE */
539 
540 /* LOAD LOGICAL IMMEDIATE */
545 
546 /* MULTIPLY HALFWORD IMMEDIATE */
549 
550 /* OR IMMEDIATE */
552 
553 #undef SLJIT_S390X_RIA
554 
555 /* RIL-a form instructions (requires extended immediate facility) */
556 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
557 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
558 { \
559  SLJIT_ASSERT(have_eimm()); \
560  return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
561 }
562 
563 /* ADD IMMEDIATE */
564 SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
565 
566 /* ADD IMMEDIATE HIGH */
567 SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
568 
569 /* AND IMMEDIATE */
570 SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
571 
572 /* EXCLUSIVE OR IMMEDIATE */
573 SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
574 
575 /* INSERT IMMEDIATE */
576 SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
577 SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
578 
579 /* LOAD IMMEDIATE */
580 SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
581 
582 /* LOAD LOGICAL IMMEDIATE */
585 
586 /* SUBTRACT LOGICAL IMMEDIATE */
587 SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
588 
589 #undef SLJIT_S390X_RILA
590 
591 /* RX-a form instructions */
592 #define SLJIT_S390X_RXA(name, pattern) \
593 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
594 { \
595  SLJIT_ASSERT((d & 0xfff) == d); \
596 \
597  return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
598 }
599 
600 /* LOAD */
601 SLJIT_S390X_RXA(l, 0x58000000)
602 
603 /* LOAD ADDRESS */
604 SLJIT_S390X_RXA(la, 0x41000000)
605 
606 /* LOAD HALFWORD */
607 SLJIT_S390X_RXA(lh, 0x48000000)
608 
609 /* MULTIPLY SINGLE */
610 SLJIT_S390X_RXA(ms, 0x71000000)
611 
612 /* STORE */
613 SLJIT_S390X_RXA(st, 0x50000000)
614 
615 /* STORE CHARACTER */
616 SLJIT_S390X_RXA(stc, 0x42000000)
617 
618 /* STORE HALFWORD */
619 SLJIT_S390X_RXA(sth, 0x40000000)
620 
621 #undef SLJIT_S390X_RXA
622 
623 /* RXY-a instructions */
624 #define SLJIT_S390X_RXYA(name, pattern, cond) \
625 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
626 { \
627  SLJIT_ASSERT(cond); \
628 \
629  return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
630 }
631 
632 /* LOAD */
633 SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
634 SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
635 SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
636 
637 /* LOAD BYTE */
638 SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
639 SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
640 
641 /* LOAD HALFWORD */
642 SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
643 SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
644 
645 /* LOAD LOGICAL */
646 SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
647 
648 /* LOAD LOGICAL CHARACTER */
649 SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
650 SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
651 
652 /* LOAD LOGICAL HALFWORD */
653 SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
654 SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
655 
656 /* MULTIPLY SINGLE */
657 SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
658 SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
659 
660 /* STORE */
661 SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
662 SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
663 
664 /* STORE CHARACTER */
665 SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
666 
667 /* STORE HALFWORD */
668 SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
669 
670 #undef SLJIT_S390X_RXYA
671 
672 /* RSY-a instructions */
673 #define SLJIT_S390X_RSYA(name, pattern, cond) \
674 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
675 { \
676  SLJIT_ASSERT(cond); \
677 \
678  return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
679 }
680 
681 /* LOAD MULTIPLE */
682 SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
683 
684 /* SHIFT LEFT LOGICAL */
685 SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
686 
687 /* SHIFT RIGHT SINGLE */
688 SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
689 
690 /* STORE MULTIPLE */
691 SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
692 
693 #undef SLJIT_S390X_RSYA
694 
695 /* RIE-f instructions (require general-instructions-extension facility) */
696 #define SLJIT_S390X_RIEF(name, pattern) \
697 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
698 { \
699  sljit_ins i3, i4, i5; \
700 \
701  SLJIT_ASSERT(have_genext()); \
702  i3 = (sljit_ins)start << 24; \
703  i4 = (sljit_ins)end << 16; \
704  i5 = (sljit_ins)rot << 8; \
705 \
706  return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
707 }
708 
709 /* ROTATE THEN AND SELECTED BITS */
710 /* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
711 
712 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
713 /* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
714 
715 /* ROTATE THEN OR SELECTED BITS */
716 SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
717 
718 /* ROTATE THEN INSERT SELECTED BITS */
719 /* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
720 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
721 
722 /* ROTATE THEN INSERT SELECTED BITS HIGH */
723 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
724 
725 /* ROTATE THEN INSERT SELECTED BITS LOW */
726 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
727 
728 #undef SLJIT_S390X_RIEF
729 
730 /* RRF-c instructions (require load/store-on-condition 1 facility) */
731 #define SLJIT_S390X_RRFC(name, pattern) \
732 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
733 { \
734  sljit_ins m3; \
735 \
736  SLJIT_ASSERT(have_lscond1()); \
737  m3 = (sljit_ins)(mask & 0xf) << 12; \
738 \
739  return (pattern) | m3 | R4A(dst) | R0A(src); \
740 }
741 
742 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
743 SLJIT_S390X_RRFC(locr, 0xb9f20000)
745 
746 #undef SLJIT_S390X_RRFC
747 
748 /* RIE-g instructions (require load/store-on-condition 2 facility) */
749 #define SLJIT_S390X_RIEG(name, pattern) \
750 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
751 { \
752  sljit_ins m3, i2; \
753 \
754  SLJIT_ASSERT(have_lscond2()); \
755  m3 = (sljit_ins)(mask & 0xf) << 32; \
756  i2 = (sljit_ins)(imm & 0xffffL) << 16; \
757 \
758  return (pattern) | R36A(reg) | m3 | i2; \
759 }
760 
761 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
762 SLJIT_S390X_RIEG(lochi, 0xec0000000042)
763 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
764 
765 #undef SLJIT_S390X_RIEG
766 
767 #define SLJIT_S390X_RILB(name, pattern, cond) \
768 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
769 { \
770  SLJIT_ASSERT(cond); \
771 \
772  return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
773 }
774 
775 /* BRANCH RELATIVE AND SAVE LONG */
776 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
777 
778 /* LOAD ADDRESS RELATIVE LONG */
779 SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
780 
781 /* LOAD RELATIVE LONG */
782 SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
783 
784 #undef SLJIT_S390X_RILB
785 
787 {
788  return 0x07f0 | target;
789 }
790 
792 {
793  sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
794  sljit_ins ri2 = (sljit_ins)target & 0xffff;
795  return 0xa7040000L | m1 | ri2;
796 }
797 
799 {
800  sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
801  sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
802  return 0xc00400000000L | m1 | ri2;
803 }
804 
806 {
808  return 0xb9830000 | R8A(dst) | R0A(src);
809 }
810 
811 /* INSERT PROGRAM MASK */
813 {
814  return 0xb2220000 | R4A(dst);
815 }
816 
817 /* SET PROGRAM MASK */
819 {
820  return 0x0400 | R4A(dst);
821 }
822 
823 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
825 {
826  return risbhg(dst, src, start, 0x8 | end, rot);
827 }
828 
829 #undef SLJIT_S390X_INSTRUCTION
830 
832 {
833  /* Condition codes: bits 18 and 19.
834  Transformation:
835  0 (zero and no overflow) : unchanged
836  1 (non-zero and no overflow) : unchanged
837  2 (zero and overflow) : decreased by 1
838  3 (non-zero and overflow) : decreased by 1 if non-zero */
839  FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
840  FAIL_IF(push_inst(compiler, ipm(tmp1)));
841  FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
842  FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
843  FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
844  FAIL_IF(push_inst(compiler, spm(tmp1)));
845  return SLJIT_SUCCESS;
846 }
847 
848 /* load 64-bit immediate into register without clobbering flags */
849 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
850 {
851  /* 4 byte instructions */
852  if (is_s16(v))
853  return push_inst(compiler, lghi(target, (sljit_s16)v));
854 
855  if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
856  return push_inst(compiler, llill(target, (sljit_u16)v));
857 
858  if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
859  return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
860 
861  if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
862  return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
863 
864  if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
865  return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
866 
867  if (is_s32(v))
868  return push_inst(compiler, lgfi(target, (sljit_s32)v));
869 
870  if (((sljit_uw)v >> 32) == 0)
871  return push_inst(compiler, llilf(target, (sljit_u32)v));
872 
873  if (((sljit_uw)v << 32) == 0)
874  return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
875 
876  FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
877  return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
878 }
879 
880 struct addr {
884 };
885 
886 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
887 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
888  struct addr *addr, sljit_s32 mem, sljit_sw off,
889  sljit_gpr tmp /* clobbered, must not be r0 */)
890 {
891  sljit_gpr base = r0;
892  sljit_gpr index = r0;
893 
894  SLJIT_ASSERT(tmp != r0);
895  if (mem & REG_MASK)
896  base = gpr(mem & REG_MASK);
897 
898  if (mem & OFFS_REG_MASK) {
899  index = gpr(OFFS_REG(mem));
900  if (off != 0) {
901  /* shift and put the result into tmp */
902  SLJIT_ASSERT(0 <= off && off < 64);
903  FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
904  index = tmp;
905  off = 0; /* clear offset */
906  }
907  }
908  else if (!is_s20(off)) {
909  FAIL_IF(push_load_imm_inst(compiler, tmp, off));
910  index = tmp;
911  off = 0; /* clear offset */
912  }
913  addr->base = base;
914  addr->index = index;
915  addr->offset = (sljit_s32)off;
916  return SLJIT_SUCCESS;
917 }
918 
919 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
920 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
921  struct addr *addr, sljit_s32 mem, sljit_sw off,
922  sljit_gpr tmp /* clobbered, must not be r0 */)
923 {
924  sljit_gpr base = r0;
925  sljit_gpr index = r0;
926 
927  SLJIT_ASSERT(tmp != r0);
928  if (mem & REG_MASK)
929  base = gpr(mem & REG_MASK);
930 
931  if (mem & OFFS_REG_MASK) {
932  index = gpr(OFFS_REG(mem));
933  if (off != 0) {
934  /* shift and put the result into tmp */
935  SLJIT_ASSERT(0 <= off && off < 64);
936  FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
937  index = tmp;
938  off = 0; /* clear offset */
939  }
940  }
941  else if (!is_u12(off)) {
942  FAIL_IF(push_load_imm_inst(compiler, tmp, off));
943  index = tmp;
944  off = 0; /* clear offset */
945  }
946  addr->base = base;
947  addr->index = index;
948  addr->offset = (sljit_s32)off;
949  return SLJIT_SUCCESS;
950 }
951 
952 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
953 #define WHEN(cond, r, i1, i2, addr) \
954  (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
955 
956 /* May clobber tmp1. */
957 static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
958  sljit_s32 mem, sljit_sw memw,
959  sljit_s32 is_32bit, const sljit_ins* forms)
960 {
961  struct addr addr;
962 
963  SLJIT_ASSERT(mem & SLJIT_MEM);
964 
965  if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
966  FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
967  return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
968  }
969 
970  FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
971  return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
972 }
973 
974 static const sljit_ins load_forms[3] = {
975  0x58000000 /* l */,
976  0xe30000000058 /* ly */,
977  0xe30000000004 /* lg */
978 };
979 
980 static const sljit_ins store_forms[3] = {
981  0x50000000 /* st */,
982  0xe30000000050 /* sty */,
983  0xe30000000024 /* stg */
984 };
985 
986 static const sljit_ins load_halfword_forms[3] = {
987  0x48000000 /* lh */,
988  0xe30000000078 /* lhy */,
989  0xe30000000015 /* lgh */
990 };
991 
992 /* May clobber tmp1. */
993 static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
994  sljit_s32 src, sljit_sw srcw,
995  sljit_s32 is_32bit)
996 {
997  return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
998 }
999 
1000 /* May clobber tmp1. */
1001 static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1002  sljit_s32 src, sljit_sw srcw,
1003  sljit_s32 is_32bit)
1004 {
1005  struct addr addr;
1006  sljit_ins ins;
1007 
1008  SLJIT_ASSERT(src & SLJIT_MEM);
1009 
1010  FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1011 
1012  ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1013  return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1014 }
1015 
1016 /* May clobber tmp1. */
1018  sljit_s32 dst, sljit_sw dstw,
1019  sljit_s32 is_32bit)
1020 {
1021  return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
1022 }
1023 
1024 #undef WHEN
1025 
1026 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1027  sljit_gpr dst_r,
1028  sljit_s32 src, sljit_sw srcw)
1029 {
1030  sljit_gpr src_r;
1031 
1032  SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1033 
1034  if (src == SLJIT_IMM)
1035  return push_load_imm_inst(compiler, dst_r, srcw);
1036 
1037  if (src & SLJIT_MEM)
1038  return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1039 
1040  src_r = gpr(src & REG_MASK);
1041  return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1042 }
1043 
1044 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1045  sljit_s32 dst,
1046  sljit_s32 src1, sljit_sw src1w,
1047  sljit_s32 src2, sljit_sw src2w)
1048 {
1049  sljit_gpr dst_r = tmp0;
1050  sljit_gpr src_r = tmp1;
1051  sljit_s32 needs_move = 1;
1052 
1053  if (FAST_IS_REG(dst)) {
1054  dst_r = gpr(dst);
1055 
1056  if (dst == src1)
1057  needs_move = 0;
1058  else if (dst == src2) {
1059  dst_r = tmp0;
1060  needs_move = 2;
1061  }
1062  }
1063 
1064  if (needs_move)
1065  FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1066 
1067  if (FAST_IS_REG(src2))
1068  src_r = gpr(src2);
1069  else
1070  FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1071 
1072  FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1073 
1074  if (needs_move != 2)
1075  return SLJIT_SUCCESS;
1076 
1077  dst_r = gpr(dst & REG_MASK);
1078  return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1079 }
1080 
1081 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1082  sljit_s32 dst,
1083  sljit_s32 src1, sljit_sw src1w)
1084 {
1085  sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1086  sljit_gpr src_r = tmp1;
1087 
1088  if (FAST_IS_REG(src1))
1089  src_r = gpr(src1);
1090  else
1091  FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1092 
1093  return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1094 }
1095 
1096 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1097  sljit_s32 dst,
1098  sljit_s32 src1, sljit_sw src1w,
1099  sljit_s32 src2, sljit_sw src2w)
1100 {
1101  sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1102  sljit_gpr src1_r = tmp0;
1103  sljit_gpr src2_r = tmp1;
1104 
1105  if (FAST_IS_REG(src1))
1106  src1_r = gpr(src1);
1107  else
1108  FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1109 
1110  if (FAST_IS_REG(src2))
1111  src2_r = gpr(src2);
1112  else
1113  FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1114 
1115  return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1116 }
1117 
1118 typedef enum {
1121 } emit_ril_type;
1122 
1123 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1124  sljit_s32 dst,
1125  sljit_s32 src1, sljit_sw src1w,
1126  sljit_sw src2w,
1128 {
1129  sljit_gpr dst_r = tmp0;
1130  sljit_s32 needs_move = 1;
1131 
1132  if (FAST_IS_REG(dst)) {
1133  dst_r = gpr(dst);
1134 
1135  if (dst == src1)
1136  needs_move = 0;
1137  }
1138 
1139  if (needs_move)
1140  FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1141 
1142  if (type == RIL_A)
1143  return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1144  return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1145 }
1146 
1147 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1148  sljit_s32 dst,
1149  sljit_s32 src1, sljit_sw src1w,
1150  sljit_sw src2w)
1151 {
1152  sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1153  sljit_gpr src_r = tmp0;
1154 
1155  if (!FAST_IS_REG(src1))
1156  FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1157  else
1158  src_r = gpr(src1 & REG_MASK);
1159 
1160  return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1161 }
1162 
1163 typedef enum {
1166 } emit_rx_type;
1167 
1168 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1169  sljit_s32 dst,
1170  sljit_s32 src1, sljit_sw src1w,
1171  sljit_s32 src2, sljit_sw src2w,
1173 {
1174  sljit_gpr dst_r = tmp0;
1175  sljit_s32 needs_move = 1;
1176  sljit_gpr base, index;
1177 
1178  SLJIT_ASSERT(src2 & SLJIT_MEM);
1179 
1180  if (FAST_IS_REG(dst)) {
1181  dst_r = gpr(dst);
1182 
1183  if (dst == src1)
1184  needs_move = 0;
1185  else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1186  dst_r = tmp0;
1187  needs_move = 2;
1188  }
1189  }
1190 
1191  if (needs_move)
1192  FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1193 
1194  base = gpr(src2 & REG_MASK);
1195  index = tmp0;
1196 
1197  if (src2 & OFFS_REG_MASK) {
1198  index = gpr(OFFS_REG(src2));
1199 
1200  if (src2w != 0) {
1201  FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1202  src2w = 0;
1203  index = tmp1;
1204  }
1205  } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1206  FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1207 
1208  if (src2 & REG_MASK)
1209  index = tmp1;
1210  else
1211  base = tmp1;
1212  src2w = 0;
1213  }
1214 
1215  if (type == RX_A)
1216  ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1217  else
1218  ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1219 
1220  FAIL_IF(push_inst(compiler, ins));
1221 
1222  if (needs_move != 2)
1223  return SLJIT_SUCCESS;
1224 
1225  dst_r = gpr(dst);
1226  return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1227 }
1228 
1229 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1230  sljit_s32 dst, sljit_sw dstw,
1231  sljit_sw srcw)
1232 {
1233  sljit_gpr dst_r = tmp1;
1234 
1235  SLJIT_ASSERT(dst & SLJIT_MEM);
1236 
1237  if (dst & OFFS_REG_MASK) {
1238  sljit_gpr index = tmp1;
1239 
1240  if ((dstw & 0x3) == 0)
1241  index = gpr(OFFS_REG(dst));
1242  else
1243  FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1244 
1245  FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1246  dstw = 0;
1247  }
1248  else if (!is_s20(dstw)) {
1249  FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1250 
1251  if (dst & REG_MASK)
1252  FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1253 
1254  dstw = 0;
1255  }
1256  else
1257  dst_r = gpr(dst & REG_MASK);
1258 
1259  return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1260 }
1261 
1262 struct ins_forms {
1270 };
1271 
1272 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1273  sljit_s32 dst,
1274  sljit_s32 src1, sljit_sw src1w,
1275  sljit_s32 src2, sljit_sw src2w)
1276 {
1277  sljit_s32 mode = compiler->mode;
1278  sljit_ins ins, ins_k;
1279 
1280  if ((src1 | src2) & SLJIT_MEM) {
1281  sljit_ins ins12, ins20;
1282 
1283  if (mode & SLJIT_32) {
1284  ins12 = forms->op;
1285  ins20 = forms->op_y;
1286  }
1287  else {
1288  ins12 = 0;
1289  ins20 = forms->op_g;
1290  }
1291 
1292  if (ins12 && ins20) {
1293  /* Extra instructions needed for address computation can be executed independently. */
1294  if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1295  || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1296  if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1297  return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1298 
1299  return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1300  }
1301 
1302  if (src1 & SLJIT_MEM) {
1303  if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1304  return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1305 
1306  return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1307  }
1308  }
1309  else if (ins12 || ins20) {
1310  emit_rx_type rx_type;
1311 
1312  if (ins12) {
1313  rx_type = RX_A;
1314  ins = ins12;
1315  }
1316  else {
1317  rx_type = RXY_A;
1318  ins = ins20;
1319  }
1320 
1321  if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1322  || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1323  return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1324 
1325  if (src1 & SLJIT_MEM)
1326  return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1327  }
1328  }
1329 
1330  if (mode & SLJIT_32) {
1331  ins = forms->op_r;
1332  ins_k = forms->op_rk;
1333  }
1334  else {
1335  ins = forms->op_gr;
1336  ins_k = forms->op_grk;
1337  }
1338 
1339  SLJIT_ASSERT(ins != 0 || ins_k != 0);
1340 
1341  if (ins && FAST_IS_REG(dst)) {
1342  if (dst == src1)
1343  return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1344 
1345  if (dst == src2)
1346  return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1347  }
1348 
1349  if (ins_k == 0)
1350  return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1351 
1352  return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1353 }
1354 
1355 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1356  sljit_s32 dst,
1357  sljit_s32 src1, sljit_sw src1w,
1358  sljit_s32 src2, sljit_sw src2w)
1359 {
1360  sljit_s32 mode = compiler->mode;
1361  sljit_ins ins;
1362 
1363  if (src2 & SLJIT_MEM) {
1364  sljit_ins ins12, ins20;
1365 
1366  if (mode & SLJIT_32) {
1367  ins12 = forms->op;
1368  ins20 = forms->op_y;
1369  }
1370  else {
1371  ins12 = 0;
1372  ins20 = forms->op_g;
1373  }
1374 
1375  if (ins12 && ins20) {
1376  if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1377  return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1378 
1379  return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1380  }
1381  else if (ins12)
1382  return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1383  else if (ins20)
1384  return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1385  }
1386 
1387  ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1388 
1389  if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1390  return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1391 
1392  return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1393 }
1394 
1395 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
1396 {
1397  struct sljit_label *label;
1398  struct sljit_jump *jump;
1399  struct sljit_const *const_;
1400  sljit_sw executable_offset;
1401  sljit_uw ins_size = compiler->size << 1;
1402  sljit_uw pool_size = 0; /* literal pool */
1403  sljit_uw pad_size;
1404  sljit_uw half_count;
1405  SLJIT_NEXT_DEFINE_TYPES;
1406  struct sljit_memory_fragment *buf;
1407  sljit_ins *buf_ptr;
1408  sljit_ins *buf_end;
1409  sljit_u16 *code;
1410  sljit_u16 *code_ptr;
1411  sljit_uw *pool, *pool_ptr;
1412  sljit_ins ins;
1414 
1415  CHECK_ERROR_PTR();
1416  CHECK_PTR(check_sljit_generate_code(compiler));
1417  reverse_buf(compiler);
1418 
1419  jump = compiler->jumps;
1420  while (jump != NULL) {
1421  if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1422  /* encoded: */
1423  /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1424  /* replace with: */
1425  /* lgrl %r1, <pool_addr> */
1426  /* bras %r14, %r1 (or bcr <mask>, %r1) */
1427  pool_size += sizeof(*pool);
1428  if (!(jump->flags & JUMP_MOV_ADDR))
1429  ins_size += 2;
1430  }
1431  jump = jump->next;
1432  }
1433 
1434  const_ = compiler->consts;
1435  while (const_) {
1436  pool_size += sizeof(*pool);
1437  const_ = const_->next;
1438  }
1439 
1440  /* pad code size to 8 bytes so is accessible with half word offsets */
1441  /* the literal pool needs to be doubleword aligned */
1442  pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1443  SLJIT_ASSERT(pad_size < 8UL);
1444 
1445  /* allocate target buffer */
1446  code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);
1448  code_ptr = code;
1449 
1450  /* TODO(carenas): pool is optional, and the ABI recommends it to
1451  * be created before the function code, instead of
1452  * globally; if generated code is too big could
1453  * need offsets bigger than 32bit words and asser()
1454  */
1455  pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1456  pool_ptr = pool;
1457  buf = compiler->buf;
1458  half_count = 0;
1459 
1460  label = compiler->labels;
1461  jump = compiler->jumps;
1462  const_ = compiler->consts;
1463  SLJIT_NEXT_INIT_TYPES();
1464  SLJIT_GET_NEXT_MIN();
1465 
1466  do {
1467  buf_ptr = (sljit_ins*)buf->memory;
1468  buf_end = buf_ptr + (buf->used_size >> 3);
1469  do {
1470  ins = *buf_ptr++;
1471 
1472  if (next_min_addr == half_count) {
1473  SLJIT_ASSERT(!label || label->size >= half_count);
1474  SLJIT_ASSERT(!jump || jump->addr >= half_count);
1475  SLJIT_ASSERT(!const_ || const_->addr >= half_count);
1476 
1477  if (next_min_addr == next_label_size) {
1478  label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1479  label = label->next;
1480  next_label_size = SLJIT_GET_NEXT_SIZE(label);
1481  }
1482 
1483  if (next_min_addr == next_jump_addr) {
1484  if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {
1485  source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1486 
1487  jump->addr = (sljit_uw)pool_ptr;
1488 
1489  /* store target into pool */
1490  offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1491  pool_ptr++;
1492 
1493  SLJIT_ASSERT(!(offset & 1));
1494  offset >>= 1;
1496  ins |= (sljit_ins)offset & 0xffffffff;
1497  } else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {
1498  sljit_ins arg;
1499 
1500  jump->addr = (sljit_uw)pool_ptr;
1501 
1502  /* load address into tmp1 */
1503  source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1504  offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1505 
1506  SLJIT_ASSERT(!(offset & 1));
1507  offset >>= 1;
1509 
1510  code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */);
1511  code_ptr[1] = (sljit_u16)(offset >> 16);
1512  code_ptr[2] = (sljit_u16)offset;
1513  code_ptr += 3;
1514  pool_ptr++;
1515 
1516  /* branch to tmp1 */
1517  arg = (ins >> 36) & 0xf;
1518  if (((ins >> 32) & 0xf) == 4) {
1519  /* brcl -> bcr */
1520  ins = bcr(arg, tmp1);
1521  } else {
1522  SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);
1523  /* brasl -> basr */
1524  ins = basr(arg, tmp1);
1525  }
1526 
1527  /* Adjust half_count. */
1528  half_count += 2;
1529  } else
1530  jump->addr = (sljit_uw)code_ptr;
1531 
1532  jump = jump->next;
1533  next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
1534  } else if (next_min_addr == next_const_addr) {
1535  /* update instruction with relative address of constant */
1536  source = (sljit_sw)code_ptr;
1537  offset = (sljit_sw)pool_ptr - source;
1538 
1539  SLJIT_ASSERT(!(offset & 0x1));
1540  offset >>= 1; /* halfword (not byte) offset */
1542 
1543  ins |= (sljit_ins)offset & 0xffffffff;
1544 
1545  /* update address */
1546  const_->addr = (sljit_uw)pool_ptr;
1547 
1548  /* store initial value into pool and update pool address */
1549  *(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value);
1550 
1551  /* move to next constant */
1552  const_ = const_->next;
1553  next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
1554  }
1555 
1556  SLJIT_GET_NEXT_MIN();
1557  }
1558 
1559  if (ins & 0xffff00000000L) {
1560  *code_ptr++ = (sljit_u16)(ins >> 32);
1561  half_count++;
1562  }
1563 
1564  if (ins & 0xffffffff0000L) {
1565  *code_ptr++ = (sljit_u16)(ins >> 16);
1566  half_count++;
1567  }
1568 
1569  *code_ptr++ = (sljit_u16)ins;
1570  half_count++;
1571  } while (buf_ptr < buf_end);
1572 
1573  buf = buf->next;
1574  } while (buf);
1575 
1576  if (next_label_size == half_count) {
1577  label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1578  label = label->next;
1579  }
1580 
1581  SLJIT_ASSERT(!label);
1582  SLJIT_ASSERT(!jump);
1583  SLJIT_ASSERT(!const_);
1584  SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr);
1585  SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1586 
1587  jump = compiler->jumps;
1588  while (jump != NULL) {
1589  offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
1590 
1591  if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1592  /* Store jump target into pool. */
1593  *(sljit_uw*)(jump->addr) = (sljit_uw)offset;
1594  } else {
1595  code_ptr = (sljit_u16*)jump->addr;
1596  offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1597 
1598  /* offset must be halfword aligned */
1599  SLJIT_ASSERT(!(offset & 1));
1600  offset >>= 1;
1601  SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1602 
1603  code_ptr[1] = (sljit_u16)(offset >> 16);
1604  code_ptr[2] = (sljit_u16)offset;
1605  }
1606  jump = jump->next;
1607  }
1608 
1609  compiler->error = SLJIT_ERR_COMPILED;
1610  compiler->executable_offset = executable_offset;
1611  compiler->executable_size = ins_size;
1612  if (pool_size)
1613  compiler->executable_size += (pad_size + pool_size);
1614 
1615  code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1616  code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1617  SLJIT_CACHE_FLUSH(code, code_ptr);
1618  SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1619  return code;
1620 }
1621 
1623 {
1624  /* TODO(mundaym): implement all */
1625  switch (feature_type) {
1626  case SLJIT_HAS_FPU:
1627 #ifdef SLJIT_IS_FPU_AVAILABLE
1628  return (SLJIT_IS_FPU_AVAILABLE) != 0;
1629 #else
1630  return 1;
1631 #endif /* SLJIT_IS_FPU_AVAILABLE */
1632 
1633  case SLJIT_HAS_CLZ:
1634  case SLJIT_HAS_REV:
1635  case SLJIT_HAS_ROT:
1636  case SLJIT_HAS_PREFETCH:
1637  case SLJIT_HAS_COPY_F32:
1638  case SLJIT_HAS_COPY_F64:
1639  case SLJIT_HAS_SIMD:
1640  case SLJIT_HAS_ATOMIC:
1641  return 1;
1642 
1643  case SLJIT_HAS_CTZ:
1644  return 2;
1645 
1646  case SLJIT_HAS_CMOV:
1647  return have_lscond1() ? 1 : 0;
1648  }
1649  return 0;
1650 }
1651 
1653 {
1655  return 0;
1656 }
1657 
1658 /* --------------------------------------------------------------------- */
1659 /* Entry, exit */
1660 /* --------------------------------------------------------------------- */
1661 
1663  sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1664  sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1665 {
1666  sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1667  sljit_s32 offset, i, tmp;
1668 
1669  CHECK_ERROR();
1670  CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1671  set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1672 
1673  /* Saved registers are stored in callee allocated save area. */
1675 
1676  offset = 2 * SSIZE_OF(sw);
1677  if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1678  if (saved_arg_count == 0) {
1679  FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1680  offset += 9 * SSIZE_OF(sw);
1681  } else {
1682  FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1683  offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1684  }
1685  } else {
1686  if (scratches == SLJIT_FIRST_SAVED_REG) {
1687  FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1688  offset += SSIZE_OF(sw);
1689  } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1690  FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1691  offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1692  }
1693 
1694  if (saved_arg_count == 0) {
1695  if (saveds == 0) {
1696  FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1697  offset += SSIZE_OF(sw);
1698  } else {
1699  FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1700  offset += (saveds + 1) * SSIZE_OF(sw);
1701  }
1702  } else if (saveds > saved_arg_count) {
1703  if (saveds == saved_arg_count + 1) {
1704  FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1705  offset += SSIZE_OF(sw);
1706  } else {
1707  FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1708  offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1709  }
1710  }
1711  }
1712 
1713  if (saved_arg_count > 0) {
1714  FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1715  offset += SSIZE_OF(sw);
1716  }
1717 
1718  tmp = SLJIT_FS0 - fsaveds;
1719  for (i = SLJIT_FS0; i > tmp; i--) {
1720  FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1721  offset += SSIZE_OF(sw);
1722  }
1723 
1724  for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1725  FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1726  offset += SSIZE_OF(sw);
1727  }
1728 
1729  local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1730  compiler->local_size = local_size;
1731 
1732  if (is_s20(-local_size))
1733  FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1734  else
1735  FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
1736 
1737  if (options & SLJIT_ENTER_REG_ARG)
1738  return SLJIT_SUCCESS;
1739 
1740  arg_types >>= SLJIT_ARG_SHIFT;
1741  saved_arg_count = 0;
1742  tmp = 0;
1743  while (arg_types > 0) {
1744  if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1745  if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1746  FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1747  saved_arg_count++;
1748  }
1749  tmp++;
1750  }
1751 
1752  arg_types >>= SLJIT_ARG_SHIFT;
1753  }
1754 
1755  return SLJIT_SUCCESS;
1756 }
1757 
1759  sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1760  sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1761 {
1762  CHECK_ERROR();
1763  CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1764  set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1765 
1766  compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1767  return SLJIT_SUCCESS;
1768 }
1769 
1771 {
1772  sljit_s32 offset, i, tmp;
1773  sljit_s32 local_size = compiler->local_size;
1774  sljit_s32 saveds = compiler->saveds;
1775  sljit_s32 scratches = compiler->scratches;
1776  sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1777 
1778  if (is_u12(local_size))
1779  FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1780  else if (is_s20(local_size))
1781  FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1782  else
1783  FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
1784 
1785  offset = 2 * SSIZE_OF(sw);
1786  if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1787  if (kept_saveds_count == 0) {
1788  FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1789  offset += 9 * SSIZE_OF(sw);
1790  } else {
1791  FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1792  offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1793  }
1794  } else {
1795  if (scratches == SLJIT_FIRST_SAVED_REG) {
1796  FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1797  offset += SSIZE_OF(sw);
1798  } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1799  FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1800  offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1801  }
1802 
1803  if (kept_saveds_count == 0) {
1804  if (saveds == 0) {
1805  if (last_reg == r14)
1806  FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1807  offset += SSIZE_OF(sw);
1808  } else if (saveds == 1 && last_reg == r13) {
1809  FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1810  offset += 2 * SSIZE_OF(sw);
1811  } else {
1812  FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1813  offset += (saveds + 1) * SSIZE_OF(sw);
1814  }
1815  } else if (saveds > kept_saveds_count) {
1816  if (saveds == kept_saveds_count + 1) {
1817  FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1818  offset += SSIZE_OF(sw);
1819  } else {
1820  FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1821  offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1822  }
1823  }
1824  }
1825 
1826  if (kept_saveds_count > 0) {
1827  if (last_reg == r14)
1828  FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1829  offset += SSIZE_OF(sw);
1830  }
1831 
1832  tmp = SLJIT_FS0 - compiler->fsaveds;
1833  for (i = SLJIT_FS0; i > tmp; i--) {
1834  FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1835  offset += SSIZE_OF(sw);
1836  }
1837 
1838  for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1839  FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1840  offset += SSIZE_OF(sw);
1841  }
1842 
1843  return SLJIT_SUCCESS;
1844 }
1845 
1847 {
1848  CHECK_ERROR();
1849  CHECK(check_sljit_emit_return_void(compiler));
1850 
1851  FAIL_IF(emit_stack_frame_release(compiler, r14));
1852  return push_inst(compiler, br(r14)); /* return */
1853 }
1854 
1856  sljit_s32 src, sljit_sw srcw)
1857 {
1858  CHECK_ERROR();
1859  CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1860 
1861  if (src & SLJIT_MEM) {
1862  ADJUST_LOCAL_OFFSET(src, srcw);
1863  FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1864  src = TMP_REG2;
1865  srcw = 0;
1866  } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1867  FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1868  src = TMP_REG2;
1869  srcw = 0;
1870  }
1871 
1872  FAIL_IF(emit_stack_frame_release(compiler, r13));
1873 
1874  SLJIT_SKIP_CHECKS(compiler);
1875  return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1876 }
1877 
1878 /* --------------------------------------------------------------------- */
1879 /* Operators */
1880 /* --------------------------------------------------------------------- */
1881 
1883 {
1884  sljit_gpr arg0 = gpr(SLJIT_R0);
1885  sljit_gpr arg1 = gpr(SLJIT_R1);
1886 
1887  CHECK_ERROR();
1888  CHECK(check_sljit_emit_op0(compiler, op));
1889 
1890  op = GET_OPCODE(op) | (op & SLJIT_32);
1891  switch (op) {
1892  case SLJIT_BREAKPOINT:
1893  /* The following invalid instruction is emitted by gdb. */
1894  return push_inst(compiler, 0x0001 /* 2-byte trap */);
1895  case SLJIT_NOP:
1896  return push_inst(compiler, 0x0700 /* 2-byte nop */);
1897  case SLJIT_LMUL_UW:
1898  FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1899  break;
1900  case SLJIT_LMUL_SW:
1901  /* signed multiplication from: */
1902  /* Hacker's Delight, Second Edition: Chapter 8-3. */
1903  FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1904  FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1905  FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1906  FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1907 
1908  /* unsigned multiplication */
1909  FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1910 
1911  FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1912  FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1913  break;
1914  case SLJIT_DIV_U32:
1915  case SLJIT_DIVMOD_U32:
1916  FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1917  FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1918  FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1919  FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1920  if (op == SLJIT_DIVMOD_U32)
1921  return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1922 
1923  return SLJIT_SUCCESS;
1924  case SLJIT_DIV_S32:
1925  case SLJIT_DIVMOD_S32:
1926  FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1927  FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1928  FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1929  FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1930  if (op == SLJIT_DIVMOD_S32)
1931  return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1932 
1933  return SLJIT_SUCCESS;
1934  case SLJIT_DIV_UW:
1935  case SLJIT_DIVMOD_UW:
1936  FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1937  FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1938  FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1939  FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1940  if (op == SLJIT_DIVMOD_UW)
1941  return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1942 
1943  return SLJIT_SUCCESS;
1944  case SLJIT_DIV_SW:
1945  case SLJIT_DIVMOD_SW:
1946  FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1947  FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1948  FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1949  if (op == SLJIT_DIVMOD_SW)
1950  return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1951 
1952  return SLJIT_SUCCESS;
1953  case SLJIT_ENDBR:
1954  return SLJIT_SUCCESS;
1956  return SLJIT_SUCCESS;
1957  default:
1959  }
1960  /* swap result registers */
1961  FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1962  FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1963  return push_inst(compiler, lgr(arg1, tmp0));
1964 }
1965 
1966 static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
1967 {
1968  sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
1969 
1970  if ((op & SLJIT_32) && src_r != tmp0) {
1971  FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
1972  src_r = tmp0;
1973  }
1974 
1975  if (is_ctz) {
1976  FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
1977 
1978  if (src_r == tmp0)
1979  FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
1980  else
1981  FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
1982 
1983  src_r = tmp0;
1984  }
1985 
1986  FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
1987 
1988  if (is_ctz)
1989  FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
1990 
1991  if (op & SLJIT_32) {
1992  if (!is_ctz && dst_r != tmp0)
1993  return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
1994 
1995  FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
1996  }
1997 
1998  if (is_ctz)
1999  FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2000 
2001  if (dst_r == tmp0)
2002  return SLJIT_SUCCESS;
2003 
2004  return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2005 }
2006 
2007 static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
2008  sljit_s32 dst, sljit_sw dstw,
2009  sljit_s32 src, sljit_sw srcw)
2010 {
2011  struct addr addr;
2012  sljit_gpr reg;
2013  sljit_ins ins;
2014  sljit_s32 opcode = GET_OPCODE(op);
2015  sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
2016 
2017  if (dst & SLJIT_MEM) {
2018  if (src & SLJIT_MEM) {
2019  FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
2020  reg = tmp0;
2021  } else
2022  reg = gpr(src);
2023 
2024  FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
2025 
2026  if (is_16bit)
2027  ins = 0xe3000000003f /* strvh */;
2028  else
2029  ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
2030 
2031  return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2032  }
2033 
2034  reg = gpr(dst);
2035 
2036  if (src & SLJIT_MEM) {
2037  FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
2038 
2039  if (is_16bit)
2040  ins = 0xe3000000001f /* lrvh */;
2041  else
2042  ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
2043 
2044  FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
2045 
2046  if (opcode == SLJIT_REV)
2047  return SLJIT_SUCCESS;
2048 
2049  if (is_16bit) {
2050  if (op & SLJIT_32)
2051  ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
2052  else
2053  ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
2054  } else
2055  ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2056 
2057  return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2058  }
2059 
2060  ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
2061  FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
2062 
2063  if (opcode == SLJIT_REV)
2064  return SLJIT_SUCCESS;
2065 
2066  if (!is_16bit) {
2067  ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2068  return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2069  }
2070 
2071  if (op & SLJIT_32) {
2072  ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
2073  return push_inst(compiler, ins | R20A(reg) | 16);
2074  }
2075 
2076  ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
2077  return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
2078 }
2079 
2080 /* LEVAL will be defined later with different parameters as needed */
2081 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2082 
2084  sljit_s32 dst, sljit_sw dstw,
2085  sljit_s32 src, sljit_sw srcw)
2086 {
2087  sljit_ins ins;
2088  struct addr mem;
2089  sljit_gpr dst_r;
2090  sljit_gpr src_r;
2091  sljit_s32 opcode = GET_OPCODE(op);
2092 
2093  CHECK_ERROR();
2094  CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2095  ADJUST_LOCAL_OFFSET(dst, dstw);
2096  ADJUST_LOCAL_OFFSET(src, srcw);
2097 
2098  if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2099  /* LOAD REGISTER */
2100  if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2101  dst_r = gpr(dst);
2102  src_r = gpr(src);
2103  switch (opcode | (op & SLJIT_32)) {
2104  /* 32-bit */
2105  case SLJIT_MOV32_U8:
2106  ins = llcr(dst_r, src_r);
2107  break;
2108  case SLJIT_MOV32_S8:
2109  ins = lbr(dst_r, src_r);
2110  break;
2111  case SLJIT_MOV32_U16:
2112  ins = llhr(dst_r, src_r);
2113  break;
2114  case SLJIT_MOV32_S16:
2115  ins = lhr(dst_r, src_r);
2116  break;
2117  case SLJIT_MOV32:
2118  if (dst_r == src_r)
2119  return SLJIT_SUCCESS;
2120  ins = lr(dst_r, src_r);
2121  break;
2122  /* 64-bit */
2123  case SLJIT_MOV_U8:
2124  ins = llgcr(dst_r, src_r);
2125  break;
2126  case SLJIT_MOV_S8:
2127  ins = lgbr(dst_r, src_r);
2128  break;
2129  case SLJIT_MOV_U16:
2130  ins = llghr(dst_r, src_r);
2131  break;
2132  case SLJIT_MOV_S16:
2133  ins = lghr(dst_r, src_r);
2134  break;
2135  case SLJIT_MOV_U32:
2136  ins = llgfr(dst_r, src_r);
2137  break;
2138  case SLJIT_MOV_S32:
2139  ins = lgfr(dst_r, src_r);
2140  break;
2141  case SLJIT_MOV:
2142  case SLJIT_MOV_P:
2143  if (dst_r == src_r)
2144  return SLJIT_SUCCESS;
2145  ins = lgr(dst_r, src_r);
2146  break;
2147  default:
2148  ins = 0;
2150  break;
2151  }
2152  FAIL_IF(push_inst(compiler, ins));
2153  return SLJIT_SUCCESS;
2154  }
2155  /* LOAD IMMEDIATE */
2156  if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
2157  switch (opcode) {
2158  case SLJIT_MOV_U8:
2159  srcw = (sljit_sw)((sljit_u8)(srcw));
2160  break;
2161  case SLJIT_MOV_S8:
2162  srcw = (sljit_sw)((sljit_s8)(srcw));
2163  break;
2164  case SLJIT_MOV_U16:
2165  srcw = (sljit_sw)((sljit_u16)(srcw));
2166  break;
2167  case SLJIT_MOV_S16:
2168  srcw = (sljit_sw)((sljit_s16)(srcw));
2169  break;
2170  case SLJIT_MOV_U32:
2171  srcw = (sljit_sw)((sljit_u32)(srcw));
2172  break;
2173  case SLJIT_MOV_S32:
2174  case SLJIT_MOV32:
2175  srcw = (sljit_sw)((sljit_s32)(srcw));
2176  break;
2177  }
2178  return push_load_imm_inst(compiler, gpr(dst), srcw);
2179  }
2180  /* LOAD */
2181  /* TODO(carenas): avoid reg being defined later */
2182  #define LEVAL(i) EVAL(i, reg, mem)
2183  if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2184  sljit_gpr reg = gpr(dst);
2185 
2186  FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2187  /* TODO(carenas): convert all calls below to LEVAL */
2188  switch (opcode | (op & SLJIT_32)) {
2189  case SLJIT_MOV32_U8:
2190  ins = llc(reg, mem.offset, mem.index, mem.base);
2191  break;
2192  case SLJIT_MOV32_S8:
2193  ins = lb(reg, mem.offset, mem.index, mem.base);
2194  break;
2195  case SLJIT_MOV32_U16:
2196  ins = llh(reg, mem.offset, mem.index, mem.base);
2197  break;
2198  case SLJIT_MOV32_S16:
2199  ins = WHEN2(is_u12(mem.offset), lh, lhy);
2200  break;
2201  case SLJIT_MOV32:
2202  ins = WHEN2(is_u12(mem.offset), l, ly);
2203  break;
2204  case SLJIT_MOV_U8:
2205  ins = LEVAL(llgc);
2206  break;
2207  case SLJIT_MOV_S8:
2208  ins = lgb(reg, mem.offset, mem.index, mem.base);
2209  break;
2210  case SLJIT_MOV_U16:
2211  ins = LEVAL(llgh);
2212  break;
2213  case SLJIT_MOV_S16:
2214  ins = lgh(reg, mem.offset, mem.index, mem.base);
2215  break;
2216  case SLJIT_MOV_U32:
2217  ins = LEVAL(llgf);
2218  break;
2219  case SLJIT_MOV_S32:
2220  ins = lgf(reg, mem.offset, mem.index, mem.base);
2221  break;
2222  case SLJIT_MOV_P:
2223  case SLJIT_MOV:
2224  ins = lg(reg, mem.offset, mem.index, mem.base);
2225  break;
2226  default:
2227  ins = 0;
2229  break;
2230  }
2231  FAIL_IF(push_inst(compiler, ins));
2232  return SLJIT_SUCCESS;
2233  }
2234  /* STORE and STORE IMMEDIATE */
2235  if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
2236  struct addr mem;
2237  sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2238 
2239  if (src == SLJIT_IMM) {
2240  /* TODO(mundaym): MOVE IMMEDIATE? */
2241  FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2242  }
2243  FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2244  switch (opcode) {
2245  case SLJIT_MOV_U8:
2246  case SLJIT_MOV_S8:
2247  return push_inst(compiler,
2248  WHEN2(is_u12(mem.offset), stc, stcy));
2249  case SLJIT_MOV_U16:
2250  case SLJIT_MOV_S16:
2251  return push_inst(compiler,
2252  WHEN2(is_u12(mem.offset), sth, sthy));
2253  case SLJIT_MOV_U32:
2254  case SLJIT_MOV_S32:
2255  case SLJIT_MOV32:
2256  return push_inst(compiler,
2257  WHEN2(is_u12(mem.offset), st, sty));
2258  case SLJIT_MOV_P:
2259  case SLJIT_MOV:
2260  FAIL_IF(push_inst(compiler, LEVAL(stg)));
2261  return SLJIT_SUCCESS;
2262  default:
2264  }
2265  }
2266  #undef LEVAL
2267  /* MOVE CHARACTERS */
2268  if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2269  struct addr mem;
2270  FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2271  switch (opcode) {
2272  case SLJIT_MOV_U8:
2273  case SLJIT_MOV_S8:
2274  FAIL_IF(push_inst(compiler,
2275  EVAL(llgc, tmp0, mem)));
2276  FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2277  return push_inst(compiler,
2278  EVAL(stcy, tmp0, mem));
2279  case SLJIT_MOV_U16:
2280  case SLJIT_MOV_S16:
2281  FAIL_IF(push_inst(compiler,
2282  EVAL(llgh, tmp0, mem)));
2283  FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2284  return push_inst(compiler,
2285  EVAL(sthy, tmp0, mem));
2286  case SLJIT_MOV_U32:
2287  case SLJIT_MOV_S32:
2288  case SLJIT_MOV32:
2289  FAIL_IF(push_inst(compiler,
2290  EVAL(ly, tmp0, mem)));
2291  FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2292  return push_inst(compiler,
2293  EVAL(sty, tmp0, mem));
2294  case SLJIT_MOV_P:
2295  case SLJIT_MOV:
2296  FAIL_IF(push_inst(compiler,
2297  EVAL(lg, tmp0, mem)));
2298  FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2299  FAIL_IF(push_inst(compiler,
2300  EVAL(stg, tmp0, mem)));
2301  return SLJIT_SUCCESS;
2302  default:
2304  }
2305  }
2307  }
2308 
2309  SLJIT_ASSERT(src != SLJIT_IMM);
2310 
2311  dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2312  src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2313 
2314  compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2315 
2316  /* TODO(mundaym): optimize loads and stores */
2317  switch (opcode) {
2318  case SLJIT_CLZ:
2319  case SLJIT_CTZ:
2320  if (src & SLJIT_MEM)
2321  FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2322 
2323  FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2324  break;
2325  case SLJIT_REV_U32:
2326  case SLJIT_REV_S32:
2327  op |= SLJIT_32;
2328  /* fallthrough */
2329  case SLJIT_REV:
2330  case SLJIT_REV_U16:
2331  case SLJIT_REV_S16:
2332  return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
2333  default:
2335  }
2336 
2337  if (dst & SLJIT_MEM)
2338  return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2339 
2340  return SLJIT_SUCCESS;
2341 }
2342 
2344 {
2345  switch (GET_OPCODE(op)) {
2346  case SLJIT_ADD:
2347  case SLJIT_ADDC:
2348  case SLJIT_MUL:
2349  case SLJIT_AND:
2350  case SLJIT_OR:
2351  case SLJIT_XOR:
2352  return 1;
2353  }
2354  return 0;
2355 }
2356 
2357 static const struct ins_forms add_forms = {
2358  0x1a00, /* ar */
2359  0xb9080000, /* agr */
2360  0xb9f80000, /* ark */
2361  0xb9e80000, /* agrk */
2362  0x5a000000, /* a */
2363  0xe3000000005a, /* ay */
2364  0xe30000000008, /* ag */
2365 };
2366 
2367 static const struct ins_forms logical_add_forms = {
2368  0x1e00, /* alr */
2369  0xb90a0000, /* algr */
2370  0xb9fa0000, /* alrk */
2371  0xb9ea0000, /* algrk */
2372  0x5e000000, /* al */
2373  0xe3000000005e, /* aly */
2374  0xe3000000000a, /* alg */
2375 };
2376 
2378  sljit_s32 dst, sljit_sw dstw,
2379  sljit_s32 src1, sljit_sw src1w,
2380  sljit_s32 src2, sljit_sw src2w)
2381 {
2382  int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2383  int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2384  const struct ins_forms *forms;
2385  sljit_ins ins;
2386 
2387  if (src2 == SLJIT_IMM) {
2388  if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2389  if (sets_overflow)
2390  ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2391  else
2392  ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2393  return emit_siy(compiler, ins, dst, dstw, src2w);
2394  }
2395 
2396  if (is_s16(src2w)) {
2397  if (sets_overflow)
2398  ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2399  else
2400  ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2401  FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2402  goto done;
2403  }
2404 
2405  if (!sets_overflow) {
2406  if ((op & SLJIT_32) || is_u32(src2w)) {
2407  ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2408  FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2409  goto done;
2410  }
2411  if (is_u32(-src2w)) {
2412  FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2413  goto done;
2414  }
2415  }
2416  else if ((op & SLJIT_32) || is_s32(src2w)) {
2417  ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2418  FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2419  goto done;
2420  }
2421  }
2422 
2423  forms = sets_overflow ? &add_forms : &logical_add_forms;
2424  FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2425 
2426 done:
2427  if (sets_zero_overflow)
2428  FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2429 
2430  if (dst & SLJIT_MEM)
2431  return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2432 
2433  return SLJIT_SUCCESS;
2434 }
2435 
2436 static const struct ins_forms sub_forms = {
2437  0x1b00, /* sr */
2438  0xb9090000, /* sgr */
2439  0xb9f90000, /* srk */
2440  0xb9e90000, /* sgrk */
2441  0x5b000000, /* s */
2442  0xe3000000005b, /* sy */
2443  0xe30000000009, /* sg */
2444 };
2445 
2446 static const struct ins_forms logical_sub_forms = {
2447  0x1f00, /* slr */
2448  0xb90b0000, /* slgr */
2449  0xb9fb0000, /* slrk */
2450  0xb9eb0000, /* slgrk */
2451  0x5f000000, /* sl */
2452  0xe3000000005f, /* sly */
2453  0xe3000000000b, /* slg */
2454 };
2455 
2457  sljit_s32 dst, sljit_sw dstw,
2458  sljit_s32 src1, sljit_sw src1w,
2459  sljit_s32 src2, sljit_sw src2w)
2460 {
2461  sljit_s32 flag_type = GET_FLAG_TYPE(op);
2462  int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2463  int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2464  const struct ins_forms *forms;
2465  sljit_ins ins;
2466 
2467  if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2468  int compare_signed = flag_type >= SLJIT_SIG_LESS;
2469 
2470  compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2471 
2472  if (src2 == SLJIT_IMM) {
2473  if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
2474  if ((op & SLJIT_32) || is_s32(src2w)) {
2475  ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2476  return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2477  }
2478  }
2479  else {
2480  if ((op & SLJIT_32) || is_u32(src2w)) {
2481  ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2482  return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2483  }
2484  if (is_s16(src2w))
2485  return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2486  }
2487  }
2488  else if (src2 & SLJIT_MEM) {
2489  if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2490  ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2491  return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2492  }
2493 
2494  if (compare_signed)
2495  ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2496  else
2497  ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2498  return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2499  }
2500 
2501  if (compare_signed)
2502  ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2503  else
2504  ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2505  return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2506  }
2507 
2508  if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2509  ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2510  FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2511  goto done;
2512  }
2513 
2514  if (src2 == SLJIT_IMM) {
2515  sljit_sw neg_src2w = -src2w;
2516 
2517  if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2518  if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2519  if (sets_signed)
2520  ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2521  else
2522  ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2523  return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2524  }
2525 
2526  if (is_s16(neg_src2w)) {
2527  if (sets_signed)
2528  ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2529  else
2530  ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2531  FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2532  goto done;
2533  }
2534  }
2535 
2536  if (!sets_signed) {
2537  if ((op & SLJIT_32) || is_u32(src2w)) {
2538  ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2539  FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2540  goto done;
2541  }
2542  if (is_u32(neg_src2w)) {
2543  FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2544  goto done;
2545  }
2546  }
2547  else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2548  ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2549  FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2550  goto done;
2551  }
2552  }
2553 
2554  forms = sets_signed ? &sub_forms : &logical_sub_forms;
2555  FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2556 
2557 done:
2558  if (sets_signed) {
2559  sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2560 
2561  if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2562  /* In case of overflow, the sign bit of the two source operands must be different, and
2563  - the first operand is greater if the sign bit of the result is set
2564  - the first operand is less if the sign bit of the result is not set
2565  The -result operation sets the corrent sign, because the result cannot be zero.
2566  The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2567  FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));
2568  FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2569  }
2570  else if (op & SLJIT_SET_Z)
2571  FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2572  }
2573 
2574  if (dst & SLJIT_MEM)
2575  return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2576 
2577  return SLJIT_SUCCESS;
2578 }
2579 
2580 static const struct ins_forms multiply_forms = {
2581  0xb2520000, /* msr */
2582  0xb90c0000, /* msgr */
2583  0xb9fd0000, /* msrkc */
2584  0xb9ed0000, /* msgrkc */
2585  0x71000000, /* ms */
2586  0xe30000000051, /* msy */
2587  0xe3000000000c, /* msg */
2588 };
2589 
2590 static const struct ins_forms multiply_overflow_forms = {
2591  0,
2592  0,
2593  0xb9fd0000, /* msrkc */
2594  0xb9ed0000, /* msgrkc */
2595  0,
2596  0xe30000000053, /* msc */
2597  0xe30000000083, /* msgc */
2598 };
2599 
2601  sljit_s32 dst,
2602  sljit_s32 src1, sljit_sw src1w,
2603  sljit_s32 src2, sljit_sw src2w)
2604 {
2605  sljit_ins ins;
2606 
2607  if (HAS_FLAGS(op)) {
2608  /* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2609  FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2610  FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2611  if (dst_r != tmp0) {
2612  FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2613  }
2614  FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2615  FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2616  FAIL_IF(push_inst(compiler, ipm(tmp1)));
2617  FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2618 
2619  return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2620  }
2621 
2622  if (src2 == SLJIT_IMM) {
2623  if (is_s16(src2w)) {
2624  ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2625  return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2626  }
2627 
2628  if (is_s32(src2w)) {
2629  ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2630  return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2631  }
2632  }
2633 
2634  return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2635 }
2636 
2638  sljit_s32 dst,
2639  sljit_s32 src1, sljit_sw src1w,
2640  sljit_uw imm, sljit_s32 count16)
2641 {
2642  sljit_s32 mode = compiler->mode;
2643  sljit_gpr dst_r = tmp0;
2644  sljit_s32 needs_move = 1;
2645 
2646  if (IS_GPR_REG(dst)) {
2647  dst_r = gpr(dst & REG_MASK);
2648  if (dst == src1)
2649  needs_move = 0;
2650  }
2651 
2652  if (needs_move)
2653  FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2654 
2655  if (type == SLJIT_AND) {
2656  if (!(mode & SLJIT_32))
2657  FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2658  return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2659  }
2660  else if (type == SLJIT_OR) {
2661  if (count16 >= 3) {
2662  FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2663  return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2664  }
2665 
2666  if (count16 >= 2) {
2667  if ((imm & 0x00000000ffffffffull) == 0)
2668  return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2669  if ((imm & 0xffffffff00000000ull) == 0)
2670  return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2671  }
2672 
2673  if ((imm & 0xffff000000000000ull) != 0)
2674  FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2675  if ((imm & 0x0000ffff00000000ull) != 0)
2676  FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2677  if ((imm & 0x00000000ffff0000ull) != 0)
2678  FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2679  if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2680  return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2681  return SLJIT_SUCCESS;
2682  }
2683 
2684  if ((imm & 0xffffffff00000000ull) != 0)
2685  FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2686  if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2687  return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2688  return SLJIT_SUCCESS;
2689 }
2690 
2691 static const struct ins_forms bitwise_and_forms = {
2692  0x1400, /* nr */
2693  0xb9800000, /* ngr */
2694  0xb9f40000, /* nrk */
2695  0xb9e40000, /* ngrk */
2696  0x54000000, /* n */
2697  0xe30000000054, /* ny */
2698  0xe30000000080, /* ng */
2699 };
2700 
2701 static const struct ins_forms bitwise_or_forms = {
2702  0x1600, /* or */
2703  0xb9810000, /* ogr */
2704  0xb9f60000, /* ork */
2705  0xb9e60000, /* ogrk */
2706  0x56000000, /* o */
2707  0xe30000000056, /* oy */
2708  0xe30000000081, /* og */
2709 };
2710 
2711 static const struct ins_forms bitwise_xor_forms = {
2712  0x1700, /* xr */
2713  0xb9820000, /* xgr */
2714  0xb9f70000, /* xrk */
2715  0xb9e70000, /* xgrk */
2716  0x57000000, /* x */
2717  0xe30000000057, /* xy */
2718  0xe30000000082, /* xg */
2719 };
2720 
2722  sljit_s32 dst,
2723  sljit_s32 src1, sljit_sw src1w,
2724  sljit_s32 src2, sljit_sw src2w)
2725 {
2726  sljit_s32 type = GET_OPCODE(op);
2727  const struct ins_forms *forms;
2728 
2729  if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {
2730  sljit_s32 count16 = 0;
2731  sljit_uw imm = (sljit_uw)src2w;
2732 
2733  if (op & SLJIT_32)
2734  imm &= 0xffffffffull;
2735 
2736  if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2737  count16++;
2738  if ((imm & 0x00000000ffff0000ull) != 0)
2739  count16++;
2740  if ((imm & 0x0000ffff00000000ull) != 0)
2741  count16++;
2742  if ((imm & 0xffff000000000000ull) != 0)
2743  count16++;
2744 
2745  if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {
2746  sljit_gpr src_r = tmp1;
2747 
2748  if (FAST_IS_REG(src1))
2749  src_r = gpr(src1 & REG_MASK);
2750  else
2751  FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
2752 
2753  if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2754  return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
2755  if ((imm & 0x00000000ffff0000ull) != 0)
2756  return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
2757  if ((imm & 0x0000ffff00000000ull) != 0)
2758  return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
2759  return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
2760  }
2761 
2762  if (!(op & SLJIT_SET_Z))
2763  return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2764  }
2765 
2766  if (type == SLJIT_AND)
2767  forms = &bitwise_and_forms;
2768  else if (type == SLJIT_OR)
2769  forms = &bitwise_or_forms;
2770  else
2771  forms = &bitwise_xor_forms;
2772 
2773  return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2774 }
2775 
2777  sljit_s32 dst,
2778  sljit_s32 src1, sljit_sw src1w,
2779  sljit_s32 src2, sljit_sw src2w)
2780 {
2781  sljit_s32 type = GET_OPCODE(op);
2782  sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2783  sljit_gpr src_r = tmp0;
2784  sljit_gpr base_r = tmp0;
2785  sljit_ins imm = 0;
2786  sljit_ins ins;
2787 
2788  if (FAST_IS_REG(src1))
2789  src_r = gpr(src1);
2790  else
2791  FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2792 
2793  if (src2 != SLJIT_IMM) {
2794  if (FAST_IS_REG(src2))
2795  base_r = gpr(src2);
2796  else {
2797  FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2798  base_r = tmp1;
2799  }
2800 
2801  if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2802  if (base_r != tmp1) {
2803  FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2804  base_r = tmp1;
2805  } else
2806  FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2807  }
2808  } else
2809  imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2810 
2811  if ((op & SLJIT_32) && dst_r == src_r) {
2812  if (type == SLJIT_SHL || type == SLJIT_MSHL)
2813  ins = 0x89000000 /* sll */;
2814  else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2815  ins = 0x88000000 /* srl */;
2816  else
2817  ins = 0x8a000000 /* sra */;
2818 
2819  FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2820  } else {
2821  if (type == SLJIT_SHL || type == SLJIT_MSHL)
2822  ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2823  else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2824  ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2825  else
2826  ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2827 
2828  FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2829  }
2830 
2831  if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2832  return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2833 
2834  return SLJIT_SUCCESS;
2835 }
2836 
2838  sljit_s32 dst,
2839  sljit_s32 src1, sljit_sw src1w,
2840  sljit_s32 src2, sljit_sw src2w)
2841 {
2842  sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2843  sljit_gpr src_r = tmp0;
2844  sljit_gpr base_r = tmp0;
2845  sljit_ins imm = 0;
2846  sljit_ins ins;
2847 
2848  if (FAST_IS_REG(src1))
2849  src_r = gpr(src1);
2850  else
2851  FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2852 
2853  if (src2 != SLJIT_IMM) {
2854  if (FAST_IS_REG(src2))
2855  base_r = gpr(src2);
2856  else {
2857  FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2858  base_r = tmp1;
2859  }
2860  }
2861 
2862  if (GET_OPCODE(op) == SLJIT_ROTR) {
2863  if (src2 != SLJIT_IMM) {
2864  ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2865  FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2866  base_r = tmp1;
2867  } else
2868  src2w = -src2w;
2869  }
2870 
2871  if (src2 == SLJIT_IMM)
2872  imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2873 
2874  ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2875  return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2876 }
2877 
2878 static const struct ins_forms addc_forms = {
2879  0xb9980000, /* alcr */
2880  0xb9880000, /* alcgr */
2881  0,
2882  0,
2883  0,
2884  0xe30000000098, /* alc */
2885  0xe30000000088, /* alcg */
2886 };
2887 
2888 static const struct ins_forms subc_forms = {
2889  0xb9990000, /* slbr */
2890  0xb9890000, /* slbgr */
2891  0,
2892  0,
2893  0,
2894  0xe30000000099, /* slb */
2895  0xe30000000089, /* slbg */
2896 };
2897 
2899  sljit_s32 dst, sljit_sw dstw,
2900  sljit_s32 src1, sljit_sw src1w,
2901  sljit_s32 src2, sljit_sw src2w)
2902 {
2903  CHECK_ERROR();
2904  CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2905  ADJUST_LOCAL_OFFSET(dst, dstw);
2906  ADJUST_LOCAL_OFFSET(src1, src1w);
2907  ADJUST_LOCAL_OFFSET(src2, src2w);
2908 
2909  compiler->mode = op & SLJIT_32;
2910  compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2911 
2912  if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
2913  src1 ^= src2;
2914  src2 ^= src1;
2915  src1 ^= src2;
2916 
2917  src1w ^= src2w;
2918  src2w ^= src1w;
2919  src1w ^= src2w;
2920  }
2921 
2922  switch (GET_OPCODE(op)) {
2923  case SLJIT_ADD:
2924  compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2925  return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2926  case SLJIT_ADDC:
2927  compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2928  FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2929  if (dst & SLJIT_MEM)
2930  return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2931  return SLJIT_SUCCESS;
2932  case SLJIT_SUB:
2933  compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2934  return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2935  case SLJIT_SUBC:
2936  compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2937  FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2938  if (dst & SLJIT_MEM)
2939  return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2940  return SLJIT_SUCCESS;
2941  case SLJIT_MUL:
2942  FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2943  break;
2944  case SLJIT_AND:
2945  case SLJIT_OR:
2946  case SLJIT_XOR:
2947  FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2948  break;
2949  case SLJIT_SHL:
2950  case SLJIT_MSHL:
2951  case SLJIT_LSHR:
2952  case SLJIT_MLSHR:
2953  case SLJIT_ASHR:
2954  case SLJIT_MASHR:
2955  FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2956  break;
2957  case SLJIT_ROTL:
2958  case SLJIT_ROTR:
2959  FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2960  break;
2961  }
2962 
2963  if (dst & SLJIT_MEM)
2964  return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2965  return SLJIT_SUCCESS;
2966 }
2967 
2969  sljit_s32 src1, sljit_sw src1w,
2970  sljit_s32 src2, sljit_sw src2w)
2971 {
2972  sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;
2973 
2974  CHECK_ERROR();
2975  CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2976 
2977  SLJIT_SKIP_CHECKS(compiler);
2978  return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2979 }
2980 
2982  sljit_s32 dst_reg,
2983  sljit_s32 src1, sljit_sw src1w,
2984  sljit_s32 src2, sljit_sw src2w)
2985 {
2986  CHECK_ERROR();
2987  CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2988 
2989  switch (GET_OPCODE(op)) {
2990  case SLJIT_MULADD:
2991  SLJIT_SKIP_CHECKS(compiler);
2992  FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));
2993  return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));
2994  }
2995 
2996  return SLJIT_SUCCESS;
2997 }
2998 
3000  sljit_s32 dst_reg,
3001  sljit_s32 src1_reg,
3002  sljit_s32 src2_reg,
3003  sljit_s32 src3, sljit_sw src3w)
3004 {
3005  sljit_s32 is_right;
3006  sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
3007  sljit_gpr dst_r = gpr(dst_reg);
3008  sljit_gpr src1_r = gpr(src1_reg);
3009  sljit_gpr src2_r = gpr(src2_reg);
3010  sljit_gpr src3_r = tmp1;
3011  sljit_ins ins;
3012 
3013  CHECK_ERROR();
3014  CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
3015 
3016  is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
3017 
3018  if (src1_reg == src2_reg) {
3019  SLJIT_SKIP_CHECKS(compiler);
3020  return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
3021  }
3022 
3023  ADJUST_LOCAL_OFFSET(src3, src3w);
3024 
3025  if (src3 == SLJIT_IMM) {
3026  src3w &= bit_length - 1;
3027 
3028  if (src3w == 0)
3029  return SLJIT_SUCCESS;
3030 
3031  if (op & SLJIT_32) {
3032  if (dst_r == src1_r) {
3033  ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3034  FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
3035  } else {
3036  ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3037  FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3038  }
3039  } else {
3040  ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3041  FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3042  }
3043 
3044  ins = 0xec0000000055 /* risbg */;
3045 
3046  if (is_right) {
3047  src3w = bit_length - src3w;
3048  ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
3049  } else
3050  ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
3051 
3052  return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
3053  }
3054 
3055  if (!(src3 & SLJIT_MEM)) {
3056  src3_r = gpr(src3);
3057 
3058  if (dst_r == src3_r) {
3059  FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
3060  src3_r = tmp1;
3061  }
3062  } else
3063  FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
3064 
3065  if (op & SLJIT_32) {
3066  if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
3067  if (src3_r != tmp1) {
3068  FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
3069  src3_r = tmp1;
3070  } else
3071  FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3072  }
3073 
3074  if (dst_r == src1_r) {
3075  ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3076  FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
3077  } else {
3078  ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3079  FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3080  }
3081 
3082  if (src3_r != tmp1) {
3083  FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3084  FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
3085  } else
3086  FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3087 
3088  ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3089  FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
3090 
3091  return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
3092  }
3093 
3094  ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3095  FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3096 
3097  ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3098 
3099  if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3100  if (src3_r != tmp1)
3101  FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3102 
3103  FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
3104  src2_r = tmp0;
3105 
3106  if (src3_r != tmp1)
3107  FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
3108  else
3109  FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3110  } else
3111  FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
3112 
3113  FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
3114  return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
3115 }
3116 
3118  sljit_s32 src, sljit_sw srcw)
3119 {
3120  sljit_gpr src_r;
3121  struct addr addr;
3122 
3123  CHECK_ERROR();
3124  CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3125  ADJUST_LOCAL_OFFSET(src, srcw);
3126 
3127  switch (op) {
3128  case SLJIT_FAST_RETURN:
3129  src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3130  if (src & SLJIT_MEM)
3131  FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3132 
3133  return push_inst(compiler, br(src_r));
3135  return SLJIT_SUCCESS;
3136  case SLJIT_PREFETCH_L1:
3137  case SLJIT_PREFETCH_L2:
3138  case SLJIT_PREFETCH_L3:
3139  case SLJIT_PREFETCH_ONCE:
3140  FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3141  return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3142  default:
3143  return SLJIT_SUCCESS;
3144  }
3145 
3146  return SLJIT_SUCCESS;
3147 }
3148 
3150  sljit_s32 dst, sljit_sw dstw)
3151 {
3152  sljit_gpr dst_r = link_r;
3153  sljit_s32 size;
3154 
3155  CHECK_ERROR();
3156  CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3157  ADJUST_LOCAL_OFFSET(dst, dstw);
3158 
3159  switch (op) {
3160  case SLJIT_FAST_ENTER:
3161  if (FAST_IS_REG(dst))
3162  return push_inst(compiler, lgr(gpr(dst), link_r));
3163  break;
3165  dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3166 
3167  size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
3168  FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
3169  break;
3170  }
3171 
3172  if (dst & SLJIT_MEM)
3173  return store_word(compiler, dst_r, dst, dstw, 0);
3174 
3175  return SLJIT_SUCCESS;
3176 }
3177 
3179 {
3180  CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3181 
3182  if (type == SLJIT_GP_REGISTER)
3183  return (sljit_s32)gpr(reg);
3184 
3185  if (type != SLJIT_FLOAT_REGISTER)
3186  return -1;
3187 
3188  return (sljit_s32)freg_map[reg];
3189 }
3190 
3192  void *instruction, sljit_u32 size)
3193 {
3194  sljit_ins ins = 0;
3195 
3196  CHECK_ERROR();
3197  CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3198 
3199  memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3200  return push_inst(compiler, ins);
3201 }
3202 
3203 /* --------------------------------------------------------------------- */
3204 /* Floating point operators */
3205 /* --------------------------------------------------------------------- */
3206 
3207 #define FLOAT_LOAD 0
3208 #define FLOAT_STORE 1
3209 
3210 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3211  sljit_s32 reg,
3212  sljit_s32 mem, sljit_sw memw)
3213 {
3214  struct addr addr;
3215  sljit_ins ins;
3216 
3217  SLJIT_ASSERT(mem & SLJIT_MEM);
3218 
3219  if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3220  FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3221 
3222  if (op & FLOAT_STORE)
3223  ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3224  else
3225  ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3226 
3227  return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3228  }
3229 
3230  FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3231 
3232  if (op & FLOAT_STORE)
3233  ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3234  else
3235  ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3236 
3237  return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3238 }
3239 
3240 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3241  sljit_s32 reg,
3242  sljit_s32 src, sljit_sw srcw)
3243 {
3244  struct addr addr;
3245 
3246  if (!(src & SLJIT_MEM))
3247  return push_inst(compiler, ins_r | F4(reg) | F0(src));
3248 
3249  FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3250  return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3251 }
3252 
3254  sljit_s32 dst, sljit_sw dstw,
3255  sljit_s32 src, sljit_sw srcw)
3256 {
3257  sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3258  sljit_ins ins;
3259 
3260  if (src & SLJIT_MEM) {
3261  FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3262  src = TMP_FREG1;
3263  }
3264 
3265  /* M3 is set to 5 */
3266  if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3267  ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3268  else
3269  ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3270 
3271  FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3272 
3273  if (dst & SLJIT_MEM)
3274  return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3275 
3276  return SLJIT_SUCCESS;
3277 }
3278 
3280  sljit_s32 dst, sljit_sw dstw,
3281  sljit_s32 src, sljit_sw srcw)
3282 {
3283  sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3284 
3285  if (src == SLJIT_IMM) {
3286  FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3287  src = (sljit_s32)tmp0;
3288  }
3289  else if (src & SLJIT_MEM) {
3290  FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
3291  src = (sljit_s32)tmp0;
3292  }
3293 
3294  FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3295 
3296  if (dst & SLJIT_MEM)
3297  return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
3298 
3299  return SLJIT_SUCCESS;
3300 }
3301 
3303  sljit_s32 dst, sljit_sw dstw,
3304  sljit_s32 src, sljit_sw srcw)
3305 {
3306  sljit_ins ins;
3307 
3308  if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3309  srcw = (sljit_s32)srcw;
3310 
3311  if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3312  ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3313  else
3314  ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3315 
3316  return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3317 }
3318 
3320  sljit_s32 dst, sljit_sw dstw,
3321  sljit_s32 src, sljit_sw srcw)
3322 {
3323  sljit_ins ins;
3324 
3325  if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
3326  srcw = (sljit_u32)srcw;
3327 
3328  if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
3329  ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
3330  else
3331  ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
3332 
3333  return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3334 }
3335 
3337  sljit_s32 src1, sljit_sw src1w,
3338  sljit_s32 src2, sljit_sw src2w)
3339 {
3340  sljit_ins ins_r, ins;
3341 
3342  if (src1 & SLJIT_MEM) {
3343  FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3344  src1 = TMP_FREG1;
3345  }
3346 
3347  if (op & SLJIT_32) {
3348  ins_r = 0xb3090000 /* cebr */;
3349  ins = 0xed0000000009 /* ceb */;
3350  } else {
3351  ins_r = 0xb3190000 /* cdbr */;
3352  ins = 0xed0000000019 /* cdb */;
3353  }
3354 
3355  return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3356 }
3357 
3359  sljit_s32 dst, sljit_sw dstw,
3360  sljit_s32 src, sljit_sw srcw)
3361 {
3362  sljit_s32 dst_r;
3363  sljit_ins ins;
3364 
3365  CHECK_ERROR();
3366 
3367  SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3368 
3369  dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3370 
3371  if (op == SLJIT_CONV_F64_FROM_F32)
3372  FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3373  else {
3374  if (src & SLJIT_MEM) {
3375  FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3376  src = dst_r;
3377  }
3378 
3379  switch (GET_OPCODE(op)) {
3380  case SLJIT_MOV_F64:
3381  if (FAST_IS_REG(dst)) {
3382  if (dst == src)
3383  return SLJIT_SUCCESS;
3384 
3385  ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3386  break;
3387  }
3388  return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3390  /* Only SLJIT_CONV_F32_FROM_F64. */
3391  ins = 0xb3440000 /* ledbr */;
3392  break;
3393  case SLJIT_NEG_F64:
3394  ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3395  break;
3396  default:
3397  SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3398  ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3399  break;
3400  }
3401 
3402  FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3403  }
3404 
3405  if (dst & SLJIT_MEM)
3406  return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3407 
3408  return SLJIT_SUCCESS;
3409 }
3410 
3411 #define FLOAT_MOV(op, dst_r, src_r) \
3412  (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3413 
3415  sljit_s32 dst, sljit_sw dstw,
3416  sljit_s32 src1, sljit_sw src1w,
3417  sljit_s32 src2, sljit_sw src2w)
3418 {
3419  sljit_s32 dst_r = TMP_FREG1;
3420  sljit_ins ins_r, ins;
3421 
3422  CHECK_ERROR();
3423  CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3424  ADJUST_LOCAL_OFFSET(dst, dstw);
3425  ADJUST_LOCAL_OFFSET(src1, src1w);
3426  ADJUST_LOCAL_OFFSET(src2, src2w);
3427 
3428  do {
3429  if (FAST_IS_REG(dst)) {
3430  dst_r = dst;
3431 
3432  if (dst == src1)
3433  break;
3434 
3435  if (dst == src2) {
3436  if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3437  src2 = src1;
3438  src2w = src1w;
3439  src1 = dst;
3440  break;
3441  }
3442 
3443  FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3444  src2 = TMP_FREG1;
3445  }
3446  }
3447 
3448  if (src1 & SLJIT_MEM)
3449  FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3450  else
3451  FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3452  } while (0);
3453 
3454  switch (GET_OPCODE(op)) {
3455  case SLJIT_ADD_F64:
3456  ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3457  ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3458  break;
3459  case SLJIT_SUB_F64:
3460  ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3461  ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3462  break;
3463  case SLJIT_MUL_F64:
3464  ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3465  ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3466  break;
3467  default:
3468  SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3469  ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3470  ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3471  break;
3472  }
3473 
3474  FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3475 
3476  if (dst & SLJIT_MEM)
3477  return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3478 
3479  return SLJIT_SUCCESS;
3480 }
3481 
3483  sljit_s32 dst_freg,
3484  sljit_s32 src1, sljit_sw src1w,
3485  sljit_s32 src2, sljit_sw src2w)
3486 {
3487  sljit_s32 reg;
3488 
3489  CHECK_ERROR();
3490  CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3491  ADJUST_LOCAL_OFFSET(src1, src1w);
3492  ADJUST_LOCAL_OFFSET(src2, src2w);
3493 
3494  if (src2 & SLJIT_MEM) {
3495  FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
3496  src2 = TMP_FREG1;
3497  }
3498 
3499  if (src1 & SLJIT_MEM) {
3500  reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
3501  FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
3502  src1 = reg;
3503  }
3504 
3505  return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
3506 }
3507 
3509  sljit_s32 freg, sljit_f32 value)
3510 {
3511  union {
3512  sljit_s32 imm;
3513  sljit_f32 value;
3514  } u;
3515 
3516  CHECK_ERROR();
3517  CHECK(check_sljit_emit_fset32(compiler, freg, value));
3518 
3519  u.value = value;
3520 
3521  FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
3522  return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3523 }
3524 
3526  sljit_s32 freg, sljit_f64 value)
3527 {
3528  union {
3529  sljit_sw imm;
3530  sljit_f64 value;
3531  } u;
3532 
3533  CHECK_ERROR();
3534  CHECK(check_sljit_emit_fset64(compiler, freg, value));
3535 
3536  u.value = value;
3537 
3538  FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
3539  return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3540 }
3541 
3543  sljit_s32 freg, sljit_s32 reg)
3544 {
3545  sljit_gpr gen_r;
3546 
3547  CHECK_ERROR();
3548  CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3549 
3550  gen_r = gpr(reg);
3551 
3552  if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
3553  if (op & SLJIT_32) {
3554  FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
3555  gen_r = tmp0;
3556  }
3557 
3558  return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
3559  }
3560 
3561  FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
3562 
3563  if (!(op & SLJIT_32))
3564  return SLJIT_SUCCESS;
3565 
3566  return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
3567 }
3568 
3569 /* --------------------------------------------------------------------- */
3570 /* Conditional instructions */
3571 /* --------------------------------------------------------------------- */
3572 
3574 {
3575  struct sljit_label *label;
3576 
3577  CHECK_ERROR_PTR();
3578  CHECK_PTR(check_sljit_emit_label(compiler));
3579 
3580  if (compiler->last_label && compiler->last_label->size == compiler->size)
3581  return compiler->last_label;
3582 
3583  label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3584  PTR_FAIL_IF(!label);
3585  set_label(label, compiler);
3586  return label;
3587 }
3588 
3590 {
3591  struct sljit_jump *jump;
3592  sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3593 
3594  CHECK_ERROR_PTR();
3595  CHECK_PTR(check_sljit_emit_jump(compiler, type));
3596 
3597  /* record jump */
3598  jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3599  PTR_FAIL_IF(!jump);
3600  set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3601  jump->addr = compiler->size;
3602 
3603  /* emit jump instruction */
3604  type &= 0xff;
3605  if (type >= SLJIT_FAST_CALL)
3606  PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3607  else
3608  PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3609 
3610  return jump;
3611 }
3612 
3614  sljit_s32 arg_types)
3615 {
3616  SLJIT_UNUSED_ARG(arg_types);
3617  CHECK_ERROR_PTR();
3618  CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3619 
3620  if (type & SLJIT_CALL_RETURN) {
3623  }
3624 
3625  SLJIT_SKIP_CHECKS(compiler);
3626  return sljit_emit_jump(compiler, type);
3627 }
3628 
3630 {
3631  sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3632 
3633  CHECK_ERROR();
3634  CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3635 
3636  if (src == SLJIT_IMM) {
3637  SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3638  FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3639  }
3640  else if (src & SLJIT_MEM) {
3641  ADJUST_LOCAL_OFFSET(src, srcw);
3642  FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3643  }
3644 
3645  /* emit jump instruction */
3646  if (type >= SLJIT_FAST_CALL)
3647  return push_inst(compiler, basr(link_r, src_r));
3648 
3649  return push_inst(compiler, br(src_r));
3650 }
3651 
3653  sljit_s32 arg_types,
3654  sljit_s32 src, sljit_sw srcw)
3655 {
3656  SLJIT_UNUSED_ARG(arg_types);
3657 
3658  CHECK_ERROR();
3659  CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3660 
3662 
3663  if (src & SLJIT_MEM) {
3664  ADJUST_LOCAL_OFFSET(src, srcw);
3665  FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3666  src = TMP_REG2;
3667  srcw = 0;
3668  }
3669 
3670  if (type & SLJIT_CALL_RETURN) {
3671  if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3672  FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3673  src = TMP_REG2;
3674  srcw = 0;
3675  }
3676 
3677  FAIL_IF(emit_stack_frame_release(compiler, r14));
3678  type = SLJIT_JUMP;
3679  }
3680 
3681  SLJIT_SKIP_CHECKS(compiler);
3682  return sljit_emit_ijump(compiler, type, src, srcw);
3683 }
3684 
3686  sljit_s32 dst, sljit_sw dstw,
3687  sljit_s32 type)
3688 {
3689  sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3690  sljit_gpr loc_r = tmp1;
3691  sljit_u8 mask = get_cc(compiler, type);
3692 
3693  CHECK_ERROR();
3694  CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3695 
3696  switch (GET_OPCODE(op)) {
3697  case SLJIT_AND:
3698  case SLJIT_OR:
3699  case SLJIT_XOR:
3700  compiler->status_flags_state = op & SLJIT_SET_Z;
3701 
3702  /* dst is also source operand */
3703  if (dst & SLJIT_MEM)
3704  FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3705 
3706  break;
3707  case SLJIT_MOV32:
3708  op |= SLJIT_32;
3709  /* fallthrough */
3710  case SLJIT_MOV:
3711  /* can write straight into destination */
3712  loc_r = dst_r;
3713  break;
3714  default:
3716  }
3717 
3718  /* TODO(mundaym): fold into cmov helper function? */
3719  #define LEVAL(i) i(loc_r, 1, mask)
3720  if (have_lscond2()) {
3721  FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3722  FAIL_IF(push_inst(compiler,
3723  WHEN2(op & SLJIT_32, lochi, locghi)));
3724  } else {
3725  FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));
3726  FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));
3727  FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3728  }
3729  #undef LEVAL
3730 
3731  /* apply bitwise op and set condition codes */
3732  switch (GET_OPCODE(op)) {
3733  #define LEVAL(i) i(dst_r, loc_r)
3734  case SLJIT_AND:
3735  FAIL_IF(push_inst(compiler,
3736  WHEN2(op & SLJIT_32, nr, ngr)));
3737  break;
3738  case SLJIT_OR:
3739  FAIL_IF(push_inst(compiler,
3740  WHEN2(op & SLJIT_32, or, ogr)));
3741  break;
3742  case SLJIT_XOR:
3743  FAIL_IF(push_inst(compiler,
3744  WHEN2(op & SLJIT_32, xr, xgr)));
3745  break;
3746  #undef LEVAL
3747  }
3748 
3749  /* store result to memory if required */
3750  if (dst & SLJIT_MEM)
3751  return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3752 
3753  return SLJIT_SUCCESS;
3754 }
3755 
3757  sljit_s32 dst_reg,
3758  sljit_s32 src1, sljit_sw src1w,
3759  sljit_s32 src2_reg)
3760 {
3761  sljit_ins mask;
3762  sljit_gpr src_r;
3763  sljit_gpr dst_r = gpr(dst_reg);
3764  sljit_ins ins;
3765 
3766  CHECK_ERROR();
3767  CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3768 
3769  ADJUST_LOCAL_OFFSET(src1, src1w);
3770 
3771  if (dst_reg != src2_reg) {
3772  if (src1 == dst_reg) {
3773  src1 = src2_reg;
3774  src1w = 0;
3775  type ^= 0x1;
3776  } else {
3777  if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3778  FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
3779  src1 = src2_reg;
3780  src1w = 0;
3781  type ^= 0x1;
3782  } else
3783  FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
3784  }
3785  }
3786 
3787  mask = get_cc(compiler, type & ~SLJIT_32);
3788 
3789  if (src1 & SLJIT_MEM) {
3790  if (src1 & OFFS_REG_MASK) {
3791  src_r = gpr(OFFS_REG(src1));
3792 
3793  if (src1w != 0) {
3794  FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
3795  src_r = tmp1;
3796  }
3797 
3798  FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3799  src_r = tmp1;
3800  src1w = 0;
3801  } else if (!is_s20(src1w)) {
3802  FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3803 
3804  if (src1 & REG_MASK)
3805  FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3806 
3807  src_r = tmp1;
3808  src1w = 0;
3809  } else
3810  src_r = gpr(src1 & REG_MASK);
3811 
3812  ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
3813  return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
3814  }
3815 
3816  if (src1 == SLJIT_IMM) {
3817  if (type & SLJIT_32)
3818  src1w = (sljit_s32)src1w;
3819 
3820  if (have_lscond2() && is_s16(src1w)) {
3821  ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3822  return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
3823  }
3824 
3825  FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3826  src_r = tmp1;
3827  } else
3828  src_r = gpr(src1);
3829 
3830  ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3831  return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
3832 }
3833 
3835  sljit_s32 dst_freg,
3836  sljit_s32 src1, sljit_sw src1w,
3837  sljit_s32 src2_freg)
3838 {
3839  sljit_ins ins;
3840  struct sljit_label *label;
3841  struct sljit_jump *jump;
3842 
3843  CHECK_ERROR();
3844  CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3845 
3846  ADJUST_LOCAL_OFFSET(src1, src1w);
3847 
3848  if (dst_freg != src2_freg) {
3849  if (dst_freg == src1) {
3850  src1 = src2_freg;
3851  src1w = 0;
3852  type ^= 0x1;
3853  } else {
3854  ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3855  FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
3856  }
3857  }
3858 
3859  SLJIT_SKIP_CHECKS(compiler);
3860  jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
3861  FAIL_IF(!jump);
3862 
3863  if (!(src1 & SLJIT_MEM)) {
3864  ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3865  FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
3866  } else
3867  FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
3868 
3869  SLJIT_SKIP_CHECKS(compiler);
3870  label = sljit_emit_label(compiler);
3871  FAIL_IF(!label);
3872 
3873  sljit_set_label(jump, label);
3874  return SLJIT_SUCCESS;
3875 }
3876 
3878  sljit_s32 reg,
3879  sljit_s32 mem, sljit_sw memw)
3880 {
3881  sljit_ins ins, reg1, reg2, base, offs = 0;
3882 
3883  CHECK_ERROR();
3884  CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3885 
3886  if (!(reg & REG_PAIR_MASK))
3887  return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3888 
3889  ADJUST_LOCAL_OFFSET(mem, memw);
3890 
3891  base = gpr(mem & REG_MASK);
3892  reg1 = gpr(REG_PAIR_FIRST(reg));
3893  reg2 = gpr(REG_PAIR_SECOND(reg));
3894 
3895  if (mem & OFFS_REG_MASK) {
3896  memw &= 0x3;
3897  offs = gpr(OFFS_REG(mem));
3898 
3899  if (memw != 0) {
3900  FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3901  offs = tmp1;
3902  } else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3903  FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3904  base = tmp1;
3905  offs = 0;
3906  }
3907 
3908  memw = 0;
3909  } else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3910  FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3911 
3912  if (base == 0)
3913  base = tmp1;
3914  else
3915  offs = tmp1;
3916 
3917  memw = 0;
3918  }
3919 
3920  if (offs == 0 && reg2 == (reg1 + 1)) {
3921  ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3922  return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3923  }
3924 
3925  ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3926 
3927  if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3928  FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3929  return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3930  }
3931 
3932  FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3933  return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3934 }
3935 
3937  sljit_s32 freg,
3938  sljit_s32 srcdst, sljit_sw srcdstw)
3939 {
3940  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3941  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3942  sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3943  struct addr addr;
3944  sljit_ins ins;
3945 
3946  CHECK_ERROR();
3947  CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3948 
3949  ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3950 
3951  if (reg_size != 4)
3952  return SLJIT_ERR_UNSUPPORTED;
3953 
3954  if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3955  return SLJIT_ERR_UNSUPPORTED;
3956 
3957  if (type & SLJIT_SIMD_TEST)
3958  return SLJIT_SUCCESS;
3959 
3960  if (!(srcdst & SLJIT_MEM)) {
3961  if (type & SLJIT_SIMD_STORE)
3962  ins = F36(srcdst) | F32(freg);
3963  else
3964  ins = F36(freg) | F32(srcdst);
3965 
3966  return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
3967  }
3968 
3969  FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
3970  ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
3971 
3972  if (alignment >= 4)
3973  ins |= 4 << 12;
3974  else if (alignment == 3)
3975  ins |= 3 << 12;
3976 
3977  return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
3978 }
3979 
3981  sljit_s32 freg,
3982  sljit_s32 src, sljit_sw srcw)
3983 {
3984  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3985  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3986  struct addr addr;
3987  sljit_gpr reg;
3988  sljit_sw sign_ext;
3989 
3990  CHECK_ERROR();
3991  CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3992 
3993  ADJUST_LOCAL_OFFSET(src, srcw);
3994 
3995  if (reg_size != 4)
3996  return SLJIT_ERR_UNSUPPORTED;
3997 
3998  if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
3999  return SLJIT_ERR_UNSUPPORTED;
4000 
4001  if (type & SLJIT_SIMD_TEST)
4002  return SLJIT_SUCCESS;
4003 
4004  if (src & SLJIT_MEM) {
4005  FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4006  return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg)
4007  | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
4008  }
4009 
4010  if (type & SLJIT_SIMD_FLOAT) {
4011  if (src == SLJIT_IMM)
4012  return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg));
4013 
4014  return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12));
4015  }
4016 
4017  if (src == SLJIT_IMM) {
4018  sign_ext = 0x10000;
4019 
4020  switch (elem_size) {
4021  case 0:
4022  srcw &= 0xff;
4023  sign_ext = (sljit_s8)srcw;
4024  break;
4025  case 1:
4026  srcw &= 0xffff;
4027  sign_ext = (sljit_s16)srcw;
4028  break;
4029  case 2:
4030  if ((sljit_s32)srcw == (sljit_s16)srcw) {
4031  srcw &= 0xffff;
4032  sign_ext = (sljit_s16)srcw;
4033  } else
4034  srcw &= 0xffffffff;
4035  break;
4036  default:
4037  if (srcw == (sljit_s16)srcw) {
4038  srcw &= 0xffff;
4039  sign_ext = (sljit_s16)srcw;
4040  }
4041  break;
4042  }
4043 
4044  if (sign_ext != 0x10000) {
4045  if (sign_ext == 0 || sign_ext == -1)
4046  return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)
4047  | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
4048 
4049  return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg)
4050  | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
4051  }
4052 
4053  push_load_imm_inst(compiler, tmp0, srcw);
4054  reg = tmp0;
4055  } else
4056  reg = gpr(src);
4057 
4058  FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
4059  return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12));
4060 }
4061 
4063  sljit_s32 freg, sljit_s32 lane_index,
4064  sljit_s32 srcdst, sljit_sw srcdstw)
4065 {
4066  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4067  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4068  struct addr addr;
4069  sljit_gpr reg;
4070  sljit_ins ins = 0;
4071 
4072  CHECK_ERROR();
4073  CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4074 
4075  ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4076 
4077  if (reg_size != 4)
4078  return SLJIT_ERR_UNSUPPORTED;
4079 
4080  if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4081  return SLJIT_ERR_UNSUPPORTED;
4082 
4083  if (type & SLJIT_SIMD_TEST)
4084  return SLJIT_SUCCESS;
4085 
4086  if (srcdst & SLJIT_MEM) {
4087  FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4088  ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4089  }
4090 
4091  if (type & SLJIT_SIMD_LANE_ZERO) {
4092  if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
4093  return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
4094 
4095  if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4096  FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg)));
4097  srcdst = TMP_FREG1;
4098  srcdstw = 0;
4099  }
4100 
4101  FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)));
4102  }
4103 
4104  if (srcdst & SLJIT_MEM) {
4105  switch (elem_size) {
4106  case 0:
4107  ins |= 0xe70000000000 /* vleb */;
4108  break;
4109  case 1:
4110  ins |= 0xe70000000001 /* vleh */;
4111  break;
4112  case 2:
4113  ins |= 0xe70000000003 /* vlef */;
4114  break;
4115  default:
4116  ins |= 0xe70000000002 /* vleg */;
4117  break;
4118  }
4119 
4120  /* Convert to vsteb - vsteg */
4121  if (type & SLJIT_SIMD_STORE)
4122  ins |= 0x8;
4123 
4124  return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
4125  }
4126 
4127  if (type & SLJIT_SIMD_FLOAT) {
4128  if (type & SLJIT_SIMD_STORE)
4129  return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
4130 
4131  if (elem_size == 3) {
4132  if (lane_index == 0)
4133  ins = F32(srcdst) | F28(freg) | (1 << 12);
4134  else
4135  ins = F32(freg) | F28(srcdst);
4136 
4137  return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins);
4138  }
4139 
4140  FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
4141  return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
4142  }
4143 
4144  if (srcdst == SLJIT_IMM) {
4145  switch (elem_size) {
4146  case 0:
4147  ins = 0xe70000000040 /* vleib */;
4148  srcdstw &= 0xff;
4149  break;
4150  case 1:
4151  ins = 0xe70000000041 /* vleih */;
4152  srcdstw &= 0xffff;
4153  break;
4154  case 2:
4155  if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
4156  srcdstw &= 0xffff;
4157  ins = 0xe70000000043 /* vleif */;
4158  } else
4159  srcdstw &= 0xffffffff;
4160  break;
4161  default:
4162  if (srcdstw == (sljit_s16)srcdstw) {
4163  srcdstw &= 0xffff;
4164  ins = 0xe70000000042 /* vleig */;
4165  }
4166  break;
4167  }
4168 
4169  if (ins != 0)
4170  return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
4171 
4172  push_load_imm_inst(compiler, tmp0, srcdstw);
4173  reg = tmp0;
4174  } else
4175  reg = gpr(srcdst);
4176 
4177  ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
4178 
4179  if (!(type & SLJIT_SIMD_STORE))
4180  return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins);
4181 
4182  FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins));
4183 
4184  if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
4185  return SLJIT_SUCCESS;
4186 
4187  switch (elem_size) {
4188  case 0:
4189  ins = 0xb9060000 /* lgbr */;
4190  break;
4191  case 1:
4192  ins = 0xb9070000 /* lghr */;
4193  break;
4194  default:
4195  ins = 0xb9140000 /* lgfr */;
4196  break;
4197  }
4198 
4199  return push_inst(compiler, ins | R4A(reg) | R0A(reg));
4200 }
4201 
4203  sljit_s32 freg,
4204  sljit_s32 src, sljit_s32 src_lane_index)
4205 {
4206  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4207  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4208 
4209  CHECK_ERROR();
4210  CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4211 
4212  if (reg_size != 4)
4213  return SLJIT_ERR_UNSUPPORTED;
4214 
4215  if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4216  return SLJIT_ERR_UNSUPPORTED;
4217 
4218  if (type & SLJIT_SIMD_TEST)
4219  return SLJIT_SUCCESS;
4220 
4221  return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src)
4222  | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
4223 }
4224 
4226  sljit_s32 freg,
4227  sljit_s32 src, sljit_sw srcw)
4228 {
4229  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4230  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4231  sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4232  struct addr addr;
4233  sljit_ins ins;
4234 
4235  CHECK_ERROR();
4236  CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4237 
4238  ADJUST_LOCAL_OFFSET(src, srcw);
4239 
4240  if (reg_size != 4)
4241  return SLJIT_ERR_UNSUPPORTED;
4242 
4243  if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4244  return SLJIT_ERR_UNSUPPORTED;
4245 
4246  if (type & SLJIT_SIMD_TEST)
4247  return SLJIT_SUCCESS;
4248 
4249  if (src & SLJIT_MEM) {
4250  FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4251  ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4252 
4253  switch (elem2_size - elem_size) {
4254  case 1:
4255  ins |= 0xe70000000002 /* vleg */;
4256  break;
4257  case 2:
4258  ins |= 0xe70000000003 /* vlef */;
4259  break;
4260  default:
4261  ins |= 0xe70000000001 /* vleh */;
4262  break;
4263  }
4264 
4265  FAIL_IF(push_inst(compiler, ins));
4266  src = freg;
4267  }
4268 
4269  if (type & SLJIT_SIMD_FLOAT) {
4270  FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12)));
4271  FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12)));
4272  return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12));
4273  }
4274 
4275  ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg);
4276 
4277  do {
4278  FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
4279  src = freg;
4280  } while (++elem_size < elem2_size);
4281 
4282  return SLJIT_SUCCESS;
4283 }
4284 
4286  sljit_s32 freg,
4287  sljit_s32 dst, sljit_sw dstw)
4288 {
4289  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4290  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4291  sljit_gpr dst_r;
4292 
4293  CHECK_ERROR();
4294  CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4295 
4296  ADJUST_LOCAL_OFFSET(dst, dstw);
4297 
4298  if (reg_size != 4)
4299  return SLJIT_ERR_UNSUPPORTED;
4300 
4301  if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4302  return SLJIT_ERR_UNSUPPORTED;
4303 
4304  if (type & SLJIT_SIMD_TEST)
4305  return SLJIT_SUCCESS;
4306 
4307  switch (elem_size) {
4308  case 0:
4309  push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
4310  push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
4311  FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
4312  break;
4313  case 1:
4314  push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
4315  break;
4316  case 2:
4317  push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
4318  break;
4319  default:
4320  push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
4321  break;
4322  }
4323 
4324  if (elem_size != 0)
4325  FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
4326 
4327  FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1)));
4328 
4329  dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
4330  FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
4331  | (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
4332 
4333  if (dst_r == tmp0)
4334  return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
4335 
4336  return SLJIT_SUCCESS;
4337 }
4338 
4340  sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4341 {
4342  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4343  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4344  sljit_ins ins = 0;
4345 
4346  CHECK_ERROR();
4347  CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4348 
4349  if (reg_size != 4)
4350  return SLJIT_ERR_UNSUPPORTED;
4351 
4352  if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4353  return SLJIT_ERR_UNSUPPORTED;
4354 
4355  if (type & SLJIT_SIMD_TEST)
4356  return SLJIT_SUCCESS;
4357 
4358  switch (SLJIT_SIMD_GET_OPCODE(type)) {
4359  case SLJIT_SIMD_OP2_AND:
4360  ins = 0xe70000000068 /* vn */;
4361  break;
4362  case SLJIT_SIMD_OP2_OR:
4363  ins = 0xe7000000006a /* vo */;
4364  break;
4365  case SLJIT_SIMD_OP2_XOR:
4366  ins = 0xe7000000006d /* vx */;
4367  break;
4368  }
4369 
4370  if (type & SLJIT_SIMD_TEST)
4371  return SLJIT_SUCCESS;
4372 
4373  return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg));
4374 }
4375 
4377  sljit_s32 dst_reg,
4378  sljit_s32 mem_reg)
4379 {
4380  CHECK_ERROR();
4381  CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4382 
4383  SLJIT_SKIP_CHECKS(compiler);
4384  return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4385 }
4386 
4388  sljit_s32 src_reg,
4389  sljit_s32 mem_reg,
4390  sljit_s32 temp_reg)
4391 {
4392  sljit_ins mask;
4393  sljit_gpr tmp_r = gpr(temp_reg);
4394  sljit_gpr mem_r = gpr(mem_reg);
4395 
4396  CHECK_ERROR();
4397  CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4398 
4399  switch (GET_OPCODE(op)) {
4400  case SLJIT_MOV32:
4401  case SLJIT_MOV_U32:
4402  return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r));
4403  case SLJIT_MOV_U8:
4404  mask = 0xff;
4405  break;
4406  case SLJIT_MOV_U16:
4407  mask = 0xffff;
4408  break;
4409  default:
4410  return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r));
4411  }
4412 
4413  /* tmp0 = (src_reg ^ tmp_r) & mask */
4414  FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask));
4415  FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r)));
4416  FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc));
4417  FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1)));
4418 
4419  /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */
4420  FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10)));
4421  FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r)));
4422  FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8)));
4423  FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1)));
4424 
4425  /* Already computed: tmp_r = mem_r & ~0x3 */
4426 
4427  FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r)));
4428  FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1)));
4429  return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r));
4430 }
4431 
4432 /* --------------------------------------------------------------------- */
4433 /* Other instructions */
4434 /* --------------------------------------------------------------------- */
4435 
4436 /* On s390x we build a literal pool to hold constants. This has two main
4437  advantages:
4438 
4439  1. we only need one instruction in the instruction stream (LGRL)
4440  2. we can store 64 bit addresses and use 32 bit offsets
4441 
4442  To retrofit the extra information needed to build the literal pool we
4443  add a new sljit_s390x_const struct that contains the initial value but
4444  can still be cast to a sljit_const. */
4445 
4447 {
4448  struct sljit_s390x_const *const_;
4449  sljit_gpr dst_r;
4450 
4451  CHECK_ERROR_PTR();
4452  CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4453 
4454  const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
4455  sizeof(struct sljit_s390x_const));
4456  PTR_FAIL_IF(!const_);
4457  set_const((struct sljit_const*)const_, compiler);
4458  const_->init_value = init_value;
4459 
4460  dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4461  if (have_genext())
4462  PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4463  else {
4464  PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4465  PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4466  }
4467 
4468  if (dst & SLJIT_MEM)
4469  PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
4470 
4471  return (struct sljit_const*)const_;
4472 }
4473 
4475 {
4476  /* Update the constant pool. */
4477  sljit_uw *ptr = (sljit_uw *)addr;
4478  SLJIT_UNUSED_ARG(executable_offset);
4479 
4480  SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
4481  *ptr = new_target;
4482  SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
4483  SLJIT_CACHE_FLUSH(ptr, ptr + 1);
4484 }
4485 
4487 {
4488  sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4489 }
4490 
4492 {
4493  struct sljit_jump *jump;
4494  sljit_gpr dst_r;
4495 
4496  CHECK_ERROR_PTR();
4497  CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4498  ADJUST_LOCAL_OFFSET(dst, dstw);
4499 
4500  jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4501  PTR_FAIL_IF(!jump);
4502  set_mov_addr(jump, compiler, 0);
4503 
4504  dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4505 
4506  if (have_genext())
4507  PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4508  else {
4509  PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4510  PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4511  }
4512 
4513  if (dst & SLJIT_MEM)
4514  PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
4515 
4516  return jump;
4517 }
4518 
4519 /* TODO(carenas): EVAL probably should move up or be refactored */
4520 #undef WHEN2
4521 #undef EVAL
4522 
4523 #undef tmp1
4524 #undef tmp0
4525 
4526 /* TODO(carenas): undef other macros that spill like is_u12? */
ncbi::TMaskedQueryRegions mask
#define CHECK(name, s)
#define CHECK_ERROR(name, s)
static char tmp[3200]
Definition: utf8.c:42
int offset
Definition: replacements.h:160
#define NULL
Definition: ncbistd.hpp:225
static const char label[]
static CStopWatch sw
char * buf
int i
if(yy_accept[yy_current_state])
mdb_mode_t mode
Definition: lmdb++.h:38
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define SLJIT_UNREACHABLE()
unsigned short int sljit_u16
signed short int sljit_s16
#define SLJIT_UNLIKELY(x)
#define SLJIT_API_FUNC_ATTRIBUTE
unsigned int sljit_uw
unsigned char sljit_u8
signed int sljit_s32
unsigned int sljit_u32
signed char sljit_s8
#define SLJIT_ASSERT(x)
#define SLJIT_UNUSED_ARG(arg)
#define SLJIT_INLINE
float sljit_f32
#define SLJIT_CACHE_FLUSH(from, to)
double sljit_f64
int sljit_sw
#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
#define PTR_FAIL_IF(expr)
Definition: sljitLir.c:61
#define FAIL_IF(expr)
Definition: sljitLir.c:55
#define PTR_FAIL_WITH_EXEC_IF(ptr)
Definition: sljitLir.c:83
#define CHECK_ERROR_PTR()
Definition: sljitLir.c:49
#define SLJIT_UNORDERED_OR_LESS_EQUAL
Definition: sljitLir.h:1626
#define SLJIT_SUB_F64
Definition: sljitLir.h:1451
#define SLJIT_NOT_CARRY
Definition: sljitLir.h:1581
#define SLJIT_HAS_SIMD
Definition: sljitLir.h:712
#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN
Definition: sljitLir.h:1357
#define SLJIT_ARG_TYPE_SCRATCH_REG
Definition: sljitLir.h:348
#define SLJIT_ORDERED
Definition: sljitLir.h:1608
#define SLJIT_DIVMOD_SW
Definition: sljitLir.h:1079
#define SLJIT_SIMD_OP2_AND
Definition: sljitLir.h:2094
#define SLJIT_SIMD_TEST
Definition: sljitLir.h:1900
#define SLJIT_ORDERED_LESS_EQUAL
Definition: sljitLir.h:1639