1 /* $Id: sr_reduce.c,v 1.11 1994/06/24 10:32:27 ceriel Exp $ */
3 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
4 * See the copyright notice in the ACK home directory, in the file "Copyright".
6 /* S T R E N G T H R E D U C T I O N
8 * S R _ R E D U C E . C
18 #include "../share/types.h"
20 #include "../share/debug.h"
21 #include "../share/alloc.h"
22 #include "../share/def.h"
23 #include "../share/global.h"
24 #include "../share/aux.h"
26 #include "../share/lset.h"
28 #include "sr_reduce.h"
34 /* If an expression such as "iv * const" or "A[iv]" is
35 * used more than once in a loop, we only use one temporary
36 * local for it and reuse this local each time.
37 * After the first occurrence, the expression is said to
41 STATIC int regtyp(code)
44 switch(code->co_instr) {
57 STATIC gen_regmes(tmp,score,code,p)
63 /* generate a register message for the temporary variable and
64 * insert it at the start of the procedure.
69 l = reg_mes(tmp,code->co_tmpsize,regtyp(code),score);
70 pro = p->p_start->b_start; /* every proc. begins with a PRO pseudo */
71 l->l_next = pro->l_next;
78 STATIC line_p newcode(code,tmp)
82 /* Construct the EM code that will replace the reducible code,
89 switch(code->co_instr) {
94 /* new code is just a LOL tmp */
99 /* New code is a LOAD tmp, where tmp is a
100 * pointer variable, so the actual EM code
101 * depends on the pointer size.
103 l = move_pointer(tmp,LOAD);
106 /* New code is a load-indirect */
111 /* New code is a store-indirect */
123 STATIC replcode(code,text)
127 /* Replace old code (extending from code->co_lfirst to
128 * code->co_llast) by new code (headed by 'text').
133 for (l = text; l->l_next != (line_p) 0; l = l->l_next);
134 /* 'l' now points to last instruction of text */
135 l1 = PREV(code->co_lfirst); /* instruction just before old code */
136 l2 = code->co_llast->l_next; /* instruction just behind old code */
137 if (l1 == (line_p) 0) {
138 code->co_block->b_start = text;
139 PREV(text) = (line_p) 0;
144 if (l2 != (line_p) 0) {
148 code->co_llast->l_next = (line_p) 0;
149 /* Note that the old code is still accessible via code->co_lfirst */
152 STATIC line_p add_code(pl, l)
159 line_p n = pl->l_next;
163 while (l->l_next) l = l->l_next;
173 STATIC init_code(code,tmp)
177 /* Generate code to set up the temporary local.
178 * For multiplication, its initial value is const*iv_expr,
179 * for array operations it is &a[iv_expr] (where iv_expr is
180 * an expression that is a linear function of the induc. var.
181 * This code is inserted immediately before the loop entry.
182 * As the initializing code looks very much like the
183 * reduced code, we reuse that (old) code.
188 l = code->co_llast; /* the mli, lar etc. instruction */
192 /* reduced code is: iv_expr * lc (or lc * iv_expr)
193 * init_code is: tmp = iv_expr * lc (or lc*iv_expr)
194 * So we just insert a 'STL tmp'.
196 l->l_next = int_line(tmp);
197 l->l_next->l_instr = op_stl;
201 /* reduced code is: iv_expr << lc
202 * init_code is: tmp = iv_expr << lc
203 * So we just insert a 'STL tmp'.
205 l->l_next = int_line(tmp);
206 l->l_next->l_instr = op_stl;
210 /* reduced code is: ...= A[iv_expr] resp.
212 * init_code is: tmp = &A[iv_expr].
213 * So just change the lar or sar into a aar ...
215 l->l_instr = (byte) op_aar;
216 /* ... and fall through !! */
218 /* append code to store a pointer in temp. local */
219 l->l_next = move_pointer(tmp,STORE);
222 assert(FALSE); /* non-reducible instruction */
225 /* Now insert the code at the end of the header block */
226 p = &code->co_loop->LP_INSTR;
227 if (*p == (line_p) 0 || (PREV((*p)) == 0 && INSTR((*p)) == op_bra)) {
228 /* LP_INSTR points to last instruction of header block,
229 * so if it is 0, the header block is empty yet.
231 code->co_loop->LP_HEADER->b_start =
232 add_code(code->co_loop->LP_HEADER->b_start, code->co_lfirst);
233 } else if (INSTR((*p)) == op_bra) {
234 add_code(PREV((*p)), code->co_lfirst);
236 else add_code(*p, code->co_lfirst);
237 while (l->l_next) l = l->l_next;
238 *p = l; /* new last instruction */
241 STATIC incr_code(code,tmp)
245 /* Generate code to increment the temporary local variable.
246 * The variable is incremented by
247 * 1) multiply --> step value of iv * loop constant
248 * 2) array --> step value of iv * element size
249 * This value can be determined statically.
250 * If the induction variable is used in a linear
251 * expression in which its sign is negative
252 * (such as in: "5-(6-(-iv))" ), this value is negated.
253 * The generated code looks like:
254 * LOL tmp ; LOC incr ; ADI ws ; STL tmp
255 * For pointer-increments we generate a "ADP c", rather than
257 * This code is put just after the code that increments
258 * the induction variable.
261 line_p load_tmp, loc, add, store_tmp, l;
263 add = newline(OPSHORT);
264 SHORT(add) = ws; /* the add instruction, can be ADI,ADU or ADS */
265 switch(code->co_instr) {
270 off_set(code->c_o.co_loadlc) *
271 code->co_iv->iv_step);
272 loc->l_instr = op_loc;
273 add->l_instr = op_adi;
274 load_tmp = int_line(tmp);
275 load_tmp->l_instr = op_lol;
276 store_tmp = int_line(tmp);
277 store_tmp->l_instr = op_stl;
283 code->co_iv->iv_step *
284 (1 << off_set(code->c_o.co_loadlc)));
285 loc->l_instr = op_loc;
286 add->l_instr = op_adi;
287 load_tmp = int_line(tmp);
288 load_tmp->l_instr = op_lol;
289 store_tmp = int_line(tmp);
290 store_tmp->l_instr = op_stl;
298 code->co_iv->iv_step *
299 elemsize(code->c_o.co_desc));
300 add->l_instr = op_adp;
301 load_tmp = move_pointer(tmp,LOAD);
302 store_tmp = move_pointer(tmp,STORE);
307 /* Now we've got pieces of code to load the temp. local,
308 * load the constant, add the two and store the result in
309 * the local. This code will be put just after the code that
310 * increments the induction variable.
312 if (loc != (line_p) 0) concatenate(load_tmp,loc);
313 concatenate(load_tmp,add);
314 concatenate(load_tmp,store_tmp);
315 /* Now load_tmp points to a list of EM instructions */
316 l = code->co_iv->iv_incr;
317 if (l->l_next != (line_p) 0) {
318 DLINK(store_tmp,l->l_next);
320 DLINK(l,load_tmp); /* doubly link them */
329 for (l = c->co_lfirst; l != (line_p) 0; l = next) {
337 STATIC bool same_address(l1,l2,vars)
341 /* See if l1 and l2 load the same address */
343 if (INSTR(l1) != INSTR(l2)) return FALSE;
346 return OBJ(l1) == OBJ(l2);
348 return off_set(l1) == off_set(l2);
351 off_set(l1) == off_set(l2) &&
352 is_loopconst(l1,vars);
355 off_set(l1) == off_set(l2) &&
356 is_loopconst(l1,vars);
363 STATIC bool same_expr(lb1,le1,lb2,le2)
364 line_p lb1,le1,lb2,le2;
366 /* See if the code from lb1 to le1 is the same
367 * expression as the code from lb2 to le2.
371 register line_p l1,l2;
376 if (INSTR(l1) != INSTR(l2)) return FALSE;
379 if (TYPE(l2) != OPSHORT ||
380 SHORT(l1) != SHORT(l2)) return FALSE;
383 if (TYPE(l2) != OPOFFSET ||
384 OFFSET(l1) != OFFSET(l2)) return FALSE;
391 if (l1 == le1 ) return l2 == le2;
392 if (l2 == le2) return FALSE;
398 STATIC bool same_code(c1,c2,vars)
402 /* See if c1 and c2 compute the same expression. Two array
403 * references can be the same even if one is e.g a fetch
404 * and the other a store.
407 switch(c1->co_instr) {
412 return c1->co_instr == c2->co_instr &&
413 off_set(c1->c_o.co_loadlc) ==
414 off_set(c2->c_o.co_loadlc) &&
415 same_expr(c1->co_ivexpr,c1->co_endexpr,
416 c2->co_ivexpr,c2->co_endexpr);
420 return ( c2->co_instr == op_aar ||
421 c2->co_instr == op_lar ||
422 c2->co_instr == op_sar) &&
423 same_expr(c1->co_ivexpr,c1->co_endexpr,
424 c2->co_ivexpr,c2->co_endexpr) &&
425 same_address(c1->c_o.co_desc,c2->c_o.co_desc,vars) &&
426 same_address(c1->co_lfirst,c2->co_lfirst,vars);
434 STATIC code_p available(c,vars)
438 /* See if the code is already available.
439 * If so, return a pointer to the first occurrence
446 for (i = Lfirst(avail); i != (Lindex) 0; i = Lnext(i,avail)) {
447 cp = (code_p) Lelem(i);
448 if (same_code(c,cp,vars)) {
455 STATIC fix_header(lp)
458 /* Check if a header block was added, and if so, add a branch to
460 * If it was added, it was added to the end of the procedure, so
461 * move the END pseudo.
463 bblock_p b = curproc->p_start;
465 if (lp->LP_HEADER->b_next == 0) {
466 line_p l = last_instr(lp->LP_HEADER);
470 if (INSTR(l) != op_bra) {
471 line_p j = newline(OPINSTRLAB);
473 assert(INSTR(lp->lp_entry->b_start) == op_lab);
474 INSTRLAB(j) = INSTRLAB(lp->lp_entry->b_start);
480 while (b->b_next != lp->LP_HEADER) b = b->b_next;
482 assert(INSTR(e) == ps_end);
483 assert(PREV(e) != 0);
489 STATIC reduce(code,vars)
493 /* Perform the actual transformations. The code on the left
494 * gets transformed into the code on the right. Note that
495 * each piece of code is assigned a name, that will be
496 * used to describe the whole process.
498 * t = iv * 118; (init_code)
500 * .. iv * 118 .. .. t .. (new_code)
502 * t += 118; (incr_code)
509 OUTTRACE("succeeded!!",0);
510 if ((ac = available(code,vars)) != (code_p) 0) {
511 /* The expression is already available, so we
512 * don't have to generate a new temporary local for it.
514 OUTTRACE("expression was already available",0);
515 replcode(code,newcode(code,ac->co_temp));
518 make_header(code->co_loop);
519 /* make sure there's a header block */
520 tmp = tmplocal(curproc,(offset) code->co_tmpsize);
522 /* create a new local variable in the stack frame
525 gen_regmes(tmp,3,code,curproc); /* generate register message */
526 /* score is set to 3, as TMP is used at least 3 times */
527 replcode(code,newcode(code,tmp));
528 OUTTRACE("replaced old code by new code",0);
529 /* Construct the EM-code that will replace the reducible code
530 * and replace the old code by the new code.
533 OUTTRACE("emitted initializing code",0);
534 /* Emit code to initialize the temporary local. This code is
535 * put in the loop header block.
537 incr_code(code,tmp); /* emit code to increment temp. local */
538 OUTTRACE("emitted increment code",0);
540 fix_header(code->co_loop);
546 STATIC try_multiply(lp,ivs,vars,b,mul)
552 /* See if we can reduce the strength of the multiply
553 * instruction. If so, then set up the global common
554 * data structure 'c' (containing information about the
555 * code to be reduced) and call 'reduce'.
564 OUTTRACE("trying multiply instruction on line %d",linecount);
565 if (ovfl_harmful && !IS_STRONG(b)) return;
566 /* If b is not a strong block, optimization may
567 * introduce an overflow error in the initializing code.
570 l2 = PREV(mul); /* Instruction before the multiply */
571 if ( (is_ivexpr(l2,ivs,vars,&lbegin,&iv,&sign)) &&
572 is_const(PREV(lbegin)) ) {
573 /* recognized expression "const * iv_expr" */
575 c->c_o.co_loadlc = PREV(l2);
577 c->co_lfirst = PREV(lbegin);
580 (is_ivexpr(PREV(l2),ivs,vars,&lbegin,&iv,&sign))) {
581 /* recognized "iv * const " */
583 c->c_o.co_loadlc = l2;
584 c->co_endexpr = PREV(l2);
585 c->co_lfirst = lbegin;
587 OUTTRACE("failed",0);
591 /* common part for both patterns */
596 c->co_ivexpr = lbegin;
598 c->co_tmpsize = ws; /* temp. local is a word */
599 c->co_instr = INSTR(mul);
600 OUTVERBOSE("sr: multiply in proc %d loop %d",
601 curproc->p_id, lp->lp_id);
608 STATIC try_leftshift(lp,ivs,vars,b,shft)
614 /* See if we can reduce the strength of the leftshift
615 * instruction. If so, then set up the global common
616 * data structure 'c' (containing information about the
617 * code to be reduced) and call 'reduce'.
626 OUTTRACE("trying leftshift instruction on line %d",linecount);
627 if (ovfl_harmful && !IS_STRONG(b)) return;
628 /* If b is not a strong block, optimization may
629 * introduce an overflow error in the initializing code.
632 l2 = PREV(shft); /* Instruction before the shift */
633 if (is_const(l2) && off_set(l2) > sli_threshold &&
634 (is_ivexpr(PREV(l2),ivs,vars,&lbegin,&iv,&sign))) {
635 /* recognized "iv << const " */
637 c->c_o.co_loadlc = l2;
638 c->co_endexpr = PREV(l2);
639 c->co_lfirst = lbegin;
641 OUTTRACE("failed",0);
648 c->co_ivexpr = lbegin;
650 c->co_tmpsize = ws; /* temp. local is a word */
651 c->co_instr = INSTR(shft);
652 OUTVERBOSE("sr: leftshift in proc %d loop %d",
653 curproc->p_id, lp->lp_id);
659 STATIC try_array(lp,ivs,vars,b,arr)
665 /* See if we can reduce the strength of the array reference
674 /* Try to recognize the pattern:
677 * LOAD ADDRESS OF DESCRIPTOR
680 OUTTRACE("trying array instruction on line %d",linecount);
681 if (arrbound_harmful && !IS_STRONG(b)) return;
682 /* If b is not a strong block, optimization may
683 * introduce an array bound error in the initializing code.
686 if (is_caddress(l2,vars) &&
687 (INSTR(arr) == op_aar || elemsize(l2) == ws) &&
688 (is_ivexpr(PREV(l2),ivs,vars,&lbegin,&iv,&sign)) ) {
690 if (is_caddress(l3,vars)) {
697 c->co_ivexpr = lbegin;
698 c->co_endexpr = PREV(l2);
700 c->co_tmpsize = ps; /* temp. local is pointer */
701 c->co_instr = INSTR(arr);
703 OUTVERBOSE("sr: array in proc %d loop %d",
704 curproc->p_id,lp->lp_id);
717 for (i = Lfirst(avail); i != (Lindex) 0; i = Lnext(i,avail)) {
725 strength_reduction(lp,ivs,vars)
726 loop_p lp; /* description of the loop */
727 lset ivs; /* set of induction variables of the loop */
728 lset vars; /* set of local variables changed in loop */
730 /* Find all expensive instructions (leftshift, multiply, array) and see
731 * if they can be reduced. We branch to several instruction-specific
732 * routines (try_...) that check if reduction is possible,
733 * and that set up a common data structure (code_info).
734 * The actual transformations are done by 'reduce', that is
735 * essentially instruction-independend.
742 avail = Lempty_set();
743 for (i = Lfirst(lp->LP_BLOCKS); i != (Lindex) 0;
744 i = Lnext(i,lp->LP_BLOCKS)) {
745 b = (bblock_p) Lelem(i);
746 for (l = b->b_start; l != (line_p) 0; l = next) {
748 if (TYPE(l) == OPSHORT && SHORT(l) == ws) {
752 try_leftshift(lp,ivs,vars,b,l);
756 try_multiply(lp,ivs,vars,b,l);
761 try_array(lp,ivs,vars,b,l);