| Polaris: access_util.cc Source File |
|
Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members
access_util.ccGo to the documentation of this file.00001 /// 00002 /// 00003 /// \file access_util.cc 00004 /// 00005 #include <stdio.h> 00006 #include "Expression/LogicalConstExpr.h" 00007 #include "Range/RangeAccessor.h" 00008 #include "Range/AIRangeDict.h" 00009 #include "Program.h" 00010 #include "ProgramUnit.h" 00011 #include "Statement/Statement.h" 00012 #include "utilities/expression_util.h" 00013 #include "Range/range_util.h" 00014 #include "Range/RangeDict.h" 00015 #include "utilities/switches_util.h" 00016 #include "utilities/stmt_util.h" 00017 #include "SimBiGraphIterator.h" 00018 #include "SimGraphIterator.h" 00019 #include "Traverser.h" 00020 #include "UBiGraphIterator.h" 00021 00022 #include "utilities/access_util.h" 00023 00024 void 00025 print_access_region_statistics( ostream & o, const RefList<AbstractAccess> & list, 00026 Statement & stmt, Symbol & sym ) 00027 { 00028 if (_ar_prtstats != 1) return; 00029 00030 AR_Stats stats; 00031 00032 init_ar_statistics( stats ); 00033 00034 stats.total = list.entries(); 00035 00036 for (Iterator<AbstractAccess> iter = list; 00037 iter.valid(); 00038 ++iter) 00039 { 00040 AbstractAccess & access_region = iter.current(); 00041 00042 collect_ar_statistics( access_region, stats, sym ); 00043 } 00044 00045 print_ar_stats_line( o, stmt, stats ); 00046 } 00047 00048 void 00049 print_access_region_statistics( ostream & o, const List<AbstractAccess> & list, 00050 Statement & stmt, Symbol & sym ) 00051 { 00052 if (_ar_prtstats != 1) return; 00053 00054 AR_Stats stats; 00055 00056 init_ar_statistics( stats ); 00057 00058 stats.total = list.entries(); 00059 00060 for (Iterator<AbstractAccess> iter = list; 00061 iter.valid(); 00062 ++iter) 00063 { 00064 AbstractAccess & access_region = iter.current(); 00065 00066 collect_ar_statistics( access_region, stats, sym ); 00067 } 00068 00069 print_ar_stats_line( o, stmt, stats ); 00070 } 00071 00072 void 00073 init_ar_statistics( AR_Stats & stats ) 00074 { 00075 stats.print = false; 00076 stats.total_dims = 0; 00077 stats.total = 0; 00078 stats.exact = 0; 00079 stats.inexact = 0; 00080 stats.proc_boundary = 0; 00081 stats.both_same_dims = 0; 00082 stats.caller_grtr_dims = 0; 00083 stats.callee_grtr_dims = 0; 00084 stats.callee_1 = 0; 00085 stats.caller_1 = 0; 00086 stats.subscr_subscr = 0; 00087 stats.match_declared_dim = 0; 00088 stats.less_declared_dim = 0; 00089 stats.greater_declared_dim = 0; 00090 stats.nonaffine = 0; 00091 stats.triangular = 0; 00092 stats.diagonal = 0; 00093 stats.proven_monotonic = 0; 00094 stats.proven_nonmonotonic = 0; 00095 stats.monotonic_unknown = 0; 00096 stats.minnest = 10000000; 00097 stats.maxnest = 0; 00098 stats.totalnest = 0; 00099 stats.base_dc = 0; 00100 stats.last_dc = 0; 00101 stats.dc_count = 0; 00102 stats.base_ars_uniq = 0; 00103 stats.last_ars_uniq = 0; 00104 stats.ars_uniq_count = 0; 00105 stats.base_ars_uniq_dims = 0; 00106 stats.last_ars_uniq_dims = 0; 00107 stats.ars_uniq_dims_count = 0; 00108 stats.base_ars_uniq_strd = 0; 00109 stats.last_ars_uniq_strd = 0; 00110 stats.ars_uniq_strd_count = 0; 00111 stats.base_ars_uniq_dims = 0; 00112 stats.last_ars_uniq_dims = 0; 00113 stats.ars_uniq_dims_count = 0; 00114 stats.base_ars_uniq_dims_st = 0; 00115 stats.last_ars_uniq_dims_st = 0; 00116 stats.ars_uniq_dims_st_count = 0; 00117 } 00118 00119 void 00120 collect_ar_statistics ( AbstractAccess & access_region, AR_Stats & stats, 00121 Symbol & sym ) 00122 { 00123 if (_ar_prtstats != 1) return; 00124 00125 Dimcount * dc_ptr; 00126 Accreg_set * ars_ptr; 00127 00128 int decl_dims = sym.dim().entries(); /// ... dims in calling context 00129 00130 int ar_dims = access_region.number_of_dimensions(); 00131 stats.total_dims += ar_dims; 00132 00133 Statement * stmt = access_region.summarized_to(); 00134 00135 /// if ((stmt->stmt_class() == CALL_STMT) || 00136 /// ((stmt->stmt_class() == ASSIGNMENT_STMT) && 00137 /// (stmt->rhs().op()==FUNCTION_CALL_OP))) { 00138 /// 00139 /// if (access_region.inner().entries() == 0) return; 00140 /// 00141 /// p_assert(access_region.inner().entries() == 1, 00142 /// "ARD at CALL has multiple inner ARDs"); 00143 /// 00144 /// stats.print = true; 00145 /// 00146 /// stats.proc_boundary++; 00147 /// callee_dims = access_region.inner()[0].symbol().dim().entries(); 00148 /// if (decl_dims == callee_dims) { 00149 /// stats.both_same_dims++; 00150 /// } 00151 /// if (decl_dims > callee_dims) { 00152 /// stats.caller_grtr_dims++; 00153 /// if (callee_dims == 1) { 00154 /// stats.callee_1++; 00155 /// } 00156 /// } 00157 /// if (decl_dims < callee_dims) { 00158 /// stats.callee_grtr_dims++; 00159 /// if (decl_dims == 1) { 00160 /// stats.caller_1++; 00161 /// } 00162 /// } 00163 /// return; 00164 /// } 00165 /// 00166 00167 if (stmt->stmt_class() == ENTRY_STMT) return; 00168 00169 stats.print = true; 00170 00171 /// switch (access_region.monoton()) 00172 /// { 00173 /// case AR_PROVEN_MONO: 00174 /// stats.proven_monotonic++; 00175 /// break; 00176 /// case AR_PROVEN_NON_MONO: 00177 /// stats.proven_nonmonotonic++; 00178 /// break; 00179 /// case AR_UNKNOWN_MONO: 00180 /// stats.monotonic_unknown++; 00181 /// break; 00182 /// } 00183 00184 bool found_one; 00185 /// bool exact = access_region.accuracy() == ACCU_EXACT; 00186 bool exact = true; 00187 /// if (exact) { 00188 /// stats.exact++; 00189 /// } else { 00190 /// stats.inexact++; 00191 /// } 00192 00193 bool subsub = access_region.is_subsub(); 00194 if (exact && subsub) { 00195 stats.subscr_subscr++; 00196 } 00197 00198 /// p_assert(access_region.actual_exists(), "No actual expression registered"); 00199 00200 /// p_assert((access_region.actual_guarded().op() == ARRAY_REF_OP) || 00201 // (access_region.actual_guarded().op() == ID_OP), 00202 // "Actual expression must be ARRAY_REF_OP"); 00203 00204 bool nonaffine = access_region.is_nonaffine(); 00205 if (nonaffine && exact && !subsub) { 00206 stats.nonaffine++; 00207 } 00208 00209 bool triangular = access_region.is_triangular(); 00210 if (triangular && exact && !subsub && !nonaffine) { 00211 stats.triangular++; 00212 } 00213 00214 bool diagonal = access_region.is_diagonal(); 00215 if (diagonal && !triangular && exact && !subsub && !nonaffine) { 00216 stats.diagonal++; 00217 } 00218 00219 bool normal = exact && !diagonal && !triangular && !subsub && !nonaffine; 00220 if ((ar_dims == decl_dims) && normal) 00221 stats.match_declared_dim++; 00222 if ((ar_dims < decl_dims) && normal) 00223 stats.less_declared_dim++; 00224 if ((ar_dims > decl_dims) && normal) 00225 stats.greater_declared_dim++; 00226 00227 int level = access_region.nesting_level(); 00228 if (level < stats.minnest) stats.minnest = level; 00229 if (level > stats.maxnest) stats.maxnest = level; 00230 stats.totalnest += level; 00231 00232 if (_ar_match_dims == -1) { 00233 _ar_match_dims = switch_value("ar_matchdims"); 00234 } 00235 00236 if (_ar_match_dims) { 00237 00238 /// ... Go through the set of dimensionalities 00239 00240 found_one = false; 00241 for (dc_ptr = stats.base_dc; 00242 dc_ptr; 00243 dc_ptr = dc_ptr->next) 00244 { 00245 if (dc_ptr->dims == ar_dims) { 00246 dc_ptr->count++; 00247 found_one = true; 00248 break; 00249 } 00250 } 00251 00252 if (! found_one) { 00253 Dimcount * dc = new Dimcount; 00254 stats.dc_count++; 00255 dc->count = 1; 00256 dc->next = 0; 00257 dc->dims = ar_dims; 00258 if (stats.base_dc == 0) { 00259 stats.base_dc = dc; 00260 stats.last_dc = dc; 00261 } else { 00262 stats.last_dc->next = dc; 00263 stats.last_dc = dc; 00264 } 00265 } 00266 00267 /// ... Check for uniqueness 00268 00269 found_one = false; 00270 for (ars_ptr = stats.base_ars_uniq; 00271 ars_ptr; 00272 ars_ptr = ars_ptr->next) 00273 { 00274 if (access_region.match( ars_ptr->ar )) { 00275 ars_ptr->count++; 00276 found_one = true; 00277 break; 00278 } 00279 } 00280 00281 if (! found_one) { 00282 Accreg_set * ars = new Accreg_set; 00283 stats.ars_uniq_count++; 00284 ars->count = 1; 00285 ars->next = 0; 00286 ars->ar = & access_region; 00287 if (stats.base_ars_uniq == 0) { 00288 stats.base_ars_uniq = ars; 00289 stats.last_ars_uniq = ars; 00290 } else { 00291 stats.last_ars_uniq->next = ars; 00292 stats.last_ars_uniq = ars; 00293 } 00294 } 00295 00296 /// ... Check for uniqueness only considering dimensions (ignoring start expr) 00297 00298 found_one = false; 00299 for (ars_ptr = stats.base_ars_uniq_dims; 00300 ars_ptr; 00301 ars_ptr = ars_ptr->next) 00302 { 00303 if (access_region.match_by_dims( ars_ptr->ar )) { 00304 ars_ptr->count++; 00305 found_one = true; 00306 break; 00307 } 00308 } 00309 00310 if (! found_one) { 00311 Accreg_set * ars = new Accreg_set; 00312 stats.ars_uniq_dims_count++; 00313 ars->count = 1; 00314 ars->next = 0; 00315 ars->ar = & access_region; 00316 if (stats.base_ars_uniq_dims == 0) { 00317 stats.base_ars_uniq_dims = ars; 00318 stats.last_ars_uniq_dims = ars; 00319 } else { 00320 stats.last_ars_uniq_dims->next = ars; 00321 stats.last_ars_uniq_dims = ars; 00322 } 00323 } 00324 00325 /// ... Check for uniqueness only considering strides 00326 00327 found_one = false; 00328 for (ars_ptr = stats.base_ars_uniq_strd; 00329 ars_ptr; 00330 ars_ptr = ars_ptr->next) 00331 { 00332 if (access_region.match_by_strides( ars_ptr->ar )) { 00333 ars_ptr->count++; 00334 found_one = true; 00335 break; 00336 } 00337 } 00338 00339 if (! found_one) { 00340 Accreg_set * ars = new Accreg_set; 00341 stats.ars_uniq_strd_count++; 00342 ars->count = 1; 00343 ars->next = 0; 00344 ars->ar = & access_region; 00345 if (stats.base_ars_uniq_strd == 0) { 00346 stats.base_ars_uniq_strd = ars; 00347 stats.last_ars_uniq_strd = ars; 00348 } else { 00349 stats.last_ars_uniq_strd->next = ars; 00350 stats.last_ars_uniq_strd = ars; 00351 } 00352 } 00353 00354 /// ... Check for uniqueness considering all-but-one dimensions 00355 00356 found_one = false; 00357 for (ars_ptr = stats.base_ars_uniq_dims; 00358 ars_ptr; 00359 ars_ptr = ars_ptr->next) 00360 { 00361 if (access_region.match_by_dims_except_1( ars_ptr->ar )) { 00362 ars_ptr->count++; 00363 found_one = true; 00364 break; 00365 } 00366 } 00367 00368 if (! found_one) { 00369 Accreg_set * ars = new Accreg_set; 00370 stats.ars_uniq_dims_count++; 00371 ars->count = 1; 00372 ars->next = 0; 00373 ars->ar = & access_region; 00374 if (stats.base_ars_uniq_dims == 0) { 00375 stats.base_ars_uniq_dims = ars; 00376 stats.last_ars_uniq_dims = ars; 00377 } else { 00378 stats.last_ars_uniq_dims->next = ars; 00379 stats.last_ars_uniq_dims = ars; 00380 } 00381 } 00382 00383 /// ... Check for uniqueness considering start and all-but-one dimension 00384 00385 found_one = false; 00386 for (ars_ptr = stats.base_ars_uniq_dims_st; 00387 ars_ptr; 00388 ars_ptr = ars_ptr->next) 00389 { 00390 if (access_region.match_by_start_dims_except_1( ars_ptr->ar )) { 00391 ars_ptr->count++; 00392 found_one = true; 00393 break; 00394 } 00395 } 00396 00397 if (! found_one) { 00398 Accreg_set * ars = new Accreg_set; 00399 stats.ars_uniq_dims_st_count++; 00400 ars->count = 1; 00401 ars->next = 0; 00402 ars->ar = & access_region; 00403 if (stats.base_ars_uniq_dims_st == 0) { 00404 stats.base_ars_uniq_dims_st = ars; 00405 stats.last_ars_uniq_dims_st = ars; 00406 } else { 00407 stats.last_ars_uniq_dims_st->next = ars; 00408 stats.last_ars_uniq_dims_st = ars; 00409 } 00410 } 00411 } 00412 return; 00413 } 00414 00415 void 00416 print_ar_stats_line( ostream & o, Statement & stmt, AR_Stats & stats ) 00417 { 00418 o << "ARSTATS: "; 00419 if (stmt.stmt_class() == DO_STMT) { 00420 o << stmt.get_loop_name(); 00421 } else if (stmt.stmt_class() == CALL_STMT) { 00422 o << "CALL_STMT"; 00423 } else if (stmt.stmt_class() == ENTRY_STMT) { 00424 o << "ENTRY_STMT"; 00425 } else { 00426 o << "UNKNOWN_STMT"; 00427 } 00428 o << " TOT: " << stats.total << " #MATCH_DECL: " << stats.match_declared_dim; 00429 o << " >DECLARED_DIM: " << stats.greater_declared_dim << " <DECLARED_DIM: " << stats.less_declared_dim ; 00430 o << " NONAFFINE: " << stats.nonaffine; 00431 o << " TRIANG: " << stats.triangular; 00432 o << " DIAG: " << stats.diagonal; 00433 o << " NEST: " << stats.minnest << " "<< stats.maxnest << " " << float(stats.totalnest)/float(stats.total); 00434 o << " XCT: " << stats.exact << " INXCT: " << stats.inexact; 00435 o << " SS: " << stats.subscr_subscr; 00436 o << " PROV_MONO: " << stats.proven_monotonic; 00437 o << " PROV_NMONO: " << stats.proven_nonmonotonic; 00438 o << " MONO_UNK: " << stats.monotonic_unknown; 00439 o << " CALLS: " << stats.proc_boundary; 00440 o << " CALLER>: " << stats.caller_grtr_dims; 00441 o << " CALLEE>: " << stats.callee_grtr_dims; 00442 o << " CALLER1: " << stats.caller_1; 00443 o << " CALLEE1: " << stats.callee_1; 00444 o << " #UNIQ: " << stats.ars_uniq_count << " DIMS: " << stats.ars_uniq_dims_count; 00445 o << " STRD: " << stats.ars_uniq_strd_count << " DIMS-1: " << stats.ars_uniq_dims_count; 00446 o << " DIMS-1/STRT: " << stats.ars_uniq_dims_st_count << " #DIFF-DIM: " << stats.dc_count; 00447 o << " TOT_DIMS: " << stats.total_dims; 00448 o << " DIM-LIST: "; 00449 00450 for (Dimcount * dc_ptr = stats.base_dc; 00451 dc_ptr; 00452 dc_ptr = dc_ptr->next) 00453 { 00454 o << " " << dc_ptr->dims; 00455 } 00456 o << endl; 00457 00458 if (stats.caller_grtr_dims > stats.callee_1) { 00459 o << "RESHAPING (CALLER>): N to M" << endl; 00460 } 00461 if (stats.callee_grtr_dims > stats.caller_1) { 00462 o << "RESHAPING (CALLEE>): N to M" << endl; 00463 } 00464 00465 } 00466 00467 /// ar_intersect - 00468 /// When you are not sure: 00469 /// OVERESTIMATE means A <intersect> B => A <union> B (maximize the result) 00470 /// UNDERESTIMATE means A <intersect> B => <emptyset> (minimize the result) 00471 00472 void 00473 ar_intersect(List<AbstractAccess> & list1, 00474 List<AbstractAccess> & list2, 00475 AR_CONSERVATIVE direction, 00476 List<AbstractAccess> & result, 00477 IPAStats & istats) 00478 { 00479 if ((list1.entries() == 0) || (list2.entries() == 0)) return; 00480 00481 istats.ar_inter1.restart(); 00482 istats.ar_inter1_elems1.add(list1.entries()); 00483 istats.ar_inter1_elems2.add(list2.entries()); 00484 00485 if (_ar_logging && _ar_logging % 2 == 0) { 00486 arlog << "Intersect1:" << list1 << " and " << list2 ; 00487 } 00488 00489 RefList<AbstractAccess> discard1; 00490 RefList<AbstractAccess> discard2; 00491 00492 SimBiGraph sbg(list1, list2 ); 00493 00494 SimBiGraphIterator sbgi( sbg ); 00495 for ( ; sbgi.list1_valid(); sbgi.next_list1_node()) { 00496 00497 int ard1 = sbgi.current_list1(); 00498 AbstractAccess & candidate = list1[ard1]; 00499 00500 for ( ; sbgi.list2_valid(); sbgi.next_list2_node()) { 00501 00502 int ard2 = sbgi.current_list2( ); 00503 00504 SimEdge & edge = sbgi.current(); 00505 00506 List<AbstractAccess> new_ard1; 00507 List<AbstractAccess> new_ard2; 00508 00509 Statement * ref_stmt; 00510 00511 if (candidate.actual_stmt()) { 00512 ref_stmt = candidate.actual_stmt(); 00513 } else { 00514 ref_stmt = range_stmt(*(candidate.summarized_to())); 00515 } 00516 00517 RangeAccessor range_acc(candidate.pgm_summarized_to()->range_dict_guarded(), 00518 *ref_stmt); 00519 istats.ar_inter1.stop(); 00520 00521 /// ... Use AND as the Meet operator, since this is for straight-line code 00522 bool inter = intersect_ARDs(list1[ard1], list2[ard2], range_acc, edge, 00523 direction, new_ard1, new_ard2, result, AR_AND, istats); 00524 istats.ar_inter1.restart(); 00525 00526 if (inter) { 00527 for (int i=0; i<new_ard2.entries(); ++i) { 00528 if (!ar_descriptor_member(list2, new_ard2[i])) { 00529 list2.ins_last(new_ard2.grab(i)); 00530 } 00531 } 00532 if (!discard2.member(list2[ard2])) discard2.ins_last(list2[ard2]); 00533 00534 for (int i=0; i<new_ard1.entries(); ++i) { 00535 if (!ar_descriptor_member(list1, new_ard1[i])) { 00536 list1.ins_last(new_ard1.grab(i)); 00537 } 00538 } 00539 if (!discard1.member(list1[ard1])) discard1.ins_last(list1[ard1]); 00540 break; 00541 } 00542 } 00543 } 00544 /// ... discard the indicated descriptors 00545 00546 for (Iterator<AbstractAccess> iter = discard1; 00547 iter.valid(); 00548 ++iter) 00549 { 00550 list1.del(iter.current()); 00551 } 00552 for (Iterator<AbstractAccess> iter = discard2; 00553 iter.valid(); 00554 ++iter) 00555 { 00556 list2.del(iter.current()); 00557 } 00558 00559 if (_ar_logging && _ar_logging % 2 == 0) { 00560 arlog << "Result: list1" << list1 << " list2:" << list2 << " Result:" << result; 00561 } 00562 istats.ar_inter1.stop(); 00563 } 00564 00565 /// ar_intersect - Internal intersection within a single list 00566 /// When you are not sure: 00567 /// OVERESTIMATE means A <intersect> B => A <union> B (maximize the result) 00568 /// UNDERESTIMATE means A <intersect> B => <emptyset> (minimize the result) 00569 /// 00570 /// Only report whether an intersection exists or not, considering 00571 00572 bool 00573 ar_intersect(List<AbstractAccess> & list, 00574 AR_CONSERVATIVE direction, 00575 IPAStats & istats) 00576 { 00577 if (list.entries() < 2) return false; 00578 00579 istats.ar_inter4.restart(); 00580 istats.ar_inter4_elems.add(list.entries()); 00581 00582 if (_ar_logging && _ar_logging % 2 == 0) { 00583 arlog << "Intersect4:" << list ; 00584 } 00585 00586 SimGraph sg(list); 00587 00588 SimGraphIterator sgiter( sg ); 00589 for ( ; sgiter.valid(); ++sgiter) { 00590 00591 SimEdge & edge = sgiter.current(); 00592 AbstractAccess * ard1 = edge.node_1(); 00593 AbstractAccess * ard2 = edge.node_2(); 00594 00595 RangeAccessor range_acc(ard1->pgm_summarized_to()->range_dict_guarded(), 00596 *(range_stmt(*(ard1->summarized_to()))) ); 00597 00598 istats.ar_inter4.stop(); 00599 00600 List<AbstractAccess> new_ard1; 00601 List<AbstractAccess> new_ard2; 00602 List<AbstractAccess> result; 00603 /// ... Use AND as the Meet operator since this is for straightline code 00604 bool inter = intersect_ARDs(*ard1, *ard2, range_acc, edge, 00605 direction, new_ard1, new_ard2, result, AR_AND, istats); 00606 istats.ar_inter4.restart(); 00607 00608 bool plausible = ar_plausible_execution(result); 00609 00610 if (inter && plausible) { 00611 if (_ar_logging && _ar_logging % 2 == 0) { 00612 arlog << " - Intersects"; 00613 } 00614 istats.ar_inter4.stop(); 00615 return true; 00616 } 00617 } 00618 istats.ar_inter4.stop(); 00619 if (_ar_logging && _ar_logging % 2 == 0) { 00620 arlog << " - No Intersection"; 00621 } 00622 istats.ar_inter4.stop(); 00623 return false; 00624 } 00625 00626 /// ar_subregion 00627 /// is ard1 a subregion of ard2 ? 00628 00629 bool 00630 ar_subregion(AbstractAccess & ard1, AbstractAccess & ard2, SimEdge & edge, 00631 AR_CONSERVATIVE direction, Array<int> *match ) 00632 { 00633 SimilarityType simtype = edge.type(); 00634 00635 if (_ar_logging && _ar_logging % 2 == 0) { 00636 arlog << endl << "Subregion- ARD1:" << ard1 << " vs ARD2:" << ard2 ; 00637 } 00638 00639 AbstractAccess * aa1 = &ard1; 00640 AbstractAccess * aa2 = &ard2; 00641 int dims1 = aa1->number_of_dimensions(); 00642 int dims2 = aa2->number_of_dimensions(); 00643 00644 if ((simtype == SIM_EQUIV) || 00645 ((simtype == SIM_EQUIV_SEMI) && (dims1 < dims2))) { 00646 00647 if (_ar_logging && _ar_logging % 2 == 0) { 00648 arlog << "-YES-" << endl; 00649 } 00650 return true; 00651 } 00652 00653 bool newaa1 = false; 00654 bool newaa2 = false; 00655 00656 RangeAccessor range_acc(aa2->pgm_summarized_to()->range_dict_guarded(), 00657 *(range_stmt(*(aa2->summarized_to()))) ); 00658 00659 Relation rel_start = range_acc.compare(aa1->start_guarded(), 00660 aa2->start_guarded()); 00661 00662 /// ... If either ARD1's start is less than ARD2's, or we just can't tell 00663 /// ... where ARD1 starts in relation to ARD2, then report NO 00664 00665 if (!rel_start.is_greater_equal()) { 00666 if (_ar_logging && _ar_logging % 2 == 0) { 00667 arlog << "-NO-" << endl; 00668 } 00669 return false; 00670 } 00671 00672 /// ... Now we know aa2 is left of aa1 00673 00674 if (dims2 == 1) { 00675 /// ... When aa2 is 1-dimensional, stride 1, if 00676 /// ... aa1 fits totally inside aa2, then it is a subregion 00677 00678 if (is_integer_one(aa2->dimension(0).stride_guarded())) { 00679 00680 Expression *lastoff1 = aa1->lastoffset(); 00681 Expression *lastoff2 = aa2->lastoffset(); 00682 00683 Relation rel_off = range_acc.compare(*lastoff1, *lastoff2); 00684 if (rel_off.is_less_equal()) { 00685 if (_ar_logging && _ar_logging % 2 == 0) { 00686 arlog << "-YES-" << endl; 00687 } 00688 return true; 00689 } 00690 } 00691 } 00692 00693 00694 Array<int> * match_index = 0; 00695 00696 /// ... If pred(ARD1) implies pred(ARD2), then check the regions for subregion-ness 00697 00698 if (ar_implies(*aa1, *aa2)) { 00699 00700 /// ... Take care of the _SEMI cases. 00701 00702 if ((simtype == SIM_DIM_EQUIV_SEMI) || 00703 (simtype == SIM_STRIDE_EQUIV_SEMI)) { 00704 00705 if (dims1 < dims2) { 00706 00707 /// ... Copy ARD1 00708 AbstractAccess * new_aa = aa1->clone(); 00709 00710 Array<int> * new_match_index = new Array<int>; 00711 new_match_index->resize(dims2); 00712 00713 if (match == 0) { 00714 match_index = edge.edge()->stridematch(); 00715 } else { 00716 match_index = match; 00717 } 00718 /// ... Invert match_index 00719 00720 Array<int> * inverted_match_index = invert_match(match_index, dims2); 00721 00722 /// ... Check whether we can determine subregion-ness immediately 00723 00724 if (simtype == SIM_DIM_EQUIV_SEMI) { 00725 /// ... If ARD1's dimensions each match a dimension in ARD2, the difference 00726 /// ... between the starting points is a multiple of some non-matching dimension's 00727 /// ... stride, and the difference between the starting points is less than the 00728 /// ... span of that non-matching dimension, then ARD1 is a subregion of ARD2. 00729 00730 Expression *diff = simplify(sub(aa1->start_guarded().clone(), 00731 aa2->start_guarded().clone())); 00732 00733 for (int ii=0; ii<dims2; ++ii) { 00734 if ((*inverted_match_index)[ii] == -1) { 00735 /// ... No match in ARD1 for dim ii in ARD2 00736 if (expr_divides(aa2->dimension(ii).stride_guarded(), *diff)==AR_YES) { 00737 00738 Relation rel = range_acc.compare(*diff, aa2->dimension(ii).span_guarded()); 00739 00740 if (rel.is_less_equal()) { 00741 00742 if (_ar_logging && _ar_logging % 2 == 0) { 00743 arlog << "-YES-" << endl; 00744 } 00745 delete diff; 00746 delete inverted_match_index; 00747 delete new_aa; 00748 return true; 00749 } 00750 } 00751 } 00752 } 00753 delete diff; 00754 } 00755 00756 /// ... Make a new descriptor for aa1 with strides equivalent to those in aa2 and zero spans 00757 00758 /// ... Add dimensions from ARD2 which were not matched, set spans to 0 00759 00760 int new_dim = dims1; /// ... add_dimension does ins_last 00761 for (int ii=0; ii<dims2; ++ii) { 00762 if ((*inverted_match_index)[ii] == -1) { 00763 AccessDimension * dim =aa2->dimension(ii).clone(); 00764 dim->span(constant(0)); 00765 new_aa->add_dimension(dim); 00766 (*new_match_index)[new_dim] = ii; 00767 ++new_dim; 00768 } else { 00769 (*new_match_index)[(*inverted_match_index)[ii]] = ii; 00770 } 00771 } 00772 delete inverted_match_index; 00773 00774 match_index = new_match_index; 00775 aa1 = new_aa; 00776 newaa1 = true; 00777 dims1 = dims2; 00778 simtype = SIM_STRIDE_EQUIV; 00779 } else { 00780 00781 /// ... Copy ARD2 00782 00783 AbstractAccess * new_aa = aa2->clone(); 00784 00785 Array<int> * new_match_index = new Array<int>; 00786 new_match_index->resize(dims1); 00787 00788 if (match == 0) { 00789 match_index = edge.edge()->stridematch(); 00790 } else { 00791 match_index = match; 00792 } 00793 for (int ii=0; ii<dims1; ++ii) { 00794 (*new_match_index)[ii] = (*match_index)[ii]; 00795 } 00796 /// ... Add dimensions from ARD1 which were not matched, set spans to 0 00797 00798 int new_dim = dims2; 00799 for (int ii=0; ii<dims1; ++ii) { 00800 if ((*match_index)[ii] == -1) { 00801 AccessDimension * dim =aa1->dimension(ii).clone(); 00802 dim->span(constant(0)); 00803 new_aa->add_dimension(dim); 00804 (*new_match_index)[ii] = new_dim; 00805 ++new_dim; 00806 } 00807 } 00808 match_index = new_match_index; 00809 aa2 = new_aa; 00810 newaa2 = true; 00811 dims2 = dims1; 00812 simtype = SIM_STRIDE_EQUIV; 00813 } 00814 } else { 00815 if (match == 0) { 00816 match_index = edge.edge()->stridematch(); 00817 } else { 00818 match_index = match; 00819 } 00820 } 00821 00822 Expression * lastoffset_1 = aa1->lastoffset(); 00823 Expression * lastoffset_2 = aa2->lastoffset(); 00824 00825 if ((simtype == SIM_STRIDE_EQUIV) && (rel_start.is_equal())) { 00826 /// ... If all spans of ard1 are less or equal to the associated spans of ard2 00827 /// ... we have a subregion 00828 00829 bool success = true; 00830 for (int i=0; i < dims1; ++i) { 00831 Relation relspan = range_acc.compare(aa1->dimension(i).span_guarded(), 00832 aa2->dimension(((*match_index)[i])).span_guarded()); 00833 if (relspan.is_greater_than()) { 00834 success = false; 00835 break; 00836 } 00837 } 00838 if (success) { 00839 if (_ar_logging && _ar_logging % 2 == 0) { 00840 arlog << "-YES-" << endl; 00841 } 00842 if (newaa1) { 00843 delete aa1; 00844 delete match_index; 00845 } 00846 if (newaa2) { 00847 delete aa2; 00848 delete match_index; 00849 } 00850 return true; 00851 } 00852 } 00853 00854 if ((simtype == SIM_STRIDE_EQUIV) && (rel_start.is_greater_than())) { 00855 /// ... Find one stride in ard2 and a matching dim in ard1, 00856 /// ... such that the difference between the starting 00857 /// ... points of the two ards is a multiple of that stride and 00858 /// ... offset_1 + span_1 <= offset_2 + span_2, PLUS 00859 /// ... for all other dims span_1 <= span_2 00860 00861 Array<int> * inverted_match_index = invert_match(match_index, dims2); 00862 00863 Expression *diff = simplify(sub(aa1->start_guarded().clone(), 00864 aa2->start_guarded().clone())); 00865 00866 int mult_dim = -1; 00867 for (int ii=0; ii<dims2; ++ii) { 00868 if (expr_divides(aa2->dimension(ii).stride_guarded(), *diff)==AR_YES) { 00869 Expression *sum1 = simplify(add(aa1->start_guarded().clone(), 00870 aa1->dimension((*inverted_match_index)[ii]).span_guarded().clone())); 00871 Expression *sum2 = simplify(add(aa2->start_guarded().clone(), 00872 aa2->dimension(ii).span_guarded().clone())); 00873 Relation rel = range_acc.compare(*sum1, *sum2); 00874 00875 delete sum1; 00876 delete sum2; 00877 00878 if (rel.is_less_equal()) { 00879 mult_dim = (*inverted_match_index)[ii]; 00880 break; 00881 } 00882 } 00883 } 00884 delete inverted_match_index; 00885 delete diff; 00886 00887 if (mult_dim != -1) { 00888 bool success = true; 00889 for (int ii=0; ii<dims1; ++ii) { 00890 if (ii == mult_dim) continue; 00891 Relation rel = range_acc.compare(aa1->dimension(ii).span_guarded(), 00892 aa2->dimension((*match_index)[ii]).span_guarded()); 00893 if (!rel.is_less_equal()) { 00894 success = false; 00895 } 00896 } 00897 if (success) { 00898 if (_ar_logging && _ar_logging % 2 == 0) { 00899 arlog << "-YES-" << endl; 00900 } 00901 if (newaa1) { 00902 delete aa1; 00903 delete match_index; 00904 } 00905 if (newaa2) { 00906 delete aa2; 00907 delete match_index; 00908 } 00909 return true; 00910 } 00911 } 00912 } 00913 00914 00915 Relation rel_end = range_acc.compare(*lastoffset_1, 00916 *lastoffset_2); 00917 if ((dims2 == 1) && 00918 (aa2->dimension(0).stride_guarded().op() == INTEGER_CONSTANT_OP) && 00919 (aa2->dimension(0).stride_guarded().value() == 1) && 00920 rel_start.is_greater_equal() && 00921 rel_end.is_less_equal()) { 00922 if (_ar_logging && _ar_logging % 2 == 0) { 00923 arlog << "-YES-" << endl; 00924 } 00925 if (newaa1) { 00926 delete aa1; 00927 delete match_index; 00928 } 00929 if (newaa2) { 00930 delete aa2; 00931 delete match_index; 00932 } 00933 return true; 00934 } 00935 if (rel_start.is_less_than()) { 00936 if (_ar_logging && _ar_logging % 2 == 0) { 00937 arlog << "-NO-" << endl; 00938 } 00939 if (newaa1) { 00940 delete aa1; 00941 delete match_index; 00942 } 00943 if (newaa2) { 00944 delete aa2; 00945 delete match_index; 00946 } 00947 return false; 00948 } 00949 00950 if (rel_end.is_greater_than()) { 00951 if (_ar_logging && _ar_logging % 2 == 0) { 00952 arlog << "-NO-" << endl; 00953 } 00954 if (newaa1) { 00955 delete aa1; 00956 delete match_index; 00957 } 00958 if (newaa2) { 00959 delete aa2; 00960 delete match_index; 00961 } 00962 return false; 00963 } 00964 } 00965 00966 /// ... If we get to this point, we just don't know, so let the 00967 /// ... conservative direction determine how we respond. 00968 00969 if (direction == UNDERESTIMATE) { 00970 if (_ar_logging && _ar_logging % 2 == 0) { 00971 arlog << "-NO-" << endl; 00972 } 00973 if (newaa1) { 00974 delete aa1; 00975 delete match_index; 00976 } 00977 if (newaa2) { 00978 delete aa2; 00979 delete match_index; 00980 } 00981 return false; 00982 } 00983 if (_ar_logging && _ar_logging % 2 == 0) { 00984 arlog << "-YES-" << endl; 00985 } 00986 if (newaa1) { 00987 delete aa1; 00988 delete match_index; 00989 } 00990 if (newaa2) { 00991 delete aa2; 00992 delete match_index; 00993 } 00994 return true; /// ... direction == OVERESTIMATE 00995 } 00996 00997 /// append ard to list 00998 void 00999 ar_append(List<AbstractAccess> & list, AbstractAccess * ard) 01000 { 01001 if (ard) { 01002 for (Iterator<AbstractAccess> iter = list; 01003 iter.valid(); 01004 ++iter) { 01005 if (ar_descriptors_equal(iter.current(), *ard)) { 01006 return; 01007 } 01008 } 01009 list.ins_last(ard); 01010 } 01011 } 01012 01013 /// append list2 to list1 01014 void 01015 ar_append(List<AbstractAccess> & list1, List<AbstractAccess> & list2, 01016 IPAStats & istats) 01017 { 01018 if (list2.entries() == 0) return; 01019 for (Iterator<AbstractAccess> iter = list2; 01020 iter.valid(); 01021 ++iter) { 01022 01023 if (iter.current().exec_predicate_false()) continue; 01024 01025 list1.ins_last(iter.current().clone()); 01026 } 01027 return; 01028 } 01029 01030 /// append list2 to list1, changing the predicate of the elements of list2 to 01031 /// a new predicate consisting of the AR_MEET applied to the predicates of 01032 /// AbstractAccess descriptors lista[indexa] and listb[indexb]. 01033 void 01034 ar_append_new_pred(List<AbstractAccess> & list1, List<AbstractAccess> & list2, 01035 List<AbstractAccess> & lista, int indexa, 01036 List<AbstractAccess> & listb, int indexb, 01037 AR_MEET meet_op, 01038 PredicateRepository & repos, IPAStats & istats) 01039 { 01040 AbstractAccess & ard1 = lista[indexa]; 01041 AbstractAccess & ard2 = listb[indexb]; 01042 01043 if (list2.entries() == 0) return; 01044 01045 for (Iterator<AbstractAccess> iter = list2; 01046 iter.valid(); 01047 ++iter) { 01048 AbstractAccess * descr = iter.current().clone(); 01049 switch(meet_op) { 01050 case AR_X: 01051 descr->exec_predicate(ard1.exec_predicate()); 01052 break; 01053 case AR_Y: 01054 descr->exec_predicate(ard2.exec_predicate()); 01055 break; 01056 case AR_AND: 01057 descr->exec_predicate(repos.and_pred(ard1.exec_predicate(), 01058 ard2.exec_predicate())); 01059 break; 01060 case AR_OR: 01061 descr->exec_predicate(repos.or_pred(ard1.exec_predicate(), 01062 ard2.exec_predicate())); 01063 break; 01064 case AR_NOTX_Y: 01065 descr->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 01066 ard2.exec_predicate())); 01067 break; 01068 case AR_X_NOTY: 01069 descr->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 01070 ard2.exec_predicate())); 01071 break; 01072 default: 01073 p_abort("Bad value for meet operator"); 01074 } 01075 list1.ins_last(descr); 01076 } 01077 } 01078 01079 AbstractAccess * 01080 compute_abstract_access( const Expression & expr, Statement * stmt, AR_TYPE artype ) 01081 { 01082 Expression * start; 01083 01084 if (expr.base_variable_ref() == 0) return 0; 01085 if ((expr.base_variable_ref()->sym_class() != VARIABLE_CLASS) && 01086 (expr.base_variable_ref()->sym_class() != FUNCTION_CLASS)) { 01087 return 0; 01088 } 01089 01090 const Symbol * sym = expr.base_variable_ref(); 01091 01092 /// if (! sym->is_array()) return 0; ---Removed to compute ARDs for scalars also 01093 01094 /// ... If this is a GSA symbol, find the base symbol for it 01095 if ((sym->sym_class() == VARIABLE_CLASS) && 01096 ((VariableSymbol *)sym)->is_gsa_symbol()) { 01097 sym = &(((VariableSymbol * )sym)->gsa_base_symbol()); 01098 } 01099 01100 if (expr.op() == ARRAY_REF_OP) { 01101 start = linearize_zero_registered( expr.subscript().clone(), *((VariableSymbol *)sym) ); 01102 } else { 01103 start = constant( 0 ); 01104 } 01105 01106 List<AccessDimension> adl; 01107 AbstractAccess * aa = new AbstractAccess(*sym, adl, start, ACCU_EXACT, artype); 01108 aa->add_actual( stmt, expr.clone() ); /// ... Add the actual expression to the rep 01109 01110 aa->byte_size(sym->type().size()); 01111 /// ... Set the byte size of the descriptor. 01112 01113 if (artype == AR_READ) { 01114 aa->read_only(true); 01115 } 01116 01117 if (artype == AR_WRITE) { 01118 aa->write_first(true); 01119 } 01120 01121 aa->no_overlap(true); 01122 01123 return aa; 01124 } 01125 01126 SimilarityType 01127 contig_type (SimilarityType oldtype) 01128 { 01129 SimilarityType new_type; 01130 01131 switch (oldtype) { 01132 case SIM_EQUIV_SEMI: 01133 case SIM_DIM_EQUIV_SEMI: 01134 case SIM_STRIDE_EQUIV_SEMI: 01135 new_type = SIM_STRIDE_EQUIV_SEMI; 01136 break; 01137 case SIM_EQUIV: 01138 case SIM_DIM_EQUIV: 01139 new_type = SIM_STRIDE_EQUIV; 01140 break; 01141 case SIM_STRIDE_EQUIV: 01142 case SIM_DIM_EQUIV_UP: 01143 case SIM_STRIDE_EQUIV_UP: 01144 new_type = SIM_STRIDE_EQUIV_UP; 01145 break; 01146 case SIM_NOT_SIMILAR: 01147 new_type = SIM_NOT_SIMILAR; 01148 break; 01149 case SIM_UNKNOWN: 01150 new_type = SIM_UNKNOWN; 01151 break; 01152 default: 01153 p_abort("Bad type for SimEdgeKernel"); 01154 break; 01155 } 01156 return new_type; 01157 } 01158 01159 /// result <- list1 - list2 01160 /// Here, when we don't know for sure, 01161 /// UNDERESTIMATE means that A - B => <empty set> 01162 /// and OVERESTIMATE means that A - B => A 01163 01164 void 01165 ar_subtract(List<AbstractAccess> & list1, List<AbstractAccess> & list2, 01166 AR_CONSERVATIVE direction, List<AbstractAccess> & result) 01167 { 01168 if (list1.entries() == 0) return; 01169 01170 if (_ar_logging && _ar_logging % 2 == 0) { 01171 arlog << "Subtract: " << list1 << " - " << list2 << endl; 01172 } 01173 01174 p_assert(&list1 != &result, "Use other ar_subtract when &list1==&result"); 01175 01176 if (list2.entries() == 0) { 01177 for (Iterator<AbstractAccess> iter = list1; 01178 iter.valid(); 01179 ++iter) 01180 { 01181 result.ins_first(iter.current().clone()); 01182 } 01183 01184 if (_ar_logging && _ar_logging % 2 == 0) { 01185 arlog << "Result: " << result; 01186 } 01187 return; 01188 } 01189 01190 AbstractAccess & aa = list1[0]; 01191 RangeAccessor range_acc(aa.pgm_summarized_to()->range_dict_guarded(), 01192 *(range_stmt(*(aa.summarized_to()))) ); 01193 01194 SimBiGraph sbg(list1, list2 ); 01195 01196 SimBiGraphIterator sbgi( sbg ); 01197 for ( ; sbgi.list1_valid(); sbgi.next_list1_node()) { 01198 01199 int ard1 = sbgi.current_list1(); 01200 bool keep = true; 01201 01202 for ( ; sbgi.list2_valid(); sbgi.next_list2_node()) { 01203 01204 int ard2 = sbgi.current_list2( ); 01205 01206 SimEdge & edge = sbgi.current(); 01207 SimilarityType simtype = edge.type(); 01208 01209 if (simtype == SIM_EQUIV) { 01210 keep = false; 01211 break; 01212 } 01213 01214 /// ... if lastoffset[A] < baseoffset[B] 01215 01216 Relation rel1 = range_acc.compare(*list1[ard1].lastoffset(), 01217 list2[ard2].start_guarded()); 01218 if (rel1.is_less_than()) continue; 01219 01220 Relation rel2 = range_acc.compare(*list2[ard2].lastoffset(), 01221 list1[ard1].start_guarded()); 01222 if (rel2.is_less_than()) continue; 01223 01224 if (ar_subregion(list1[ard1], list2[ard2], edge, direction)) { 01225 keep = false; 01226 break; 01227 } 01228 } 01229 if (keep) { 01230 result.ins_first(list1[ard1].clone()); 01231 } else if (direction == OVERESTIMATE) { 01232 result.ins_first(list1[ard1].clone()); 01233 } 01234 } 01235 if (_ar_logging && _ar_logging % 2 == 0) { 01236 arlog << "Result: " << result; 01237 } 01238 } 01239 01240 /// list1 <- list1 - list2 01241 /// Here, when we don't know for sure, 01242 /// UNDERESTIMATE means that A - B => <empty set> 01243 /// and OVERESTIMATE means that A - B => A 01244 01245 void 01246 ar_subtract(List<AbstractAccess> & list1, List<AbstractAccess> & list2, 01247 AR_CONSERVATIVE direction) 01248 { 01249 if (list1.entries() == 0) return; 01250 if (list2.entries() == 0) return; 01251 01252 if (_ar_logging && _ar_logging % 2 == 0) { 01253 arlog << "Subtract: " << list1 << " - " << list2; 01254 } 01255 01256 AbstractAccess & aa = list1[0]; 01257 RangeAccessor range_acc(aa.pgm_summarized_to()->range_dict_guarded(), 01258 *(range_stmt(*(aa.summarized_to()))) ); 01259 01260 RefList<AbstractAccess> discard; 01261 01262 SimBiGraph sbg(list1, list2 ); 01263 01264 SimBiGraphIterator sbgi( sbg ); 01265 for ( ; sbgi.list1_valid(); sbgi.next_list1_node()) { 01266 01267 int ard1 = sbgi.current_list1(); 01268 AbstractAccess & candidate = list1[ard1]; 01269 01270 bool determined = false; 01271 for ( ; sbgi.list2_valid(); sbgi.next_list2_node()) { 01272 01273 int ard2 = sbgi.current_list2( ); 01274 01275 SimEdge & edge = sbgi.current(); 01276 SimilarityType simtype = edge.type(); 01277 01278 if (simtype == SIM_EQUIV) { 01279 discard.ins_first(candidate); 01280 determined = true; 01281 break; 01282 } 01283 01284 /// ... if lastoffset[A] < baseoffset[B] 01285 01286 Relation rel1 = range_acc.compare(*list1[ard1].lastoffset(), 01287 list2[ard2].start_guarded()); 01288 if (rel1.is_less_than()) continue; 01289 01290 Relation rel2 = range_acc.compare(*list2[ard2].lastoffset(), 01291 list1[ard1].start_guarded()); 01292 if (rel2.is_less_than()) continue; 01293 01294 if (ar_subregion(list1[ard1], list2[ard2], edge, direction)) { 01295 discard.ins_first(candidate); 01296 determined = true; 01297 break; 01298 } 01299 } 01300 if (! determined) { 01301 if (direction == UNDERESTIMATE) { 01302 discard.ins_first(candidate); 01303 } 01304 } 01305 } 01306 for (Iterator<AbstractAccess> iter = discard; 01307 iter.valid(); 01308 ++iter) 01309 { 01310 list1.del(iter.current()); 01311 } 01312 01313 if (_ar_logging && _ar_logging % 2 == 0) { 01314 arlog << "Result in list1: " << list1; 01315 } 01316 } 01317 01318 void 01319 check_access_patterns(Program & prog, ProgramUnit & pgm) 01320 { 01321 for (Iterator<Statement> iter = pgm.stmts(); 01322 iter.valid(); 01323 ++iter) { 01324 01325 Statement & stmt = iter.current(); 01326 01327 if (stmt.access_table_exists()) { 01328 for (KeyIterator<Symbol, SymbolAccess> ar_iter = stmt.iter_access_table_guarded(); 01329 ar_iter.valid(); 01330 ++ar_iter) { 01331 01332 SymbolAccess & sa = ar_iter.current_data(); 01333 01334 /// ... Iterate through the read accesses 01335 01336 for (Iterator<AbstractAccess> rd_iter = sa.iter_read(); 01337 rd_iter.valid(); 01338 ++rd_iter) { 01339 AbstractAccess & aa = rd_iter.current(); 01340 aa.check_access_patterns(); 01341 } 01342 /// ... Iterate through the write accesses 01343 01344 for (Iterator<AbstractAccess> wr_iter = sa.iter_write(); 01345 wr_iter.valid(); 01346 ++wr_iter) { 01347 AbstractAccess & aa = wr_iter.current(); 01348 aa.check_access_patterns(); 01349 } 01350 // Iterate through the read/write accesses 01351 01352 for (Iterator<AbstractAccess> rdwr_iter = sa.iter_readwrite(); 01353 rdwr_iter.valid(); 01354 ++rdwr_iter) { 01355 AbstractAccess & aa = rdwr_iter.current(); 01356 aa.check_access_patterns(); 01357 } 01358 } 01359 } 01360 } 01361 } 01362 01363 void 01364 simplify_descriptors( List<AbstractAccess> & list, IPAStats & istats ) 01365 { 01366 bool changed = false; 01367 01368 if (list.entries() == 0) return; 01369 01370 istats.simp_descr.restart(); 01371 istats.simp_descr_elems.add(list.entries()); 01372 01373 /// ... eliminate all descriptors with .FALSE. execution predicates 01374 01375 for (Mutator<AbstractAccess> iter = list; 01376 iter.valid(); 01377 ++iter) { 01378 01379 if (iter.current().exec_predicate_false()) { 01380 iter.del(); 01381 } 01382 } 01383 01384 elim_equivalent_descrs( list, istats ); 01385 01386 istats.coal_tim.restart(); 01387 01388 if (_ar_coalesce) { 01389 for (Iterator<AbstractAccess> outer_iter = list; 01390 outer_iter.valid(); 01391 ++outer_iter) { 01392 01393 changed = outer_iter.current().coalesce(); 01394 } 01395 } 01396 istats.coal_tim.stop(); 01397 01398 SimTypeOfChange change; 01399 01400 if (_ar_aggregate && (list.entries() > 1)) { 01401 01402 /// ... Build similarity graph: 01403 /// ... Match up all combinations of descriptors 01404 01405 if (_ar_logging && _ar_logging % 2 == 0) { 01406 arlog << endl << "Simplify descriptors:" << endl; 01407 int i = 0; 01408 for (Iterator<AbstractAccess> it=list; it.valid(); ++it,++i) { 01409 arlog << "[" << i << "]" << it.current() << endl; 01410 } 01411 arlog << endl; 01412 } 01413 01414 SimGraph sg( list ); 01415 01416 sg.print_similarities(); 01417 01418 /// ... First, eliminate all but one in the same equivalence class 01419 SimGraphIterator sgit( sg ); 01420 for ( ; sgit.valid(); ++sgit) { 01421 01422 SimEdge & edge = sgit.current(); 01423 AbstractAccess * descr1 = edge.node_1(); 01424 AbstractAccess * descr2 = edge.node_2(); 01425 01426 if (descr1->pgm_summarized_to() != descr2->pgm_summarized_to()) continue; 01427 01428 /// ... If the descriptors are structurally equivalent and neither 01429 /// ... have execution predicates: 01430 if (edge.type() == SIM_EQUIV) { 01431 if (ar_predicates_equal(*descr1, *descr2)) { 01432 01433 if (_ar_logging && _ar_logging % 2 == 0) { 01434 arlog << "Equivalence: " << *descr1 << " and " << *descr2 << endl; 01435 } 01436 list.del(*descr2); 01437 sgit.delete_node(*descr2); 01438 01439 /// ... adjust similarity graph 01440 01441 sgit.record_change( SIM_EQUIVALENCE, edge ); 01442 } else if (ar_predicates_negation(*descr1, *descr2)) { 01443 01444 if (_ar_logging && _ar_logging % 2 == 0) { 01445 arlog << "Negation: " << *descr1 << " and " << *descr2 << endl; 01446 } 01447 list.del(*descr2); 01448 sgit.delete_node(*descr2); 01449 01450 descr1->exec_predicate(-1); /// ... Set predicate .TRUE. for remaining descriptor 01451 01452 /// ... adjust similarity graph 01453 01454 sgit.record_change( SIM_EQUIVALENCE, edge ); 01455 } else { /// ... Form the OR of the predicates 01456 01457 if (_ar_logging && _ar_logging % 2 == 0) { 01458 arlog << "ORing predicates: " << *descr1 << " and " << *descr2 << endl; 01459 } 01460 PredicateRepository & repos = descr1->pgm_summarized_to()->pred_repos(); 01461 01462 Expression * descr1_pred = repos.predicate(descr1->exec_predicate())->clone(); 01463 Expression * descr2_pred = repos.predicate(descr2->exec_predicate())->clone(); 01464 Expression * new_pred = simplify(or(descr1_pred,descr2_pred)); 01465 01466 list.del(*descr2); 01467 sgit.delete_node(*descr2); 01468 01469 descr1->exec_predicate(repos.ins(new_pred)); 01470 } 01471 01472 } else if (ar_subregion(*descr1, *descr2, edge, UNDERESTIMATE)) { 01473 /// ... ar_subregion uses "ar_implies" to see if 01474 /// ... descr1's exec predicate implies descr2's exec predicate 01475 sgit.record_change( SIM_SUBREG_NODE2, edge ); 01476 } 01477 } 01478 01479 if (_ar_interleave) { 01480 /// ... Let the user tell us how much effort to apply 01481 /// ... to the interleaving test. 01482 01483 int size_limit = ar_effort_limit(); 01484 if ( (size_limit == -1) || (list.entries() < size_limit)) { 01485 01486 istats.interl_tim.restart(); 01487 01488 /// ... Check for interleaving pattern among STRIDE_EQUIVALENT descriptors 01489 01490 RefList<AbstractAccess> * interleave_list = ar_interleave(sgit, sg); 01491 01492 int max_entries = interleave_list->entries(); 01493 01494 if (max_entries > 0) { 01495 Iterator<AbstractAccess> iter = interleave_list; 01496 01497 /// ... New combined descriptor 01498 AbstractAccess & combined = iter.current(); 01499 01500 /// ... Remove eliminated nodes from list and graph 01501 for (++iter; 01502 iter.valid(); 01503 ++iter) { 01504 AbstractAccess & aa = iter.current(); 01505 list.del(aa); 01506 sgit.delete_node(aa); 01507 } 01508 sgit.mark_unknown(combined); 01509 } 01510 delete interleave_list; 01511 istats.interl_tim.stop(); 01512 } 01513 } 01514 01515 istats.agg_tim.restart(); 01516 01517 /// ... Check for contiguous aggregation 01518 sgit.reset(); 01519 for ( ; sgit.valid(); ++sgit) { 01520 01521 SimEdge & edge = sgit.current(); 01522 01523 if (! ar_predicates_equal(*(edge.node_1()), *(edge.node_2()))) continue; 01524 01525 change = ar_aggregate( edge, sg, list ); 01526 01527 if ( change != SIM_NOCHANGE ) { 01528 01529 /// ... adjust similarity graph 01530 01531 sgit.record_change( change, edge ); 01532 01533 if (_ar_coalesce) { 01534 if (change == SIM_CONTIG_NODE1) { 01535 changed = edge.node_1()->coalesce(); 01536 if (changed) { 01537 sgit.record_change( SIM_COALESCE_NODE1, edge ); 01538 } 01539 } else if (change == SIM_CONTIG_NODE2) { 01540 changed = edge.node_2()->coalesce(); 01541 if (changed) { 01542 sgit.record_change( SIM_COALESCE_NODE2, edge ); 01543 } 01544 } 01545 } 01546 sg.print_similarities(); 01547 } 01548 } 01549 istats.agg_tim.stop(); 01550 } 01551 istats.simp_descr.stop(); 01552 } 01553 01554 RefList<AbstractAccess> * 01555 ar_interleave( SimGraphIterator & sgit, SimGraph & sg ) 01556 { 01557 RangeAccessor * range_acc = sg.range_acc( ); 01558 sgit.reset(); 01559 01560 List<Expression> dist_list; 01561 Expression * distance; 01562 01563 /// ... Build chains of equidistant offsets 01564 Map<Expression,List<RefList<AbstractAccess> > > offset_db; 01565 for ( ; sgit.valid(); ++sgit) { 01566 01567 SimEdge & edge = sgit.current(); 01568 if (edge.type() != SIM_DIM_EQUIV) continue; 01569 01570 AbstractAccess * node1 = edge.node_1(); 01571 AbstractAccess * node2 = edge.node_2(); 01572 01573 if (! ar_predicates_equal(*node1, *node2)) continue; 01574 01575 bool reverse = false; 01576 01577 distance = simplify(sub(node1->start_guarded().clone(), 01578 node2->start_guarded().clone())); 01579 dist_list.ins_first(distance); 01580 01581 EXPR_SIGN sign = range_acc->signz(*distance); 01582 if (sign == NEG_EXPR) { 01583 01584 distance = simplify(sub(node2->start_guarded().clone(), 01585 node1->start_guarded().clone())); 01586 dist_list.ins_first(distance); 01587 reverse = true; 01588 } 01589 01590 bool constant_distance = false; 01591 if (distance->op() == INTEGER_CONSTANT_OP) { 01592 constant_distance = true; 01593 } 01594 bool added = false; 01595 for (KeyIterator<Expression,List<RefList<AbstractAccess> > > key_iter = offset_db; 01596 key_iter.valid(); 01597 ++key_iter) { 01598 01599 Expression & keydist = key_iter.current_key(); 01600 List<RefList<AbstractAccess> > & chain_list = key_iter.current_data(); 01601 01602 /// ... First use a cheap comparison to find it 01603 01604 if (constant_distance) { 01605 if (keydist.op() != INTEGER_CONSTANT_OP) continue; 01606 if (distance->value() == keydist.value()) { 01607 if (reverse) { 01608 add_to_chain(node1, node2, chain_list); 01609 added = true; 01610 } else { 01611 add_to_chain(node2, node1, chain_list); 01612 added = true; 01613 } 01614 } else continue; 01615 } else { 01616 01617 /// ... Then, if necessary, use a range comparison 01618 01619 Relation rel = range_acc->compare(*distance, keydist); 01620 if (rel.is_equal()) { 01621 if (reverse) { 01622 add_to_chain(node2, node1, chain_list); 01623 added = true; 01624 } else { 01625 add_to_chain(node1, node2, chain_list); 01626 added = true; 01627 } 01628 } 01629 } 01630 } 01631 /// ... If the current distance wasn't added to an existing chain, 01632 /// ... start a new chain 01633 01634 if (!added) { 01635 List<RefList<AbstractAccess> > * lists = new List<RefList<AbstractAccess> >; 01636 RefList<AbstractAccess> * chain = new RefList<AbstractAccess>; 01637 01638 if (reverse) { 01639 chain->ins_first(*node2); 01640 chain->ins_first(*node1); 01641 } else { 01642 chain->ins_first(*node1); 01643 chain->ins_first(*node2); 01644 } 01645 lists->ins_first(chain); 01646 offset_db.ins(*distance, lists); 01647 } 01648 } 01649 01650 /// ... Now, find the longest single chain and use that for interleaving. 01651 01652 RefList<AbstractAccess> * longest_list = 0; 01653 Expression * longest_distance = 0; 01654 01655 int max_entries = -1; 01656 for (KeyIterator<Expression,List<RefList<AbstractAccess> > > key_iter = offset_db; 01657 key_iter.valid(); 01658 ++key_iter) { 01659 01660 List<RefList<AbstractAccess> > & chain_list = key_iter.current_data(); 01661 01662 for (Iterator<RefList<AbstractAccess> > iter_list = chain_list; 01663 iter_list.valid(); 01664 ++iter_list) { 01665 RefList<AbstractAccess> & aa_list = iter_list.current(); 01666 01667 if (aa_list.entries() > max_entries) { 01668 max_entries = aa_list.entries(); 01669 longest_list = &aa_list; 01670 longest_distance = &(key_iter.current_key()); 01671 } 01672 } 01673 } 01674 01675 /// ... Return the longest chain 01676 01677 RefList<AbstractAccess> * return_list = new RefList<AbstractAccess>; 01678 01679 if (max_entries == -1) return return_list; 01680 01681 if ((longest_distance->op() == INTEGER_CONSTANT_OP) && 01682 (longest_distance->value() == 1)) { /// ... drop through 01683 } else if (max_entries < 3) { 01684 return return_list; 01685 } 01686 if (_ar_logging && _ar_logging % 2 == 0) { 01687 arlog << "Interleaving: "; 01688 } 01689 01690 Iterator<AbstractAccess> iter = longest_list; 01691 01692 p_assert(iter.valid(), "Longest list was empty"); 01693 01694 return_list->ins_last(iter.current()); 01695 01696 /// ... Add a new dimension to descriptor with smallest offset 01697 /// ... to represent all descriptors on the list. 01698 01699 Expression * span = simplify(mul(constant(max_entries-1),longest_distance->clone())); 01700 AccessDimension * newdim = new AccessDimension(longest_distance->clone(), span); 01701 int which_dim = (*return_list)[0].sort_add_dim(newdim, *range_acc); 01702 AbstractAccess & combined = (*return_list)[0]; 01703 combined.calc_overlap( which_dim ); 01704 01705 if (_ar_logging && _ar_logging % 2 == 0) { 01706 arlog << combined << " "; 01707 } 01708 01709 for (++iter; 01710 iter.valid(); 01711 ++iter) { 01712 01713 AbstractAccess & aa = iter.current(); 01714 01715 if (_ar_logging && _ar_logging % 2 == 0) { 01716 arlog << aa << " "; 01717 } 01718 01719 /// ... In "combined", save the info that dimensions need overlap checking 01720 01721 Iterator<AccessDimension> iter = aa.iter_dims(); 01722 for (int index=0; 01723 iter.valid(); 01724 ++iter,++index) { 01725 if (iter.current().check_overlap()) { 01726 combined.set_check_overlap(index, true); 01727 } 01728 } 01729 return_list->ins_last(aa); 01730 } 01731 01732 /// ... Calc_overlap where necessary 01733 01734 if (! combined.is_imprecise()) { 01735 Iterator<AccessDimension> iter2 = combined.iter_dims(); 01736 for (int index=0; 01737 iter2.valid(); 01738 ++iter2,++index) { 01739 if (iter2.current().check_overlap()) { 01740 combined.calc_overlap(index); 01741 } 01742 } 01743 } 01744 01745 if (_ar_logging && _ar_logging % 2 == 0) { 01746 arlog << " -> " << combined << endl; 01747 } 01748 01749 return return_list; 01750 } 01751 01752 /// Add a link node1->node2 to the chain(s) in the List of RefLists. 01753 /// Since we have removed all-but-one descriptors in each equivalence 01754 /// class, each node1->node2 link will be added either at the beginning 01755 /// or the end of one of the RefLists, or else will serve to tie two 01756 /// RefLists together into a single list. 01757 01758 void 01759 add_to_chain(AbstractAccess *node1, AbstractAccess *node2, 01760 List<RefList<AbstractAccess> > & chain_list) 01761 { 01762 /// ... First, look for node1 at the end of a RefList 01763 01764 bool found_node1 = false; 01765 01766 for (Iterator<RefList<AbstractAccess> > iter = chain_list; 01767 iter.valid(); 01768 ++iter) 01769 { 01770 RefList<AbstractAccess> & list1 = iter.current(); 01771 int last_index = list1.entries() - 1; 01772 01773 if (node1 == &(list1[last_index])) { 01774 /// ... Check whether node2 is at the beginning of a list 01775 01776 found_node1 = true; 01777 bool found_node2 = false; 01778 01779 for (Iterator<RefList<AbstractAccess> > iter2 = chain_list; 01780 iter2.valid(); 01781 ++iter2) 01782 { 01783 RefList<AbstractAccess> & list2 = iter2.current(); 01784 01785 if (node2 == &(list2[0])) { 01786 /// ... Append list2 to list1 01787 01788 found_node2 = true; 01789 for (Iterator<AbstractAccess> iter3 = list2; 01790 iter3.valid(); 01791 ++iter3) 01792 { 01793 list1.ins_last(iter3.current()); 01794 } 01795 break; 01796 } 01797 } 01798 01799 if (found_node2) break; 01800 01801 /// ... Didn't find node2, so add node2 to RefList 01802 01803 list1.ins_last(*node2); 01804 break; 01805 } 01806 } 01807 01808 if (found_node1) return; 01809 01810 /// ... If we didn't find node1, check this time for node2 at the beginning of a RefList 01811 01812 bool part2_found_node2 = false; 01813 01814 for (Iterator<RefList<AbstractAccess> > iter = chain_list; 01815 iter.valid(); 01816 ++iter) 01817 { 01818 RefList<AbstractAccess> & list1 = iter.current(); 01819 01820 if (node2 == &(list1[0])) { 01821 /// ... Check whether node2 is at the end of a list 01822 01823 part2_found_node2 = true; 01824 bool part2_found_node1 = false; 01825 01826 for (Iterator<RefList<AbstractAccess> > iter2 = chain_list; 01827 iter2.valid(); 01828 ++iter2) 01829 { 01830 RefList<AbstractAccess> & list2 = iter2.current(); 01831 int last_index2 = list2.entries() - 1; 01832 01833 if (node1 == &(list2[last_index2])) { 01834 /// ... Prepend list2 to list1 01835 01836 part2_found_node1 = true; 01837 for (Iterator<AbstractAccess> iter3 = list2; 01838 iter3.valid(); 01839 ++iter3) 01840 { 01841 list1.ins_first(iter3.current()); 01842 } 01843 break; 01844 } 01845 } 01846 01847 if (part2_found_node1) break; 01848 01849 /// ... Didn't find node1, so add node1 to RefList 01850 01851 list1.ins_first(*node1); 01852 break; 01853 } 01854 } 01855 01856 if (part2_found_node2) return; 01857 01858 /// ... If found neither node1 nor node2 in the lists, then we must begin a new RefList, containing 01859 /// ... those two nodes. 01860 01861 RefList<AbstractAccess> * newlist = new RefList<AbstractAccess>; 01862 01863 newlist->ins_first(*node1); 01864 newlist->ins_last (*node2); 01865 01866 chain_list.ins_first(newlist); 01867 01868 return; 01869 } 01870 01871 SimTypeOfChange 01872 ar_aggregate( SimEdge & edge, SimGraph & sg, List<AbstractAccess> & list ) 01873 { 01874 SimTypeOfChange change = SIM_NOCHANGE; 01875 AbstractAccess * desc_less; 01876 AbstractAccess * desc_grt; 01877 AbstractAccess * descr1 = edge.node_1(); 01878 AbstractAccess * descr2 = edge.node_2(); 01879 SimilarityType type = edge.type(); 01880 01881 if (_ar_logging && _ar_logging % 2 == 0) { 01882 arlog << "COMPARE: " << endl; 01883 arlog << "[" << edge.index1() << "]"; 01884 arlog << *descr1 << " and " << endl; 01885 arlog << "[" << edge.index2() << "]"; 01886 arlog << *descr2 << " (" << edge.type_name() << ") " << endl; 01887 } 01888 01889 int dims1 = descr1->number_of_dimensions(); 01890 int dims2 = descr2->number_of_dimensions(); 01891 01892 /// ... Take care of the _SEMI cases. 01893 01894 Array<int> * stridematch=0; 01895 Array<bool> * spanmatch=0; 01896 bool must_cleanup=false; 01897 01898 /// ... This represented the descriptor which has dimension(s) added (if any) 01899 int augmented_dim = -1; 01900 01901 if ((type == SIM_DIM_EQUIV_SEMI) || 01902 (type == SIM_STRIDE_EQUIV_SEMI)) { 01903 01904 if (dims1 < dims2) { 01905 01906 /// ... Copy ARD1 01907 01908 AbstractAccess * new_aa = descr1->clone(); 01909 01910 Array<int> * new_stridematch = new Array<int>; 01911 Array<bool> * new_spanmatch = new Array<bool>; 01912 01913 (*new_stridematch).resize(dims2); 01914 (*new_spanmatch).resize(dims2); 01915 01916 /// ... Get the match from ARD2 to ARD1 01917 01918 stridematch = sg.kernel(edge.index2(), edge.index1()).stridematch(); 01919 spanmatch = sg.kernel(edge.index2(), edge.index1()).spanmatch(); 01920 01921 /// ... Add dimensions from ARD2 which were not matched, set spans to 0 01922 01923 int new_dim = dims1; /// ... add_dimension does ins_last 01924 for (int ii=0; ii<dims2; ++ii) { 01925 if ((*stridematch)[ii] == -1) { 01926 AccessDimension * dim =descr2->dimension(ii).clone(); 01927 dim->span(constant(0)); 01928 new_aa->add_dimension(dim); 01929 01930 (*new_stridematch)[new_dim] = ii; 01931 (*new_spanmatch)[new_dim] = false; 01932 ++new_dim; 01933 } else { 01934 (*new_stridematch)[(*stridematch)[ii]] = ii; 01935 (*new_spanmatch)[(*stridematch)[ii]] = (*spanmatch)[ii]; 01936 } 01937 } 01938 stridematch = new_stridematch; 01939 spanmatch = new_spanmatch; 01940 /// ... silvius: this is a memory leak 01941 must_cleanup=true; 01942 descr1 = new_aa; 01943 dims1 = dims2; 01944 type = SIM_STRIDE_EQUIV; 01945 augmented_dim = 1; 01946 dims1 = dims2; 01947 } else { 01948 01949 /// ... Copy ARD2 01950 01951 AbstractAccess * new_aa = descr2->clone(); 01952 01953 Array<int> * new_stridematch = new Array<int>; 01954 Array<bool> * new_spanmatch = new Array<bool>; 01955 01956 (*new_stridematch).resize(dims1); 01957 (*new_spanmatch).resize(dims1); 01958 01959 stridematch = edge.edge()->stridematch(); 01960 spanmatch = edge.edge()->spanmatch(); 01961 01962 for (int ii=0; ii<dims1; ++ii) { 01963 (*new_stridematch)[ii] = (*stridematch)[ii]; 01964 (*new_spanmatch)[ii] = (*spanmatch)[ii]; 01965 } 01966 01967 /// ... Add dimensions from ARD1 which were not matched, set spans to 0 01968 01969 int new_dim = dims2; 01970 for (int ii=0; ii<dims1; ++ii) { 01971 if ((*stridematch)[ii] == -1) { 01972 AccessDimension * dim =descr1->dimension(ii).clone(); 01973 dim->span(constant(0)); 01974 new_aa->add_dimension(dim); 01975 01976 (*new_stridematch)[ii] = new_dim; 01977 ++new_dim; 01978 (*new_spanmatch)[ii] = false; 01979 } 01980 } 01981 stridematch = new_stridematch; 01982 spanmatch = new_spanmatch; 01983 /// ... silvius: this is a memory leak 01984 must_cleanup=true; 01985 descr2 = new_aa; 01986 type = SIM_STRIDE_EQUIV; 01987 augmented_dim = 2; 01988 dims2 = dims1; 01989 } 01990 } else { 01991 stridematch = edge.edge()->stridematch(); 01992 spanmatch = edge.edge()->spanmatch(); 01993 } 01994 01995 switch (type) { 01996 case SIM_EQUIV: 01997 change = SIM_EQUIVALENCE; 01998 01999 if (_ar_logging && _ar_logging % 2 == 0) { 02000 arlog << "Equivalence: " << *descr1 << " and " << *descr2 << endl; 02001 } 02002 list.del(*(edge.node_2())); 02003 break; 02004 02005 case SIM_EQUIV_SEMI: 02006 if (dims1 < dims2) { 02007 list.del(*(edge.node_1())); 02008 change = SIM_CONTIG_NODE2; 02009 } else { 02010 list.del(*(edge.node_2())); 02011 change = SIM_CONTIG_NODE1; 02012 } 02013 break; 02014 02015 case SIM_NOT_SIMILAR: 02016 change = SIM_NOCHANGE; 02017 break; 02018 02019 case SIM_STRIDE_EQUIV: /// ... Common point for Dimension-equivalent and Stride-equivalent descriptors 02020 case SIM_DIM_EQUIV: 02021 { 02022 /// ... First, determine the descriptor with the lesser offset 02023 /// ... If can't decide, can't go further for either type of relationship 02024 02025 int smaller_offset = 0; 02026 02027 RangeAccessor * range_acc = sg.range_acc( ); 02028 02029 if ((descr1->start_guarded().op() == INTEGER_CONSTANT_OP) && 02030 (descr2->start_guarded().op() == INTEGER_CONSTANT_OP)) { 02031 02032 int start1 = descr1->start_guarded().value(); 02033 int start2 = descr2->start_guarded().value(); 02034 02035 if (start1 < start2) { 02036 desc_less = descr1; 02037 desc_grt = descr2; 02038 smaller_offset = 1; 02039 } else { 02040 desc_less = descr2; 02041 desc_grt = descr1; 02042 smaller_offset = 2; 02043 } 02044 } else { 02045 Relation rel = range_acc->compare(descr1->start_guarded(), descr2->start_guarded()); 02046 02047 if (rel.is_less_than()) { 02048 /// ... start1 < start2 02049 desc_less = descr1; 02050 desc_grt = descr2; 02051 smaller_offset = 1; 02052 02053 } else if (rel.is_greater_equal()) { 02054 /// ... start2 <= start1 02055 desc_less = descr2; 02056 desc_grt = descr1; 02057 smaller_offset = 2; 02058 02059 } else { /// ... cannot decide 02060 change = SIM_NOCHANGE; 02061 break; 02062 } 02063 } 02064 02065 if (type == SIM_STRIDE_EQUIV) { 02066 02067 /// ... For Stride-equivalent descriptors, check for all-but-one matching 02068 /// ... and determine which dimension does not match. 02069 02070 int count = 0; 02071 int nonmatch = -1; 02072 02073 for (int i=0; i<dims1; ++i) { 02074 if ( (*spanmatch)[i] ) { 02075 ++count; 02076 } else { 02077 nonmatch = i; 02078 } 02079 } 02080 02081 if (count < (dims1-1)) { 02082 change = SIM_NOCHANGE; 02083 } else if (count == (dims1-1)) { /// ... all spans but one match - try to aggregate 02084 if (smaller_offset == 1) { 02085 change = contiguous_aggregation(range_acc, 02086 desc_less, desc_grt, 02087 smaller_offset, 02088 nonmatch, 02089 (*stridematch)[nonmatch]); 02090 } else { 02091 change = contiguous_aggregation(range_acc, 02092 desc_less, desc_grt, 02093 smaller_offset, 02094 (*stridematch)[nonmatch], 02095 nonmatch); 02096 } 02097 if (change == SIM_CONTIG_NODE1) { 02098 if (augmented_dim == 2) { 02099 delete descr2; 02100 } 02101 list.del(*(edge.node_2())); 02102 if (augmented_dim == 1) { 02103 list.modify(*(edge.node_1()), descr1); 02104 sg.set_ptr(edge.index1(), *descr1); 02105 sg.set_unknown( edge, edge.index1()); 02106 } else { 02107 sg.redo_spanmatch( edge, edge.index1(), nonmatch ); 02108 } 02109 } else if (change == SIM_CONTIG_NODE2) { 02110 if (augmented_dim == 1) { 02111 delete descr1; 02112 } 02113 list.del(*(edge.node_1())); 02114 if (augmented_dim == 2) { 02115 list.modify(*(edge.node_2()), descr2); 02116 sg.set_ptr(edge.index2(), *descr2); 02117 sg.set_unknown( edge, edge.index2()); 02118 } else { 02119 sg.redo_spanmatch( edge, edge.index2(), (*stridematch)[nonmatch] ); 02120 } 02121 } 02122 } else { 02123 cout << "All spans match on SIM_STRIDE_EQUIV"; 02124 change = SIM_NOCHANGE; 02125 } 02126 02127 break; 02128 02129 } else { /// ... type == SIM_DIM_EQUIV 02130 02131 /// ... For Dimension-equivalent descriptors, must try each matched set 02132 /// ... to see if any one meets the criterion. When one pair 02133 /// ... does, we stop. 02134 02135 int dim1 = 0, dim2 = 0; 02136 int dims = dims1; 02137 02138 Array<int> * stridematch = edge.edge()->stridematch(); 02139 for (dim1=0; dim1<dims; ++dim1) { 02140 02141 dim2 = (*stridematch)[dim1]; 02142 if (smaller_offset == 1) { 02143 change = contiguous_aggregation(range_acc, 02144 desc_less, desc_grt, 02145 smaller_offset, 02146 dim1, 02147 dim2); 02148 } else { 02149 change = contiguous_aggregation(range_acc, 02150 desc_less, desc_grt, 02151 smaller_offset, 02152 dim2, 02153 dim1); 02154 } 02155 if (change != SIM_NOCHANGE) { 02156 break; 02157 } 02158 } 02159 if (change == SIM_CONTIG_NODE1) { 02160 list.del(*(edge.node_2())); 02161 sg.redo_spanmatch( edge, edge.index1(), dim1 ); 02162 break; 02163 } 02164 if (change == SIM_CONTIG_NODE2) { 02165 list.del(*(edge.node_1())); 02166 sg.redo_spanmatch( edge, edge.index2(), dim2 ); 02167 break; 02168 } 02169 change = SIM_NOCHANGE; 02170 break; 02171 } 02172 break; 02173 } 02174 default: 02175 p_abort("Similarity type was not one of the valid types"); 02176 } 02177 if (must_cleanup){ 02178 delete stridematch; 02179 delete spanmatch; 02180 } 02181 return change; 02182 } 02183 02184 /// Test the contiguous-region conditions 02185 SimTypeOfChange 02186 contiguous_aggregation (RangeAccessor * range_acc, 02187 AbstractAccess * desc_less, AbstractAccess * desc_grt, 02188 int smaller_offset, int dim_less_index, int dim_grt_index ) 02189 { 02190 SimTypeOfChange change; 02191 AccessDimension & dim_less = (AccessDimension & ) desc_less->dimension(dim_less_index); 02192 AccessDimension & dim_grt = (AccessDimension & ) desc_grt->dimension(dim_grt_index); 02193 02194 Expression * diff = simplify(sub(desc_grt->start_guarded().clone(), 02195 desc_less->start_guarded().clone())); 02196 02197 Expression * dimsum = simplify(add(dim_less.stride_guarded().clone(), 02198 dim_less.span_guarded().clone())); 02199 02200 if ((diff->op() == INTEGER_CONSTANT_OP) && (dimsum->op() == INTEGER_CONSTANT_OP) && 02201 (dim_grt.stride_guarded().op() == INTEGER_CONSTANT_OP) && 02202 (dim_grt.span_guarded().op() == INTEGER_CONSTANT_OP) && 02203 (dim_less.span_guarded().op() == INTEGER_CONSTANT_OP)) { 02204 02205 int diff_i = diff->value(); 02206 int dimsum_i = dimsum->value(); 02207 int grt_stride_i = dim_grt.stride_guarded().value(); 02208 int grt_span_i = dim_grt.span_guarded().value(); 02209 int less_span_i = dim_less.span_guarded().value(); 02210 02211 if (dimsum_i < diff_i) { 02212 if (_ar_logging && _ar_logging % 2 == 0) { 02213 arlog << " NOT >=" << endl; 02214 } 02215 change = SIM_NOCHANGE; 02216 } else if (diff_i % grt_stride_i == 0) { 02217 /// ... We can aggregate the two descriptors 02218 02219 if (_ar_logging && _ar_logging % 2 == 0) { 02220 arlog << " >=" << endl; 02221 } 02222 02223 if (grt_span_i + diff_i > less_span_i) { 02224 02225 Expression * new_span = simplify(add( dim_grt.span_guarded().clone(), diff->clone() )); 02226 02227 dim_less.span( new_span ); /// ... Modify descriptor with smaller offset 02228 02229 dim_less.singleton( dim_less.singleton() && dim_grt.singleton() ); 02230 02231 desc_less->add_actual(0, 0); /// ... Remove the actual pointer, because now it represents >1 sites 02232 02233 /// ... Calculating the overlap characteristic 02234 02235 if (dimsum_i > diff_i) { 02236 desc_less->no_overlap(false); 02237 } 02238 02239 /// ... Transfer overlap-check notations from dim_grt to dim_less 02240 02241 Iterator<AccessDimension> iter = desc_grt->iter_dims(); 02242 for (int index=0; iter.valid(); ++iter,++index) { 02243 if (iter.current().check_overlap()) { 02244 desc_less->set_check_overlap(index,true); 02245 } 02246 } 02247 /// ... Calc_overlap where necessary 02248 02249 if (! desc_less->is_imprecise()) { 02250 Iterator<AccessDimension> iter2 = desc_less->iter_dims(); 02251 for (int index=0; 02252 iter2.valid(); 02253 ++iter2,++index) { 02254 if (iter2.current().check_overlap()) { 02255 desc_less->calc_overlap(index); 02256 } 02257 } 02258 } 02259 } else { 02260 /// ... dim_grt is a subregion of dim_less, don't change span 02261 dim_less.singleton( dim_less.singleton() && dim_grt.singleton() ); 02262 } 02263 if (_ar_logging && _ar_logging % 2 == 0) { 02264 arlog << "RESULT: " << *desc_less << endl; 02265 } 02266 02267 if ( smaller_offset == 1) { 02268 change = SIM_CONTIG_NODE1; 02269 } else { 02270 change = SIM_CONTIG_NODE2; 02271 } 02272 } else { 02273 if (_ar_logging && _ar_logging % 2 == 0) { 02274 arlog << " OK" << endl; 02275 arlog << dim_grt.stride_guarded() << " DOES NOT DIVIDE " << *diff << endl; 02276 } 02277 change = SIM_NOCHANGE; 02278 } 02279 } else { 02280 02281 /// cerr<<"\n descr1 = "<<*desc_less; 02282 /// cerr<<"\n descr2 = "<<*desc_grt; 02283 02284 Relation rel2 = range_acc->compare(*dimsum, *diff); 02285 02286 if (_ar_logging && _ar_logging % 2 == 0) { 02287 arlog << "Contiguous Aggregation: " << *desc_less << "[" << dim_less_index << "] and " 02288 << *desc_grt << "[" << dim_grt_index << "]" << endl; 02289 arlog << *dimsum << " VS " << *diff ; 02290 } 02291 02292 if (rel2.is_less_than()) { 02293 02294 if (_ar_logging && _ar_logging % 2 == 0) { 02295 arlog << " NOT >=" << endl; 02296 } 02297 change = SIM_NOCHANGE; 02298 02299 } else if (rel2.is_greater_equal()) { 02300 02301 if (expr_divides(dim_grt.stride_guarded(),*diff) == AR_YES) { 02302 02303 /// ... We can aggregate the two descriptors 02304 02305 if (_ar_logging && _ar_logging % 2 == 0) { 02306 arlog << " >=" << endl; 02307 } 02308 02309 Expression * sum = simplify(add(dim_grt.span_guarded().clone(), 02310 diff->clone())); 02311 Relation subreg = range_acc->compare(*sum, 02312 dim_less.span_guarded()); 02313 /// cerr<<"\n sum = "<<*sum; 02314 02315 delete sum; 02316 /// ... silvius: 02317 /// ... maybe start2=start1+span1+stride 02318 Expression* sum1=add(desc_less->start_guarded().clone(), 02319 dim_less.span_guarded().clone()); 02320 sum1=add(sum1, dim_less.stride_guarded().clone()); 02321 sum1=simplify(sum1); 02322 bool can_do=false; 02323 if (*sum1==desc_grt->start_guarded()){ 02324 can_do=true; 02325 } 02326 delete sum1; 02327 02328 if (subreg.is_greater_than() || can_do) { 02329 02330 Expression * new_span = simplify(add( dim_grt.span_guarded().clone(), diff->clone() )); 02331 02332 dim_less.span( new_span ); /// ... Modify descriptor with smaller offset 02333 02334 dim_less.singleton( dim_less.singleton() && dim_grt.singleton() ); 02335 02336 desc_less->add_actual(0, 0); /// ... Remove the actual pointer, because now it represents >1 sites 02337 02338 /// ... Calculating the overlap characteristic 02339 02340 if (rel2.is_equal()) { 02341 /// ... Does not change the overlap characteristic 02342 } else if (rel2.is_not_equal()) { 02343 desc_less->no_overlap(false); 02344 } else { 02345 desc_less->no_overlap(2); /// ... Say it will take a runtime test 02346 dim_less.check_overlap(true); 02347 } 02348 02349 /// ... Transfer overlap-check notations from dim_grt to dim_less 02350 02351 Iterator<AccessDimension> iter = desc_grt->iter_dims(); 02352 for (int index=0; iter.valid(); ++iter,++index) { 02353 if (iter.current().check_overlap()) { 02354 desc_less->set_check_overlap(index,true); 02355 } 02356 } 02357 /// ... Calc_overlap where necessary 02358 02359 if (! desc_less->is_imprecise()) { 02360 Iterator<AccessDimension> iter2 = desc_less->iter_dims(); 02361 for (int index=0; 02362 iter2.valid(); 02363 ++iter2,++index) { 02364 if (iter2.current().check_overlap()) { 02365 desc_less->calc_overlap(index); 02366 } 02367 } 02368 } 02369 } else if (subreg.is_less_equal()) { 02370 /// ... dim_grt is a subregion of dim_less, don't change span 02371 dim_less.singleton( dim_less.singleton() && dim_grt.singleton() ); 02372 } else { 02373 /// ... cannot be sure, don't do anything 02374 02375 02376 02377 change = SIM_NOCHANGE; 02378 delete diff; 02379 delete dimsum; 02380 if (_ar_logging && _ar_logging % 2 == 0) { 02381 arlog << "RESULT: " << *desc_less << endl; 02382 } 02383 return change; 02384 } 02385 if (_ar_logging && _ar_logging % 2 == 0) { 02386 arlog << "RESULT: " << *desc_less << endl; 02387 } 02388 02389 if ( smaller_offset == 1) { 02390 change = SIM_CONTIG_NODE1; 02391 } else { 02392 change = SIM_CONTIG_NODE2; 02393 } 02394 02395 } else { 02396 if (_ar_logging && _ar_logging % 2 == 0) { 02397 arlog << " OK" << endl; 02398 arlog << dim_grt.stride_guarded() << " DOES NOT DIVIDE " << *diff << endl; 02399 } 02400 change = SIM_NOCHANGE; 02401 } 02402 } else { 02403 /// ... Don't know, no change 02404 if (_ar_logging && _ar_logging % 2 == 0) { 02405 arlog << " OK" << endl; 02406 arlog << *dimsum << " DOES NOT DIVIDE " << *diff << endl; 02407 } 02408 change = SIM_NOCHANGE; 02409 } 02410 } 02411 delete dimsum; 02412 delete diff; 02413 return change; 02414 } 02415 02416 /// ar_intersect - 02417 /// Attempt to remove the overlapping portion from the descriptor in list1. 02418 /// list1[ard1] - list2[ard2] -> list1[ard1] 02419 /// 02420 /// list2[ard2] is not changed. 02421 /// 02422 /// If new descriptors are 02423 /// added to list1 as a result of this operation, they must be added at 02424 /// the end of the list, since we assume that an iteration involving the 02425 /// two lists is going on outside of this routine and we don't want to 02426 /// add or delete elements which would disturb that. 02427 /// 02428 /// The notion of conservatism used in this routine is that whenever 02429 /// we aren't totally sure of something, the list1 descriptor is left 02430 /// untouched. 02431 02432 bool 02433 ar_intersect (List<AbstractAccess> & list1, int ard1, 02434 RefList<AbstractAccess> & discard1, 02435 List<AbstractAccess> & list2, int ard2, 02436 SimBiGraph & sbg, SimEdge & edge, 02437 AR_MEET meet_op, 02438 IPAStats & istats) 02439 { 02440 List<AbstractAccess> new_ard1; 02441 List<AbstractAccess> new_ard2; 02442 List<AbstractAccess> intersect; 02443 02444 RangeAccessor * range_acc = sbg.range_acc( ); 02445 02446 bool inter = intersect_ARDs(list1[ard1], list2[ard2], *range_acc, edge, 02447 UNDERESTIMATE, new_ard1, new_ard2, intersect, 02448 meet_op, istats); 02449 02450 istats.ar_inter2.restart(); 02451 if (inter) { 02452 for (int i=0; i<new_ard1.entries(); ++i) { 02453 if (!ar_descriptor_member(list1, new_ard1[i])) { 02454 ar_append(list1, new_ard1.grab(i)); 02455 } 02456 } 02457 if (!discard1.member(list1[ard1])) discard1.ins_last(list1[ard1]); 02458 } 02459 istats.ar_inter2.stop(); 02460 return inter; 02461 } 02462 02463 /// ar_intersect - 02464 /// Attempt to remove the overlapping portion from the descriptors in list1 and list2. 02465 /// Also, move the overlapping portion to a new list. 02466 /// list1[ard1] - list2[ard2] -> list1 02467 /// list2[ard2] - list1[ard1] -> list2 02468 /// list1[ard1] <intersect> list2[ard2] -> intersect 02469 /// 02470 /// If new descriptors are added to list1 or list2 as a result of this operation, 02471 /// they must be added at the end of the list, since we assume that an iteration 02472 /// involving the two lists is going on outside of this routine and we don't want 02473 /// to add or delete elements which would disturb that. 02474 bool 02475 ar_intersect (List<AbstractAccess> & list1, int ard1, RefList<AbstractAccess> & discard1, 02476 List<AbstractAccess> & list2, int ard2, RefList<AbstractAccess> & discard2, 02477 SimBiGraph & sbg, SimEdge & edge, 02478 List<AbstractAccess> & intersect, AR_CONSERVATIVE direction, 02479 AR_MEET meet_op, IPAStats & istats) 02480 { 02481 List<AbstractAccess> new_ard1; 02482 List<AbstractAccess> new_ard2; 02483 RangeAccessor * range_acc = sbg.range_acc( ); 02484 02485 bool inter = intersect_ARDs(list1[ard1], list2[ard2], *range_acc, edge, 02486 direction, new_ard1, new_ard2, intersect, 02487 meet_op, istats); 02488 02489 istats.ar_inter3.restart(); 02490 istats.ar_inter3_elems1.add(list1.entries()); 02491 istats.ar_inter3_elems2.add(list2.entries()); 02492 02493 if (inter) { 02494 for (Mutator<AbstractAccess> iter = new_ard1; iter.valid(); ++iter) { 02495 if (!ar_descriptor_member(list1, iter.current())) { 02496 ar_append(list1, iter.grab()); 02497 } 02498 } 02499 if (!discard1.member(list1[ard1])) discard1.ins_last(list1[ard1]); 02500 02501 for (Mutator<AbstractAccess> iter = new_ard2; iter.valid(); ++iter) { 02502 if (!ar_descriptor_member(list2, iter.current())) { 02503 ar_append(list2, iter.grab()); 02504 } 02505 } 02506 if (!discard2.member(list2[ard2])) discard2.ins_last(list2[ard2]); 02507 } 02508 istats.ar_inter3.stop(); 02509 return inter; 02510 } 02511 02512 /// intersect_ARDs - 02513 /// Assume that ARD1 represents an "earlier" access, in the sense that 02514 /// it precedes the ARD2 access(es) in an execution-order traversal of the 02515 /// program. 02516 /// 02517 /// Intersect ARD1 with ARD2 and return the three parts: the part 02518 /// that intersects, and the part remaining in ARD1 and ARD2 after 02519 /// the intersecting part is removed. The new ARD1 is passed back in 02520 /// new_ard1 and the new ARD2 is passed back in new_ard2. 02521 /// 02522 /// This routine uses a "meet" operation specified as a parameter to 02523 /// handle the predicates which occur on the input ARDs. For straight-line 02524 /// code where logical implication can be proven, the meet operation should 02525 /// be AND (AR_AND). "Logical implication" means that the predicate on the 02526 /// later access can be shown to imply that the predicate on the earlier 02527 /// access was TRUE. This establishes an order between the accesses. In this 02528 /// case, the earlier access can be shown to actually occur prior to the 02529 /// later access. In this situation, predicates for the two descriptors 02530 /// should be ANDed together. 02531 /// 02532 /// For straightline code in which logical implication cannot be proven, 02533 /// the results of the intersection will be loaded into the ReadWrite 02534 /// summary set and the meet operation of "Not X and Y" (AR_NOTX_Y) will 02535 /// be used to produce the condition under which the dependence happens. 02536 /// 02537 /// So, for example, for code such as: 02538 /// 02539 /// do . . . 02540 /// if (P) then 02541 /// a(1:N) = 02542 /// endif 02543 /// if (Q) then 02544 /// . . . = a(1:N) 02545 /// endif 02546 /// end do 02547 /// 02548 /// if we can't prove that Q=>P, then there is a dependence 02549 /// 02550 /// For IF constructs, such as: 02551 /// 02552 /// if (P) then 02553 /// a(1:N) = 02554 /// else 02555 /// a(1:n) = 02556 /// endif 02557 /// 02558 /// the meet operation should be OR (AR_OR). Thus, we can say 02559 /// <WRITE> {P}:a(1:N) intersecting <WRITE> {~P}:a(1:N) equals <WRITE> {P || ~P}:a(1:N), 02560 /// which equals <WRITE> a(1:N). This reflects the reality that A(1:N) is written 02561 /// regardless of the value of P. 02562 /// 02563 /// new_ard1 and new_ard2 are filled with descriptors which are different 02564 /// from ard1 and ard2, respectively. If ard1 or ard2 are changed by this 02565 /// routine, or "disappear" (it is a subregion of the other, or ard1==ard2), 02566 /// then that will be signalled by the routine returning true. 02567 /// 02568 /// If either ard1 or ard2 is unchanged by this routine, the routine returns false 02569 /// and nothing is placed in new_ard1 or new_ard2, respectively. 02570 /// 02571 /// So, for example, if the intersection causes a span in ard1 to change, then 02572 /// the new version of the descriptor is written to new_ard1[0] and the routine 02573 /// returns true. 02574 02575 /// The match_index in the SimEdge argument gives a matching from ard1 to ard2 02576 /// (not from ard2 to ard1). The ard2 to ard1 matching can be inferred from the 02577 /// ard1 to ard2 matching. THE TWO DESCRIPTORS ARD1 AND ARD2 MUST BE CONSIDERED 02578 /// IN THE SAME ORDER AS THEY WERE USED IN CONSTRUCTING THE SIMEDGE. 02579 02580 bool 02581 intersect_ARDs (AbstractAccess & ard1, AbstractAccess & ard2, 02582 RangeAccessor & range_acc, SimEdge & edge, 02583 AR_CONSERVATIVE direction, 02584 List<AbstractAccess> & new_ard1, 02585 List<AbstractAccess> & new_ard2, 02586 List<AbstractAccess> & intersect, 02587 AR_MEET meet_op, 02588 IPAStats & istats) 02589 { 02590 istats.inter_ARD.restart(); 02591 02592 SimilarityType simtype = edge.type(); 02593 02594 PredicateRepository & repos = ard1.pgm_summarized_to()->pred_repos(); 02595 02596 if (_ar_logging && _ar_logging % 2 == 0) { 02597 arlog << "intersect_ARDs:" << ard1 << " vs " << ard2 << endl; 02598 } 02599 02600 /// ... silvius: try this first. 02601 Expression* s1=simplify(ard1.start_guarded().clone()); 02602 Expression* lo1=simplify(ard1.lastoffset()); 02603 Expression* s2=simplify(ard2.start_guarded().clone()); 02604 Expression* lo2=simplify(ard2.lastoffset()); 02605 Relation rel1 = range_acc.compare(*s1, *lo2); 02606 Relation rel2 = range_acc.compare(*s2, *lo1); 02607 delete s1; 02608 delete s2; 02609 delete lo1; 02610 delete lo2; 02611 if (rel1.is_greater_than() || rel2.is_greater_than()){ 02612 return false; 02613 } 02614 02615 if (ard1.approximate()) { 02616 if (direction == OVERESTIMATE) { 02617 intersect.ins_last(ard1.clone()); 02618 intersect.ins_last(ard2.clone()); 02619 02620 if (_ar_logging && _ar_logging % 2 == 0) { 02621 arlog << "Adding approximate LMAD" << intersect << endl; 02622 } 02623 istats.inter_ARD.stop(); 02624 return true; 02625 } 02626 if (direction == UNDERESTIMATE) { 02627 02628 if (_ar_logging && _ar_logging % 2 == 0) { 02629 arlog << "Approximate LMAD - UNDERESTIMATE with empty intersection" << endl; 02630 } 02631 istats.inter_ARD.stop(); 02632 return false; 02633 } 02634 } 02635 if (ard2.approximate()) { 02636 if (direction == OVERESTIMATE) { 02637 intersect.ins_last(ard1.clone()); 02638 intersect.ins_last(ard2.clone()); 02639 02640 if (_ar_logging && _ar_logging % 2 == 0) { 02641 arlog << "Adding approximate LMAD" << intersect << endl; 02642 } 02643 istats.inter_ARD.stop(); 02644 return true; 02645 } 02646 if (direction == UNDERESTIMATE) { 02647 02648 if (_ar_logging && _ar_logging % 2 == 0) { 02649 arlog << "Approximate LMAD - UNDERESTIMATE with empty intersection" << endl; 02650 } 02651 istats.inter_ARD.stop(); 02652 return false; 02653 } 02654 } 02655 02656 /// ... If either predicate is .FALSE., then there can be no intersection 02657 02658 if (ard1.exec_predicate_false() || ard2.exec_predicate_false()) { 02659 istats.inter_ARD.stop(); 02660 return false; 02661 } 02662 02663 if (simtype == SIM_NOT_SIMILAR) { 02664 02665 /// ... Can't do much. Just check if mins against maxes. 02666 02667 Expression *last1 = simplify(ard1.lastoffset()); 02668 Relation rel1 = range_acc.compare(*last1, ard2.start_guarded()); 02669 if (rel1.is_less_than()) { 02670 /// ... no intersection 02671 if (_ar_logging && _ar_logging % 2 == 0) { 02672 arlog << "No intersection" << endl; 02673 } 02674 istats.inter_ARD.stop(); 02675 delete last1; 02676 return false; 02677 } 02678 Expression *last2 = simplify(ard2.lastoffset()); 02679 Relation rel2 = range_acc.compare(*last2, ard1.start_guarded()); 02680 if (rel2.is_less_than()) { 02681 // no intersection 02682 if (_ar_logging && _ar_logging % 2 == 0) { 02683 arlog << "No intersection" << endl; 02684 } 02685 istats.inter_ARD.stop(); 02686 delete last2; 02687 return false; 02688 } 02689 02690 /// ... assume intersection 02691 02692 if (direction == OVERESTIMATE) { 02693 intersect.ins_last(ard1.clone()); 02694 intersect.ins_last(ard2.clone()); 02695 /// ... Add nothing to new_ard1 or new_ard2 02696 if (_ar_logging && _ar_logging % 2 == 0) { 02697 arlog << "Just don't know - OVERESTIMATE by including both ARD1 & ARD2" << endl; 02698 } 02699 istats.inter_ARD.stop(); 02700 return true; 02701 } else { /// ... UNDERESTIMATE 02702 if (_ar_logging && _ar_logging % 2 == 0) { 02703 arlog << "Just don't know - UNDERESTIMATE with empty intersection" << endl; 02704 } 02705 istats.inter_ARD.stop(); 02706 return false; 02707 } 02708 } 02709 02710 AbstractAccess * aa1 = &ard1; 02711 AbstractAccess * aa2 = &ard2; 02712 int dims1 = aa1->number_of_dimensions(); 02713 int dims2 = aa2->number_of_dimensions(); 02714 02715 bool newaa1 = false; 02716 bool newaa2 = false; 02717 02718 /// ... Take care of the _SEMI cases. 02719 02720 Array<int> * match_index; 02721 02722 if ((simtype == SIM_STRIDE_EQUIV_SEMI) || 02723 (simtype == SIM_EQUIV_SEMI) || 02724 (simtype == SIM_DIM_EQUIV_SEMI)) { 02725 02726 if (dims2 < dims1) { 02727 02728 /// ... Make a new descriptor for aa2 with 02729 /// ... strides equivalent to those in aa1 and zero spans 02730 02731 /// ... Just fill in the existing match_index, since it's big enough 02732 02733 AbstractAccess * new_aa = aa2->clone(); 02734 02735 match_index = edge.edge()->stridematch(); 02736 02737 if (dims2 > 0) { 02738 /// ... Add match elements for new dimensions 02739 int new_dim = dims2; 02740 for (int ii=0; ii<dims1; ++ii) { 02741 if ((*match_index)[ii] == -1) { 02742 AccessDimension * dim =aa1->dimension(ii).clone(); 02743 dim->span(constant(0)); 02744 new_aa->add_dimension(dim); 02745 (*match_index)[ii] = new_dim; 02746 ++new_dim; 02747 } 02748 } 02749 } else { /// ... dims2 == 0 02750 int new_dim = dims2; 02751 for (int ii=0; ii<dims1; ++ii) { 02752 AccessDimension * dim =aa1->dimension(ii).clone(); 02753 dim->span(constant(0)); 02754 new_aa->add_dimension(dim); 02755 (*match_index)[ii] = new_dim; 02756 ++new_dim; 02757 } 02758 } 02759 02760 aa2 = new_aa; 02761 newaa2 = true; 02762 dims2 = dims1; 02763 simtype = SIM_STRIDE_EQUIV; 02764 02765 } else { /// ... aa1 has fewer dimensions 02766 02767 /// ... Make a new descriptor for aa1 with strides equivalent to those in aa2 and zero spans 02768 02769 AbstractAccess * new_aa = aa1->clone(); 02770 02771 Array<int> * new_match_index = new Array<int>; 02772 new_match_index->resize(dims2); 02773 02774 if (dims1 > 0) { 02775 /// ... Invert match_index 02776 match_index = edge.edge()->stridematch(); 02777 02778 Array<int> * inverted_match_index = invert_match(match_index, dims2); 02779 02780 int new_dim = dims1; /// ... add_dimension does ins_last 02781 for (int ii=0; ii<dims2; ++ii) { 02782 if ((*inverted_match_index)[ii] == -1) { 02783 AccessDimension * dim =aa2->dimension(ii).clone(); 02784 dim->span(constant(0)); 02785 new_aa->add_dimension(dim); 02786 (*new_match_index)[new_dim] = ii; 02787 ++new_dim; 02788 } else { 02789 (*new_match_index)[(*inverted_match_index)[ii]] = ii; 02790 } 02791 } 02792 delete inverted_match_index; 02793 } else { /// ... dims1 == 0 02794 int new_dim = dims1; /// ... add_dimension does ins_last 02795 for (int ii=0; ii<dims2; ++ii) { 02796 AccessDimension * dim =aa2->dimension(ii).clone(); 02797 dim->span(constant(0)); 02798 new_aa->add_dimension(dim); 02799 (*new_match_index)[new_dim] = ii; 02800 ++new_dim; 02801 } 02802 } 02803 02804 match_index = new_match_index; 02805 aa1 = new_aa; 02806 newaa1 = true; 02807 dims1 = dims2; 02808 simtype = SIM_STRIDE_EQUIV; 02809 } 02810 } else { 02811 match_index = edge.edge()->stridematch(); 02812 } 02813 02814 /// ... Now, we know that the strides are equivalent 02815 02816 if ((dims1 == 0) && (dims2 == 0)) { 02817 if ((aa1->start_guarded().op() == INTEGER_CONSTANT_OP) && 02818 (aa2->start_guarded().op() == INTEGER_CONSTANT_OP)) { 02819 02820 int start1 = aa1->start_guarded().value(); 02821 int start2 = aa2->start_guarded().value(); 02822 02823 if (start1 == start2) { /// ... both represent same location 02824 /// ... Intersect descriptor 02825 AbstractAccess * interARD = ard1.clone(); 02826 switch (meet_op) { 02827 case AR_X: 02828 interARD->exec_predicate(ard1.exec_predicate()); 02829 break; 02830 case AR_Y: 02831 interARD->exec_predicate(ard2.exec_predicate()); 02832 break; 02833 case AR_AND: 02834 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 02835 ard2.exec_predicate())); 02836 break; 02837 case AR_OR: 02838 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 02839 ard2.exec_predicate())); 02840 break; 02841 case AR_NOTX_Y: 02842 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 02843 ard2.exec_predicate())); 02844 break; 02845 case AR_X_NOTY: 02846 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 02847 ard2.exec_predicate())); 02848 break; 02849 default: 02850 p_abort("Bad value for meet operator"); 02851 } 02852 interARD->poss_reduct(aa1->poss_reduct() && aa2->poss_reduct()); 02853 if (aa1->reduct_op() == aa2->reduct_op()) { 02854 interARD->reduct_op(aa1->reduct_op()); 02855 } else { 02856 interARD->reduct_op(RED_OP_NONE); 02857 } 02858 02859 if (newaa1) delete aa1; 02860 if (newaa2) delete aa2; 02861 if (_ar_logging && _ar_logging % 2 == 0) { 02862 arlog << "New ARD1:" << new_ard1 << endl; 02863 arlog << "New ARD2:" << new_ard2 << endl; 02864 } 02865 istats.inter_ARD.stop(); 02866 02867 intersect.ins_last(interARD); 02868 if (_ar_logging && _ar_logging % 2 == 0) { 02869 arlog << "Intersection:" << *interARD << endl; 02870 } 02871 return true; 02872 } else { 02873 /// ... accesses don't overlap 02874 if (_ar_logging && _ar_logging % 2 == 0) { 02875 arlog << "No intersection" << endl; 02876 } 02877 if (newaa1) delete aa1; 02878 if (newaa2) delete aa2; 02879 if (_ar_logging && _ar_logging % 2 == 0) { 02880 arlog << "New ARD1:" << new_ard1 << endl; 02881 arlog << "New ARD2:" << new_ard2 << endl; 02882 } 02883 istats.inter_ARD.stop(); 02884 return false; 02885 } 02886 } else { /// ... Do the above symbolically 02887 02888 Expression & estart1 = aa1->start_guarded(); 02889 Expression & estart2 = aa2->start_guarded(); 02890 02891 Expression * ediff = simplify(sub(estart1.clone(), estart2.clone())); 02892 02893 if (ediff->op() == INTEGER_CONSTANT_OP) { 02894 if (ediff->value() == 0) { 02895 /// ... Intersect descriptor 02896 AbstractAccess * interARD = ard1.clone(); 02897 switch (meet_op) { 02898 case AR_X: 02899 interARD->exec_predicate(ard1.exec_predicate()); 02900 break; 02901 case AR_Y: 02902 interARD->exec_predicate(ard2.exec_predicate()); 02903 break; 02904 case AR_AND: 02905 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 02906 ard2.exec_predicate())); 02907 break; 02908 case AR_OR: 02909 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 02910 ard2.exec_predicate())); 02911 break; 02912 case AR_NOTX_Y: 02913 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 02914 ard2.exec_predicate())); 02915 break; 02916 case AR_X_NOTY: 02917 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 02918 ard2.exec_predicate())); 02919 break; 02920 default: 02921 p_abort("Bad value for meet operator"); 02922 } 02923 interARD->poss_reduct(aa1->poss_reduct() && aa2->poss_reduct()); 02924 if (aa1->reduct_op() == aa2->reduct_op()) { 02925 interARD->reduct_op(aa1->reduct_op()); 02926 } else { 02927 interARD->reduct_op(RED_OP_NONE); 02928 } 02929 02930 if (newaa1) delete aa1; 02931 if (newaa2) delete aa2; 02932 delete ediff; 02933 if (interARD->exec_predicate_false()) { 02934 if (_ar_logging && _ar_logging % 2 == 0) { 02935 arlog << "No intersection" << endl; 02936 } 02937 return false; 02938 } else { 02939 if (_ar_logging && _ar_logging % 2 == 0) { 02940 arlog << "New ARD1:" << new_ard1 << endl; 02941 arlog << "New ARD2:" << new_ard2 << endl; 02942 } 02943 istats.inter_ARD.stop(); 02944 02945 intersect.ins_last(interARD); 02946 if (_ar_logging && _ar_logging % 2 == 0) { 02947 arlog << "Intersection:" << *interARD << endl; 02948 } 02949 return true; 02950 } 02951 } else { 02952 /// ... accesses don't overlap 02953 if (_ar_logging && _ar_logging % 2 == 0) { 02954 arlog << "No intersection" << endl; 02955 } 02956 if (newaa1) delete aa1; 02957 if (newaa2) delete aa2; 02958 if (_ar_logging && _ar_logging % 2 == 0) { 02959 arlog << "New ARD1:" << new_ard1 << endl; 02960 arlog << "New ARD2:" << new_ard2 << endl; 02961 } 02962 delete ediff; 02963 istats.inter_ARD.stop(); 02964 return false; 02965 } 02966 } else { 02967 Expression *zero = constant(0); 02968 Relation relz = range_acc.compare(*ediff, *zero); 02969 delete zero; 02970 02971 if (relz.is_not_equal()) { 02972 /// ... accesses don't overlap 02973 if (_ar_logging && _ar_logging % 2 == 0) { 02974 arlog << "No intersection" << endl; 02975 } 02976 if (newaa1) delete aa1; 02977 if (newaa2) delete aa2; 02978 if (_ar_logging && _ar_logging % 2 == 0) { 02979 arlog << "New ARD1:" << new_ard1 << endl; 02980 arlog << "New ARD2:" << new_ard2 << endl; 02981 } 02982 delete ediff; 02983 istats.inter_ARD.stop(); 02984 return false; 02985 } 02986 if (_ar_logging && _ar_logging % 2 == 0) { 02987 arlog << endl << "Not enough info" << endl; 02988 } 02989 02990 if (direction == OVERESTIMATE) { 02991 intersect.ins_last(ard1.clone()); 02992 intersect.ins_last(ard2.clone()); 02993 /// ... Add nothing to new_ard1 or new_ard2 02994 if (_ar_logging && _ar_logging % 2 == 0) { 02995 arlog << "OVERESTIMATE - ARD1 & ARD2 in intersection, discard both" << endl; 02996 } 02997 if (newaa1) delete aa1; 02998 if (newaa2) delete aa2; 02999 if (_ar_logging && _ar_logging % 2 == 0) { 03000 arlog << "New ARD1:" << new_ard1 << endl; 03001 arlog << "New ARD2:" << new_ard2 << endl; 03002 } 03003 delete ediff; 03004 istats.inter_ARD.stop(); 03005 return true; 03006 } else { /// ... UNDERESTIMATE 03007 if (_ar_logging && _ar_logging % 2 == 0) { 03008 arlog << "Just don't know - UNDERESTIMATE with empty intersection" << endl; 03009 } 03010 if (newaa1) delete aa1; 03011 if (newaa2) delete aa2; 03012 if (_ar_logging && _ar_logging % 2 == 0) { 03013 arlog << "New ARD1:" << new_ard1 << endl; 03014 arlog << "New ARD2:" << new_ard2 << endl; 03015 } 03016 delete ediff; 03017 istats.inter_ARD.stop(); 03018 return false; 03019 } 03020 } 03021 } 03022 } 03023 03024 if ((dims1 == 1) && (dims2 == 1)) { 03025 03026 AccessDimension & dim1 = (AccessDimension &) aa1->dimension(0); 03027 AccessDimension & dim2 = (AccessDimension &) aa2->dimension(0); 03028 03029 Expression & estride = dim1.stride_guarded(); 03030 03031 if ((aa1->start_guarded().op() == INTEGER_CONSTANT_OP) && 03032 (aa2->start_guarded().op() == INTEGER_CONSTANT_OP) && 03033 (dim1.stride_guarded().op() == INTEGER_CONSTANT_OP) && 03034 (dim1.span_guarded().op() == INTEGER_CONSTANT_OP) && 03035 (dim2.span_guarded().op() == INTEGER_CONSTANT_OP)) { 03036 03037 int stride = dim1.stride_guarded().value(); 03038 p_assert(stride, "Stride of zero not allowed"); 03039 03040 int start1 = aa1->start_guarded().value(); 03041 int start2 = aa2->start_guarded().value(); 03042 03043 int span1 = dim1.span_guarded().value(); 03044 int span2 = dim2.span_guarded().value(); 03045 03046 int lastoff1 = start1 + span1; 03047 int lastoff2 = start2 + span2; 03048 03049 if ((start2 > lastoff1) || 03050 (start1 > lastoff2)) { 03051 /// ... accesses don't overlap 03052 if (_ar_logging && _ar_logging % 2 == 0) { 03053 arlog << "No intersection" << endl; 03054 } 03055 if (newaa1) delete aa1; 03056 if (newaa2) delete aa2; 03057 if (_ar_logging && _ar_logging % 2 == 0) { 03058 arlog << "New ARD1:" << new_ard1 << endl; 03059 arlog << "New ARD2:" << new_ard2 << endl; 03060 } 03061 istats.inter_ARD.stop(); 03062 return false; 03063 } 03064 03065 int diff = start1 - start2; 03066 if ( abs(diff) % stride == 0 ) { 03067 if (diff > 0) { 03068 /// ... aa1 starts to the right of aa2 03069 /// ... We know (from above) that start1 <= lastoff2 03070 if (lastoff1 > lastoff2) { 03071 /// ... New start1 is one stride after end of aa2 03072 /// ... New start2 is old start2 03073 03074 AbstractAccess * ard1clone = ard1.clone(); 03075 ard1clone->start(constant(lastoff2+stride)); 03076 int newspan1 = lastoff1-(lastoff2+stride); 03077 if (newspan1 == 0) { 03078 ard1clone->del_dimension(0); 03079 } else { 03080 ard1clone->dimension(0).span(constant(newspan1)); 03081 } 03082 new_ard1.ins_last(ard1clone); 03083 03084 AbstractAccess * ard2clone = ard2.clone(); 03085 int newspan2 = (start1-stride)-start2; 03086 if (newspan2 == 0) { 03087 ard2clone->del_dimension(0); 03088 } else { 03089 ard2clone->dimension(0).span(constant(newspan2)); 03090 } 03091 new_ard2.ins_last(ard2clone); 03092 03093 /// ... Intersect descriptor 03094 AbstractAccess * interARD = ard1.clone(); 03095 int newspan3 = lastoff2-start1; 03096 if (newspan3 == 0) { 03097 interARD->del_dimension(0); 03098 } else { 03099 interARD->dimension(0).span(constant(newspan3)); 03100 } 03101 switch (meet_op) { 03102 case AR_X: 03103 interARD->exec_predicate(ard1.exec_predicate()); 03104 break; 03105 case AR_Y: 03106 interARD->exec_predicate(ard2.exec_predicate()); 03107 break; 03108 case AR_AND: 03109 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03110 ard2.exec_predicate())); 03111 break; 03112 case AR_OR: 03113 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03114 ard2.exec_predicate())); 03115 break; 03116 case AR_NOTX_Y: 03117 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03118 ard2.exec_predicate())); 03119 break; 03120 case AR_X_NOTY: 03121 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03122 ard2.exec_predicate())); 03123 break; 03124 default: 03125 p_abort("Bad value for meet operator"); 03126 } 03127 /// ... This can't be propagate the reduction characteristic because 03128 /// ... the two ARDs aren't identical 03129 interARD->poss_reduct(false); 03130 interARD->reduct_op(RED_OP_NONE); 03131 03132 if (newaa1) delete aa1; 03133 if (newaa2) delete aa2; 03134 if (interARD->exec_predicate_false()) { 03135 if (_ar_logging && _ar_logging % 2 == 0) { 03136 arlog << "No intersection" << endl; 03137 } 03138 return false; 03139 } else { 03140 if (_ar_logging && _ar_logging % 2 == 0) { 03141 arlog << "New ARD1:" << new_ard1 << endl; 03142 arlog << "New ARD2:" << new_ard2 << endl; 03143 } 03144 istats.inter_ARD.stop(); 03145 intersect.ins_last(interARD); 03146 if (_ar_logging && _ar_logging % 2 == 0) { 03147 arlog << "Intersection:" << *interARD << endl; 03148 } 03149 return true; 03150 } 03151 } else if (lastoff1 == lastoff2) { 03152 /// ... aa1 is a subregion of aa2. 03153 03154 AbstractAccess * ard2clone = ard2.clone(); 03155 int newspan1 = (start1-stride)-start2; 03156 if (newspan1 == 0) { 03157 ard2clone->del_dimension(0); 03158 } else { 03159 ard2clone->dimension(0).span(constant(newspan1)); 03160 } 03161 new_ard2.ins_last(ard2clone); 03162 03163 AbstractAccess * interARD = ard1.clone(); 03164 switch (meet_op) { 03165 case AR_X: 03166 interARD->exec_predicate(ard1.exec_predicate()); 03167 break; 03168 case AR_Y: 03169 interARD->exec_predicate(ard2.exec_predicate()); 03170 break; 03171 case AR_AND: 03172 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03173 ard2.exec_predicate())); 03174 break; 03175 case AR_OR: 03176 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03177 ard2.exec_predicate())); 03178 break; 03179 case AR_NOTX_Y: 03180 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03181 ard2.exec_predicate())); 03182 break; 03183 case AR_X_NOTY: 03184 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03185 ard2.exec_predicate())); 03186 break; 03187 default: 03188 p_abort("Bad value for meet operator"); 03189 } 03190 /// ... This can't be propagate the reduction characteristic because 03191 /// ... the two ARDs aren't identical 03192 interARD->poss_reduct(false); 03193 interARD->reduct_op(RED_OP_NONE); 03194 03195 if (newaa1) delete aa1; 03196 if (newaa2) delete aa2; 03197 if (interARD->exec_predicate_false()) { 03198 if (_ar_logging && _ar_logging % 2 == 0) { 03199 arlog << "No intersection" << endl; 03200 } 03201 return false; 03202 } else { 03203 if (_ar_logging && _ar_logging % 2 == 0) { 03204 arlog << "New ARD1:" << new_ard1 << endl; 03205 arlog << "New ARD2:" << new_ard2 << endl; 03206 } 03207 istats.inter_ARD.stop(); 03208 intersect.ins_last(interARD); 03209 if (_ar_logging && _ar_logging % 2 == 0) { 03210 arlog << "ARD1 is subregion of ARD2. Intersection is ARD1. Break ARD2" << endl; 03211 } 03212 return true; 03213 } 03214 } else { /// ... lastoff1 < lastoff2 03215 /// ... aa1 is a subregion of aa2. Nothing in new_ard1. 03216 03217 AbstractAccess * ard2_part1 = ard2.clone(); 03218 int newspan1 = (start1-stride)-start2; 03219 if (newspan1 == 0) { 03220 ard2_part1->del_dimension(0); 03221 } else { 03222 ard2_part1->dimension(0).span(constant(newspan1)); 03223 } 03224 new_ard2.ins_last(ard2_part1); 03225 03226 AbstractAccess * ard2_part2 = ard2.clone(); 03227 ard2_part2->start(constant(lastoff1+stride)); 03228 int newspan2 = lastoff2-(lastoff1+stride); 03229 if (newspan2 == 0) { 03230 ard2_part2->del_dimension(0); 03231 } else { 03232 ard2_part2->dimension(0).span(constant(newspan2)); 03233 } 03234 new_ard2.ins_last(ard2_part2); 03235 03236 AbstractAccess * interARD = ard1.clone(); 03237 switch (meet_op) { 03238 case AR_X: 03239 interARD->exec_predicate(ard1.exec_predicate()); 03240 break; 03241 case AR_Y: 03242 interARD->exec_predicate(ard2.exec_predicate()); 03243 break; 03244 case AR_AND: 03245 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03246 ard2.exec_predicate())); 03247 break; 03248 case AR_OR: 03249 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03250 ard2.exec_predicate())); 03251 break; 03252 case AR_NOTX_Y: 03253 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03254 ard2.exec_predicate())); 03255 break; 03256 case AR_X_NOTY: 03257 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03258 ard2.exec_predicate())); 03259 break; 03260 default: 03261 p_abort("Bad value for meet operator"); 03262 } 03263 /// ... This can't be propagate the reduction characteristic because 03264 /// ... the two ARDs aren't identical 03265 interARD->poss_reduct(false); 03266 interARD->reduct_op(RED_OP_NONE); 03267 03268 if (newaa1) delete aa1; 03269 if (newaa2) delete aa2; 03270 if (interARD->exec_predicate_false()) { 03271 if (_ar_logging && _ar_logging % 2 == 0) { 03272 arlog << "No intersection" << endl; 03273 } 03274 return false; 03275 } else { 03276 if (_ar_logging && _ar_logging % 2 == 0) { 03277 arlog << "New ARD1:" << new_ard1 << endl; 03278 arlog << "New ARD2:" << new_ard2 << endl; 03279 } 03280 istats.inter_ARD.stop(); 03281 intersect.ins_last(interARD); 03282 if (_ar_logging && _ar_logging % 2 == 0) { 03283 arlog << "ARD1 is subregion of ARD2. Intersection is ARD1. Break ARD2" << endl; 03284 } 03285 return true; 03286 } 03287 } 03288 } else if (diff == 0) { /// ... aa1 and aa2 start at the same point 03289 03290 if (lastoff1 > lastoff2) { 03291 /// ... Just 1 piece of ard1 survives 03292 03293 AbstractAccess * ard1clone = ard1.clone(); 03294 ard1clone->start(constant(lastoff2+stride)); 03295 int newspan1 = lastoff1-(lastoff2+stride); 03296 if (newspan1 == 0) { 03297 ard1clone->del_dimension(0); 03298 } else { 03299 ard1clone->dimension(0).span(constant(newspan1)); 03300 } 03301 new_ard1.ins_last(ard1clone); 03302 03303 /// ... intersection is aa2. aa2 disappears. 03304 03305 AbstractAccess *interARD = ard2.clone(); 03306 switch (meet_op) { 03307 case AR_X: 03308 interARD->exec_predicate(ard1.exec_predicate()); 03309 break; 03310 case AR_Y: 03311 interARD->exec_predicate(ard2.exec_predicate()); 03312 break; 03313 case AR_AND: 03314 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03315 ard2.exec_predicate())); 03316 break; 03317 case AR_OR: 03318 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03319 ard2.exec_predicate())); 03320 break; 03321 case AR_NOTX_Y: 03322 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03323 ard2.exec_predicate())); 03324 break; 03325 case AR_X_NOTY: 03326 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03327 ard2.exec_predicate())); 03328 break; 03329 default: 03330 p_abort("Bad value for meet operator"); 03331 } 03332 /// ... This can't be propagate the reduction characteristic because 03333 /// ... the two ARDs aren't identical 03334 interARD->poss_reduct(false); 03335 interARD->reduct_op(RED_OP_NONE); 03336 03337 if (newaa1) delete aa1; 03338 if (newaa2) delete aa2; 03339 if (interARD->exec_predicate_false()) { 03340 if (_ar_logging && _ar_logging % 2 == 0) { 03341 arlog << "No intersection" << endl; 03342 } 03343 return false; 03344 } else { 03345 if (_ar_logging && _ar_logging % 2 == 0) { 03346 arlog << "New ARD1:" << new_ard1 << endl; 03347 arlog << "New ARD2:" << new_ard2 << endl; 03348 } 03349 istats.inter_ARD.stop(); 03350 intersect.ins_last(interARD); 03351 if (_ar_logging && _ar_logging % 2 == 0) { 03352 arlog << "ARD2 is subregion of ARD1. Intersection is ARD2. Reduce ARD1" << endl; 03353 } 03354 return true; 03355 } 03356 } else if (lastoff1 == lastoff2) { 03357 /// ... ARD1 == ARD2. Both disappear into the intersection. 03358 03359 AbstractAccess *interARD = ard1.clone(); 03360 switch (meet_op) { 03361 case AR_X: 03362 interARD->exec_predicate(ard1.exec_predicate()); 03363 break; 03364 case AR_Y: 03365 interARD->exec_predicate(ard2.exec_predicate()); 03366 break; 03367 case AR_AND: 03368 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03369 ard2.exec_predicate())); 03370 break; 03371 case AR_OR: 03372 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03373 ard2.exec_predicate())); 03374 break; 03375 case AR_NOTX_Y: 03376 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03377 ard2.exec_predicate())); 03378 break; 03379 case AR_X_NOTY: 03380 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03381 ard2.exec_predicate())); 03382 break; 03383 default: 03384 p_abort("Bad value for meet operator"); 03385 } 03386 interARD->poss_reduct(aa1->poss_reduct() && aa2->poss_reduct()); 03387 if (aa1->reduct_op() == aa2->reduct_op()) { 03388 interARD->reduct_op(aa1->reduct_op()); 03389 } else { 03390 interARD->reduct_op(RED_OP_NONE); 03391 } 03392 03393 if (newaa1) delete aa1; 03394 if (newaa2) delete aa2; 03395 if (interARD->exec_predicate_false()) { 03396 if (_ar_logging && _ar_logging %2 == 0) { 03397 arlog << "No intersect" << endl; 03398 } 03399 return false; 03400 } else { 03401 if (_ar_logging && _ar_logging % 2 == 0) { 03402 arlog << "New ARD1:" << new_ard1 << endl; 03403 arlog << "New ARD2:" << new_ard2 << endl; 03404 } 03405 istats.inter_ARD.stop(); 03406 intersect.ins_last(interARD); 03407 if (_ar_logging && _ar_logging % 2 == 0) { 03408 arlog << "ARD2 == ARD1. Both disappear into intersection." << endl; 03409 } 03410 return true; 03411 } 03412 } else { 03413 /// ... lastoff1 < lastoff2 03414 /// ... Just 1 piece of ard2 survives 03415 03416 AbstractAccess * ard2clone = ard2.clone(); 03417 ard2clone->start(constant(lastoff1+stride)); 03418 int newspan1 = lastoff2-(lastoff1+stride); 03419 if (newspan1 == 0) { 03420 ard2clone->del_dimension(0); 03421 } else { 03422 ard2clone->dimension(0).span(constant(newspan1)); 03423 } 03424 new_ard2.ins_last(ard2clone); 03425 03426 /// ... intersection is aa1. aa1 disappears. 03427 03428 AbstractAccess *interARD = ard1.clone(); 03429 switch (meet_op) { 03430 case AR_X: 03431 interARD->exec_predicate(ard1.exec_predicate()); 03432 break; 03433 case AR_Y: 03434 interARD->exec_predicate(ard2.exec_predicate()); 03435 break; 03436 case AR_AND: 03437 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03438 ard2.exec_predicate())); 03439 break; 03440 case AR_OR: 03441 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03442 ard2.exec_predicate())); 03443 break; 03444 case AR_NOTX_Y: 03445 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03446 ard2.exec_predicate())); 03447 break; 03448 case AR_X_NOTY: 03449 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03450 ard2.exec_predicate())); 03451 break; 03452 default: 03453 p_abort("Bad value for meet operator"); 03454 } 03455 /// ... This can't be propagate the reduction characteristic because 03456 /// ... the two ARDs aren't identical 03457 interARD->poss_reduct(false); 03458 interARD->reduct_op(RED_OP_NONE); 03459 03460 if (newaa1) delete aa1; 03461 if (newaa2) delete aa2; 03462 if (interARD->exec_predicate_false()) { 03463 if (_ar_logging && _ar_logging %2 == 0) { 03464 arlog << "No intersect" << endl; 03465 } 03466 return false; 03467 } else { 03468 if (_ar_logging && _ar_logging % 2 == 0) { 03469 arlog << "New ARD1:" << new_ard1 << endl; 03470 arlog << "New ARD2:" << new_ard2 << endl; 03471 } 03472 istats.inter_ARD.stop(); 03473 intersect.ins_last(interARD); 03474 if (_ar_logging && _ar_logging % 2 == 0) { 03475 arlog << "ARD1 is subregion of ARD2. Intersection is ARD1. Reduce ARD2" << endl; 03476 } 03477 return true; 03478 } 03479 } 03480 } else { /// ... diff < 0 -> aa2 starts to the right of aa1 03481 03482 if (lastoff1 > lastoff2) { 03483 /// ... aa2 is a subregion of aa1. Nothing in new_ard2. 03484 03485 AbstractAccess * ard1_part1 = ard1.clone(); 03486 int newspan1 = (start2-stride)-start1; 03487 if (newspan1 == 0) { 03488 ard1_part1->del_dimension(0); 03489 } else { 03490 ard1_part1->dimension(0).span(constant(newspan1)); 03491 } 03492 new_ard1.ins_last(ard1_part1); 03493 03494 AbstractAccess * ard1_part2 = ard1.clone(); 03495 ard1_part2->start(constant(lastoff2+stride)); 03496 int newspan2 = lastoff1-(lastoff2+stride); 03497 if (newspan2 == 0) { 03498 ard1_part2->del_dimension(0); 03499 } else { 03500 ard1_part2->dimension(0).span(constant(newspan2)); 03501 } 03502 new_ard1.ins_last(ard1_part2); 03503 03504 AbstractAccess * interARD = ard2.clone(); 03505 switch (meet_op) { 03506 case AR_X: 03507 interARD->exec_predicate(ard1.exec_predicate()); 03508 break; 03509 case AR_Y: 03510 interARD->exec_predicate(ard2.exec_predicate()); 03511 break; 03512 case AR_AND: 03513 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03514 ard2.exec_predicate())); 03515 break; 03516 case AR_OR: 03517 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03518 ard2.exec_predicate())); 03519 break; 03520 case AR_NOTX_Y: 03521 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03522 ard2.exec_predicate())); 03523 break; 03524 case AR_X_NOTY: 03525 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03526 ard2.exec_predicate())); 03527 break; 03528 default: 03529 p_abort("Bad value for meet operator"); 03530 } 03531 /// ... This can't be propagate the reduction characteristic because 03532 /// ... the two ARDs aren't identical 03533 interARD->poss_reduct(false); 03534 interARD->reduct_op(RED_OP_NONE); 03535 03536 if (newaa1) delete aa1; 03537 if (newaa2) delete aa2; 03538 if (interARD->exec_predicate_false()) { 03539 if (_ar_logging && _ar_logging %2 == 0) { 03540 arlog << "No intersect" << endl; 03541 } 03542 return false; 03543 } else { 03544 if (_ar_logging && _ar_logging % 2 == 0) { 03545 arlog << "New ARD1:" << new_ard1 << endl; 03546 arlog << "New ARD2:" << new_ard2 << endl; 03547 } 03548 istats.inter_ARD.stop(); 03549 intersect.ins_last(interARD); 03550 if (_ar_logging && _ar_logging % 2 == 0) { 03551 arlog << "ARD2 is subregion of ARD1. Intersection is ARD2. Break ARD1" << endl; 03552 } 03553 return true; 03554 } 03555 } else if (lastoff1 == lastoff2) { 03556 /// ... aa2 is a subregion of aa1. Nothing in new_ard2. 03557 03558 AbstractAccess * ard1_part1 = ard1.clone(); 03559 int newspan1 = (start2-stride)-start1; 03560 if (newspan1 == 0) { 03561 ard1_part1->del_dimension(0); 03562 } else { 03563 ard1_part1->dimension(0).span(constant(newspan1)); 03564 } 03565 new_ard1.ins_last(ard1_part1); 03566 03567 AbstractAccess * interARD = ard2.clone(); 03568 switch (meet_op) { 03569 case AR_X: 03570 interARD->exec_predicate(ard1.exec_predicate()); 03571 break; 03572 case AR_Y: 03573 interARD->exec_predicate(ard2.exec_predicate()); 03574 break; 03575 case AR_AND: 03576 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03577 ard2.exec_predicate())); 03578 break; 03579 case AR_OR: 03580 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03581 ard2.exec_predicate())); 03582 break; 03583 case AR_NOTX_Y: 03584 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03585 ard2.exec_predicate())); 03586 break; 03587 case AR_X_NOTY: 03588 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03589 ard2.exec_predicate())); 03590 break; 03591 default: 03592 p_abort("Bad value for meet operator"); 03593 } 03594 /// ... This can't be propagate the reduction characteristic because 03595 /// ... the two ARDs aren't identical 03596 interARD->poss_reduct(false); 03597 interARD->reduct_op(RED_OP_NONE); 03598 03599 if (newaa1) delete aa1; 03600 if (newaa2) delete aa2; 03601 if (interARD->exec_predicate_false()) { 03602 if (_ar_logging && _ar_logging %2 == 0) { 03603 arlog << "No intersect" << endl; 03604 } 03605 return false; 03606 } else { 03607 if (_ar_logging && _ar_logging % 2 == 0) { 03608 arlog << "New ARD1:" << new_ard1 << endl; 03609 arlog << "New ARD2:" << new_ard2 << endl; 03610 } 03611 istats.inter_ARD.stop(); 03612 intersect.ins_last(interARD); 03613 if (_ar_logging && _ar_logging % 2 == 0) { 03614 arlog << "ARD2 is subregion of ARD1. Intersection is ARD2. Break ARD1" << endl; 03615 } 03616 return true; 03617 } 03618 } else { 03619 /// ... New start2 is one stride after end of aa1 03620 /// ... New start1 is old start1 03621 03622 AbstractAccess * ard1clone = ard1.clone(); 03623 int newspan1 = (start2-stride)-start1; 03624 if (newspan1 == 0) { 03625 ard1clone->del_dimension(0); 03626 } else { 03627 ard1clone->dimension(0).span(constant(newspan1)); 03628 } 03629 new_ard1.ins_last(ard1clone); 03630 03631 AbstractAccess * ard2clone = ard2.clone(); 03632 ard2clone->start(constant(lastoff1+stride)); 03633 int newspan2 = lastoff2-(lastoff1+stride); 03634 if (newspan2 == 0) { 03635 ard2clone->del_dimension(0); 03636 } else { 03637 ard2clone->dimension(0).span(constant(newspan2)); 03638 } 03639 new_ard2.ins_last(ard2clone); 03640 03641 /// ... Intersect descriptor 03642 AbstractAccess * interARD = ard2.clone(); 03643 int newspan3 = lastoff1-start2; 03644 if (newspan3 == 0) { 03645 interARD->del_dimension(0); 03646 } else { 03647 interARD->dimension(0).span(constant(newspan3)); 03648 } 03649 switch (meet_op) { 03650 case AR_X: 03651 interARD->exec_predicate(ard1.exec_predicate()); 03652 break; 03653 case AR_Y: 03654 interARD->exec_predicate(ard2.exec_predicate()); 03655 break; 03656 case AR_AND: 03657 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03658 ard2.exec_predicate())); 03659 break; 03660 case AR_OR: 03661 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03662 ard2.exec_predicate())); 03663 break; 03664 case AR_NOTX_Y: 03665 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03666 ard2.exec_predicate())); 03667 break; 03668 case AR_X_NOTY: 03669 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03670 ard2.exec_predicate())); 03671 break; 03672 default: 03673 p_abort("Bad value for meet operator"); 03674 } 03675 /// ... This can't be propagate the reduction characteristic because 03676 /// ... the two ARDs aren't identical 03677 interARD->poss_reduct(false); 03678 interARD->reduct_op(RED_OP_NONE); 03679 03680 if (newaa1) delete aa1; 03681 if (newaa2) delete aa2; 03682 if (interARD->exec_predicate_false()) { 03683 if (_ar_logging && _ar_logging %2 == 0) { 03684 arlog << "No intersect" << endl; 03685 } 03686 return false; 03687 } else { 03688 if (_ar_logging && _ar_logging % 2 == 0) { 03689 arlog << "New ARD1:" << new_ard1 << endl; 03690 arlog << "New ARD2:" << new_ard2 << endl; 03691 } 03692 istats.inter_ARD.stop(); 03693 intersect.ins_last(interARD); 03694 if (_ar_logging && _ar_logging % 2 == 0) { 03695 arlog << "Intersection:" << *interARD << endl; 03696 } 03697 return true; 03698 } 03699 } 03700 } 03701 } else { 03702 /// ... Points don't overlap 03703 if (_ar_logging && _ar_logging % 2 == 0) { 03704 arlog << "No intersection" << endl; 03705 } 03706 if (newaa1) delete aa1; 03707 if (newaa2) delete aa2; 03708 if (_ar_logging && _ar_logging % 2 == 0) { 03709 arlog << "New ARD1:" << new_ard1 << endl; 03710 arlog << "New ARD2:" << new_ard2 << endl; 03711 } 03712 istats.inter_ARD.stop(); 03713 return false; 03714 } 03715 } else { 03716 /// ... Do the same as above, only symbolically 03717 03718 Expression & estart1 = aa1->start_guarded(); 03719 Expression & estart2 = aa2->start_guarded(); 03720 Expression & espan1 = dim1.span_guarded(); 03721 Expression & espan2 = dim2.span_guarded(); 03722 03723 Expression * elastoff1 = simplify(add(estart1.clone(), espan1.clone())); 03724 Expression * elastoff2 = simplify(add(estart2.clone(), espan2.clone())); 03725 03726 Relation relcheck1 = range_acc.compare(estart1, *elastoff2); 03727 Relation relcheck2 = range_acc.compare(estart2, *elastoff1); 03728 03729 if (relcheck2.is_greater_than() || relcheck1.is_greater_than()) { 03730 /// ... accesses don't overlap 03731 if (_ar_logging && _ar_logging % 2 == 0) { 03732 arlog << "No intersection" << endl; 03733 } 03734 if (newaa1) delete aa1; 03735 if (newaa2) delete aa2; 03736 if (_ar_logging && _ar_logging % 2 == 0) { 03737 arlog << "New ARD1:" << new_ard1 << endl; 03738 arlog << "New ARD2:" << new_ard2 << endl; 03739 } 03740 delete elastoff1; 03741 delete elastoff2; 03742 istats.inter_ARD.stop(); 03743 return false; 03744 } 03745 03746 Expression * ediff = simplify(sub(estart1.clone(), estart2.clone())); 03747 Expression * zero = constant(0); 03748 03749 bool evenly_divides; 03750 03751 /// ... If the two dimensions involved come from singleton indexes, 03752 /// ... and we can determine the size of the declared bound of the 03753 /// ... variable, then we can test whether the difference between 03754 /// ... the base offsets is divisible by the size of the declared 03755 /// ... bound. If it is, then there is no overlap. 03756 03757 // if (dim1.singleton() && dim2.singleton() && 03758 // dim[0].upper_exists()) { 03759 // upper = dim[0].upper_guarded().clone(); 03760 /// 03761 // if (!dim[0].lower_exists()) { 03762 // lower = constant(1); 03763 // } else { 03764 // lower = dim[0].lower_guarded().clone(); 03765 // } 03766 // Expression * dimsize = simplify(add(sub(upper, lower), constant(1))); 03767 // if (divides_diff(*dimsize, ediff, ard1, ard2, OVERESTIMATE)) { 03768 /// 03769 // if (_ar_logging && _ar_logging % 2 == 0) { 03770 // arlog << "No intersection" << endl; 03771 // } 03772 // if (newaa1) delete aa1; 03773 // if (newaa2) delete aa2; 03774 // delete dimsize; 03775 // delete ediff; 03776 // delete zero; 03777 // delete elastoff1; 03778 // delete elastoff2; 03779 // istats.inter_ARD.stop(); 03780 // return false; 03781 // } 03782 // delete dimsize; 03783 // } 03784 03785 /// ... Check whether the stride divides the diff, and how certain we are about that 03786 03787 if ((estride.op() == INTEGER_CONSTANT_OP) && 03788 (ediff->op() == INTEGER_CONSTANT_OP)) { 03789 03790 if (ediff->value() % estride.value() == 0) { 03791 evenly_divides = true; 03792 } else { 03793 evenly_divides = false; 03794 } 03795 } else { 03796 if (divides_diff(estride, ediff, ard1, ard2, OVERESTIMATE)) { 03797 evenly_divides = true; 03798 } else { 03799 if (_ar_logging && _ar_logging % 2 == 0) { 03800 arlog << "No intersection" << endl; 03801 } 03802 if (newaa1) delete aa1; 03803 if (newaa2) delete aa2; 03804 if (_ar_logging && _ar_logging % 2 == 0) { 03805 arlog << "New ARD1:" << new_ard1 << endl; 03806 arlog << "New ARD2:" << new_ard2 << endl; 03807 } 03808 delete ediff; 03809 delete zero; 03810 delete elastoff1; 03811 delete elastoff2; 03812 istats.inter_ARD.stop(); 03813 return false; 03814 } 03815 } 03816 /// ... If we weren't sure about the evenly_divides, we would have returned before this. 03817 if (evenly_divides) { 03818 Relation reldiff = range_acc.compare(*ediff,*zero); 03819 delete zero; 03820 if (reldiff.is_greater_than()) { 03821 /// ... aa1 starts to the right of aa2 03822 03823 Relation rel_over2 = range_acc.compare(*elastoff1, *elastoff2); 03824 03825 /// ... We know (from above) that start1 <= lastoff2 03826 if (rel_over2.is_greater_than()) { 03827 /// ... New start1 is one stride after end of aa2 03828 /// ... New start2 is old start2 03829 03830 AbstractAccess * ard1clone = ard1.clone(); 03831 ard1clone->start(simplify(add(elastoff2->clone(),estride.clone()))); 03832 Expression * newspan1 = simplify(sub(elastoff1->clone(), 03833 add(elastoff2->clone(),estride.clone()))); 03834 if (is_integer_zero(*newspan1)) { 03835 ard1clone->del_dimension(0); 03836 } else { 03837 ard1clone->dimension(0).span(newspan1); 03838 } 03839 new_ard1.ins_last(ard1clone); 03840 03841 AbstractAccess * ard2clone = ard2.clone(); 03842 Expression * newspan2 = simplify(sub(sub(estart1.clone(),estride.clone()), 03843 estart2.clone())); 03844 if (is_integer_zero(*newspan2)) { 03845 ard2clone->del_dimension(0); 03846 } else { 03847 ard2clone->dimension(0).span(newspan2); 03848 } 03849 new_ard2.ins_last(ard2clone); 03850 03851 /// ... Intersect descriptor 03852 AbstractAccess * interARD = ard1.clone(); 03853 Expression * newspan3 = simplify(sub(elastoff2->clone(),estart1.clone())); 03854 if (is_integer_zero(*newspan3)) { 03855 interARD->del_dimension(0); 03856 } else { 03857 interARD->dimension(0).span(newspan3); 03858 } 03859 switch (meet_op) { 03860 case AR_X: 03861 interARD->exec_predicate(ard1.exec_predicate()); 03862 break; 03863 case AR_Y: 03864 interARD->exec_predicate(ard2.exec_predicate()); 03865 break; 03866 case AR_AND: 03867 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03868 ard2.exec_predicate())); 03869 break; 03870 case AR_OR: 03871 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03872 ard2.exec_predicate())); 03873 break; 03874 case AR_NOTX_Y: 03875 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03876 ard2.exec_predicate())); 03877 break; 03878 case AR_X_NOTY: 03879 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03880 ard2.exec_predicate())); 03881 break; 03882 default: 03883 p_abort("Bad value for meet operator"); 03884 } 03885 /// ... This can't be propagate the reduction characteristic because 03886 /// ... the two ARDs aren't identical 03887 interARD->poss_reduct(false); 03888 interARD->reduct_op(RED_OP_NONE); 03889 03890 if (newaa1) delete aa1; 03891 if (newaa2) delete aa2; 03892 delete elastoff1; 03893 delete elastoff2; 03894 delete ediff; 03895 if (interARD->exec_predicate_false()) { 03896 if (_ar_logging && _ar_logging %2 == 0) { 03897 arlog << "No intersect" << endl; 03898 } 03899 return false; 03900 } else { 03901 if (_ar_logging && _ar_logging % 2 == 0) { 03902 arlog << "New ARD1:" << new_ard1 << endl; 03903 arlog << "New ARD2:" << new_ard2 << endl; 03904 } 03905 istats.inter_ARD.stop(); 03906 intersect.ins_last(interARD); 03907 if (_ar_logging && _ar_logging % 2 == 0) { 03908 arlog << "Intersection:" << *interARD << endl; 03909 } 03910 return true; 03911 } 03912 } else if (rel_over2.is_equal()) { 03913 /// ... aa1 is a subregion of aa2. aa1 disappears. 03914 03915 AbstractAccess * ard2clone = ard2.clone(); 03916 Expression * newspan1 = simplify(sub(sub(estart1.clone(),estride.clone()), 03917 estart2.clone())); 03918 if (is_integer_zero(*newspan1)) { 03919 ard2clone->del_dimension(0); 03920 } else { 03921 ard2clone->dimension(0).span(newspan1); 03922 } 03923 new_ard2.ins_last(ard2clone); 03924 03925 AbstractAccess * interARD = ard1.clone(); 03926 switch (meet_op) { 03927 case AR_X: 03928 interARD->exec_predicate(ard1.exec_predicate()); 03929 break; 03930 case AR_Y: 03931 interARD->exec_predicate(ard2.exec_predicate()); 03932 break; 03933 case AR_AND: 03934 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 03935 ard2.exec_predicate())); 03936 break; 03937 case AR_OR: 03938 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 03939 ard2.exec_predicate())); 03940 break; 03941 case AR_NOTX_Y: 03942 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 03943 ard2.exec_predicate())); 03944 break; 03945 case AR_X_NOTY: 03946 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 03947 ard2.exec_predicate())); 03948 break; 03949 default: 03950 p_abort("Bad value for meet operator"); 03951 } 03952 /// ... This can't be propagate the reduction characteristic because 03953 /// ... the two ARDs aren't identical 03954 interARD->poss_reduct(false); 03955 interARD->reduct_op(RED_OP_NONE); 03956 03957 if (newaa1) delete aa1; 03958 if (newaa2) delete aa2; 03959 delete elastoff1; 03960 delete elastoff2; 03961 delete ediff; 03962 if (interARD->exec_predicate_false()) { 03963 if (_ar_logging && _ar_logging %2 == 0) { 03964 arlog << "No intersect" << endl; 03965 } 03966 return false; 03967 } else { 03968 if (_ar_logging && _ar_logging % 2 == 0) { 03969 arlog << "New ARD1:" << new_ard1 << endl; 03970 arlog << "New ARD2:" << new_ard2 << endl; 03971 } 03972 istats.inter_ARD.stop(); 03973 intersect.ins_last(interARD); 03974 if (_ar_logging && _ar_logging % 2 == 0) { 03975 arlog << "ARD1 is subregion of ARD2. Intersection is ARD1. Break ARD2" << endl; 03976 } 03977 return true; 03978 } 03979 } else if (rel_over2.is_less_than()) { /// ... lastoff1 < lastoff2 03980 /// ... aa1 is a subregion of aa2. Nothing in new_ard1. 03981 03982 AbstractAccess * ard2_part1 = ard2.clone(); 03983 Expression * newspan1 = simplify(sub(sub(estart1.clone(),estride.clone()), 03984 estart2.clone())); 03985 if (is_integer_zero(*newspan1)) { 03986 ard2_part1->del_dimension(0); 03987 } else { 03988 ard2_part1->dimension(0).span(newspan1); 03989 } 03990 new_ard2.ins_last(ard2_part1); 03991 03992 AbstractAccess * ard2_part2 = ard2.clone(); 03993 ard2_part2->start(add(elastoff1->clone(),estride.clone())); 03994 Expression * newspan2 = simplify(sub(elastoff2->clone(), 03995 add(elastoff1->clone(),estride.clone()))); 03996 if (is_integer_zero(*newspan2)) { 03997 ard2_part2->del_dimension(0); 03998 } else { 03999 ard2_part2->dimension(0).span(newspan2); 04000 } 04001 new_ard2.ins_last(ard2_part2); 04002 04003 AbstractAccess * interARD = ard1.clone(); 04004 switch (meet_op) { 04005 case AR_X: 04006 interARD->exec_predicate(ard1.exec_predicate()); 04007 break; 04008 case AR_Y: 04009 interARD->exec_predicate(ard2.exec_predicate()); 04010 break; 04011 case AR_AND: 04012 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 04013 ard2.exec_predicate())); 04014 break; 04015 case AR_OR: 04016 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 04017 ard2.exec_predicate())); 04018 break; 04019 case AR_NOTX_Y: 04020 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 04021 ard2.exec_predicate())); 04022 break; 04023 case AR_X_NOTY: 04024 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 04025 ard2.exec_predicate())); 04026 break; 04027 default: 04028 p_abort("Bad value for meet operator"); 04029 } 04030 /// ... This can't be propagate the reduction characteristic because 04031 /// ... the two ARDs aren't identical 04032 interARD->poss_reduct(false); 04033 interARD->reduct_op(RED_OP_NONE); 04034 04035 if (newaa1) delete aa1; 04036 if (newaa2) delete aa2; 04037 delete elastoff1; 04038 delete elastoff2; 04039 delete ediff; 04040 if (interARD->exec_predicate_false()) { 04041 if (_ar_logging && _ar_logging %2 == 0) { 04042 arlog << "No intersect" << endl; 04043 } 04044 return false; 04045 } else { 04046 if (_ar_logging && _ar_logging % 2 == 0) { 04047 arlog << "New ARD1:" << new_ard1 << endl; 04048 arlog << "New ARD2:" << new_ard2 << endl; 04049 } 04050 istats.inter_ARD.stop(); 04051 intersect.ins_last(interARD); 04052 if (_ar_logging && _ar_logging % 2 == 0) { 04053 arlog << "ARD1 is subregion of ARD2. Intersection is ARD1. Break ARD2" << endl; 04054 } 04055 return true; 04056 } 04057 } else { /// ... Just don't know - use the conservative direction 04058 if (_ar_logging && _ar_logging % 2 == 0) { 04059 arlog << endl << "Not enough info" << endl; 04060 } 04061 if (direction == OVERESTIMATE) { 04062 intersect.ins_last(ard1.clone()); 04063 intersect.ins_last(ard2.clone()); 04064 /// ... Add nothing to new_ard1 or new_ard2 04065 if (_ar_logging && _ar_logging % 2 == 0) { 04066 arlog << "OVERESTIMATE - ARD1 & ARD2 in intersection, discard both" << endl; 04067 } 04068 if (newaa1) delete aa1; 04069 if (newaa2) delete aa2; 04070 if (_ar_logging && _ar_logging % 2 == 0) { 04071 arlog << "New ARD1:" << new_ard1 << endl; 04072 arlog << "New ARD2:" << new_ard2 << endl; 04073 } 04074 delete elastoff1; 04075 delete elastoff2; 04076 delete ediff; 04077 istats.inter_ARD.stop(); 04078 return true; 04079 } else { /// ... UNDERESTIMATE 04080 if (_ar_logging && _ar_logging % 2 == 0) { 04081 arlog << "Just don't know - UNDERESTIMATE with empty intersection" << endl; 04082 } 04083 if (newaa1) delete aa1; 04084 if (newaa2) delete aa2; 04085 if (_ar_logging && _ar_logging % 2 == 0) { 04086 arlog << "New ARD1:" << new_ard1 << endl; 04087 arlog << "New ARD2:" << new_ard2 << endl; 04088 } 04089 delete elastoff1; 04090 delete elastoff2; 04091 delete ediff; 04092 istats.inter_ARD.stop(); 04093 return false; 04094 } 04095 } 04096 } else if (reldiff.is_equal()) { /// ... aa1 and aa2 start at the same point 04097 04098 Relation rel_over2 = range_acc.compare(*elastoff1, *elastoff2); 04099 04100 if (rel_over2.is_greater_than()) { /// ... lastoff1 > lastoff2 04101 /// ... Just 1 piece of ard1 survives 04102 04103 AbstractAccess * ard1clone = ard1.clone(); 04104 ard1clone->start(add(elastoff2->clone(),estride.clone())); 04105 Expression * newspan1 = simplify(sub(elastoff1->clone(),add(elastoff2->clone(),estride.clone()))); 04106 if (is_integer_zero(*newspan1)) { 04107 ard1clone->del_dimension(0); 04108 } else { 04109 ard1clone->dimension(0).span(newspan1); 04110 } 04111 new_ard1.ins_last(ard1clone); 04112 04113 /// ... intersection is aa2. aa2 disappears. 04114 04115 AbstractAccess *interARD = ard2.clone(); 04116 switch (meet_op) { 04117 case AR_X: 04118 interARD->exec_predicate(ard1.exec_predicate()); 04119 break; 04120 case AR_Y: 04121 interARD->exec_predicate(ard2.exec_predicate()); 04122 break; 04123 case AR_AND: 04124 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 04125 ard2.exec_predicate())); 04126 break; 04127 case AR_OR: 04128 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 04129 ard2.exec_predicate())); 04130 break; 04131 case AR_NOTX_Y: 04132 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 04133 ard2.exec_predicate())); 04134 break; 04135 case AR_X_NOTY: 04136 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 04137 ard2.exec_predicate())); 04138 break; 04139 default: 04140 p_abort("Bad value for meet operator"); 04141 } 04142 /// ... This can't be propagate the reduction characteristic because 04143 /// ... the two ARDs aren't identical 04144 interARD->poss_reduct(false); 04145 interARD->reduct_op(RED_OP_NONE); 04146 04147 if (newaa1) delete aa1; 04148 if (newaa2) delete aa2; 04149 delete elastoff1; 04150 delete elastoff2; 04151 delete ediff; 04152 if (interARD->exec_predicate_false()) { 04153 istats.inter_ARD.stop(); 04154 if (_ar_logging && _ar_logging % 2 == 0) { 04155 arlog << "No intersection" << endl; 04156 } 04157 return false; 04158 } else { 04159 if (_ar_logging && _ar_logging % 2 == 0) { 04160 arlog << "New ARD1:" << new_ard1 << endl; 04161 arlog << "New ARD2:" << new_ard2 << endl; 04162 } 04163 istats.inter_ARD.stop(); 04164 intersect.ins_last(interARD); 04165 if (_ar_logging && _ar_logging % 2 == 0) { 04166 arlog << "ARD2 is subregion of ARD1. Intersection is ARD2. Reduce ARD1" << endl; 04167 } 04168 return true; 04169 } 04170 } else if (rel_over2.is_equal()) { /// ... lastoff1 == lastoff2 04171 /// ... ARD1 == ARD2. Both disappear into the intersection. 04172 04173 AbstractAccess *interARD = ard1.clone(); 04174 switch (meet_op) { 04175 case AR_X: 04176 interARD->exec_predicate(ard1.exec_predicate()); 04177 break; 04178 case AR_Y: 04179 interARD->exec_predicate(ard2.exec_predicate()); 04180 break; 04181 case AR_AND: 04182 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 04183 ard2.exec_predicate())); 04184 break; 04185 case AR_OR: 04186 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 04187 ard2.exec_predicate())); 04188 break; 04189 case AR_NOTX_Y: 04190 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 04191 ard2.exec_predicate())); 04192 break; 04193 case AR_X_NOTY: 04194 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 04195 ard2.exec_predicate())); 04196 break; 04197 default: 04198 p_abort("Bad value for meet operator"); 04199 } 04200 interARD->poss_reduct(aa1->poss_reduct() && aa2->poss_reduct()); 04201 if (aa1->reduct_op() == aa2->reduct_op()) { 04202 interARD->reduct_op(aa1->reduct_op()); 04203 } else { 04204 interARD->reduct_op(RED_OP_NONE); 04205 } 04206 04207 if (newaa1) delete aa1; 04208 if (newaa2) delete aa2; 04209 delete elastoff1; 04210 delete elastoff2; 04211 delete ediff; 04212 if (_ar_logging && _ar_logging % 2 == 0) { 04213 arlog << "New ARD1:" << new_ard1 << endl; 04214 arlog << "New ARD2:" << new_ard2 << endl; 04215 } 04216 istats.inter_ARD.stop(); 04217 intersect.ins_last(interARD); 04218 if (_ar_logging && _ar_logging % 2 == 0) { 04219 arlog << "ARD2 == ARD1. Both disappear into intersection." << endl; 04220 } 04221 return true; 04222 } else if (rel_over2.is_less_than()) { 04223 /// ... lastoff1 < lastoff2 04224 /// ... Just 1 piece of ard2 survives 04225 04226 AbstractAccess * ard2clone = ard2.clone(); 04227 ard2clone->start(add(elastoff1->clone(),estride.clone())); 04228 Expression * newspan1 = simplify(sub(elastoff2->clone(), 04229 add(elastoff1->clone(),estride.clone()))); 04230 if (is_integer_zero(*newspan1)) { 04231 ard2clone->del_dimension(0); 04232 } else { 04233 ard2clone->dimension(0).span(newspan1); 04234 } 04235 new_ard2.ins_last(ard2clone); 04236 04237 /// ... intersection is aa1. aa1 disappears. 04238 04239 AbstractAccess *interARD = ard1.clone(); 04240 switch (meet_op) { 04241 case AR_X: 04242 interARD->exec_predicate(ard1.exec_predicate()); 04243 break; 04244 case AR_Y: 04245 interARD->exec_predicate(ard2.exec_predicate()); 04246 break; 04247 case AR_AND: 04248 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 04249 ard2.exec_predicate())); 04250 break; 04251 case AR_OR: 04252 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 04253 ard2.exec_predicate())); 04254 break; 04255 case AR_NOTX_Y: 04256 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 04257 ard2.exec_predicate())); 04258 break; 04259 case AR_X_NOTY: 04260 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 04261 ard2.exec_predicate())); 04262 break; 04263 default: 04264 p_abort("Bad value for meet operator"); 04265 } 04266 /// ... This can't be propagate the reduction characteristic because 04267 /// ... the two ARDs aren't identical 04268 interARD->poss_reduct(false); 04269 interARD->reduct_op(RED_OP_NONE); 04270 04271 if (newaa1) delete aa1; 04272 if (newaa2) delete aa2; 04273 delete elastoff1; 04274 delete elastoff2; 04275 delete ediff; 04276 if (interARD->exec_predicate_false()) { 04277 if (_ar_logging && _ar_logging %2 == 0) { 04278 arlog << "No intersect" << endl; 04279 } 04280 return false; 04281 } else { 04282 if (_ar_logging && _ar_logging % 2 == 0) { 04283 arlog << "New ARD1:" << new_ard1 << endl; 04284 arlog << "New ARD2:" << new_ard2 << endl; 04285 } 04286 istats.inter_ARD.stop(); 04287 intersect.ins_last(interARD); 04288 if (_ar_logging && _ar_logging % 2 == 0) { 04289 arlog << "ARD1 is subregion of ARD2. Intersection is ARD1. Reduce ARD2" << endl; 04290 } 04291 return true; 04292 } 04293 } else { /// ... Not enough information, use conservative direction 04294 if (_ar_logging && _ar_logging % 2 == 0) { 04295 arlog << endl << "Not enough info" << endl; 04296 } 04297 if (direction == OVERESTIMATE) { 04298 intersect.ins_last(ard1.clone()); 04299 intersect.ins_last(ard2.clone()); 04300 /// ... Add nothing to new_ard1 or new_ard2 04301 if (_ar_logging && _ar_logging % 2 == 0) { 04302 arlog << "OVERESTIMATE - ARD1 & ARD2 in intersection, discard both" << endl; 04303 } 04304 if (newaa1) delete aa1; 04305 if (newaa2) delete aa2; 04306 if (_ar_logging && _ar_logging % 2 == 0) { 04307 arlog << "New ARD1:" << new_ard1 << endl; 04308 arlog << "New ARD2:" << new_ard2 << endl; 04309 } 04310 delete elastoff1; 04311 delete elastoff2; 04312 delete ediff; 04313 istats.inter_ARD.stop(); 04314 return true; 04315 } else { /// ... UNDERESTIMATE 04316 if (_ar_logging && _ar_logging % 2 == 0) { 04317 arlog << "Just don't know - UNDERESTIMATE with empty intersection" << endl; 04318 } 04319 if (newaa1) delete aa1; 04320 if (newaa2) delete aa2; 04321 if (_ar_logging && _ar_logging % 2 == 0) { 04322 arlog << "New ARD1:" << new_ard1 << endl; 04323 arlog << "New ARD2:" << new_ard2 << endl; 04324 } 04325 delete elastoff1; 04326 delete elastoff2; 04327 delete ediff; 04328 istats.inter_ARD.stop(); 04329 return false; 04330 } 04331 } 04332 } else if (reldiff.is_less_than()) { /// ... aa1 starts to the left of aa2 04333 Relation rel_over2 = range_acc.compare(*elastoff1, *elastoff2); 04334 04335 if (rel_over2.is_greater_than()) { /// ... lastoff1 > lastoff2 04336 /// ... aa2 is a subregion of aa1. Nothing in new_ard2. 04337 04338 AbstractAccess * ard1_part1 = ard1.clone(); 04339 Expression * newspan1 = simplify(sub(sub(estart2.clone(),estride.clone()),estart1.clone())); 04340 if (is_integer_zero(*newspan1)) { 04341 ard1_part1->del_dimension(0); 04342 } else { 04343 ard1_part1->dimension(0).span(newspan1); 04344 } 04345 new_ard1.ins_last(ard1_part1); 04346 04347 AbstractAccess * ard1_part2 = ard1.clone(); 04348 ard1_part2->start(add(elastoff2->clone(),estride.clone())); 04349 Expression * newspan2 = simplify(sub(elastoff1->clone(),add(elastoff2->clone(),estride.clone()))); 04350 if (is_integer_zero(*newspan2)) { 04351 ard1_part2->del_dimension(0); 04352 } else { 04353 ard1_part2->dimension(0).span(newspan2); 04354 } 04355 new_ard1.ins_last(ard1_part2); 04356 04357 AbstractAccess * interARD = ard2.clone(); 04358 switch (meet_op) { 04359 case AR_X: 04360 interARD->exec_predicate(ard1.exec_predicate()); 04361 break; 04362 case AR_Y: 04363 interARD->exec_predicate(ard2.exec_predicate()); 04364 break; 04365 case AR_AND: 04366 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 04367 ard2.exec_predicate())); 04368 break; 04369 case AR_OR: 04370 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 04371 ard2.exec_predicate())); 04372 break; 04373 case AR_NOTX_Y: 04374 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 04375 ard2.exec_predicate())); 04376 break; 04377 case AR_X_NOTY: 04378 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 04379 ard2.exec_predicate())); 04380 break; 04381 default: 04382 p_abort("Bad value for meet operator"); 04383 } 04384 /// ... This can't be propagate the reduction characteristic because 04385 /// ... the two ARDs aren't identical 04386 interARD->poss_reduct(false); 04387 interARD->reduct_op(RED_OP_NONE); 04388 04389 if (newaa1) delete aa1; 04390 if (newaa2) delete aa2; 04391 delete elastoff1; 04392 delete elastoff2; 04393 delete ediff; 04394 if (interARD->exec_predicate_false()) { 04395 if (_ar_logging && _ar_logging %2 == 0) { 04396 arlog << "No intersect" << endl; 04397 } 04398 return false; 04399 } else { 04400 if (_ar_logging && _ar_logging % 2 == 0) { 04401 arlog << "New ARD1:" << new_ard1 << endl; 04402 arlog << "New ARD2:" << new_ard2 << endl; 04403 } 04404 istats.inter_ARD.stop(); 04405 intersect.ins_last(interARD); 04406 if (_ar_logging && _ar_logging % 2 == 0) { 04407 arlog << "ARD2 is subregion of ARD1. Intersection is ARD2. Break ARD1" << endl; 04408 } 04409 return true; 04410 } 04411 } else if (rel_over2.is_equal()) { /// ... lastoff1 == lastoff2 04412 /// ... aa2 is a subregion of aa1. Nothing in new_ard2. 04413 04414 AbstractAccess * ard1_part1 = ard1.clone(); 04415 Expression * newspan1 = simplify(sub(sub(estart2.clone(),estride.clone()),estart1.clone())); 04416 if (is_integer_zero(*newspan1)) { 04417 ard1_part1->del_dimension(0); 04418 } else { 04419 ard1_part1->dimension(0).span(newspan1); 04420 } 04421 new_ard1.ins_last(ard1_part1); 04422 04423 AbstractAccess * interARD = ard2.clone(); 04424 switch (meet_op) { 04425 case AR_X: 04426 interARD->exec_predicate(ard1.exec_predicate()); 04427 break; 04428 case AR_Y: 04429 interARD->exec_predicate(ard2.exec_predicate()); 04430 break; 04431 case AR_AND: 04432 interARD->exec_predicate(repos.and_pred(ard1.exec_predicate(), 04433 ard2.exec_predicate())); 04434 break; 04435 case AR_OR: 04436 interARD->exec_predicate(repos.or_pred(ard1.exec_predicate(), 04437 ard2.exec_predicate())); 04438 break; 04439 case AR_NOTX_Y: 04440 interARD->exec_predicate(repos.notx_and_y_pred(ard1.exec_predicate(), 04441 ard2.exec_predicate())); 04442 break; 04443 case AR_X_NOTY: 04444 interARD->exec_predicate(repos.x_and_noty_pred(ard1.exec_predicate(), 04445 ard2.exec_predicate())); 04446 break; 04447 default: 04448 p_abort("Bad value for meet operator"); 04449 } 04450 /// ... This can't be propagate the reduction characteristic because 04451 /// ... the two ARDs aren't identical 04452 interARD->poss_reduct(false); 04453 interARD->reduct_op(RED_OP_NONE); 04454 04455 if (newaa1) delete aa1; 04456 if (newaa2) delete aa2; 04457 delete elastoff1; 04458 delete elastoff2; 04459 delete ediff; 04460 if (interARD->exec_predicate_false()) { 04461 if (_ar_logging && _ar_logging %2 == 0) { 04462 arlog << "No intersect" << endl; 04463 } 04464 return false; 04465 } else { 04466 if (_ar_logging && _ar_logging % 2 == 0) { 04467 arlog << "New ARD1:" << new_ard1 << endl; 04468 arlog << "New ARD2:" << new_ard2 << endl; 04469 } 04470 istats.inter_ARD.stop(); 04471 intersect.ins_last(interARD); 04472 if (_ar_logging && _ar_logging % 2 == 0) { 04473 arlog << "ARD2 is subregion of ARD1. Intersection is ARD2. Break ARD1" << endl; 04474 } 04475 return true; 04476 } 04477 } else if (rel_over2.is_less_than()) { 04478 04479 /// ... New start2 is one stride after end of aa1 04480 /// ... New start1 is old start1 04481 04482 AbstractAccess * ard1clone = ard1.clone(); 04483 Expression * newspan1 = simplify(sub(sub(estart2.clone(),estride.clone()),estart1.clone())); 04484 if (is_integer_zero(*newspan1)) { 04485 ard1clone->del_dimension(0); 04486 } else { 04487 ard1clone->dimension(0).span(newspan1); 04488 } 04489 04490 AbstractAccess * ard2clone = ard2.clone(); 04491 ard2clone->start(simplify(add(elastoff1->clone(),estride.clone()))); 04492 Expression * newspan2 = simplify(sub(elastoff2->clone(), 04493 add(elastoff1->clone(),estride.clone()))); 04494 if (is_integer_zero(*newspan2)) { 04495 ard2clone->del_dimension(0); 04496 } else { 04497 ard2clone->dimension(0).span(newspan2); 04498 } 04499 04500 /// ... Intersect descriptor 04501 AbstractAccess * interARD = ard2.clone(); 04502 Expression * newspan3 = simplify(sub(elastoff1->clone(),estart2.clone())); 04503 if (is_integer_zero(*newspan3)) { 04504 interARD->del_dimension(0); 04505 } else { 04506 interARD->dimension(0).span(newspan3); 04507 } 04508 04509 /// ... Do we know the relationship between lastoff1 and start2? 04510 04511 Relation rel_off1_st2 = range_acc.compare(*elastoff1, estart2); 04512 04513 if (rel_off1_st2.is_greater_equal()) { 04514 /// ... OK, there definitely is an intersection 04515 } else { 04516 /// ... We just don't know, so add a predicate to each descriptor 04517 04518 /// ... If there is an intersection: 04519 Expression * corr = simplify(gt(elastoff1->clone(),sub(estart2.clone(),constant(1)))); 04520 PredicateRepository & repos = ard1.pgm_summarized_to()->pred_repos(); 04521 int repindx = repos.ins(corr); 04522 ard1clone->exec_predicate(repos.and_pred(ard1clone->exec_predicate(), 04523 repindx)); 04524 ard2clone->exec_predicate(repos.and_pred(ard2clone->exec_predicate(), 04525 repindx)); 04526 interARD->exec_predicate(repos.and_pred(interARD->exec_predicate(), 04527 repindx)); 04528 if (!(interARD->exec_predicate_false())) { 04529 04530 /// ... If there is not an intersection: 04531 Expression * corr2 = simplify(lt(elastoff1->clone(),estart2.clone())); 04532 04533 repindx = repos.ins(corr2); 04534 AbstractAccess * ard1orig = ard1.clone(); 04535 ard1orig->exec_predicate(repos.and_pred(ard1orig->exec_predicate(), 04536 repindx)); 04537 if (!(ard1orig->exec_predicate_false())) { 04538 new_ard1.ins_last(ard1orig); 04539 } 04540 AbstractAccess * ard2orig = ard2.clone(); 04541 ard2orig->exec_predicate(repos.and_pred(ard2orig->exec_predicate(), 04542 repindx)); 04543 if (!(ard2orig->exec_predicate_false())) { 04544 new_ard2.ins_last(ard2orig); 04545 } 04546 } 04547 } 04548 if (!(ard1clone->exec_predicate_false())) { 04549 new_ard1.ins_last(ard1clone); 04550 } 04551 if (!(ard2clone->exec_predicate_false())) { 04552 new_ard2.ins_last(ard2clone); 04553 } 04554 04555 if ((!interARD->exec_predicate_false())) { 04556 switch (meet_op) { 04557 case AR_X: 04558 interARD->exec_predicate(repos.and_pred(interARD->exec_predicate(), 04559 ard1.exec_predicate())); 04560 break; 04561 case AR_Y: 04562 interARD->exec_predicate(repos.and_pred(interARD->exec_predicate(), 04563 ard2.exec_predicate())); 04564 break; 04565 case AR_AND: 04566 interARD->exec_predicate(repos.and_pred(interARD->exec_predicate(), 04567 repos.and_pred(ard1.exec_predicate(), 04568 ard2.exec_predicate()))); 04569 break; 04570 case AR_OR: 04571 interARD->exec_predicate(repos.and_pred(interARD->exec_predicate(), 04572 repos.or_pred(ard1.exec_predicate(), 04573 ard2.exec_predicate()))); 04574 break; 04575 case AR_NOTX_Y: 04576 interARD->exec_predicate(repos.and_pred(interARD->exec_predicate(), 04577 repos.notx_and_y_pred(ard1.exec_predicate(), 04578 ard2.exec_predicate()))); 04579 break; 04580 case AR_X_NOTY: 04581 interARD->exec_predicate(repos.and_pred(interARD->exec_predicate(), 04582 repos.x_and_noty_pred(ard1.exec_predicate(), 04583 ard2.exec_predicate()))); 04584 break; 04585 default: 04586 p_abort("Bad value for meet operator"); 04587 } 04588 } 04589 /// ... This can't propagate the reduction characteristic because 04590 /// ... the two ARDs aren't identical 04591 interARD->poss_reduct(false); 04592 interARD->reduct_op(RED_OP_NONE); 04593 04594 if (newaa1) delete aa1; 04595 if (newaa2) delete aa2; 04596 delete elastoff1; 04597 delete elastoff2; 04598 delete ediff; 04599 if (_ar_logging && _ar_logging % 2 == 0) { 04600 arlog << "New ARD1:" << new_ard1 << endl; 04601 arlog << "New ARD2:" << new_ard2 << endl; 04602 } 04603 istats.inter_ARD.stop(); 04604 intersect.ins_last(interARD); 04605 if (interARD->exec_predicate_false()) { 04606 if (_ar_logging && _ar_logging % 2 == 0) { 04607 arlog << "No intersection" << endl; 04608 } 04609 return false; 04610 } else { 04611 if (_ar_logging && _ar_logging % 2 == 0) { 04612 arlog << "Intersection:" << *interARD << endl; 04613 } 04614 return true; 04615 } 04616 } else { /// ... Just don't know, use conservative direction 04617 if (_ar_logging && _ar_logging % 2 == 0) { 04618 arlog << endl << "Not enough info" << endl; 04619 } 04620 if (direction == OVERESTIMATE) { 04621 intersect.ins_last(ard1.clone()); 04622 intersect.ins_last(ard2.clone()); 04623 /// ... Add nothing to new_ard1 or new_ard2 04624 if (_ar_logging && _ar_logging % 2 == 0) { 04625 arlog << "OVERESTIMATE - ARD1 & ARD2 in intersection, discard both" << endl; 04626 } 04627 if (newaa1) delete aa1; 04628 if (newaa2) delete aa2; 04629 delete elastoff1; 04630 delete elastoff2; 04631 delete ediff; 04632 if (_ar_logging && _ar_logging % 2 == 0) { 04633 arlog << "New ARD1:" << new_ard1 << endl; 04634 arlog << "New ARD2:" << new_ard2 << endl; 04635 } 04636 istats.inter_ARD.stop(); 04637 return true; 04638 } else { /// ... UNDERESTIMATE 04639 if (_ar_logging && _ar_logging % 2 == 0) { 04640 arlog << "Just don't know - UNDERESTIMATE with empty intersection" << endl; 04641 } 04642 if (newaa1) delete aa1; 04643 if (newaa2) delete aa2; 04644 if (_ar_logging && _ar_logging % 2 == 0) { 04645 arlog << "New ARD1:" << new_ard1 << endl; 04646 arlog << "New ARD2:" << new_ard2 << endl; 04647 } 04648 delete elastoff1; 04649 delete elastoff2; 04650 delete ediff; 04651 istats.inter_ARD.stop(); 04652 return false; 04653 } 04654 } 04655 } else { 04656 /// ... We just don't know 04657 if (_ar_logging && _ar_logging % 2 == 0) { 04658 arlog << endl << "Not enough info" << endl; 04659 } 04660 if (direction == OVERESTIMATE) { 04661 intersect.ins_last(ard1.clone()); 04662 intersect.ins_last(ard2.clone()); 04663 /// ... Add nothing to new_ard1 or new_ard2 04664 if (_ar_logging && _ar_logging % 2 == 0) { 04665 arlog << "OVERESTIMATE - ARD1 & ARD2 in intersection, discard both" << endl; 04666 } 04667 if (newaa1) delete aa1; 04668 if (newaa2) delete aa2; 04669 if (_ar_logging && _ar_logging % 2 == 0) { 04670 arlog << "New ARD1:" << new_ard1 << endl; 04671 arlog << "New ARD2:" << new_ard2 << endl; 04672 } 04673 delete elastoff1; 04674 delete elastoff2; 04675 delete ediff; 04676 istats.inter_ARD.stop(); 04677 return true; 04678 } else { /// ... UNDERESTIMATE 04679 if (_ar_logging && _ar_logging % 2 == 0) { 04680 arlog << "Just don't know - UNDERESTIMATE with empty intersection" << endl; 04681 } 04682 if (newaa1) delete aa1; 04683 if (newaa2) delete aa2; 04684 if (_ar_logging && _ar_logging % 2 == 0) { 04685 arlog << "New ARD1:" << new_ard1 << endl; 04686 arlog << "New ARD2:" << new_ard2 << endl; 04687 } 04688 delete elastoff1; 04689 delete elastoff2; 04690 delete ediff; 04691 istats.inter_ARD.stop(); 04692 return false; 04693 } 04694 } 04695 } else { 04696 /// ... Points don't overlap 04697 if (_ar_logging && _ar_logging % 2 == 0) { 04698 arlog << "No overlap" << endl; 04699 } 04700 if (newaa1) delete aa1; 04701 if (newaa2) delete aa2; 04702 if (_ar_logging && _ar_logging % 2 == 0) { 04703 arlog << "New ARD1:" << new_ard1 << endl; 04704 arlog << "New ARD2:" << new_ard2 << endl; 04705 } 04706 delete elastoff1; 04707 delete elastoff2; 04708 delete ediff; 04709 istats.inter_ARD.stop(); 04710 return false; 04711 } 04712 } 04713 } 04714 04715 if ((dims1 == 2) && (dims2 == 2)) { 04716 04717 AccessDimension & dim1_1 = (AccessDimension &) aa1->dimension(0); 04718 AccessDimension & dim1_2 = (AccessDimension &) aa1->dimension(1); 04719 AccessDimension & dim2_1 = (AccessDimension &) aa2->dimension(0); 04720 AccessDimension & dim2_2 = (AccessDimension &) aa2->dimension(1); 04721 04722 Expression * ediff = simplify(sub(aa1->start_guarded().clone(), 04723 aa2->start_guarded().clone())); 04724 04725 if ((ediff->op() == INTEGER_CONSTANT_OP) && 04726 (dim1_1.stride_guarded().op() == INTEGER_CONSTANT_OP) && 04727 (dim1_2.stride_guarded().op() == INTEGER_CONSTANT_OP) && 04728 (dim2_1.stride_guarded().op() == INTEGER_CONSTANT_OP) && 04729 (dim2_2.stride_guarded().op() == INTEGER_CONSTANT_OP) && 04730 (dim1_1.span_guarded().op() == INTEGER_CONSTANT_OP) && 04731 (dim1_2.span_guarded().op() == INTEGER_CONSTANT_OP) && 04732 (dim2_1.span_guarded().op() == INTEGER_CONSTANT_OP) && 04733 (dim2_2.span_guarded().op() == INTEGER_CONSTANT_OP)) { 04734 04735 int diff = ediff->value(); 04736 int stride1_1 = dim1_1.stride_guarded().value(); 04737 int stride1_2 = dim1_2.stride_guarded().value(); 04738 int stride2_1 = dim2_1.stride_guarded().value(); 04739 int stride2_2 = dim2_2.stride_guarded().value(); 04740 int span1_1 = dim1_1.span_guarded().value(); 04741 int span1_2 = dim1_2.span_guarded().value(); 04742 int span2_1 = dim2_1.span_guarded().value(); 04743 int span2_2 = dim2_2.span_guarded().value(); 04744 04745 if (diff > 0) { 04746 if (span2_1 + span2_2 < diff) { 04747 04748 if (_ar_logging && _ar_logging % 2 == 0) { 04749 arlog << "No intersection" << endl; 04750 } 04751 if (newaa1) delete aa1; 04752 if (newaa2) delete aa2; 04753 if (_ar_logging && _ar_logging % 2 == 0) { 04754 arlog << "New ARD1:" << new_ard1 << endl; 04755 arlog << "New ARD2:" << new_ard2 << endl; 04756 } 04757 istats.inter_ARD.stop(); 04758 return false; 04759 } 04760 } else { /// ... diff < 0 04761 if (span1_1 + span1_2 < -diff) { 04762 04763 if (_ar_logging && _ar_logging % 2 == 0) { 04764 arlog << "No intersection" << endl; 04765 } 04766 if (newaa1) delete aa1; 04767 if (newaa2) delete aa2; 04768 if (_ar_logging && _ar_logging % 2 == 0) { 04769 arlog << "New ARD1:" << new_ard1 << endl; 04770 arlog << "New ARD2:" << new_ard2 << endl; 04771 } 04772 istats.inter_ARD.stop(); 04773 return false; 04774 } 04775 } 04776 int stride_left_inner, stride_left_outer; 04777 int stride_right_inner, stride_right_outer; 04778 int span_left_inner, span_left_outer; 04779 int span_right_inner, span_right_outer; 04780 04781 if (diff < 0) { /// ... aa1 is to the left 04782 diff = -diff; 04783 if (stride1_1 < stride1_2) { 04784 stride_left_inner = stride1_1; 04785 stride_left_outer = stride1_2; 04786 span_left_inner = span1_1; 04787 span_left_outer = span1_2; 04788 if (stride2_1 < stride2_2) { 04789 stride_right_inner = stride2_1; 04790 stride_right_outer = stride2_2; 04791 span_right_inner = span2_1; 04792 span_right_outer = span2_2; 04793 } else { 04794 stride_right_inner = stride2_2; 04795 stride_right_outer = stride2_1; 04796 span_right_inner = span2_2; 04797 span_right_outer = span2_1; 04798 } 04799 } else { 04800 stride_left_inner = stride1_2; 04801 stride_left_outer = stride1_1; 04802 span_left_inner = span1_2; 04803 span_left_outer = span1_1; 04804 if (stride2_1 < stride2_2) { 04805 stride_right_inner = stride2_1; 04806 stride_right_outer = stride2_2; 04807 span_right_inner = span2_1; 04808 span_right_outer = span2_2; 04809 } else { 04810 stride_right_inner = stride2_2; 04811 stride_right_outer = stride2_1; 04812 span_right_inner = span2_2; 04813 span_right_outer = span2_1; 04814 } 04815 } 04816 } else { /// ... aa1 is to the right 04817 if (stride2_1 < stride2_2) { 04818 stride_left_inner = stride2_1; 04819 stride_left_outer = stride2_2; 04820 span_left_inner = span2_1; 04821 span_left_outer = span2_2; 04822 if (stride1_1 < stride1_2) { 04823 stride_right_inner = stride1_1; 04824 stride_right_outer = stride1_2; 04825 span_right_inner = span1_1; 04826 span_right_outer = span1_2; 04827 } else { 04828 stride_right_inner = stride1_2; 04829 stride_right_outer = stride1_1; 04830 span_right_inner = span1_2; 04831 span_right_outer = span1_1; 04832 } 04833 } else { 04834 stride_left_inner = stride2_2; 04835 stride_left_outer = stride2_1; 04836 span_left_inner = span2_2; 04837 span_left_outer = span2_1; 04838 if (stride1_1 < stride1_2) { 04839 stride_right_inner = stride1_1; 04840 stride_right_outer = stride1_2; 04841 span_right_inner = span1_1; 04842 span_right_outer = span1_2; 04843 } else { 04844 stride_right_inner = stride1_2; 04845 stride_right_outer = stride1_1; 04846 span_right_inner = span1_2; 04847 span_right_outer = span1_1; 04848 } 04849 } 04850 } 04851 04852 /// ... Now do range test type check 04853 04854 diff = diff % stride_left_outer; 04855 if ((diff > span_left_inner) && 04856 ((diff + span_right_inner) < stride_left_outer)) { 04857 04858 if (_ar_logging && _ar_logging % 2 == 0) { 04859 arlog << "No intersection" << endl; 04860 } 04861 if (newaa1) delete aa1; 04862 if (newaa2) delete aa2; 04863 if (_ar_logging && _ar_logging % 2 == 0) { 04864 arlog << "New ARD1:" << new_ard1 << endl; 04865 arlog << "New ARD2:" << new_ard2 << endl; 04866 } 04867 istats.inter_ARD.stop(); 04868 return false; 04869 } 04870 04871 /// ... Now do GCD test type check 04872 04873 if ((diff % stride_left_inner != 0) && 04874 ((diff + span_right_inner) < stride_left_outer)) { 04875 04876 if (_ar_logging && _ar_logging % 2 == 0) { 04877 arlog << "No intersection" << endl; 04878 } 04879 if (newaa1) delete aa1; 04880 if (newaa2) delete aa2; 04881 if (_ar_logging && _ar_logging % 2 == 0) { 04882 arlog << "New ARD1:" << new_ard1 << endl; 04883 arlog << "New ARD2:" << new_ard2 << endl; 04884 } 04885 istats.inter_ARD.stop(); 04886 return false; 04887 } 04888 } else { 04889 04890 /// ... Do the above symbolically 04891 04892 List<Expression> factors; 04893 Expression * new_expr = sym_factors(dim2_1.span_guarded(), factors); 04894 04895 Expression * no_empty_LMAD; 04896 04897 if (new_expr) { 04898 no_empty_LMAD = ge(new_expr, constant(0)); 04899 } else { 04900 no_empty_LMAD = ge(dim2_1.span_guarded().clone(), constant(0)); 04901 } 04902 04903 Statement * s = aa2->actual_stmt(); 04904 if (s == 0) { 04905 s = aa2->summarized_to(); 04906 } 04907 04908 if (s) { 04909 // StmtRanges & ranges = 04910 ((AIRangeDict & )(aa2->pgm_summarized_to()->range_dict_guarded()))._s_ranges(s->tag()); 04911 04912 // ranges.extract_ranges(*no_empty_LMAD); 04913 } 04914 04915 delete no_empty_LMAD; 04916 04917 bool diff_less = false; 04918 Expression * zero = constant(0); 04919 Relation reldiff = range_acc.compare(*ediff, *zero); 04920 delete zero; 04921 if (reldiff.is_greater_than()) { 04922 Expression * sumspans2 = add(dim2_1.span_guarded().clone(), dim2_2.span_guarded().clone()); 04923 Relation relspan2 = range_acc.compare(*sumspans2, *ediff); 04924 delete sumspans2; 04925 if (relspan2.is_less_than()) { 04926 if (_ar_logging && _ar_logging % 2 == 0) { 04927 arlog << "No intersection" << endl; 04928 } 04929 delete ediff; 04930 if (newaa1) delete aa1; 04931 if (newaa2) delete aa2; 04932 if (_ar_logging && _ar_logging % 2 == 0) { 04933 arlog << "New ARD1:" << new_ard1 << endl; 04934 arlog << "New ARD2:" << new_ard2 << endl; 04935 } 04936 istats.inter_ARD.stop(); 04937 return false; 04938 } 04939 /// ... Above, we compared dim2_2.span + dim2_1.span vs ediff. 04940 /// ... Having gotten to this point, that didn't work. There 04941 /// ... may just be some unknown variable muddying the waters. 04942 /// ... As an alternative, if we know that dim2_1 is "no overlap", 04943 /// ... and the descriptors are precisely-sorted, then we can 04944 /// ... try again, testing dim2_2.span + dim2_2.stride vs ediff 04945 /// ... (since dim2_1.span must be < dim2_2.stride, due to "no overlap"). 04946 04947 if (!(aa2->is_imprecise()) && !(dim2_1.check_overlap())) { 04948 Expression * one = constant(1); 04949 sumspans2 = sub(add(dim2_2.span_guarded().clone(), 04950 dim2_2.stride_guarded().clone()), 04951 one); 04952 Relation relspan2 = range_acc.compare(*sumspans2, *ediff); 04953 delete sumspans2; 04954 if (relspan2.is_less_than()) { 04955 if (_ar_logging && _ar_logging % 2 == 0) { 04956 arlog << "No intersection" << endl; 04957 } 04958 delete ediff; 04959 if (newaa1) delete aa1; 04960 if (newaa2) delete aa2; 04961 if (_ar_logging && _ar_logging % 2 == 0) { 04962 arlog << "New ARD1:" << new_ard1 << endl; 04963 arlog << "New ARD2:" << new_ard2 << endl; 04964 } 04965 istats.inter_ARD.stop(); 04966 return false; 04967 } 04968 } 04969 } else if (reldiff.is_less_equal()) { /// ... diff < 0 04970 diff_less = true; 04971 Expression * sumspans1 = add(dim1_1.span_guarded().clone(), 04972 dim1_2.span_guarded().clone()); 04973 Expression * negdiff = sub(constant(0),ediff->clone()); 04974 Relation relspan1 = range_acc.compare(*sumspans1, *negdiff); 04975 delete sumspans1; 04976 if (relspan1.is_less_than()) { 04977 if (_ar_logging && _ar_logging % 2 == 0) { 04978 arlog << "No intersection" << endl; 04979 } 04980 delete negdiff; 04981 delete ediff; 04982 if (newaa1) delete aa1; 04983 if (newaa2) delete aa2; 04984 if (_ar_logging && _ar_logging % 2 == 0) { 04985 arlog << "New ARD1:" << new_ard1 << endl; 04986 arlog << "New ARD2:" << new_ard2 << endl; 04987 } 04988 istats.inter_ARD.stop(); 04989 return false; 04990 } 04991 /// ... Above, we compared dim1_2.span + dim1_1.span vs ediff. 04992 /// ... Having gotten to this point, that didn't work. There 04993 /// ... may just be some unknown variable muddying the waters. 04994 /// ... As an alternative, if we know that dim1_1 is "no overlap", 04995 /// ... and the descriptors are precisely-sorted, then we can 04996 /// ... try again, testing dim1_2.span + dim1_2.stride vs ediff 04997 /// ... (since dim1_1.span must be < dim1_2.stride, due to "no overlap"). 04998 04999 if (!(aa1->is_imprecise()) && !(dim1_1.check_overlap())) { 05000 Expression * one = constant(1); 05001 sumspans1 = sub(add(dim1_2.span_guarded().clone(), 05002 dim1_2.stride_guarded().clone()), 05003 one); 05004 Relation relspan1 = range_acc.compare(*sumspans1, *negdiff); 05005 delete sumspans1; 05006 if (relspan1.is_less_than()) { 05007 if (_ar_logging && _ar_logging % 2 == 0) { 05008 arlog << "No intersection" << endl; 05009 } 05010 delete negdiff; 05011 delete ediff; 05012 if (newaa1) delete aa1; 05013 if (newaa2) delete aa2; 05014 if (_ar_logging && _ar_logging % 2 == 0) { 05015 arlog << "New ARD1:" << new_ard1 << endl; 05016 arlog << "New ARD2:" << new_ard2 << endl; 05017 } 05018 istats.inter_ARD.stop(); 05019 return false; 05020 } 05021 } 05022 delete negdiff; 05023 } 05024 05025 Expression * estride_left_inner = 0; 05026 Expression * estride_left_outer = 0; 05027 Expression * estride_right_inner = 0; 05028 Expression * estride_right_outer = 0; 05029 Expression * espan_left_inner = 0; 05030 Expression * espan_left_outer = 0; 05031 Expression * espan_right_inner = 0; 05032 Expression * espan_right_outer = 0; 05033 05034 if (diff_less) { /// ... aa1 is to the left 05035 ediff = sub(constant(0),ediff); 05036 Relation rel_stride1 = range_acc.compare(dim1_1.stride_guarded(), 05037 dim1_2.stride_guarded()); 05038 if (rel_stride1.is_less_than()) { 05039 estride_left_inner = dim1_1.stride_guarded().clone(); 05040 estride_left_outer = dim1_2.stride_guarded().clone(); 05041 espan_left_inner = dim1_1.span_guarded().clone(); 05042 espan_left_outer = dim1_2.span_guarded().clone(); 05043 Relation rel_stride2 = range_acc.compare(dim2_1.stride_guarded(), 05044 dim2_2.stride_guarded()); 05045 if (rel_stride2.is_less_than()) { 05046 estride_right_inner = dim2_1.stride_guarded().clone(); 05047 estride_right_outer = dim2_2.stride_guarded().clone(); 05048 espan_right_inner = dim2_1.span_guarded().clone(); 05049 espan_right_outer = dim2_2.span_guarded().clone(); 05050 } else if (rel_stride2.is_greater_than()) { 05051 estride_right_inner = dim2_2.stride_guarded().clone(); 05052 estride_right_outer = dim2_1.stride_guarded().clone(); 05053 espan_right_inner = dim2_2.span_guarded().clone(); 05054 espan_right_outer = dim2_1.span_guarded().clone(); 05055 } 05056 } else if (rel_stride1.is_greater_than()) { 05057 estride_left_inner = dim1_2.stride_guarded().clone(); 05058 estride_left_outer = dim1_1.stride_guarded().clone(); 05059 espan_left_inner = dim1_2.span_guarded().clone(); 05060 espan_left_outer = dim1_1.span_guarded().clone(); 05061 05062 Relation rel_stride2 = range_acc.compare(dim2_1.stride_guarded(), 05063 dim2_2.stride_guarded()); 05064 if (rel_stride2.is_less_than()) { 05065 estride_right_inner = dim2_1.stride_guarded().clone(); 05066 estride_right_outer = dim2_2.stride_guarded().clone(); 05067 espan_right_inner = dim2_1.span_guarded().clone(); 05068 espan_right_outer = dim2_2.span_guarded().clone(); 05069 } else if ( rel_stride2.is_greater_than()) { 05070 estride_right_inner = dim2_2.stride_guarded().clone(); 05071 estride_right_outer = dim2_1.stride_guarded().clone(); 05072 espan_right_inner = dim2_2.span_guarded().clone(); 05073 espan_right_outer = dim2_1.span_guarded().clone(); 05074 } 05075 } 05076 } else { /// ... aa1 is to the right 05077 Relation rel_stride2 = range_acc.compare(dim2_1.stride_guarded(), 05078 dim2_2.stride_guarded()); 05079 if (rel_stride2.is_less_than()) { 05080 estride_left_inner = dim2_1.stride_guarded().clone(); 05081 estride_left_outer = dim2_2.stride_guarded().clone(); 05082 espan_left_inner = dim2_1.span_guarded().clone(); 05083 espan_left_outer = dim2_2.span_guarded().clone(); 05084 Relation rel_stride1 = range_acc.compare(dim1_1.span_guarded(), 05085 dim2_2.span_guarded()); 05086 if (rel_stride1.is_less_than()) { 05087 estride_right_inner = dim1_1.stride_guarded().clone(); 05088 estride_right_outer = dim1_2.stride_guarded().clone(); 05089 espan_right_inner = dim1_1.span_guarded().clone(); 05090 espan_right_outer = dim1_2.span_guarded().clone(); 05091 } else if (rel_stride2.is_greater_than()) { 05092 estride_right_inner = dim1_2.stride_guarded().clone(); 05093 estride_right_outer = dim1_1.stride_guarded().clone(); 05094 espan_right_inner = dim1_2.span_guarded().clone(); 05095 espan_right_outer = dim1_1.span_guarded().clone(); 05096 } 05097 } else if (rel_stride2.is_greater_than()) { 05098 estride_left_inner = dim2_2.stride_guarded().clone(); 05099 estride_left_outer = dim2_1.stride_guarded().clone(); 05100 espan_left_inner = dim2_2.span_guarded().clone(); 05101 espan_left_outer = dim2_1.span_guarded().clone(); 05102 Relation rel_stride1 = range_acc.compare(dim1_1.stride_guarded(), 05103 dim1_2.stride_guarded()); 05104 if (rel_stride1.is_less_than()) { 05105 estride_right_inner = dim1_1.stride_guarded().clone(); 05106 estride_right_outer = dim1_2.stride_guarded().clone(); 05107 espan_right_inner = dim1_1.span_guarded().clone(); 05108 espan_right_outer = dim1_2.span_guarded().clone(); 05109 } else if (rel_stride1.is_greater_than()) { 05110 estride_right_inner = dim1_2.stride_guarded().clone(); 05111 estride_right_outer = dim1_1.stride_guarded().clone(); 05112 espan_right_inner = dim1_2.span_guarded().clone(); 05113 espan_right_outer = dim1_1.span_guarded().clone(); 05114 } 05115 } 05116 } 05117 05118 /// ... Now check for an interleaved access pattern 05119 05120 /// ... How to implement this??? diff = diff % stride_left_outer; 05121 05122 /// ... Do the range test type check 05123 05124 if (espan_left_inner && espan_right_inner && estride_left_outer) { 05125 05126 Relation rel_left = range_acc.compare(*ediff, *espan_left_inner); 05127 Expression * right_extent = add(ediff->clone(), espan_right_inner->clone()); 05128 Relation rel_right = range_acc.compare(*right_extent, *estride_left_outer); 05129 delete right_extent; 05130 05131 if (rel_left.is_greater_than() && rel_right.is_less_than()) { 05132 05133 if (_ar_logging && _ar_logging % 2 == 0) { 05134 arlog << "No intersection" << endl; 05135 } 05136 delete ediff; 05137 if (newaa1) delete aa1; 05138 if (newaa2) delete aa2; 05139 if (_ar_logging && _ar_logging % 2 == 0) { 05140 arlog << "New ARD1:" << new_ard1 << endl; 05141 arlog << "New ARD2:" << new_ard2 << endl; 05142 } 05143 istats.inter_ARD.stop(); 05144 return false; 05145 } 05146 05147 /// ... Do the GCD test type check 05148 05149 if (! divides_diff(*estride_left_inner, ediff, 05150 *aa1, *aa2, OVERESTIMATE) && 05151 rel_right.is_less_than()) { 05152 05153 if (_ar_logging && _ar_logging % 2 == 0) { 05154 arlog << "No intersection" << endl; 05155 } 05156 delete ediff; 05157 if (newaa1) delete aa1; 05158 if (newaa2) delete aa2; 05159 if (_ar_logging && _ar_logging % 2 == 0) { 05160 arlog << "New ARD1:" << new_ard1 << endl; 05161 arlog << "New ARD2:" << new_ard2 << endl; 05162 } 05163 istats.inter_ARD.stop(); 05164 return false; 05165 } 05166 } 05167 } 05168 delete ediff; /// ... ??? 05169 } 05170 05171 /// ... Just don't know - use conservative direction 05172 05173 if (_ar_logging && _ar_logging % 2 == 0) { 05174 arlog << endl << "Not enough info" << endl; 05175 } 05176 if (direction == OVERESTIMATE) { 05177 intersect.ins_last(ard1.clone()); 05178 intersect.ins_last(ard2.clone()); 05179 /// ... Add nothing to new_ard1 or new_ard2 05180 if (_ar_logging && _ar_logging % 2 == 0) { 05181 arlog << "OVERESTIMATE - ARD1 & ARD2 in intersection, discard both" << endl; 05182 } 05183 if (newaa1) delete aa1; 05184 if (newaa2) delete aa2; 05185 if (_ar_logging && _ar_logging % 2 == 0) { 05186 arlog << "New ARD1:" << new_ard1 << endl; 05187 arlog << "New ARD2:" << new_ard2 << endl; 05188 } 05189 istats.inter_ARD.stop(); 05190 return true; 05191 } else { /// ... UNDERESTIMATE 05192 if (_ar_logging && _ar_logging % 2 == 0) { 05193 arlog << "Just don't know - UNDERESTIMATE with empty intersection" << endl; 05194 } 05195 if (newaa1) delete aa1; 05196 if (newaa2) delete aa2; 05197 if (_ar_logging && _ar_logging % 2 == 0) { 05198 arlog << "New ARD1:" << new_ard1 << endl; 05199 arlog << "New ARD2:" << new_ard2 << endl; 05200 } 05201 istats.inter_ARD.stop(); 05202 return false; 05203 } 05204 istats.inter_ARD.stop(); 05205 } 05206 05207 /// calc_stride_span_match - 05208 /// Do a complete matching of the dimensions of two descriptors. Record 05209 /// the match in the stride_match and span_match arrays. 05210 05211 void 05212 calc_stride_span_match(AbstractAccess & aa1, AbstractAccess & aa2, 05213 RangeAccessor & range_acc, 05214 Array<int> & stride_match1, Array<int> & stride_match2, 05215 Array<bool> & span_match1, Array<bool> & span_match2) 05216 { 05217 int ndims1 = aa1.number_of_dimensions(); 05218 int ndims2 = aa2.number_of_dimensions(); 05219 05220 /// ... Initialize all match arrays 05221 05222 for (int ii=0; ii<ndims1; ++ii) { 05223 stride_match1[ii] = -1; 05224 span_match1[ii] = false; 05225 } 05226 for (int ii=0; ii<ndims2; ++ii) { 05227 stride_match2[ii] = -1; 05228 span_match2[ii] = false; 05229 } 05230 05231 /// ... Check the two descriptors for stride-equivalence 05232 05233 UBiGraphIterator ubi( ndims1, ndims2 ); 05234 05235 for ( ; ubi.valid(); ++ubi) { 05236 05237 UBiEdge & edge = ubi.current(); 05238 int lodim = edge.node_1( ); 05239 int hidim = edge.node_2( ); 05240 05241 if ((stride_match1[lodim] == -1) && 05242 (stride_match2[hidim] == -1)) { 05243 05244 /// ... Check stride for match 05245 const Relation & relstr = range_acc.compare(aa1.dimension(lodim).stride_guarded(), 05246 aa2.dimension(hidim).stride_guarded()); 05247 if (relstr.is_equal()) { 05248 stride_match1[lodim] = hidim; 05249 stride_match2[hidim] = lodim; 05250 } 05251 } 05252 } 05253 05254 /// ... Check span for match 05255 05256 for (int ii=0; ii<ndims1; ++ii) { 05257 int lodim = ii; 05258 int hidim = stride_match1[ii]; 05259 05260 if (hidim == -1) continue; /// ... No matching dimension 05261 05262 const Relation & relsp = 05263 range_acc.compare(aa1.dimension(lodim).span_guarded(), 05264 aa2.dimension(hidim).span_guarded()); 05265 if (relsp.is_equal()) { 05266 span_match1[lodim] = true; 05267 span_match2[hidim] = true; 05268 } 05269 } 05270 } 05271 05272 //static Boolean 05273 //_is_relational_op (const Expression & expr) 05274 //{ 05275 /// switch (expr.op()) { 05276 /// case EQ_OP: 05277 /// case NE_OP: 05278 /// case LT_OP: 05279 /// case LE_OP: 05280 /// case GT_OP: 05281 /// case GE_OP: 05282 // return True; 05283 /// default: 05284 // return False; 05285 /// } 05286 //} 05287 05288 /// Take the match of ARD1 to ARD2 indicated by match_index and return a 05289 /// match array from ARD2 to ARD1. 05290 05291 Array<int> * 05292 invert_match(Array<int> * match_index, int new_dims) 05293 { 05294 Array<int> * inverted_match_index = new Array<int>; 05295 inverted_match_index->resize(new_dims); 05296 05297 /// ... Invert match_index 05298 05299 for (int ii=0; ii<new_dims; ++ii) { 05300 (*inverted_match_index)[ii] = -1; 05301 } 05302 for (int ii=0; ii<match_index->entries(); ++ii) { 05303 if ((*match_index)[ii] != -1) 05304 (*inverted_match_index)[(*match_index)[ii]] = ii; 05305 } 05306 return inverted_match_index; 05307 } 05308 05309 /// Respond whether the predicate of ARD1 implies that the predicate of ARD2 is true 05310 05311 bool 05312 ar_implies(AbstractAccess & aa1, AbstractAccess & aa2) 05313 { 05314 if (aa2.exec_predicate_true()) return true; 05315 05316 /// ... Here, we know that aa2 has a predicate and it's not .TRUE. 05317 05318 if (aa1.exec_predicate_true()) return false; 05319 05320 /// ... Here, we know that both predicates exist and neither are .TRUE. 05321 05322 if (aa1.exec_predicate() == aa2.exec_predicate()) return true; 05323 05324 /// ... Both descriptors have predicates 05325 /// ... aa1's predicate is the implier; aa2's predicate is the implied 05326 05327 PredicateRepository & repos = aa1.pgm_summarized_to()->pred_repos(); 05328 05329 /// ... 1. Transform x .NE. y into (x .LT. y) .OR. (x .GT. y) 05330 05331 Expression * implier = repos.predicate(repos.transform_not_equal(aa1.exec_predicate())); 05332 Expression * implied = repos.predicate(repos.transform_not_equal(aa2.exec_predicate())); 05333 05334 /// ... 2. Attempt to prove that implied subsumes implier 05335 05336 bool subsume = ar_subsume(*implier, *implied); 05337 05338 return subsume; 05339 } 05340 05341 /// Attempt to prove that implier => implied. Assume that the two 05342 /// expressions have been simplified. Better inference techniques 05343 /// can be added here to increase the precision of the result. For 05344 /// instance, if we are checking whether MARK2 implies N .GT. 0, 05345 /// we might follow an SSA link for MARK2 back to its definition, 05346 /// which might be: 05347 /// 05348 /// MARK0 = .FALSE. 05349 /// IF (N .GT. 0) MARK1 = .TRUE. 05350 /// MARK2 = PHI(MARK0, MARK1) 05351 /// 05352 /// In this case, 05353 /// MARK2 being .TRUE. would directly imply that N .GT. 0. 05354 05355 bool 05356 ar_subsume(Expression & implier, Expression & implied) 05357 { 05358 bool any = false; 05359 bool any2 = false; 05360 bool all = true; 05361 bool all2 = true; 05362 05363 switch (implied.op()) { 05364 case AND_OP: 05365 for (Iterator<Expression> iter_implied = implied.arg_list(); 05366 iter_implied.valid(); 05367 ++iter_implied) { 05368 05369 if (!ar_subsume(implier, iter_implied.current())) { 05370 all = false; 05371 break; 05372 } 05373 } 05374 return all; 05375 case OR_OP: 05376 for (Iterator<Expression> iter_implied = implied.arg_list(); 05377 iter_implied.valid(); 05378 ++iter_implied) { 05379 05380 if (ar_subsume(implier, iter_implied.current())) { 05381 any = true; 05382 break; 05383 } 05384 } 05385 return any; 05386 case LOGICAL_CONSTANT_OP: 05387 if (implied.str_data() == ".TRUE.") return true; 05388 else return false; 05389 case EQ_OP: 05390 case NE_OP: 05391 case LT_OP: 05392 case LE_OP: 05393 case GT_OP: 05394 case GE_OP: 05395 switch (implier.op()) { 05396 case AND_OP: 05397 for (Iterator<Expression> iter_implier = implier.arg_list(); 05398 iter_implier.valid(); 05399 ++iter_implier) { 05400 if (ar_subsume(iter_implier.current(), implied)) { 05401 any2 = true; 05402 break; 05403 } 05404 } 05405 return any2; 05406 case OR_OP: 05407 for (Iterator<Expression> iter_implier = implier.arg_list(); 05408 iter_implier.valid(); 05409 ++iter_implier) { 05410 if (!ar_subsume(iter_implier.current(), implied)) { 05411 all2 = false; 05412 break; 05413 } 05414 } 05415 return all2; 05416 case LOGICAL_CONSTANT_OP: 05417 return false; 05418 case ID_OP: 05419 case ARRAY_REF_OP: 05420 return false; 05421 case EQ_OP: 05422 case NE_OP: 05423 case LT_OP: 05424 case LE_OP: 05425 case GT_OP: 05426 case GE_OP: 05427 if (subsume_relation(implier, implied)) { 05428 return true; 05429 } else { 05430 return false; 05431 } 05432 case NOT_OP: 05433 return false; 05434 default: p_abort("Unexpected operator in ar_subsume: implier"); 05435 } 05436 case ARRAY_REF_OP: 05437 switch (implier.op()) { 05438 case ARRAY_REF_OP: 05439 if ((implier.base_variable_ref() == implied.base_variable_ref()) && 05440 (implier.subscript() == implied.subscript())) { 05441 return true; 05442 } else { 05443 return false; 05444 } 05445 case AND_OP: 05446 for (Iterator<Expression> iter_implier = implier.arg_list(); 05447 iter_implier.valid(); 05448 ++iter_implier) { 05449 if (ar_subsume(iter_implier.current(), implied)) { 05450 any2 = true; 05451 break; 05452 } 05453 } 05454 return any2; 05455 case OR_OP: 05456 for (Iterator<Expression> iter_implier = implier.arg_list(); 05457 iter_implier.valid(); 05458 ++iter_implier) { 05459 if (!ar_subsume(iter_implier.current(), implied)) { 05460 all2 = false; 05461 break; 05462 } 05463 } 05464 return all2; 05465 default: return false; 05466 } 05467 case ID_OP: 05468 switch (implier.op()) { 05469 case ID_OP: 05470 if (implier.base_variable_ref() == implied.base_variable_ref()) { 05471 return true; 05472 } else { 05473 return false; 05474 } 05475 case AND_OP: 05476 for (Iterator<Expression> iter_implier = implier.arg_list(); 05477 iter_implier.valid(); 05478 ++iter_implier) { 05479 if (ar_subsume(iter_implier.current(), implied)) { 05480 any2 = true; 05481 break; 05482 } 05483 } 05484 return any2; 05485 case OR_OP: 05486 for (Iterator<Expression> iter_implier = implier.arg_list(); 05487 iter_implier.valid(); 05488 ++iter_implier) { 05489 if (!ar_subsume(iter_implier.current(), implied)) { 05490 all2 = false; 05491 break; 05492 } 05493 } 05494 return all2; 05495 default: return false; 05496 } 05497 case NOT_OP: /// ... Does B imply .NOT.A? Only if A implies .NOT. B 05498 switch (implier.op()) { 05499 case ID_OP: 05500 /// ... Is this A => .NOT. A? or B => .NOT. A? 05501 if (implied.expr_guarded().op() == ID_OP) { 05502 return false; 05503 } 05504 return false; 05505 case NOT_OP: 05506 /// ... Is this .NOT. A => .NOT. A? 05507 if ((implied.expr_guarded().op() == ID_OP) && 05508 (implier.expr_guarded().op() == ID_OP)) { 05509 return &(implier.expr_guarded().symbol()) == &(implier.expr_guarded().symbol()); 05510 } 05511 return false; 05512 case AND_OP: 05513 for (Iterator<Expression> iter_implier = implier.arg_list(); 05514 iter_implier.valid(); 05515 ++iter_implier) { 05516 if (ar_subsume(iter_implier.current(), implied)) { 05517 any2 = true; 05518 break; 05519 } 05520 } 05521 return any2; 05522 case OR_OP: 05523 for (Iterator<Expression> iter_implier = implier.arg_list(); 05524 iter_implier.valid(); 05525 ++iter_implier) { 05526 if (!ar_subsume(iter_implier.current(), implied)) { 05527 all2 = false; 05528 break; 05529 } 05530 } 05531 return all2; 05532 default: 05533 return false; 05534 } 05535 default: 05536 p_abort("Unexpected operator in ar_subsume: implied"); 05537 } 05538 return false; 05539 } 05540 05541 /// subsume_relation - 05542 /// Does the implied_subtree expression subsume (completely cover) the 05543 /// implier_subtree? The roots of the subtrees used as parameters here are both 05544 /// relational operators. Do what is appropriate given the operators. 05545 05546 bool 05547 subsume_relation(const Expression & implier_subtree, const Expression & implied_subtree) 05548 { 05549 Expression * implier_left; 05550 Expression * implied_left; 05551 05552 if (! (is_relational_op(implier_subtree.op()) && 05553 is_relational_op(implied_subtree.op())) ) { 05554 return false; 05555 } 05556 05557 const Expression & plier_left = implier_subtree.left_guarded(); 05558 const Expression & plier_right = implier_subtree.right_guarded(); 05559 const Expression & plied_left = implied_subtree.left_guarded(); 05560 const Expression & plied_right = implied_subtree.right_guarded(); 05561 05562 if ((plier_left.type().data_type() == CHARACTER_TYPE) || 05563 (plier_right.type().data_type() == CHARACTER_TYPE) || 05564 (plied_left.type().data_type() == CHARACTER_TYPE) || 05565 (plied_right.type().data_type() == CHARACTER_TYPE)) { 05566 05567 if ((plier_left.type().data_type() == CHARACTER_TYPE) && 05568 (plier_right.type().data_type() == CHARACTER_TYPE) && 05569 (plied_left.type().data_type() == CHARACTER_TYPE) && 05570 (plied_right.type().data_type() == CHARACTER_TYPE)) { 05571 return implier_subtree == implied_subtree; /// ... A .EQ. B => A .EQ. B 05572 } else { 05573 return false; 05574 } 05575 } 05576 05577 if (!is_integer_zero(plier_right)) { 05578 implier_left = simplify(sub(plier_left.clone(), 05579 plier_right.clone())); 05580 } else { 05581 implier_left = plier_left.clone(); 05582 } 05583 05584 if (!is_integer_zero(plied_right)) { 05585 implied_left = simplify(sub(plied_left.clone(), 05586 plied_right.clone())); 05587 } else { 05588 implied_left = plied_left.clone(); 05589 } 05590 05591 Expression * diff = 0; 05592 bool cover = false; 05593 05594 switch (implied_subtree.op()) { 05595 case LT_OP: 05596 switch (implier_subtree.op()) { 05597 case LT_OP: 05598 diff = simplify(sub(implied_left->clone(), implier_left->clone())); 05599 05600 if (is_integer_constant(*diff)) { 05601 if (diff->value() <= 0) cover = true; 05602 else cover = false; 05603 } else { 05604 if (*implied_left == *implier_left) { /// ... to handle cases the simplifier can't 05605 cover = true; 05606 } else { 05607 cover = false; /// ... Just don't know 05608 } 05609 } 05610 break; 05611 case LE_OP: 05612 diff = simplify(sub(implied_left->clone(), implier_left->clone())); 05613 05614 if (is_integer_constant(*diff)) { 05615 if (diff->value() < 0) cover = true; 05616 else cover = false; 05617 } else cover = false; /// ... Just don't know 05618 break; 05619 case GT_OP: 05620 diff = |