00001 package edu.rice.cs.hpc.data.experiment.merge;
00002
00003 import java.util.Arrays;
00004 import java.util.Comparator;
00005 import java.util.HashMap;
00006
00007 import edu.rice.cs.hpc.data.experiment.Experiment;
00008 import edu.rice.cs.hpc.data.experiment.metric.MetricValue;
00009 import edu.rice.cs.hpc.data.experiment.scope.CallSiteScope;
00010 import edu.rice.cs.hpc.data.experiment.scope.LoopScope;
00011 import edu.rice.cs.hpc.data.experiment.scope.ProcedureScope;
00012 import edu.rice.cs.hpc.data.experiment.scope.RootScope;
00013 import edu.rice.cs.hpc.data.experiment.scope.Scope;
00014 import edu.rice.cs.hpc.data.experiment.scope.filters.EmptyMetricValuePropagationFilter;
00015 import edu.rice.cs.hpc.data.experiment.scope.visitors.DuplicateScopeTreesVisitor;
00016 import edu.rice.cs.hpc.data.experiment.scope.visitors.IScopeVisitor;
00017 import edu.rice.cs.hpc.data.experiment.scope.visitors.PercentScopeVisitor;
00018 import edu.rice.cs.hpc.data.experiment.scope.visitors.ResetCounterVisitor;
00019
00020
00021
00022
00023
00024
00025
00026 public class TreeSimilarity {
00027
00028 final private boolean debug = false;
00029 private boolean verbose = false;
00030
00031 int numNodes = 0;
00032 int numMerges = 0;
00033 int numUnmerges = 0;
00034 int numChildMatches = 0;
00035 int numSiblingMatches = 0;
00036
00037 private enum SimilarityType{ SAME, SIMILAR, DIFF }
00038 private EmptyMetricValuePropagationFilter emptyFilter;
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049 public TreeSimilarity(int offset, RootScope target, RootScope source, boolean verbose)
00050 {
00051 this.verbose = verbose;
00052
00053
00054 IScopeVisitor visitor = new ResetCounterVisitor();
00055 source.dfsVisitScopeTree(visitor);
00056
00057 emptyFilter = new EmptyMetricValuePropagationFilter();
00058
00059
00060 mergeMetrics(target, source, offset);
00061
00062
00063 mergeTree(target, source, offset);
00064
00065
00066 PercentScopeVisitor percentVisitor = new PercentScopeVisitor(offset,
00067 ((Experiment)target.getExperiment()).getMetricCount(), target);
00068 target.dfsVisitScopeTree(percentVisitor);
00069
00070 if (verbose) {
00071 float mergePercent = (float) (numMerges * 100.0 / numNodes);
00072 float unmergePercent = (float) (numUnmerges * 100.0 / numNodes);
00073
00074 System.out.println("Merged: " + numMerges + "\t " + mergePercent +
00075 " %\t Unmerges: " + numUnmerges + " \t" + unmergePercent + " %\t Nodes: "
00076 + numNodes + " \t sibling-match: " + numSiblingMatches +
00077 "\t child-match: " + numChildMatches);
00078 }
00079 }
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090 private void mergeTree( Scope target, Scope source, int metricOffset)
00091 {
00092
00093
00094
00095
00096 final Scope []sortedSource = getSortedChildren( source );
00097
00098 if (sortedSource == null)
00099 return;
00100
00101
00102
00103
00104 final Scope []sortedTarget = getSortedChildren( target );
00105 if (sortedTarget == null)
00106 {
00107 for (Scope childSource: sortedSource)
00108 {
00109 addSubTree(target, childSource, metricOffset);
00110 }
00111 if (verbose)
00112 numUnmerges += sortedSource.length;
00113 return;
00114 }
00115
00116
00117
00118
00119
00120
00121 for (Scope childTarget: sortedTarget)
00122 {
00123 if (childTarget.isCounterZero())
00124 {
00125
00126
00127
00128
00129
00130
00131
00132 for (int i=0; i<sortedSource.length; i++)
00133 {
00134 Scope childSource = sortedSource[i];
00135
00136 if (childSource.isCounterZero())
00137 {
00138
00139 CoupleNodes candidate = mergeNode(childTarget, childSource,
00140 i, sortedSource, metricOffset) ;
00141 if (candidate != null)
00142 {
00143 if (verbose)
00144 numMerges += 2;
00145
00146
00147
00148 mergeTree( candidate.target, candidate.source, metricOffset );
00149 break;
00150 }
00151 }
00152 }
00153 }
00154 }
00155
00156
00157 checkInlinedScope(sortedTarget, sortedSource, metricOffset);
00158
00159
00160 checkInlinedScope(sortedSource, sortedTarget, metricOffset);
00161
00162
00163 for (Scope childSource: sortedSource)
00164 {
00165 if (childSource.isCounterZero())
00166 {
00167 addSubTree(target, childSource, metricOffset);
00168 if (verbose) {
00169 numUnmerges++;
00170 }
00171 }
00172 }
00173
00174 if (verbose)
00175 {
00176 for (Scope s: sortedTarget)
00177 {
00178 if (s.isCounterZero())
00179 {
00180 numUnmerges++;
00181 }
00182 }
00183 numNodes += (sortedSource.length + sortedTarget.length);
00184 }
00185 }
00186
00187
00188
00189
00190
00191
00192
00193
00194 private void checkInlinedScope(Scope []scope1, Scope []scope2, int metricOffset)
00195 {
00196 for (int j=0; j<scope1.length; j++)
00197 {
00198 Scope s1 = scope1[j];
00199 if (s1.isCounterZero())
00200 {
00201 for (Scope s2: scope2)
00202 {
00203 if (s1.isCounterZero() && s2.isCounterZero() && s2.getChildCount()>0)
00204 {
00205 Scope []sortedGrandChildren = getSortedChildren(s2);
00206 for (int i=0; i<sortedGrandChildren.length; i++)
00207 {
00208 Scope ss2 = sortedGrandChildren[i];
00209 if (ss2.isCounterZero() && s1.isCounterZero())
00210 {
00211 int metric1 = ((Experiment) s1.getExperiment()).getMetricCount();
00212 int metric2 = ((Experiment) ss2.getExperiment()).getMetricCount();
00213
00214 CoupleNodes candidate;
00215
00216
00217
00218
00219
00220
00221
00222
00223 if (metric1>metric2)
00224 {
00225
00226 candidate = mergeNode(s1, ss2, i, sortedGrandChildren, metricOffset) ;
00227 } else
00228 {
00229
00230 candidate = mergeNode(ss2, s1, j, scope1, metricOffset);
00231 }
00232 if (candidate != null)
00233 {
00234 if (verbose)
00235 numMerges += 2;
00236
00237
00238 Scope parent = candidate.target.getParentScope();
00239 parent.accumulateMetric(candidate.source, 0, metricOffset, emptyFilter);
00240
00241
00242
00243 parent = candidate.source.getParentScope();
00244 disseminateMetric(parent, candidate.source, 0, parent.getMetricValues().length);
00245
00246
00247 parent.incrementCounter();
00248
00249
00250
00251 mergeTree( candidate.target, candidate.source, metricOffset );
00252 break;
00253 }
00254 }
00255 }
00256 }
00257 }
00258 }
00259 }
00260 }
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270 private void disseminateMetric(Scope target, Scope source, int sourceOffset, int metricCount)
00271 {
00272
00273 for (int i=sourceOffset; i<metricCount; i++)
00274 {
00275 MetricValue mvTarget = target.getMetricValue(i);
00276 MetricValue mvSource = source.getMetricValue(i);
00277 MetricValue.setValue(mvTarget, mvTarget.getValue() - mvSource.getValue());
00278
00279 target.setMetricValue(i, mvTarget);
00280 }
00281 }
00282
00283
00284
00285
00286
00287 final private HashMap<String, Scope[]> mapScopeChildren = new HashMap<String, Scope[]>();
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297 private Scope[] getSortedChildren(Scope scope)
00298 {
00299 String key = ((Experiment)scope.getExperiment()).getXMLExperimentFile().getAbsolutePath() +
00300 ": " + scope.getCCTIndex();
00301 Scope []sortedChildren = mapScopeChildren.get(key);
00302 if (sortedChildren != null)
00303 return sortedChildren;
00304
00305 Object []children = scope.getChildren();
00306 if (children == null)
00307 return null;
00308
00309 sortedChildren = sortArrayOfNodes(children);
00310 mapScopeChildren.put(key, sortedChildren);
00311
00312 return sortedChildren;
00313 }
00314
00315
00316
00317
00318
00319
00320
00321 private Scope [] sortArrayOfNodes(Object []nodes)
00322 {
00323 Scope []sorted = new Scope[nodes.length];
00324 System.arraycopy(nodes, 0, sorted, 0, nodes.length);
00325
00326 Arrays.sort(sorted, new CompareScope() );
00327
00328 return sorted;
00329 }
00330
00331 private class CoupleNodes
00332 {
00333 Scope target;
00334 Scope source;
00335 public CoupleNodes(Scope target, Scope source, Similarity similar)
00336 {
00337 this.target = target;
00338 this.source = source;
00339 }
00340 }
00341
00342
00343
00344
00345
00346
00347
00348
00349 private CoupleNodes mergeNode(
00350 Scope scope1,
00351 Scope scope2, int offsetScope2, Scope []siblingsScope2,
00352 int metricOffset)
00353 {
00354 Similarity similar = checkNodesSimilarity( scope1, scope2);
00355
00356 if (similar.type == SimilarityType.SAME)
00357 {
00358 setMergedNodes(scope1, scope2, metricOffset);
00359
00360 return new CoupleNodes(scope1, scope2, similar);
00361
00362 } else if (similar.type == SimilarityType.SIMILAR)
00363 {
00364
00365
00366
00367
00368 int nextOffset = offsetScope2 + 1;
00369
00370
00371
00372
00373 int numSiblings = Math.min(siblingsScope2.length-nextOffset,
00374 Constants.MAX_LEN_BFS);
00375 Scope candidate = scope2;
00376
00377
00378
00379 for (int i=nextOffset; i<nextOffset+numSiblings; i++)
00380 {
00381 Scope sibling = siblingsScope2[i];
00382 if (sibling.isCounterZero())
00383 {
00384 Similarity result = checkNodesSimilarity(scope1, sibling);
00385 if (result.type == SimilarityType.SAME)
00386 {
00387 setMergedNodes(scope1, sibling, metricOffset);
00388
00389 return new CoupleNodes(scope1, sibling, result);
00390
00391 } else if(result.type == SimilarityType.SIMILAR) {
00392
00393
00394
00395 if ( result.score > similar.score )
00396 {
00397 similar = result;
00398 candidate = sibling;
00399 }
00400 }
00401 }
00402 }
00403 if (verbose) {
00404 numSiblingMatches++;
00405 }
00406 setMergedNodes(scope1, candidate, metricOffset);
00407
00408 return new CoupleNodes(scope1, candidate, similar);
00409 }
00410 return null;
00411 }
00412
00413 private void setMergedNodes(Scope target, Scope source, int offset)
00414 {
00415 assert target.isCounterZero() : "target counter is not zero: " + target ;
00416 assert source.isCounterZero() : "source counter is not zero: " + source ;
00417
00418
00419
00420
00421 mergeMetrics(target, source, offset);
00422
00423
00424 source.incrementCounter();
00425 target.incrementCounter();
00426 }
00427
00428
00429
00430
00431
00432
00433
00434
00435 private int getScopeSimilarityScore ( Scope s1, Scope s2 )
00436 {
00437
00438 final int loc_distance = Math.abs(s1.getFirstLineNumber() - s2.getFirstLineNumber());
00439
00440
00441 final boolean same_type = areSameType( s1, s2 );
00442
00443
00444 final float metric_distance = getMetricDistance( s1, s2 );
00445
00446
00447 final boolean same_name = areSameName( s1, s2 );
00448
00449 int score = (int) (Constants.SCORE_INIT + (1-metric_distance) * Constants.WEIGHT_METRIC);
00450
00451 int score_loc = (int) Math.max(Constants.WEIGHT_LOCATION, loc_distance * Constants.WEIGHT_LOCATION_COEF);
00452
00453 score -= score_loc;
00454
00455
00456
00457
00458 if (same_type && (s1 instanceof CallSiteScope))
00459 {
00460 score += (same_name? 3:-1) * Constants.WEIGHT_NAME;
00461 } else
00462 {
00463 score += (same_name ? Constants.WEIGHT_NAME : 0);
00464 }
00465
00466 if (isOnlyChild( s1, s2 ))
00467 score += Constants.WEIGHT_LOCATION;
00468
00469 score += (same_type ? Constants.WEIGHT_TYPE : 0);
00470
00471 return score;
00472 }
00473
00474 private boolean isOnlyChild( Scope s1, Scope s2 )
00475 {
00476 int ns1 = s1.getParentScope().getChildCount();
00477 int ns2 = s2.getParentScope().getChildCount();
00478
00479 return (ns1==ns2 && ns1==1);
00480 }
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490 private Similarity checkNodesSimilarity( Scope s1, Scope s2)
00491 {
00492 Similarity result = new Similarity();
00493 result.type = SimilarityType.DIFF;
00494 result.score = getScopeSimilarityScore( s1, s2 );
00495
00496
00497 result.score += getChildrenSimilarityScore( s1, s2 );
00498
00499 if (result.score>Constants.SCORE_SAME)
00500 {
00501
00502 result.type = SimilarityType.SAME;
00503 }
00504 else if (result.score>Constants.SCORE_SIMILAR)
00505 {
00506
00507
00508 result.type = SimilarityType.SIMILAR;
00509 }
00510
00511 if (debug)
00512 {
00513 System.out.println("TS " + s1 + " [" + s1.getCCTIndex()+"] \tvs.\t " +s2 + " ["+ s2.getCCTIndex()
00514 +"]\t s: " + result.score +"\t t: " + result.type);
00515 }
00516 return result;
00517 }
00518
00519
00520
00521
00522
00523
00524 private boolean hasUnderscoreSuffix(String s)
00525 {
00526 final int l = s.length();
00527 return (s.charAt( l - 1) == '_');
00528 }
00529
00537 private boolean areSameName( Scope s1, Scope s2 )
00538 {
00539 if (s1 instanceof CallSiteScope && s2 instanceof CallSiteScope)
00540 {
00541 final String n1 = s1.getName();
00542 final String n2 = s2.getName();
00543
00544 int diff = Math.abs( n1.compareTo(n2) );
00545 if (diff == 1)
00546 {
00547 return (hasUnderscoreSuffix(n1) || hasUnderscoreSuffix(n2));
00548 }
00549 return (diff == 0);
00550 }
00551 else
00552 {
00553 return s1.getName().equals(s2.getName());
00554 }
00555 }
00556
00557 private boolean areSameType( Scope s1, Scope s2)
00558 {
00559 final Class<? extends Scope> c1 = s1.getClass();
00560 final Class<? extends Scope> c2 = s2.getClass();
00561
00562 return (c1 == c2);
00563 }
00564
00565 private float getMetricDistance( Scope s1, Scope s2 )
00566 {
00567 final float v1 = getAnnotationValue(s1);
00568 final float v2 = getAnnotationValue(s2);
00569 return (Math.abs(v2-v1));
00570 }
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583 private boolean areSameChildren(Scope s1, Scope s2, int currDepth)
00584 {
00585
00586 if (currDepth > Constants.MAX_LEN_DFS)
00587 return false;
00588
00589
00590 if (s1.getChildCount()==0 || s2.getChildCount()==0)
00591 return false;
00592
00593 final Scope sortedS1[] = getSortedChildren(s1);
00594 final Scope sortedS2[] = getSortedChildren(s2);
00595
00596
00597
00598 int c1 = Math.min( Constants.MAX_LEN_BFS, sortedS1.length );
00599 int c2 = Math.min( Constants.MAX_LEN_BFS, sortedS2.length );
00600
00601 int finalScore = 0;
00602
00603
00604 for (int i=0; i<c1 ; i++)
00605 {
00606 final Scope cs1 = sortedS1[i];
00607
00608 for (int j=0; j<c2; j++)
00609 {
00610 final Scope cs2 = sortedS2[j];
00611 int score = getScopeSimilarityScore( cs1, cs2 );
00612
00613 if (score > Constants.SCORE_SIMILAR)
00614 finalScore += 3;
00615 else
00616 {
00617
00618 final boolean cs1_is_alien = isAlien( cs1 );
00619 final boolean cs2_is_alien = isAlien( cs2 );
00620 Scope next1 = s1, next2 = s2;
00621
00622
00623
00624 if (cs1_is_alien)
00625 next1 = cs1;
00626 if (cs2_is_alien)
00627 next2 = cs2;
00628
00629 if (cs1_is_alien || cs2_is_alien)
00630 {
00631 if (areSameChildren(next1, next2, currDepth + 1))
00632 finalScore++;
00633 } else
00634 {
00635 final boolean areLoops = (cs1 instanceof LoopScope && cs2 instanceof LoopScope);
00636 if (areLoops)
00637 {
00638 if (areSameChildren( cs1, cs2, currDepth + 1))
00639 finalScore++;
00640 }
00641 }
00642 }
00643 }
00644 }
00645 boolean verdict = (finalScore >= Math.max(c1, c2));
00646 return verdict;
00647 }
00648
00649
00650
00651
00652
00653
00654
00655 private boolean isAlien( Scope scope )
00656 {
00657 if (scope instanceof ProcedureScope)
00658 {
00659 return ((ProcedureScope) scope).isAlien();
00660 }
00661 return false;
00662 }
00663
00664
00665
00666
00667
00668
00669
00670
00671 private int getChildrenSimilarityScore( Scope s1, Scope s2 )
00672 {
00673 final boolean is_same = areSameChildren( s1, s2, 0 );
00674 int score = 0;
00675 if (is_same)
00676 {
00677 score = Constants.WEIGHT_CHILDREN;
00678 }
00679 return score;
00680 }
00681
00682
00683
00684
00685
00686
00687
00688
00689 private float getAnnotationValue(Scope s)
00690 {
00691 final MetricValue mv = s.getMetricValue(0);
00692 if (MetricValue.isAnnotationAvailable(mv))
00693 {
00694 return MetricValue.getAnnotationValue(mv);
00695 }
00696 else
00697 {
00698 return MetricValue.getValue(mv);
00699 }
00700 }
00701
00702
00703
00704
00705
00706
00707
00708 private void addSubTree(Scope parent, Scope node, int metricOffset)
00709 {
00710 DuplicateScopeTreesVisitor visitor = new DuplicateScopeTreesVisitor(parent, metricOffset);
00711 node.dfsVisitScopeTree(visitor);
00712 }
00713
00714
00715
00716
00717
00718
00719
00720 private void mergeMetrics(Scope target, Scope source, int metricOffset)
00721 {
00722 source.copyMetrics(target, metricOffset);
00723
00724 }
00725
00726
00727 private class Similarity
00728 {
00729 SimilarityType type;
00730 int score;
00731 }
00732
00733
00734
00735
00736
00737
00738
00739 private class CompareScope implements Comparator<Scope>
00740 {
00742 public int compare(Scope s1, Scope s2) {
00743 return (int) (s2.getMetricValue(0).getValue() - s1.getMetricValue(0).getValue());
00744 }
00745 }
00746 }