Jlm
StoreValueForwarding.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2026 HÃ¥vard Krogstie <krogstie.havard@gmail.com>
3  * See COPYING for terms of redistribution.
4  */
5 
12 #include <jlm/llvm/ir/Trace.hpp>
13 #include <jlm/llvm/ir/types.hpp>
19 #include <jlm/rvsdg/delta.hpp>
20 #include <jlm/rvsdg/gamma.hpp>
21 #include <jlm/rvsdg/MatchType.hpp>
22 #include <jlm/rvsdg/node.hpp>
23 #include <jlm/rvsdg/Phi.hpp>
24 #include <jlm/rvsdg/region.hpp>
27 #include <jlm/rvsdg/theta.hpp>
28 #include <jlm/rvsdg/traverser.hpp>
29 #include <jlm/util/common.hpp>
30 #include <jlm/util/Statistics.hpp>
31 #include <jlm/util/time.hpp>
32 
33 #include <memory>
34 #include <queue>
35 
36 namespace jlm::llvm
37 {
38 
39 // Makes the LocalAA try harder, by tracing along all possible paths,
40 // and checking if allocas provably do not escape the function.
41 static const bool ENABLE_AGGRESSIVE_LOCALAA = std::getenv("JLM_ENABLE_SVF_AGGRESSIVE_LOCALAA");
42 
43 // Enables the use of the PointsToGraphAliasAnalysis.
44 // Runs Andersen to make the PointsToGraph, and queries it if LocalAA yields MayAlias.
45 static const bool ENABLE_PTGAA = std::getenv("JLM_ENABLE_SVF_PTGAA");
46 
51 {
52  static constexpr auto NumTotalLoads_ = "#TotalLoads";
53  static constexpr auto NumLoadsForwarded_ = "#LoadsForwarded";
54  static constexpr auto numNoAliasAnalysisQueriesLabel_ = "#NoAliasAnalysisQueries";
55  static constexpr auto numMayAliasAnalysisQueriesLabel_ = "#MayAliasAnalysisQueries";
56  static constexpr auto numMustAliasAnalysisQueriesLabel_ = "#MustAliasAnalysisQueries";
57  static constexpr auto TracingLabel_ = "TracingTime";
58  static constexpr auto ForwardingLabel_ = "ForwardingTime";
59 
60 public:
61  ~Statistics() override = default;
62 
63  explicit Statistics(const util::FilePath & sourceFile)
64  : util::Statistics(Id::StoreValueForwarding, sourceFile)
65  {
68  }
69 
70  void
71  StartStatistics() noexcept
72  {
74  }
75 
76  void
78  const size_t numTotalLoads,
79  const size_t numLoadsForwarded,
80  const size_t numNoAliasAnalysisQueries,
81  const size_t numMayAliasAnalysisQueries,
82  const size_t numMustAliasAnalysisQueries) noexcept
83  {
85  AddMeasurement(NumTotalLoads_, numTotalLoads);
86  AddMeasurement(NumLoadsForwarded_, numLoadsForwarded);
87  AddMeasurement(numNoAliasAnalysisQueriesLabel_, numNoAliasAnalysisQueries);
88  AddMeasurement(numMayAliasAnalysisQueriesLabel_, numMayAliasAnalysisQueries);
89  AddMeasurement(numMustAliasAnalysisQueriesLabel_, numMustAliasAnalysisQueries);
90  }
91 
92  void
93  startTracing() noexcept
94  {
96  }
97 
98  void
99  stopTracing() noexcept
100  {
102  }
103 
104  void
105  startForwarding() noexcept
106  {
108  }
109 
110  void
111  stopForwarding() noexcept
112  {
114  }
115 
116  static std::unique_ptr<Statistics>
117  Create(const util::FilePath & sourceFile)
118  {
119  return std::make_unique<Statistics>(sourceFile);
120  }
121 };
122 
127 {
129  : outputTracer(true),
132  {
135  }
136 
137  // Counters used for statistics
138  size_t numTotalLoads = 0;
139  size_t numLoadsForwarded = 0;
143 
144  // Memoization of outputs that have been routed into regions
146  {
147  std::size_t
148  operator()(const std::pair<rvsdg::Output *, rvsdg::Region *> & value) const
149  {
150  return std::hash<rvsdg::Output *>()(value.first) ^ std::hash<rvsdg::Region *>()(value.second);
151  }
152  };
153 
154  std::unordered_map<std::pair<rvsdg::Output *, rvsdg::Region *>, rvsdg::Output *, OutputRegionHash>
156 
158 
159  // The AliasAnalysis instance used for all alias queries
161 
163 };
164 
166 
168  : Transformation("StoreValueForwarding")
169 {}
170 
171 void
173 {
174  for (auto & node : region.Nodes())
175  {
177  node,
178  [&](rvsdg::PhiNode & phiNode)
179  {
181  },
182  [&](rvsdg::LambdaNode & lambdaNode)
183  {
184  // Output tracing is only done intra-procedural in this pass, and we are about to process
185  // a new lambda node. Clear the tracing cache to free up the memory from the last lambda
186  // we processed.
187  context_->outputTracer.clearCache();
188 
189  traverseIntraProceduralRegion(*lambdaNode.subregion());
190  },
191  [&]([[maybe_unused]] rvsdg::DeltaNode & deltaNode)
192  {
193  // Do nothing about delta nodes
194  });
195  }
196 }
197 
198 void
200 {
201  rvsdg::TopDownTraverser traverser(&region);
202  for (auto node : traverser)
203  {
205  *node,
206  [&](rvsdg::GammaNode & gammaNode)
207  {
208  for (auto & subregion : gammaNode.Subregions())
209  {
210  traverseIntraProceduralRegion(subregion);
211  }
212  },
213  [&](rvsdg::ThetaNode & thetaNode)
214  {
215  traverseIntraProceduralRegion(*thetaNode.subregion());
216  },
217  [&](rvsdg::SimpleNode & simpleNode)
218  {
219  if (is<LoadNonVolatileOperation>(&simpleNode))
220  {
221  processLoadNode(simpleNode);
222  }
223 
224  // For other node types, we don't need to do anything for store value forwarding
225  });
226  }
227 
228  // Any forwarded loads are dead at this point, so remove them
229  region.prune(false);
230 }
231 
232 // Enum containing the possible relationships between a load operation and a store node
233 enum class StoreNodeInfo
234 {
235  ValueForwarding, // The store can be forwarded to the load node
236  ClobberNoForward, // The store may clobber the load, but the stored value can not be forwarded.
237  // This can for example be because the addresses are not MustAlias,
238  // or that the type of the stored value and the loaded value are not identical.
239  NoClobber // The store is guaranteed to not clobber the loaded value
240 };
241 
242 // When tracing backwards from a load node through memory state edges, we store points
243 // at which a store writes to the load node, or a structural node causes the loaded value
244 // to have multiple possible origins. These points are known as StoreValueOrigins
246 {
247  enum class Kind
248  {
249  Unknown, // Tracing does not lead to known stored values in all branches
250  Uninitialized, // Tracing leads to uninitialized memory, like an alloca
251  StoreNode, // Tracing leads to exactly one store node, and it is not inside a subregion
252  GammaNodeOutput, // Tracing leads to the output of a gamma, but all branches trace to stores
253  ThetaNodeOutput, // Tracing leads to the output of a theta, with a store inside
254  ThetaNodePre // Tracing leads to the pre value of a loop variable
255  };
256 
259 
260  [[nodiscard]] bool
261  isKnown() const
262  {
263  return kind != Kind::Unknown;
264  }
265 
266  [[nodiscard]] bool
267  operator==(const StoreValueOrigin & other) const noexcept
268  {
269  return kind == other.kind && node == other.node;
270  }
271 
272  [[nodiscard]] bool
273  operator!=(const StoreValueOrigin & other) const noexcept
274  {
275  return !(*this == other);
276  }
277 
278  static StoreValueOrigin
280  {
281  return StoreValueOrigin{ Kind::Unknown, nullptr };
282  }
283 
284  static StoreValueOrigin
286  {
287  return StoreValueOrigin{ Kind::Uninitialized, nullptr };
288  }
289 
290  static StoreValueOrigin
292  {
293  return StoreValueOrigin{ Kind::StoreNode, &storeNode };
294  }
295 
296  static StoreValueOrigin
298  {
299  return StoreValueOrigin{ Kind::GammaNodeOutput, &gammaNode };
300  }
301 
302  static StoreValueOrigin
304  {
305  return StoreValueOrigin{ Kind::ThetaNodeOutput, &thetaNode };
306  }
307 
308  static StoreValueOrigin
310  {
311  return StoreValueOrigin{ Kind::ThetaNodePre, &thetaNode };
312  }
313 };
314 
321 {
322 public:
327  : loadNode(loadNode),
328  tracer(tracer),
330  {
331  JLM_ASSERT(is<LoadNonVolatileOperation>(&loadNode));
335  }
336 
346  bool
348  {
349  // Forwarding of loads with no memory states is not possible
350  // TODO: We could load values from constant globals
352  return false;
353 
354  // Perform tracing from each memory state input to find exactly what store it leads to
355  for (auto & memoryStateInput : LoadOperation::MemoryStateInputs(loadNode))
356  {
357  // If the memory state input can not be traced back to store nodes,
358  // or different memory state inputs lead to different store nodes, we give up
359  auto lastStoredValue = getLastStoreBeforeInput(memoryStateInput);
360  if (!lastStoredValue.isKnown())
361  return false;
362  }
363 
364  // During tracing, loop back-edges are never followed, but instead added to a list.
365  // Go through the list to ensure all back-edges have been traced as well.
366  while (!loopVarPostsToTrace.IsEmpty())
367  {
368  auto loopVarPost = *loopVarPostsToTrace.Items().begin();
369  auto lastStoredValue = getLastStoreBeforeInput(*loopVarPost);
370  if (!lastStoredValue.isKnown())
371  return false;
372 
373  loopVarPostsToTrace.Remove(loopVarPost);
374  }
375 
376  return true;
377  }
378 
379 private:
388  {
389  JLM_ASSERT(is<StoreOperation>(&storeNode));
390 
391  const auto & storeAddress = *StoreOperation::AddressInput(storeNode).origin();
392  const auto storeType = StoreOperation::StoredValueInput(storeNode).Type();
393  const auto storedSize = GetTypeStoreSize(*storeType);
394 
395  // Trace the store address now, to avoid duplicate work when multiple alias analyses are used
396  const auto & tracedStoredAddress = llvm::traceOutput(storeAddress);
397 
398  // Query the alias analysis
399  const auto response =
400  aliasAnalysis.Query(*loadedAddress, loadedTypeSize, tracedStoredAddress, storedSize);
402 
403  return response;
404  }
405 
422  {
423  // If the input has already been traced, return the last result
424  if (const auto it = lastStoreBeforeInput.find(&input); it != lastStoreBeforeInput.end())
425  return it->second;
426 
427  auto result = getLastStoreBeforeInputInternal(input);
428 
429  // Add the result to the tracing maps
430  lastStoreBeforeInput[&input] = result;
431 
432  // If the input is on a node, add the result to the node map
433  if (auto node = rvsdg::TryGetOwnerNode<rvsdg::Node>(input))
434  {
435  const auto [it, inserted] = lastStoreBeforeNode.emplace(node, result);
436 
437  // If the node already had a different last store value, give up
438  if (!inserted && it->second != result)
440  }
441 
442  // If the input is a region exit, add the result to the region exit map
443  if (auto regionResult = dynamic_cast<rvsdg::RegionResult *>(&input))
444  {
445  const auto region = regionResult->region();
446  const auto [it, inserted] = lastStoreInRegion.emplace(region, result);
447 
448  // If the region already had a different last store value, give up
449  if (!inserted && it->second != result)
451  }
452 
453  return result;
454  }
455 
458  {
459  auto & tracedOutput = tracer.trace(*input.origin());
460 
461  // If tracing reached a store operation, look up its info
462  if (auto [storeNode, storeOp] =
463  rvsdg::TryGetSimpleNodeAndOptionalOp<StoreOperation>(tracedOutput);
464  storeNode && storeOp)
465  {
466  // Lookup or create a store node info entry
467  auto [it, inserted] = storeNodeInfo.emplace(storeNode, StoreNodeInfo::ClobberNoForward);
468 
469  // If the store has not been encountered before, determine forwarding / clobbering
470  if (inserted)
471  {
472  const auto aliasReponse = queryAliasAnalysis(*storeNode);
473  switch (aliasReponse)
474  {
476  it->second = StoreNodeInfo::ClobberNoForward;
477  break;
479  it->second = StoreNodeInfo::NoClobber;
480  break;
482  {
483  // MustAlias means a store forwarding candidate was found,
484  // but forwarding is only possible if the type matches
485  auto storedType = StoreOperation::StoredValueInput(*storeNode).Type();
486  if (*storedType == *loadedType)
487  it->second = StoreNodeInfo::ValueForwarding;
488  else
489  it->second = StoreNodeInfo::ClobberNoForward;
490  break;
491  }
492  default:
493  JLM_UNREACHABLE("Unknown AliasAnalysis response");
494  }
495  }
496 
497  switch (it->second)
498  {
500  return StoreValueOrigin::createStoreNode(*storeNode);
501 
504 
506  {
507  // If the store is not clobbering, keep tracing along the memory state chain
508  auto & memoryStateInput = StoreOperation::MapMemoryStateOutputToInput(tracedOutput);
509  return getLastStoreBeforeInput(memoryStateInput);
510  }
511 
512  default:
513  JLM_UNREACHABLE("Unknown StoreNodeInfo");
514  }
515  }
516 
517  // For join operations, all the inputs must lead to the same last store
518  if (auto [joinNode, joinOp] =
519  rvsdg::TryGetSimpleNodeAndOptionalOp<MemoryStateJoinOperation>(tracedOutput);
520  joinNode && joinOp)
521  {
522  if (joinNode->ninputs() == 0)
524 
525  for (auto & input : joinNode->Inputs())
526  {
527  auto result = getLastStoreBeforeInput(input);
528  if (!result.isKnown())
530  }
531 
532  // If none of the calls returned nullptr, there must a shared last store before the join
533  const auto sharedLastStore = lastStoreBeforeNode[joinNode];
534  JLM_ASSERT(sharedLastStore.isKnown());
535  return sharedLastStore;
536  }
537 
538  // if tracing reaches an alloca, the value is uninitialized, so we can pick our own value
539  if (auto [allocaNode, allocaOp] =
540  rvsdg::TryGetSimpleNodeAndOptionalOp<AllocaOperation>(tracedOutput);
541  allocaNode && allocaOp)
542  {
544  }
545 
546  // If we found an exit variable of a gamma node, trace each of its subregions
547  if (auto gammaNode = rvsdg::TryGetOwnerNode<rvsdg::GammaNode>(tracedOutput))
548  {
549  const auto exitVar = gammaNode->MapOutputExitVar(tracedOutput);
550 
551  // If all branches lead to the same store node, return it directly.
552  // If different last store nodes have been observed, this becomes nullptr
553  std::optional<StoreValueOrigin> commonStoreValueOrigin;
554 
555  for (auto branchResult : exitVar.branchResult)
556  {
557  auto lastStoreNode = getLastStoreBeforeInput(*branchResult);
558 
559  // If any of the gamma branches is impossible to trace back to a last store,
560  // give up on forwarding entirely
561  if (!lastStoreNode.isKnown())
563 
564  // Keep track if there is a single shared last store in all branches
565  if (!commonStoreValueOrigin.has_value())
566  commonStoreValueOrigin = lastStoreNode;
567  else if (commonStoreValueOrigin.value() != lastStoreNode)
568  commonStoreValueOrigin = StoreValueOrigin::createUnknown();
569  }
570 
571  JLM_ASSERT(commonStoreValueOrigin.has_value());
572  if (commonStoreValueOrigin->isKnown())
573  return *commonStoreValueOrigin;
574 
575  // If the last store node depends on the branch taken, return the gamma node itself
576  return StoreValueOrigin::createGammaNodeOutput(*gammaNode);
577  }
578 
579  // If we found an exit variable of a theta node, continue tracing on the inside
580  if (auto thetaNode = rvsdg::TryGetOwnerNode<rvsdg::ThetaNode>(tracedOutput))
581  {
582  const auto loopVar = thetaNode->MapOutputLoopVar(tracedOutput);
583  auto lastStoreNode = getLastStoreBeforeInput(*loopVar.post);
584 
585  if (!lastStoreNode.isKnown())
587 
588  // if the last store before the end of the theta subregion is the pre of the same theta,
589  // the loaded memory is loop invariant, and we can load from the last store before the theta.
590  if (lastStoreNode.kind == StoreValueOrigin::Kind::ThetaNodePre
591  && lastStoreNode.node == thetaNode)
592  {
593  return getLastStoreBeforeInput(*loopVar.input);
594  }
595 
596  // if the reached store node is inside the theta, it must be routed out of it
597  if (lastStoreNode.node->region() == thetaNode->subregion())
598  return StoreValueOrigin::createThetaNodeOutput(*thetaNode);
599 
600  // The last store node is outside the theta, so point to it directly
601  return lastStoreNode;
602  }
603 
604  // If we found a loop pre variable in a theta node, trace both inside and outside
605  if (auto thetaNode = rvsdg::TryGetRegionParentNode<rvsdg::ThetaNode>(tracedOutput))
606  {
607  const auto loopVar = thetaNode->MapPreLoopVar(tracedOutput);
608 
609  // Trace from the theta input first
610  auto inputLastStoreNode = getLastStoreBeforeInput(*loopVar.input);
611  if (!inputLastStoreNode.isKnown())
613 
614  // If the loop variables' post result is not traced, add it to the list.
615  // Using a list prevents visiting the loop body multiple times during recursion.
616  if (lastStoreBeforeInput.count(loopVar.post) == 0)
617  loopVarPostsToTrace.insert(loopVar.post);
618 
619  return StoreValueOrigin::createThetaNodePre(*thetaNode);
620  }
621 
622  // Tracing reached something that is not handled, such as a function call
624  }
625 
626  void
628  {
629  switch (response)
630  {
633  break;
636  break;
639  break;
640  default:
641  throw std::logic_error("Unhandled alias analysis query response!");
642  }
643  }
644 
645 public:
648  std::shared_ptr<const rvsdg::Type> loadedType;
650 
653 
654  // Counters used for statistics
658 
659  // Map containing info about each store node relevant to store value forwarding.
660  std::unordered_map<rvsdg::SimpleNode *, StoreNodeInfo> storeNodeInfo;
661 
662  // The last store value origin on the memory state chain before the given input.
663  std::unordered_map<rvsdg::Input *, StoreValueOrigin> lastStoreBeforeInput;
664  // The last store value origin before the given node.
665  std::unordered_map<rvsdg::Node *, StoreValueOrigin> lastStoreBeforeNode;
666  // The last store value origin node before the end of the given region.
667  // It can be outside the region if no store occurs inside the region.
668  std::unordered_map<rvsdg::Region *, StoreValueOrigin> lastStoreInRegion;
669 
670  // When tracing reaches a loop var pre argument, tracing does not continue through the post.
671  // The loop var post result is instead added to this set, to ensure that tracing happens later.
672  // Only loop vars that have yet to be traced are added here.
674 
675  // During routing, at most one exit variable need to be created per gamma
676  std::unordered_map<rvsdg::GammaNode *, rvsdg::Output *> createdExitVars;
677  // During routing, at most one loop variable needs to be created per theta.
678  std::unordered_map<rvsdg::ThetaNode *, rvsdg::ThetaNode::LoopVar> createdLoopVars;
679  // During routing, loop variable posts are not routed immediately, but added to this queue
680  std::queue<rvsdg::Input *> unroutedLoopVarPosts;
681 };
682 
683 void
685 {
686  context_->numTotalLoads++;
687 
688  // Only non-volatile loads are candidates for being forwarded to
689  JLM_ASSERT(is<LoadNonVolatileOperation>(&loadNode));
690 
691  context_->statistics.startTracing();
692  LoadTracingInfo loadTracingInfo(loadNode, context_->outputTracer, context_->aliasAnalysis);
693  const bool success = loadTracingInfo.traceAllMemoryStateInputs();
694  context_->statistics.stopTracing();
695 
696  context_->numNoAliasAnalysisQueries += loadTracingInfo.numNoAliasAnalysisQueries;
697  context_->numMayAliasAnalysisQueries += loadTracingInfo.numMayAliasAnalysisQueries;
698  context_->numMustAliasAnalysisQueries += loadTracingInfo.numMustAliasAnalysisQueries;
699 
700  if (success)
701  {
702  context_->statistics.startForwarding();
703  forwardStoredValues(loadTracingInfo);
704  context_->statistics.stopForwarding();
705  }
706 }
707 
708 // Performs StoreValueForwarding to the load node represented by the tracingInfo.
709 void
711 {
712  context_->numLoadsForwarded++;
713 
714  auto & loadNode = tracingInfo.loadNode;
715  auto & loadedValueOutput = LoadOperation::LoadedValueOutput(loadNode);
716  auto & loadRegion = *loadNode.region();
717 
718  // There must be a node providing the last value stored before the load
719  const auto lastStoreNode = tracingInfo.lastStoreBeforeNode[&loadNode];
720  JLM_ASSERT(lastStoreNode.isKnown());
721  auto & storedValueOutput = getStoredValueOrigin(lastStoreNode, loadRegion, tracingInfo);
722 
723  // Fixup all loop variables that were created during the above routing
724  connectUnroutedLoopPosts(tracingInfo);
725 
726  // Divert users of the load to the routed stored value
727  loadedValueOutput.divert_users(&storedValueOutput);
728 
729  // Make the load node dead by routing all memory state users around it
730  for (auto & memoryStateOutput : LoadNonVolatileOperation::MemoryStateOutputs(loadNode))
731  {
732  auto & memoryStateInput =
734  memoryStateOutput.divert_users(memoryStateInput.origin());
735  }
736 }
737 
738 // Gets an rvsdg output providing the last value stored relative to the storeValueOrigin.
741  StoreValueOrigin storeValueOrigin,
742  rvsdg::Region & targetRegion,
743  LoadTracingInfo & tracingInfo)
744 {
745  JLM_ASSERT(storeValueOrigin.isKnown());
746 
747  if (storeValueOrigin.kind == StoreValueOrigin::Kind::Uninitialized)
748  {
749  // When forwarding uninitialized memory, create an undef node
750  return *UndefValueOperation::Create(targetRegion, tracingInfo.loadedType);
751  }
752 
753  if (storeValueOrigin.kind == StoreValueOrigin::Kind::StoreNode)
754  {
755  // For store nodes, the stored value is the origin of the node's value input
756  auto & storedValue = *StoreOperation::StoredValueInput(*storeValueOrigin.node).origin();
757  return routeOutputToRegion(storedValue, targetRegion);
758  }
759 
760  // For gamma nodes, create an exit variable by finding the stored value in each of its regions
761  if (storeValueOrigin.kind == StoreValueOrigin::Kind::GammaNodeOutput)
762  {
763  auto gammaNode = dynamic_cast<rvsdg::GammaNode *>(storeValueOrigin.node);
764  JLM_ASSERT(gammaNode);
765 
766  // We only need to create at most one exit variable per gamma, so memoize it
767  auto [it, inserted] = tracingInfo.createdExitVars.emplace(gammaNode, nullptr);
768  if (inserted)
769  {
770  std::vector<rvsdg::Output *> lastStorePerSubregion;
771  for (auto & subregion : gammaNode->Subregions())
772  {
773  auto lastStoreValue = tracingInfo.lastStoreInRegion[&subregion];
774  auto & storedValueOutput = getStoredValueOrigin(lastStoreValue, subregion, tracingInfo);
775  lastStorePerSubregion.push_back(&storedValueOutput);
776  }
777 
778  auto exitVar = gammaNode->AddExitVar(lastStorePerSubregion);
779  it->second = exitVar.output;
780  }
781  JLM_ASSERT(it->second);
782  return routeOutputToRegion(*it->second, targetRegion);
783  }
784 
785  // For theta nodes, create a loop variable
786  if (storeValueOrigin.kind == StoreValueOrigin::Kind::ThetaNodeOutput
787  || storeValueOrigin.kind == StoreValueOrigin::Kind::ThetaNodePre)
788  {
789  auto thetaNode = dynamic_cast<rvsdg::ThetaNode *>(storeValueOrigin.node);
790  JLM_ASSERT(thetaNode);
791 
792  // If the theta output is being requested, and no clobber happens inside the theta,
793  // skip making a loop variable and return the last store before the theta instead
794  if (storeValueOrigin.kind == StoreValueOrigin::Kind::ThetaNodeOutput)
795  {
796  auto lastStoreInRegion = tracingInfo.lastStoreInRegion.find(thetaNode->subregion());
797  JLM_ASSERT(lastStoreInRegion != tracingInfo.lastStoreInRegion.end());
798  JLM_ASSERT(lastStoreInRegion->second.isKnown());
799 
800  // If the last store is the theta pre, the loop body never clobbers
801  if (lastStoreInRegion->second.kind == StoreValueOrigin::Kind::ThetaNodePre)
802  {
803  auto lastStoreBeforeTheta = tracingInfo.lastStoreBeforeNode.find(thetaNode);
804  JLM_ASSERT(lastStoreBeforeTheta != tracingInfo.lastStoreBeforeNode.end());
805  return getStoredValueOrigin(lastStoreBeforeTheta->second, targetRegion, tracingInfo);
806  }
807  }
808 
809  // If the loop variable has not yet been created in this theta, create it now
810  auto loopVarSlot = tracingInfo.createdLoopVars.find(thetaNode);
811  if (loopVarSlot == tracingInfo.createdLoopVars.end())
812  {
813  rvsdg::Output * initialValue = nullptr;
814 
815  if (auto lastStoreBeforeTheta = tracingInfo.lastStoreBeforeNode.find(thetaNode);
816  lastStoreBeforeTheta != tracingInfo.lastStoreBeforeNode.end())
817  {
818  JLM_ASSERT(lastStoreBeforeTheta->second.isKnown());
819  auto & outerRegion = *thetaNode->region();
820  initialValue =
821  &getStoredValueOrigin(lastStoreBeforeTheta->second, outerRegion, tracingInfo);
822  }
823  else
824  {
825  // the loop variable is only used as a theta output, so give it undef as input
826  initialValue = UndefValueOperation::Create(*thetaNode->region(), tracingInfo.loadedType);
827  }
828 
829  // Create the loop variable and add it to the map
830  JLM_ASSERT(initialValue);
831  auto loopVar = thetaNode->AddLoopVar(initialValue);
832  auto [it, inserted] = tracingInfo.createdLoopVars.emplace(thetaNode, loopVar);
833  JLM_ASSERT(inserted);
834  loopVarSlot = it;
835 
836  // To prevent looping during routing, the created loop variable's post is added to a
837  // queue of loop variable posts that are routed properly later.
838  tracingInfo.unroutedLoopVarPosts.push(loopVar.post);
839  }
840 
841  // Return the correct output, depending on the query kind
842  switch (storeValueOrigin.kind)
843  {
845  return routeOutputToRegion(*loopVarSlot->second.pre, targetRegion);
847  return routeOutputToRegion(*loopVarSlot->second.output, targetRegion);
848  default:
849  JLM_UNREACHABLE("Unknown StoreValueOrigin kind");
850  }
851  }
852 
853  JLM_UNREACHABLE("Unknown StoreValueOriginKind");
854 }
855 
856 void
858 {
859  // The process of handling all created loop variables may also create more loop variables,
860  // so keep going until the queue is empty.
861  while (!tracingInfo.unroutedLoopVarPosts.empty())
862  {
863  auto post = tracingInfo.unroutedLoopVarPosts.front();
864  tracingInfo.unroutedLoopVarPosts.pop();
865 
866  auto lastStore = tracingInfo.lastStoreInRegion.find(post->region());
867  JLM_ASSERT(lastStore != tracingInfo.lastStoreInRegion.end());
868  auto & origin = getStoredValueOrigin(lastStore->second, *post->region(), tracingInfo);
869  post->divert_to(&origin);
870  }
871 }
872 
875 {
876  if (output.region() == &region)
877  return output;
878 
879  if (region.IsRootRegion())
880  JLM_UNREACHABLE("root region reached during attempt at routing output into region");
881 
882  if (auto gammaNode = dynamic_cast<rvsdg::GammaNode *>(region.node()))
883  {
884  // Route the output all the way to just outside the gamma first
885  auto & outerOutput = routeOutputToRegion(output, *gammaNode->region());
886 
887  // If the outer output already has a corresponding EntryVar, return it
888  if (auto it = context_->routedOutputs.find({ &outerOutput, &region });
889  it != context_->routedOutputs.end())
890  return *it->second;
891 
892  // Create an EntryVar for the output, add all branch arguments to the cache
893  auto entryVar = gammaNode->AddEntryVar(&outerOutput);
894  for (auto branchArgument : entryVar.branchArgument)
895  {
896  context_->routedOutputs[{ &outerOutput, branchArgument->region() }] = branchArgument;
897  }
898 
899  return *entryVar.branchArgument[region.index()];
900  }
901 
902  if (auto thetaNode = dynamic_cast<rvsdg::ThetaNode *>(region.node()))
903  {
904  // Route the output all the way to just outside the theta first
905  auto & outerOutput = routeOutputToRegion(output, *thetaNode->region());
906 
907  // If the outer output already has a corresponding invariant loop variable, return it
908  if (auto it = context_->routedOutputs.find({ &outerOutput, &region });
909  it != context_->routedOutputs.end())
910  return *it->second;
911 
912  // Create an invariant LoopVar for the output and add it to the cache
913  auto loopVar = thetaNode->AddLoopVar(&outerOutput);
914  context_->routedOutputs[{ &outerOutput, &region }] = loopVar.pre;
915  return *loopVar.pre;
916  }
917 
918  JLM_UNREACHABLE("routeOutputToRegion reached unhandled structural node");
919 }
920 
921 static std::unique_ptr<aa::AliasAnalysis>
923 {
924  auto localAA = std::make_unique<aa::LocalAliasAnalysis>();
925 
927  {
928  // Setting the trace collection size to 1 limits the analysis to only the most trivial tracing
929  localAA->setMaxTraceCollectionSize(1);
930  }
931 
932  if (!ENABLE_PTGAA)
933  return localAA;
934 
935  aa::Andersen andersen;
936  auto ptg = andersen.Analyze(module, statisticsCollector);
937  auto ptgAA = std::make_unique<aa::PointsToGraphAliasAnalysis>(std::move(ptg));
938 
939  return std::make_unique<aa::ChainedAliasAnalysis>(std::move(localAA), std::move(ptgAA));
940 }
941 
942 void
944  rvsdg::RvsdgModule & module,
946 {
947  auto aliasAnalysis = createAliasAnalysis(module, statisticsCollector);
948  auto statistics = Statistics::Create(module.SourceFilePath().value());
949 
950  context_ = std::make_unique<Context>(*aliasAnalysis, *statistics);
951 
952  statistics->StartStatistics();
953 
954  auto & rvsdg = module.Rvsdg();
955  traverseInterProceduralRegion(rvsdg.GetRootRegion());
956 
957  statistics->StopStatistics(
958  context_->numTotalLoads,
959  context_->numLoadsForwarded,
960  context_->numNoAliasAnalysisQueries,
961  context_->numMayAliasAnalysisQueries,
962  context_->numMustAliasAnalysisQueries);
963  statisticsCollector.CollectDemandedStatistics(std::move(statistics));
964 
965  // Discard internal state to free up memory after we are done
966  context_.reset();
967 }
968 }
static jlm::util::StatisticsCollector statisticsCollector
static rvsdg::Input & AddressInput(const rvsdg::Node &node) noexcept
Definition: Load.hpp:75
static rvsdg::Output & LoadedValueOutput(const rvsdg::Node &node)
Definition: Load.hpp:84
static size_t numMemoryStates(const rvsdg::SimpleNode &node) noexcept
Definition: Load.hpp:101
static rvsdg::Node::OutputIteratorRange MemoryStateOutputs(const rvsdg::Node &node) noexcept
Definition: Load.hpp:116
static rvsdg::Node::InputIteratorRange MemoryStateInputs(const rvsdg::Node &node) noexcept
Definition: Load.hpp:139
static rvsdg::Input & MapMemoryStateOutputToInput(const rvsdg::Output &output)
Definition: Load.hpp:157
std::unordered_map< rvsdg::Input *, StoreValueOrigin > lastStoreBeforeInput
StoreValueOrigin getLastStoreBeforeInputInternal(rvsdg::Input &input)
void updateAliasAnalysisQueryCounters(aa::AliasAnalysis::AliasQueryResponse response)
aa::AliasAnalysis::AliasQueryResponse queryAliasAnalysis(rvsdg::SimpleNode &storeNode)
std::unordered_map< rvsdg::GammaNode *, rvsdg::Output * > createdExitVars
LoadTracingInfo(rvsdg::SimpleNode &loadNode, OutputTracer &tracer, aa::AliasAnalysis &aliasAnalysis)
std::unordered_map< rvsdg::SimpleNode *, StoreNodeInfo > storeNodeInfo
StoreValueOrigin getLastStoreBeforeInput(rvsdg::Input &input)
std::unordered_map< rvsdg::ThetaNode *, rvsdg::ThetaNode::LoopVar > createdLoopVars
std::queue< rvsdg::Input * > unroutedLoopVarPosts
std::unordered_map< rvsdg::Region *, StoreValueOrigin > lastStoreInRegion
std::shared_ptr< const rvsdg::Type > loadedType
std::unordered_map< rvsdg::Node *, StoreValueOrigin > lastStoreBeforeNode
util::HashSet< rvsdg::Input * > loopVarPostsToTrace
void setTraceThroughLoadedStates(bool traceThroughLoadedStates)
Definition: Trace.hpp:38
static rvsdg::Input & StoredValueInput(const rvsdg::Node &node) noexcept
Definition: Store.hpp:84
static rvsdg::Input & MapMemoryStateOutputToInput(const rvsdg::Output &output)
Definition: Store.hpp:134
static rvsdg::Input & AddressInput(const rvsdg::Node &node) noexcept
Definition: Store.hpp:75
Store Value Forwarding Statistics class.
Statistics(const util::FilePath &sourceFile)
void StopStatistics(const size_t numTotalLoads, const size_t numLoadsForwarded, const size_t numNoAliasAnalysisQueries, const size_t numMayAliasAnalysisQueries, const size_t numMustAliasAnalysisQueries) noexcept
static std::unique_ptr< Statistics > Create(const util::FilePath &sourceFile)
Store Value Forwarding Optimization.
~StoreValueForwarding() noexcept override
std::unique_ptr< Context > context_
void connectUnroutedLoopPosts(LoadTracingInfo &tracingInfo)
void processLoadNode(rvsdg::SimpleNode &loadNode)
rvsdg::Output & routeOutputToRegion(rvsdg::Output &output, rvsdg::Region &region)
void Run(rvsdg::RvsdgModule &module, util::StatisticsCollector &statisticsCollector) override
Perform RVSDG transformation.
rvsdg::Output & getStoredValueOrigin(StoreValueOrigin storeValueOrigin, rvsdg::Region &targetRegion, LoadTracingInfo &tracingInfo)
void traverseInterProceduralRegion(rvsdg::Region &region)
void traverseIntraProceduralRegion(rvsdg::Region &region)
void forwardStoredValues(LoadTracingInfo &tracingInfo)
static jlm::rvsdg::Output * Create(rvsdg::Region &region, std::shared_ptr< const jlm::rvsdg::Type > type)
Definition: operators.hpp:1055
virtual AliasQueryResponse Query(const rvsdg::Output &p1, size_t s1, const rvsdg::Output &p2, size_t s2)=0
std::unique_ptr< PointsToGraph > Analyze(const rvsdg::RvsdgModule &module, util::StatisticsCollector &statisticsCollector) override
Definition: Andersen.cpp:1531
Delta node.
Definition: delta.hpp:129
Conditional operator / pattern matching.
Definition: gamma.hpp:99
Output * origin() const noexcept
Definition: node.hpp:58
const std::shared_ptr< const rvsdg::Type > & Type() const noexcept
Definition: node.hpp:67
Lambda node.
Definition: lambda.hpp:83
void setTraceThroughStructuralNodes(bool value) noexcept
Definition: Trace.hpp:59
Output & trace(Output &output)
Definition: Trace.cpp:22
rvsdg::Region * region() const noexcept
Definition: node.cpp:151
const std::shared_ptr< const rvsdg::Type > & Type() const noexcept
Definition: node.hpp:366
A phi node represents the fixpoint of mutually recursive definitions.
Definition: Phi.hpp:46
rvsdg::Region * subregion() const noexcept
Definition: Phi.hpp:320
Represents the result of a region.
Definition: region.hpp:120
Represent acyclic RVSDG subgraphs.
Definition: region.hpp:213
rvsdg::StructuralNode * node() const noexcept
Definition: region.hpp:369
void prune(bool recursive)
Definition: region.cpp:323
size_t index() const noexcept
Definition: region.hpp:375
bool IsRootRegion() const noexcept
Definition: region.cpp:170
NodeRange Nodes() noexcept
Definition: region.hpp:328
const std::optional< util::FilePath > & SourceFilePath() const noexcept
Definition: RvsdgModule.hpp:73
Graph & Rvsdg() noexcept
Definition: RvsdgModule.hpp:57
SubregionIteratorRange Subregions()
void CollectDemandedStatistics(std::unique_ptr< Statistics > statistics)
Definition: Statistics.hpp:574
Statistics Interface.
Definition: Statistics.hpp:31
util::Timer & GetTimer(const std::string &name)
Definition: Statistics.cpp:137
util::Timer & AddTimer(std::string name)
Definition: Statistics.cpp:158
void AddMeasurement(std::string name, T value)
Definition: Statistics.hpp:177
void start() noexcept
Definition: time.hpp:54
void stop() noexcept
Definition: time.hpp:67
#define JLM_ASSERT(x)
Definition: common.hpp:16
#define JLM_UNREACHABLE(msg)
Definition: common.hpp:43
Global memory state passed between functions.
static std::unique_ptr< aa::AliasAnalysis > createAliasAnalysis(rvsdg::RvsdgModule &module, util::StatisticsCollector &statisticsCollector)
rvsdg::Output & traceOutput(rvsdg::Output &output, const rvsdg::Region *withinRegion)
Definition: Trace.cpp:54
size_t GetTypeStoreSize(const rvsdg::Type &type)
Definition: types.cpp:386
static const bool ENABLE_PTGAA
static const bool ENABLE_AGGRESSIVE_LOCALAA
void MatchTypeOrFail(T &obj, const Fns &... fns)
Pattern match over subclass type of given object.
std::size_t operator()(const std::pair< rvsdg::Output *, rvsdg::Region * > &value) const
Context(aa::AliasAnalysis &aliasAnalysis, Statistics &statistics) noexcept
std::unordered_map< std::pair< rvsdg::Output *, rvsdg::Region * >, rvsdg::Output *, OutputRegionHash > routedOutputs
static StoreValueOrigin createUninitialized()
static StoreValueOrigin createStoreNode(rvsdg::SimpleNode &storeNode)
bool operator!=(const StoreValueOrigin &other) const noexcept
static StoreValueOrigin createGammaNodeOutput(rvsdg::GammaNode &gammaNode)
static StoreValueOrigin createThetaNodePre(rvsdg::ThetaNode &thetaNode)
static StoreValueOrigin createThetaNodeOutput(rvsdg::ThetaNode &thetaNode)
bool operator==(const StoreValueOrigin &other) const noexcept
static StoreValueOrigin createUnknown()
static const char * Timer
Definition: Statistics.hpp:251