28 if (rr->output() && rvsdg::TryGetOwnerNode<LoopNode>(*rr->output()))
40 JLM_ASSERT(rvsdg::TryGetOwnerNode<LoopNode>(*user));
43 else if (rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*user))
57 if (
is_constant(rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*out)))
61 auto [forkNode, forkOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<ForkOperation>(*out);
62 if (forkOperation && forkOperation->IsConstant())
68 auto [bufferNode, bufferOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<BufferOperation>(*user);
70 && (bufferOperation->IsPassThrough() != passThrough
71 || bufferOperation->Capacity() != capacity))
74 auto node = rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*user);
75 passThrough = passThrough && bufferOperation->IsPassThrough();
76 capacity = std::max(capacity, bufferOperation->Capacity());
78 node->output(0)->divert_users(bufOut);
87 directUser.divert_to(newOut);
106 auto [bufferNode, bufferOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<BufferOperation>(*user);
109 auto node2 = rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*user);
111 bool passThrough = buf->IsPassThrough() && bufferOperation->IsPassThrough();
112 auto capacity = std::max(buf->Capacity(), bufferOperation->Capacity());
114 JLM_ASSERT(node2->region() == newOut->region());
115 node2->output(0)->divert_users(newOut);
127 bool outerLoop = !rvsdg::is<LoopOperation>(loopNode->
region()->
node());
131 for (
size_t i = 0; i < loopNode->
noutputs(); ++i)
133 auto out = loopNode->
output(i);
135 auto [branchNode, branchOperation] =
136 rvsdg::TryGetSimpleNodeAndOptionalOp<BranchOperation>(*res->origin());
137 if (!branchOperation)
143 auto oldBufInput = &branchNode->output(1)->SingleUser();
144 auto [oldBufferNode, oldBufferOperation] =
145 rvsdg::TryGetSimpleNodeAndOptionalOp<BufferOperation>(*oldBufInput);
146 if (rvsdg::IsOwnerNodeOperation<SinkOperation>(*oldBufInput))
152 auto oldBufNode = rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*oldBufInput);
155 branchNode->input(1)->origin(),
156 oldBufferOperation->Capacity(),
157 oldBufferOperation->IsPassThrough());
161 branchNode->input(0)->origin(),
162 oldBufferOperation->Capacity(),
163 oldBufferOperation->IsPassThrough());
165 oldBufNode->output(0)->divert_users(oldBufInput->origin());
173 for (
size_t i = 0; i < loopNode->
ninputs(); ++i)
175 auto in = loopNode->
input(i);
177 auto user = &arg->SingleUser();
179 if (
auto [node, muxOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<MuxOperation>(*user);
182 if (!muxOperation->loop)
188 else if (rvsdg::IsOwnerNodeOperation<LoopConstantBufferOperation>(*user))
207 auto loop =
dynamic_cast<LoopNode *
>(node);
210 for (
size_t n = 0; n < structnode->nsubregions(); n++)
217 if (jlm::rvsdg::is<BufferOperation>(node))
221 else if (jlm::rvsdg::is<ForkOperation>(node))
225 else if (jlm::rvsdg::is<BranchOperation>(node))
229 else if (jlm::rvsdg::is<StateGateOperation>(node))
233 else if (jlm::rvsdg::is<AddressQueueOperation>(node))
243 static constexpr uint32_t
261 std::vector<jlm::rvsdg::SimpleNode *> nodes;
266 auto loop =
dynamic_cast<LoopNode *
>(node);
268 for (
size_t n = 0; n < structnode->nsubregions(); n++)
275 if (rvsdg::is<BufferOperation>(node))
279 else if (rvsdg::is<DecoupledLoadOperation>(node))
285 for (
auto node : nodes)
290 if (dl->capacity < capacity)
295 *node->input(0)->origin(),
296 *node->input(1)->origin(),
304 static std::vector<size_t>
307 auto max_cycles = *std::max_element(input_cycles.begin(), input_cycles.end());
312 return { max_cycles + 1 };
317 if (op->IsPassThrough())
319 return { max_cycles + 0 };
321 return { max_cycles + 1 };
325 return { input_cycles[0] };
327 else if (rvsdg::is<DecoupledLoadOperation>(node))
331 else if (rvsdg::is<StateGateOperation>(node))
335 if (rvsdg::IsOwnerNodeOperation<DecoupledLoadOperation>(*sg0_user) && sg0_user->index() == 1)
341 else if (rvsdg::is<StoreOperation>(node))
346 return std::vector<size_t>(node->
noutputs(), max_cycles);
351 static std::vector<size_t>
354 auto min_capacity = *std::min_element(input_capacities.begin(), input_capacities.end());
359 return { min_capacity + 1 };
364 return { min_capacity + op->Capacity() };
368 return { input_capacities[0] };
372 return { min_capacity + op->capacity, 0 };
374 else if (rvsdg::is<StateGateOperation>(node))
378 if (rvsdg::IsOwnerNodeOperation<DecoupledLoadOperation>(*sg0_user) && sg0_user->index() == 1)
384 else if (rvsdg::is<StoreOperation>(node))
388 return std::vector<size_t>(node->
noutputs(), min_capacity);
394 std::unordered_map<rvsdg::Output *, size_t> & output_cycles,
395 std::unordered_set<rvsdg::Input *> & frontier,
396 std::unordered_set<BackEdgeResult *> & stream_backedges,
397 std::unordered_set<rvsdg::SimpleNode *> & top_muxes)
399 for (
size_t i = 0; i < loop->
ninputs(); ++i)
401 auto in = loop->
input(i);
404 auto user = &arg->SingleUser();
405 auto userNode = rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*user);
406 auto [muxNode, muxOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<MuxOperation>(*user);
407 if (rvsdg::IsOwnerNodeOperation<LoopConstantBufferOperation>(*user)
408 || (muxOperation && muxOperation->loop))
410 top_muxes.insert(userNode);
412 auto out = userNode->output(0);
413 output_cycles[out] = output_cycles[in->origin()];
414 frontier.insert(&out->SingleUser());
417 output_cycles[arg] = output_cycles[in->origin()];
418 frontier.insert(&arg->SingleUser());
423 auto out = tn.output(0);
424 output_cycles[out] = 0;
425 frontier.insert(&out->SingleUser());
435 auto [muxNode, muxOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<MuxOperation>(*user);
436 if ((muxOperation && muxOperation->loop))
440 if (rvsdg::IsOwnerNodeOperation<BufferOperation>(*user))
442 auto bufNode = rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*user);
443 if (rvsdg::IsOwnerNodeOperation<PredicateBufferOperation>(bufNode->output(0)->SingleUser()))
450 output_cycles[arg] = 0;
451 frontier.insert(&arg->SingleUser());
452 stream_backedges.insert(backedge->result());
459 std::unordered_map<rvsdg::Output *, size_t> & output_cycles,
460 bool analyze_inner_loop =
false);
464 std::unordered_map<rvsdg::Output *, size_t> & output_cycles,
465 std::unordered_set<rvsdg::Input *> & frontier,
466 std::unordered_set<BackEdgeResult *> & stream_backedges,
467 std::unordered_set<rvsdg::SimpleNode *> & top_muxes)
469 bool changed =
false;
473 for (
auto in : frontier)
475 if (
auto simpleNode = rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*in))
477 bool all_contained =
true;
478 for (
size_t i = 0; i < simpleNode->ninputs(); ++i)
480 auto f = frontier.find(simpleNode->input(i));
481 if (f == frontier.end())
483 all_contained =
false;
489 std::vector<size_t> input_cycles;
490 for (
size_t i = 0; i < simpleNode->ninputs(); ++i)
492 input_cycles.push_back(output_cycles[simpleNode->input(i)->origin()]);
493 frontier.erase(simpleNode->input(i));
495 std::vector<size_t> out_cycles =
NodeCycles(simpleNode, input_cycles);
497 if (top_muxes.find(simpleNode) != top_muxes.end())
499 if (
dynamic_cast<const MuxOperation *
>(&simpleNode->GetOperation()))
504 auto pred_latency = output_cycles[simpleNode->input(0)->origin()];
505 auto input_latency = output_cycles[simpleNode->input(1)->origin()];
506 auto backedge_latency = output_cycles[simpleNode->input(2)->origin()];
507 auto out_latency = backedge_latency - pred_latency + input_latency;
508 std::cout <<
"top_mux " << simpleNode <<
" pred latency: " << pred_latency
509 <<
" input latency: " << input_latency
510 <<
" backedge latency: " << backedge_latency
511 <<
" out latency: " << out_latency << std::endl;
512 output_cycles[simpleNode->output(0)] = out_latency;
516 JLM_ASSERT(rvsdg::is<LoopConstantBufferOperation>(simpleNode->GetOperation()));
522 for (
size_t i = 0; i < simpleNode->noutputs(); ++i)
524 auto out = simpleNode->output(i);
525 output_cycles[out] = out_cycles[i];
526 frontier.insert(&out->SingleUser());
535 auto out = be->argument();
536 if (stream_backedges.find(be) == stream_backedges.end())
539 output_cycles[out] = output_cycles[in->origin()];
540 frontier.insert(&out->SingleUser());
548 auto out = rr->output();
550 output_cycles[out] = output_cycles[in->origin()];
557 auto inner_loop = rvsdg::TryGetOwnerNode<LoopNode>(*in);
559 bool all_contained =
true;
560 for (
size_t i = 0; i < inner_loop->ninputs(); ++i)
562 auto f = frontier.find(inner_loop->input(i));
563 if (f == frontier.end())
565 all_contained =
false;
570 for (
size_t i = 0; i < inner_loop->ninputs(); ++i)
572 frontier.erase(inner_loop->input(i));
576 for (
size_t i = 0; i < inner_loop->noutputs(); ++i)
578 std::cout <<
"output latency " << i <<
" " << output_cycles[inner_loop->output(i)]
580 frontier.insert(&inner_loop->output(i)->SingleUser());
588 if (!frontier.empty())
590 std::unordered_map<rvsdg::Output *, std::string> o_color;
591 std::unordered_map<rvsdg::Input *, std::string> i_color;
592 for (
auto i : frontier)
594 i_color.insert({ i,
"red" });
603 std::unordered_map<rvsdg::Output *, size_t> & output_cycles,
604 bool analyze_inner_loop)
606 if (!analyze_inner_loop)
608 for (
size_t i = 0; i < loop->
ninputs(); ++i)
610 auto in = loop->
input(i);
611 output_cycles[in->origin()] = 0;
614 std::unordered_set<rvsdg::Input *> frontier;
615 std::unordered_set<BackEdgeResult *> stream_backedges;
616 std::unordered_set<rvsdg::SimpleNode *> top_muxes;
618 std::unordered_set<rvsdg::Input *> frontier2(frontier);
619 std::cout <<
"CalculateLoopCycleDepth(" << loop <<
", " << analyze_inner_loop <<
")" << std::endl;
628 std::unordered_map<rvsdg::Output *, std::string> o_color;
629 std::unordered_map<rvsdg::Input *, std::string> i_color;
630 std::unordered_map<rvsdg::Output *, std::string> tail_label;
631 if (!analyze_inner_loop)
633 for (
auto i : frontier2)
635 i_color.insert({ i,
"red" });
637 for (
auto [o, l] : output_cycles)
639 tail_label[o] = std::to_string(l);
642 std::cout <<
"second iteration" << std::endl;
644 if (!analyze_inner_loop)
646 for (
auto [o, l] : output_cycles)
648 tail_label[o] = std::to_string(l);
667 auto [forkNode, forkOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<ForkOperation>(*out);
668 JLM_ASSERT(!(forkOperation && forkOperation->IsConstant()));
670 if (rvsdg::is<rvsdg::LambdaOperation>(out->
region()->
node()))
677 if (arg && arg->input())
679 return PlaceBufferLoop(arg->input()->origin(), min_capacity, passThrough);
683 if (
auto [loopConstantNode, op] =
684 rvsdg::TryGetSimpleNodeAndOptionalOp<LoopConstantBufferOperation>(*out);
688 PlaceBufferLoop(loopConstantNode->input(0)->origin(), min_capacity, passThrough),
689 PlaceBufferLoop(loopConstantNode->input(1)->origin(), min_capacity, passThrough));
692 if (
auto [node, bufferOperation] = rvsdg::TryGetSimpleNodeAndOptionalOp<BufferOperation>(*out);
696 passThrough = passThrough && bufferOperation->IsPassThrough();
697 size_t capacity =
round_up_pow2(bufferOperation->Capacity() + min_capacity);
703 node->output(0)->divert_users(bufOut);
717 directUser.divert_to(newOut);
725 std::unordered_map<rvsdg::Output *, size_t> & output_cycles,
726 std::unordered_map<rvsdg::Output *, size_t> & buffer_capacity,
727 bool analyze_inner_loop =
false)
729 if (!analyze_inner_loop)
731 for (
size_t i = 0; i < loop->
ninputs(); ++i)
733 auto in = loop->
input(i);
734 buffer_capacity[in->origin()] = 0;
737 std::unordered_set<rvsdg::Input *> frontier;
738 std::unordered_set<BackEdgeResult *> stream_backedges;
739 std::unordered_set<rvsdg::SimpleNode *> top_muxes;
744 auto out = tn.output(0);
749 for (
size_t i = 0; i < loop->
ninputs(); ++i)
751 auto in = loop->
input(i);
758 std::unordered_map<rvsdg::Output *, std::string> o_color;
759 std::unordered_map<rvsdg::Input *, std::string> i_color;
760 std::unordered_map<rvsdg::Output *, std::string> tail_label;
761 if (!analyze_inner_loop)
763 for (
auto i : frontier)
765 i_color.insert({ i,
"red" });
767 for (
auto [o, l] : buffer_capacity)
769 tail_label[o] = std::to_string(l);
773 bool changed =
false;
777 for (
auto in : frontier)
779 if (
auto simpleNode = rvsdg::TryGetOwnerNode<rvsdg::SimpleNode>(*in))
781 bool all_contained =
true;
782 for (
size_t i = 0; i < simpleNode->ninputs(); ++i)
784 auto f = frontier.find(simpleNode->input(i));
785 if (f == frontier.end())
787 all_contained =
false;
793 size_t max_cycles = 0;
794 for (
size_t i = 0; i < simpleNode->ninputs(); ++i)
796 max_cycles = std::max(max_cycles, output_cycles[simpleNode->input(i)->origin()]);
797 frontier.erase(simpleNode->input(i));
800 std::vector<size_t> input_capacities;
802 for (
size_t i = 0; i < simpleNode->ninputs(); ++i)
804 auto capacity = buffer_capacity[simpleNode->input(i)->origin()];
805 if (!analyze_inner_loop && (!rvsdg::is<AddressQueueOperation>(simpleNode))
806 && capacity < max_cycles)
808 size_t capacity_diff = max_cycles - capacity;
809 capacity +=
PlaceBufferLoop(simpleNode->input(i)->origin(), capacity_diff,
true);
810 buffer_capacity[simpleNode->input(i)->origin()] = capacity;
812 input_capacities.push_back(capacity);
815 if (top_muxes.find(simpleNode) != top_muxes.end())
818 auto mux =
dynamic_cast<const MuxOperation *
>(&simpleNode->GetOperation());
821 std::cout <<
"top_mux " << simpleNode
822 <<
" pred capacity: " << buffer_capacity[simpleNode->input(0)->origin()]
823 <<
" backedge capacity: " << buffer_capacity[simpleNode->input(2)->origin()]
829 std::vector<size_t> out_capacities =
NodeCapacity(simpleNode, input_capacities);
830 for (
size_t i = 0; i < simpleNode->noutputs(); ++i)
832 auto out = simpleNode->output(i);
833 buffer_capacity[out] = out_capacities[i];
834 JLM_ASSERT(analyze_inner_loop || buffer_capacity[out] >= output_cycles[out]);
835 frontier.insert(&out->SingleUser());
844 auto out = be->argument();
845 buffer_capacity[out] = buffer_capacity[in->origin()];
846 if (stream_backedges.find(be) == stream_backedges.end())
848 frontier.insert(&out->SingleUser());
856 auto out = rr->output();
858 buffer_capacity[out] = buffer_capacity[in->origin()];
865 auto inner_loop = rvsdg::TryGetOwnerNode<LoopNode>(*in);
867 bool all_contained =
true;
868 for (
size_t i = 0; i < inner_loop->ninputs(); ++i)
870 auto f = frontier.find(inner_loop->input(i));
871 if (f == frontier.end())
873 all_contained =
false;
879 size_t max_cycles = 0;
880 for (
size_t i = 0; i < inner_loop->ninputs(); ++i)
882 max_cycles = std::max(max_cycles, output_cycles[inner_loop->input(i)->origin()]);
883 frontier.erase(inner_loop->input(i));
886 for (
size_t i = 0; i < inner_loop->ninputs(); ++i)
888 auto capacity = buffer_capacity[inner_loop->input(i)->origin()];
889 if (!analyze_inner_loop && capacity < max_cycles)
891 auto user = &inner_loop->input(i)->arguments.begin().ptr()->SingleUser();
892 auto [muxNode, muxOperation] =
893 rvsdg::TryGetSimpleNodeAndOptionalOp<MuxOperation>(*user);
894 if ((muxOperation && muxOperation->loop)
895 || rvsdg::IsOwnerNodeOperation<LoopConstantBufferOperation>(*user))
897 size_t capacity_diff = max_cycles - capacity;
898 capacity +=
PlaceBufferLoop(inner_loop->input(i)->origin(), capacity_diff,
true);
899 buffer_capacity[inner_loop->input(i)->origin()] = capacity;
909 for (
size_t i = 0; i < inner_loop->noutputs(); ++i)
911 frontier.insert(&inner_loop->output(i)->SingleUser());
927 if (!analyze_inner_loop)
929 for (
auto [o, l] : buffer_capacity)
931 tail_label[o] = std::to_string(l);
941 if (
auto loop =
dynamic_cast<LoopNode *
>(node))
945 std::unordered_map<rvsdg::Output *, size_t> output_cycles;
947 std::unordered_map<rvsdg::Output *, size_t> buffer_capacity;
962 const auto & graph = rvsdgModule.
Rvsdg();
964 if (rootRegion->numNodes() != 1)
966 throw std::logic_error(
"Root should have only one node now");
969 const auto lambda =
dynamic_cast<const rvsdg::LambdaNode *
>(rootRegion->Nodes().begin().ptr());
972 throw std::logic_error(
"Node needs to be a lambda");
void Run(rvsdg::RvsdgModule &rvsdgModule, util::StatisticsCollector &statisticsCollector) override
Perform RVSDG transformation.
~BufferInsertion() noexcept override
static std::vector< jlm::rvsdg::Output * > create(jlm::rvsdg::Output &value, size_t capacity, bool pass_through=false)
static std::vector< jlm::rvsdg::Output * > create(jlm::rvsdg::Output &addr, jlm::rvsdg::Output &load_result, size_t capacity)
rvsdg::Region * subregion() const noexcept
Region & GetRootRegion() const noexcept
bool IsDead() const noexcept
Determines whether the node is dead.
rvsdg::Region * region() const noexcept
size_t ninputs() const noexcept
size_t noutputs() const noexcept
rvsdg::Input & SingleUser() noexcept
rvsdg::Region * region() const noexcept
Represents the argument of a region.
Represents the result of a region.
Represent acyclic RVSDG subgraphs.
RegionArgumentRange Arguments() noexcept
rvsdg::StructuralNode * node() const noexcept
TopNodeRange TopNodes() noexcept
const SimpleOperation & GetOperation() const noexcept override
NodeInput * input(size_t index) const noexcept
NodeOutput * output(size_t index) const noexcept
StructuralOutput * output(size_t index) const noexcept
StructuralInput * input(size_t index) const noexcept
Iterator begin() noexcept
#define JLM_UNREACHABLE(msg)
static size_t MemoryLatency
static void CalculateLoopCycleDepth(LoopNode *loop, std::unordered_map< rvsdg::Output *, size_t > &output_cycles, bool analyze_inner_loop=false)
static std::vector< size_t > NodeCycles(rvsdg::SimpleNode *node, std::vector< size_t > &input_cycles)
static void CreateLoopFrontier(const LoopNode *loop, std::unordered_map< rvsdg::Output *, size_t > &output_cycles, std::unordered_set< rvsdg::Input * > &frontier, std::unordered_set< BackEdgeResult * > &stream_backedges, std::unordered_set< rvsdg::SimpleNode * > &top_muxes)
static void PushCycleFrontier(std::unordered_map< rvsdg::Output *, size_t > &output_cycles, std::unordered_set< rvsdg::Input * > &frontier, std::unordered_set< BackEdgeResult * > &stream_backedges, std::unordered_set< rvsdg::SimpleNode * > &top_muxes)
static void AddBuffers(rvsdg::Region *region)
static constexpr uint32_t round_up_pow2(uint32_t x)
static rvsdg::Input * FindUserNode(rvsdg::Output *out)
static void divert_users(jlm::rvsdg::Output *output, Context &ctx)
static bool is_constant(const rvsdg::Node *node)
static void OptimizeBuffer(rvsdg::SimpleNode *node)
static void PlaceBuffer(rvsdg::Output *out, size_t capacity, bool passThrough)
const size_t UnlimitedBufferCapacity
static void CalculateLoopDepths(rvsdg::Region *region)
static std::vector< size_t > NodeCapacity(rvsdg::SimpleNode *node, std::vector< size_t > &input_capacities)
static size_t PlaceBufferLoop(rvsdg::Output *out, size_t min_capacity, bool passThrough)
static void OptimizeLoop(LoopNode *loopNode)
static void OptimizeAddrQ(rvsdg::SimpleNode *node)
void setMemoryLatency(size_t memoryLatency)
static void MaximizeBuffers(rvsdg::Region *region)
static void AdjustLoopBuffers(LoopNode *loop, std::unordered_map< rvsdg::Output *, size_t > &output_cycles, std::unordered_map< rvsdg::Output *, size_t > &buffer_capacity, bool analyze_inner_loop=false)
const size_t MaximumBufferSize
static void remove(Node *node)