Jlm
UnrollTests.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 Nico Reißmann <nico.reissmann@gmail.com>
3  * See COPYING for terms of redistribution.
4  */
5 
6 #include <gtest/gtest.h>
7 
10 #include <jlm/llvm/opt/unroll.hpp>
14 #include <jlm/rvsdg/gamma.hpp>
16 #include <jlm/rvsdg/theta.hpp>
17 #include <jlm/rvsdg/traverser.hpp>
18 #include <jlm/util/Statistics.hpp>
19 
21 
22 static size_t
24 {
25  size_t n = 0;
26  for (const auto & node : region->Nodes())
27  {
28  if (dynamic_cast<const jlm::rvsdg::ThetaNode *>(&node))
29  n++;
30  }
31 
32  return n;
33 }
34 
35 static jlm::rvsdg::ThetaNode *
39  jlm::rvsdg::Output * init,
40  jlm::rvsdg::Output * step,
41  jlm::rvsdg::Output * end)
42 {
43  using namespace jlm::rvsdg;
44 
45  auto graph = init->region()->graph();
46 
47  auto theta = ThetaNode::create(&graph->GetRootRegion());
48  auto subregion = theta->subregion();
49  auto idv = theta->AddLoopVar(init);
50  auto lvs = theta->AddLoopVar(step);
51  auto lve = theta->AddLoopVar(end);
52 
53  auto arm = SimpleNode::Create(*subregion, aop.copy(), { idv.pre, lvs.pre }).output(0);
54  auto cmp = SimpleNode::Create(*subregion, cop.copy(), { arm, lve.pre }).output(0);
55  auto & matchNode = MatchOperation::CreateNode(*cmp, { { 1, 1 } }, 0, 2);
56 
57  idv.post->divert_to(arm);
58  theta->set_predicate(matchNode.output(0));
59 
60  return theta;
61 }
62 
63 TEST(LoopUnrollingTests, test_unrollinfo)
64 {
65  auto bt32 = jlm::rvsdg::BitType::Create(32);
66  jlm::rvsdg::bitslt_op slt(32);
67  jlm::rvsdg::bitult_op ult(32);
68  jlm::rvsdg::bitule_op ule(32);
69  jlm::rvsdg::bitugt_op ugt(32);
70  jlm::rvsdg::bitsge_op sge(32);
71  jlm::rvsdg::biteq_op eq(32);
72 
73  jlm::rvsdg::bitadd_op add(32);
74  jlm::rvsdg::bitsub_op sub(32);
75 
76  {
77  jlm::rvsdg::Graph graph;
78  auto x = &jlm::rvsdg::GraphImport::Create(graph, bt32, "x");
79  auto theta = create_theta(slt, add, x, x, x);
80  auto ui = jlm::llvm::LoopUnrollInfo::create(theta);
81 
82  EXPECT_NE(ui, nullptr);
83  EXPECT_TRUE(ui->is_additive());
84  EXPECT_FALSE(ui->is_subtractive());
85  EXPECT_FALSE(ui->is_known());
86  EXPECT_FALSE(ui->niterations());
87  EXPECT_EQ(ui->theta(), theta);
88  EXPECT_EQ(theta->MapPreLoopVar(*ui->idv()).input->origin(), x);
89  }
90 
91  {
92  jlm::rvsdg::Graph graph;
93 
94  auto init0 = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0 });
95  auto init1 = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 1 });
96  auto initm1 =
97  &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0xFFFFFFFF });
98 
99  auto step1 = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 1 });
100  auto step0 = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0 });
101  auto stepm1 =
102  &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0xFFFFFFFF });
103  auto step2 = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 2 });
104 
105  auto end100 = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 100 });
106 
107  auto theta = create_theta(ult, add, init0, step1, end100);
108  auto ui = jlm::llvm::LoopUnrollInfo::create(theta);
109  EXPECT_TRUE(ui && *ui->niterations() == 100);
110 
111  theta = create_theta(ule, add, init0, step1, end100);
113  EXPECT_TRUE(ui && *ui->niterations() == 101);
114 
115  theta = create_theta(ugt, sub, end100, stepm1, init0);
117  EXPECT_TRUE(ui && *ui->niterations() == 100);
118 
119  theta = create_theta(sge, sub, end100, stepm1, init0);
121  EXPECT_TRUE(ui && *ui->niterations() == 101);
122 
123  theta = create_theta(ult, add, init0, step0, end100);
125  EXPECT_TRUE(ui && !ui->niterations());
126 
127  theta = create_theta(eq, add, initm1, step1, end100);
129  EXPECT_TRUE(ui && *ui->niterations() == 101);
130 
131  theta = create_theta(eq, add, init1, step2, end100);
133  EXPECT_TRUE(ui && !ui->niterations());
134  }
135 }
136 
137 TEST(LoopUnrollingTests, test_known_boundaries)
138 {
139  jlm::rvsdg::bitult_op ult(32);
140  jlm::rvsdg::bitsgt_op sgt(32);
141  jlm::rvsdg::bitadd_op add(32);
142  jlm::rvsdg::bitsub_op sub(32);
143 
144  {
145  jlm::rvsdg::Graph graph;
146 
147  auto init = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0 });
148  auto step = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 1 });
149  auto end = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 4 });
150 
151  auto theta = create_theta(ult, add, init, step, end);
152  // jlm::rvsdg::view(graph, stdout);
153  jlm::llvm::unroll(theta, 4);
154  // jlm::rvsdg::view(graph, stdout);
155  /*
156  The unroll factor is greater than or equal the number of iterations.
157  The loop should be fully unrolled and the theta removed.
158  */
159  EXPECT_EQ(nthetas(&graph.GetRootRegion()), 0u);
160  }
161 
162  {
163  jlm::rvsdg::Graph graph;
164 
165  auto init = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0 });
166  auto step = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 1 });
167  auto end = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 100 });
168 
169  auto theta = create_theta(ult, add, init, step, end);
170  // jlm::rvsdg::view(graph, stdout);
171  jlm::llvm::unroll(theta, 2);
172  // jlm::rvsdg::view(graph, stdout);
173  /*
174  The unroll factor is a multiple of the number of iterations.
175  We should only find one (unrolled) theta.
176  */
177  EXPECT_EQ(nthetas(&graph.GetRootRegion()), 1u);
178  }
179 
180  {
181  jlm::rvsdg::Graph graph;
182 
183  auto init = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0 });
184  auto step = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 1 });
185  auto end = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 100 });
186 
187  auto theta = create_theta(ult, add, init, step, end);
188  // jlm::rvsdg::view(graph, stdout);
189  jlm::llvm::unroll(theta, 3);
190  // jlm::rvsdg::view(graph, stdout);
191  /*
192  The unroll factor is NOT a multiple of the number of iterations
193  and we have one remaining iteration. We should find only the
194  unrolled theta and the body of the old theta as epilogue.
195  */
196  EXPECT_EQ(nthetas(&graph.GetRootRegion()), 1u);
197  }
198 
199  {
200  jlm::rvsdg::Graph graph;
201 
202  auto init = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 100 });
203  auto step = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, -1 });
204  auto end = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0 });
205 
206  auto theta = create_theta(sgt, sub, init, step, end);
207  // jlm::rvsdg::view(graph, stdout);
208  jlm::llvm::unroll(theta, 6);
209  // jlm::rvsdg::view(graph, stdout);
210  /*
211  The unroll factor is NOT a multiple of the number of iterations
212  and we have four remaining iterations. We should find two thetas:
213  one unrolled theta and one theta for the residual iterations.
214  */
215  EXPECT_EQ(nthetas(&graph.GetRootRegion()), 2u);
216  }
217 }
218 
219 TEST(LoopUnrollingTests, test_unknown_boundaries)
220 {
221  using namespace jlm::llvm;
222  using namespace jlm::rvsdg;
223 
224  auto bt = jlm::rvsdg::BitType::Create(32);
225  TestOperation op({ bt }, { bt });
226 
228  auto & graph = rm.Rvsdg();
229 
230  auto x = &jlm::rvsdg::GraphImport::Create(graph, bt, "x");
231  auto y = &jlm::rvsdg::GraphImport::Create(graph, bt, "y");
232 
233  auto theta = jlm::rvsdg::ThetaNode::create(&graph.GetRootRegion());
234  auto lv1 = theta->AddLoopVar(x);
235  auto lv2 = theta->AddLoopVar(y);
236 
237  auto one = &BitConstantOperation::create(*theta->subregion(), { 32, 1 });
238  auto add = jlm::rvsdg::bitadd_op::create(32, lv1.pre, one);
239  auto cmp = jlm::rvsdg::bitult_op::create(32, add, lv2.pre);
240  auto & matchNode = MatchOperation::CreateNode(*cmp, { { 1, 0 } }, 1, 2);
241 
242  lv1.post->divert_to(add);
243 
244  theta->set_predicate(matchNode.output(0));
245 
246  auto & ex1 = GraphExport::Create(*lv1.output, "x");
247 
248  // jlm::rvsdg::view(graph, stdout);
249  jlm::llvm::LoopUnrolling loopunroll(2);
250  loopunroll.Run(rm, statisticsCollector);
251  // jlm::rvsdg::view(graph, stdout);
252 
253  auto node = jlm::rvsdg::TryGetOwnerNode<jlm::rvsdg::GammaNode>(*ex1.origin());
254  EXPECT_NE(node, nullptr);
255  node = jlm::rvsdg::TryGetOwnerNode<jlm::rvsdg::GammaNode>(*node->input(1)->origin());
256  EXPECT_NE(node, nullptr);
257 
258  /* Create cleaner output */
260  dne.Run(rm, statisticsCollector);
261  // jlm::rvsdg::view(graph, stdout);
262 }
263 
264 static std::vector<jlm::rvsdg::ThetaNode *>
266 {
267  std::vector<jlm::rvsdg::ThetaNode *> thetas;
268  for (auto & node : jlm::rvsdg::TopDownTraverser(region))
269  {
270  if (auto theta = dynamic_cast<jlm::rvsdg::ThetaNode *>(node))
271  thetas.push_back(theta);
272  }
273 
274  return thetas;
275 }
276 
277 TEST(LoopUnrollingTests, test_nested_theta)
278 {
280  auto & graph = rm.Rvsdg();
281 
282  auto init = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 0 });
283  auto step = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 1 });
284  auto end = &jlm::rvsdg::BitConstantOperation::create(graph.GetRootRegion(), { 32, 97 });
285 
286  /* Outer loop */
287  auto otheta = jlm::rvsdg::ThetaNode::create(&graph.GetRootRegion());
288 
289  auto lvo_init = otheta->AddLoopVar(init);
290  auto lvo_step = otheta->AddLoopVar(step);
291  auto lvo_end = otheta->AddLoopVar(end);
292 
293  auto add = jlm::rvsdg::bitadd_op::create(32, lvo_init.pre, lvo_step.pre);
294  auto compare = jlm::rvsdg::bitult_op::create(32, add, lvo_end.pre);
295  auto & matchNode = jlm::rvsdg::MatchOperation::CreateNode(*compare, { { 1, 1 } }, 0, 2);
296  otheta->set_predicate(matchNode.output(0));
297  lvo_init.post->divert_to(add);
298 
299  /* First inner loop in the original loop */
300  auto inner_theta = jlm::rvsdg::ThetaNode::create(otheta->subregion());
301 
302  auto inner_init = &jlm::rvsdg::BitConstantOperation::create(*otheta->subregion(), { 32, 0 });
303  auto lvi_init = inner_theta->AddLoopVar(inner_init);
304  auto lvi_step = inner_theta->AddLoopVar(lvo_step.pre);
305  auto lvi_end = inner_theta->AddLoopVar(lvo_end.pre);
306 
307  auto inner_add = jlm::rvsdg::bitadd_op::create(32, lvi_init.pre, lvi_step.pre);
308  auto inner_compare = jlm::rvsdg::bitult_op::create(32, inner_add, lvi_end.pre);
309  auto & innerMatchNode =
310  jlm::rvsdg::MatchOperation::CreateNode(*inner_compare, { { 1, 1 } }, 0, 2);
311  inner_theta->set_predicate(innerMatchNode.output(0));
312  lvi_init.post->divert_to(inner_add);
313 
314  /* Nested inner loop */
315  auto inner_nested_theta = jlm::rvsdg::ThetaNode::create(inner_theta->subregion());
316 
317  auto inner_nested_init =
318  &jlm::rvsdg::BitConstantOperation::create(*inner_theta->subregion(), { 32, 0 });
319  auto lvi_nested_init = inner_nested_theta->AddLoopVar(inner_nested_init);
320  auto lvi_nested_step = inner_nested_theta->AddLoopVar(lvi_step.pre);
321  auto lvi_nested_end = inner_nested_theta->AddLoopVar(lvi_end.pre);
322 
323  auto inner_nested_add =
324  jlm::rvsdg::bitadd_op::create(32, lvi_nested_init.pre, lvi_nested_step.pre);
325  auto inner_nested_compare =
326  jlm::rvsdg::bitult_op::create(32, inner_nested_add, lvi_nested_end.pre);
327  auto & innerNestedMatchNode =
328  jlm::rvsdg::MatchOperation::CreateNode(*inner_nested_compare, { { 1, 1 } }, 0, 2);
329  inner_nested_theta->set_predicate(innerNestedMatchNode.output(0));
330  lvi_nested_init.post->divert_to(inner_nested_add);
331 
332  /* Second inner loop in the original loop */
333  auto inner2_theta = jlm::rvsdg::ThetaNode::create(otheta->subregion());
334 
335  auto inner2_init = &jlm::rvsdg::BitConstantOperation::create(*otheta->subregion(), { 32, 0 });
336  auto lvi2_init = inner2_theta->AddLoopVar(inner2_init);
337  auto lvi2_step = inner2_theta->AddLoopVar(lvo_step.pre);
338  auto lvi2_end = inner2_theta->AddLoopVar(lvo_end.pre);
339 
340  auto inner2_add = jlm::rvsdg::bitadd_op::create(32, lvi2_init.pre, lvi2_step.pre);
341  auto inner2_compare = jlm::rvsdg::bitult_op::create(32, inner2_add, lvi2_end.pre);
342  auto & inner2MatchNode =
343  jlm::rvsdg::MatchOperation::CreateNode(*inner2_compare, { { 1, 1 } }, 0, 2);
344  inner2_theta->set_predicate(inner2MatchNode.output(0));
345  lvi2_init.post->divert_to(inner2_add);
346 
347  // jlm::rvsdg::view(graph, stdout);
348  jlm::llvm::LoopUnrolling loopunroll(4);
349  loopunroll.Run(rm, statisticsCollector);
350  /*
351  The outher theta should contain two inner thetas
352  */
353  EXPECT_EQ(nthetas(otheta->subregion()), 2u);
354  /*
355  The outer theta should not be unrolled and since the
356  original graph contains 7 nodes and the unroll factor
357  is 4 an unrolled theta should have around 28 nodes. So
358  we check for less than 20 nodes in case an updated
359  unroll algorithm would hoist code from the inner
360  thetas.
361  */
362  EXPECT_LE(otheta->subregion()->numNodes(), 20u);
363  /*
364  The inner theta should not be unrolled and since the
365  original graph contains 5 nodes and the unroll factor
366  is 4 an unrolled theta should have around 20 nodes. So
367  we check for less than 15 nodes in case an updated
368  unroll algorithm would hoist code from the inner
369  thetas.
370  */
371  EXPECT_LE(inner_theta->subregion()->numNodes(), 15u);
372  /*
373  The innermost theta should be unrolled and since the
374  original graph contains 3 nodes and the unroll factor
375  is 4 an unrolled theta should have around 12 nodes. So
376  we check for more than 7 nodes in case an updated
377  unroll algorithm would hoist code from the inner
378  thetas.
379  */
380  auto thetas = find_thetas(inner_theta->subregion());
381  EXPECT_EQ(thetas.size(), 1u);
382  EXPECT_GE(thetas[0]->subregion()->numNodes(), 7u);
383  /*
384  The second inner theta should be unrolled and since
385  the original graph contains 3 nodes and the unroll
386  factor is 4 an unrolled theta should have around 12
387  nodes. So we check for less than 7 nodes in case an
388  updated unroll algorithm would hoist code from the
389  innner thetas.
390  */
391  thetas = find_thetas(otheta->subregion());
392  EXPECT_EQ(thetas.size(), 2u);
393  EXPECT_GE(thetas[1]->subregion()->numNodes(), 7u);
394  // jlm::rvsdg::view(graph, stdout);
395  jlm::llvm::unroll(otheta, 4);
396  // jlm::rvsdg::view(graph, stdout);
397  /*
398  After unrolling the outher theta four times it should
399  now contain 8 thetas.
400  */
401  thetas = find_thetas(&graph.GetRootRegion());
402  EXPECT_EQ(thetas.size(), 3u);
403  EXPECT_EQ(nthetas(thetas[0]->subregion()), 8u);
404 }
static jlm::rvsdg::ThetaNode * create_theta(const jlm::rvsdg::BitCompareOperation &cop, const jlm::rvsdg::BitBinaryOperation &aop, jlm::rvsdg::Output *init, jlm::rvsdg::Output *step, jlm::rvsdg::Output *end)
Definition: UnrollTests.cpp:36
TEST(LoopUnrollingTests, test_unrollinfo)
Definition: UnrollTests.cpp:63
static size_t nthetas(jlm::rvsdg::Region *region)
Definition: UnrollTests.cpp:23
static jlm::util::StatisticsCollector statisticsCollector
Definition: UnrollTests.cpp:20
static std::vector< jlm::rvsdg::ThetaNode * > find_thetas(jlm::rvsdg::Region *region)
Dead Node Elimination Optimization.
void Run(rvsdg::RvsdgModule &module, util::StatisticsCollector &statisticsCollector) override
Perform RVSDG transformation.
static std::unique_ptr< LoopUnrollInfo > create(rvsdg::ThetaNode *theta)
Definition: unroll.cpp:135
Optimization that attempts to unroll loops (thetas).
Definition: unroll.hpp:24
void Run(rvsdg::RvsdgModule &module, util::StatisticsCollector &statisticsCollector) override
Definition: unroll.cpp:525
static Output & create(Region &region, BitValueRepresentation value)
Definition: constant.hpp:44
static std::shared_ptr< const BitType > Create(std::size_t nbits)
Creates bit type of specified width.
Definition: type.cpp:45
static GraphImport & Create(Graph &graph, std::shared_ptr< const rvsdg::Type > type, std::string name)
Definition: graph.cpp:36
Region & GetRootRegion() const noexcept
Definition: graph.hpp:99
std::unique_ptr< BitBinaryOperation > create(size_t nbits) const override
std::unique_ptr< BitCompareOperation > create(size_t nbits) const override
static Node & CreateNode(Output &predicate, const std::unordered_map< uint64_t, uint64_t > &mapping, const uint64_t defaultAlternative, const size_t numAlternatives)
Definition: control.hpp:226
virtual std::unique_ptr< Operation > copy() const =0
rvsdg::Region * region() const noexcept
Definition: node.cpp:151
Represent acyclic RVSDG subgraphs.
Definition: region.hpp:213
Graph * graph() const noexcept
Definition: region.hpp:363
NodeRange Nodes() noexcept
Definition: region.hpp:328
Graph & Rvsdg() noexcept
Definition: RvsdgModule.hpp:57
NodeOutput * output(size_t index) const noexcept
Definition: simple-node.hpp:88
static SimpleNode & Create(Region &region, std::unique_ptr< Operation > operation, const std::vector< rvsdg::Output * > &operands)
Definition: simple-node.hpp:49
static ThetaNode * create(rvsdg::Region *parent)
Definition: theta.hpp:73
Global memory state passed between functions.
void unroll(rvsdg::ThetaNode *otheta, size_t factor)
Definition: unroll.cpp:482