Line data Source code
1 : /*
2 : * Copyright 2012-2014 Ecole Normale Superieure
3 : * Copyright 2014 INRIA Rocquencourt
4 : *
5 : * Use of this software is governed by the MIT license
6 : *
7 : * Written by Sven Verdoolaege,
8 : * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
9 : * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt,
10 : * B.P. 105 - 78153 Le Chesnay, France
11 : */
12 :
13 : #include <limits.h>
14 : #include <isl/id.h>
15 : #include <isl/val.h>
16 : #include <isl/space.h>
17 : #include <isl/aff.h>
18 : #include <isl/constraint.h>
19 : #include <isl/set.h>
20 : #include <isl/ilp.h>
21 : #include <isl/union_set.h>
22 : #include <isl/union_map.h>
23 : #include <isl/schedule_node.h>
24 : #include <isl/options.h>
25 : #include <isl_sort.h>
26 : #include <isl_tarjan.h>
27 : #include <isl_ast_private.h>
28 : #include <isl_ast_build_expr.h>
29 : #include <isl_ast_build_private.h>
30 : #include <isl_ast_graft_private.h>
31 :
32 : /* Try and reduce the number of disjuncts in the representation of "set",
33 : * without dropping explicit representations of local variables.
34 : */
35 0 : static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set)
36 : {
37 : isl_ctx *ctx;
38 : int save_preserve;
39 :
40 0 : if (!set)
41 0 : return NULL;
42 :
43 0 : ctx = isl_set_get_ctx(set);
44 0 : save_preserve = isl_options_get_coalesce_preserve_locals(ctx);
45 0 : isl_options_set_coalesce_preserve_locals(ctx, 1);
46 0 : set = isl_set_coalesce(set);
47 0 : isl_options_set_coalesce_preserve_locals(ctx, save_preserve);
48 0 : return set;
49 : }
50 :
51 : /* Data used in generate_domain.
52 : *
53 : * "build" is the input build.
54 : * "list" collects the results.
55 : */
56 : struct isl_generate_domain_data {
57 : isl_ast_build *build;
58 :
59 : isl_ast_graft_list *list;
60 : };
61 :
62 : static __isl_give isl_ast_graft_list *generate_next_level(
63 : __isl_take isl_union_map *executed,
64 : __isl_take isl_ast_build *build);
65 : static __isl_give isl_ast_graft_list *generate_code(
66 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
67 : int internal);
68 :
69 : /* Generate an AST for a single domain based on
70 : * the (non single valued) inverse schedule "executed".
71 : *
72 : * We extend the schedule with the iteration domain
73 : * and continue generating through a call to generate_code.
74 : *
75 : * In particular, if executed has the form
76 : *
77 : * S -> D
78 : *
79 : * then we continue generating code on
80 : *
81 : * [S -> D] -> D
82 : *
83 : * The extended inverse schedule is clearly single valued
84 : * ensuring that the nested generate_code will not reach this function,
85 : * but will instead create calls to all elements of D that need
86 : * to be executed from the current schedule domain.
87 : */
88 0 : static isl_stat generate_non_single_valued(__isl_take isl_map *executed,
89 : struct isl_generate_domain_data *data)
90 : {
91 : isl_map *identity;
92 : isl_ast_build *build;
93 : isl_ast_graft_list *list;
94 :
95 0 : build = isl_ast_build_copy(data->build);
96 :
97 0 : identity = isl_set_identity(isl_map_range(isl_map_copy(executed)));
98 0 : executed = isl_map_domain_product(executed, identity);
99 0 : build = isl_ast_build_set_single_valued(build, 1);
100 :
101 0 : list = generate_code(isl_union_map_from_map(executed), build, 1);
102 :
103 0 : data->list = isl_ast_graft_list_concat(data->list, list);
104 :
105 0 : return isl_stat_ok;
106 : }
107 :
108 : /* Call the at_each_domain callback, if requested by the user,
109 : * after recording the current inverse schedule in the build.
110 : */
111 0 : static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft,
112 : __isl_keep isl_map *executed, __isl_keep isl_ast_build *build)
113 : {
114 0 : if (!graft || !build)
115 0 : return isl_ast_graft_free(graft);
116 0 : if (!build->at_each_domain)
117 0 : return graft;
118 :
119 0 : build = isl_ast_build_copy(build);
120 0 : build = isl_ast_build_set_executed(build,
121 : isl_union_map_from_map(isl_map_copy(executed)));
122 0 : if (!build)
123 0 : return isl_ast_graft_free(graft);
124 :
125 0 : graft->node = build->at_each_domain(graft->node,
126 : build, build->at_each_domain_user);
127 0 : isl_ast_build_free(build);
128 :
129 0 : if (!graft->node)
130 0 : graft = isl_ast_graft_free(graft);
131 :
132 0 : return graft;
133 : }
134 :
135 : /* Generate a call expression for the single executed
136 : * domain element "map" and put a guard around it based its (simplified)
137 : * domain. "executed" is the original inverse schedule from which "map"
138 : * has been derived. In particular, "map" is either identical to "executed"
139 : * or it is the result of gisting "executed" with respect to the build domain.
140 : * "executed" is only used if there is an at_each_domain callback.
141 : *
142 : * At this stage, any pending constraints in the build can no longer
143 : * be simplified with respect to any enforced constraints since
144 : * the call node does not have any enforced constraints.
145 : * Since all pending constraints not covered by any enforced constraints
146 : * will be added as a guard to the graft in create_node_scaled,
147 : * even in the eliminated case, the pending constraints
148 : * can be considered to have been generated by outer constructs.
149 : *
150 : * If the user has set an at_each_domain callback, it is called
151 : * on the constructed call expression node.
152 : */
153 0 : static isl_stat add_domain(__isl_take isl_map *executed,
154 : __isl_take isl_map *map, struct isl_generate_domain_data *data)
155 : {
156 : isl_ast_build *build;
157 : isl_ast_graft *graft;
158 : isl_ast_graft_list *list;
159 : isl_set *guard, *pending;
160 :
161 0 : build = isl_ast_build_copy(data->build);
162 0 : pending = isl_ast_build_get_pending(build);
163 0 : build = isl_ast_build_replace_pending_by_guard(build, pending);
164 :
165 0 : guard = isl_map_domain(isl_map_copy(map));
166 0 : guard = isl_set_compute_divs(guard);
167 0 : guard = isl_set_coalesce_preserve(guard);
168 0 : guard = isl_set_gist(guard, isl_ast_build_get_generated(build));
169 0 : guard = isl_ast_build_specialize(build, guard);
170 :
171 0 : graft = isl_ast_graft_alloc_domain(map, build);
172 0 : graft = at_each_domain(graft, executed, build);
173 0 : isl_ast_build_free(build);
174 0 : isl_map_free(executed);
175 0 : graft = isl_ast_graft_add_guard(graft, guard, data->build);
176 :
177 0 : list = isl_ast_graft_list_from_ast_graft(graft);
178 0 : data->list = isl_ast_graft_list_concat(data->list, list);
179 :
180 0 : return isl_stat_ok;
181 : }
182 :
183 : /* Generate an AST for a single domain based on
184 : * the inverse schedule "executed" and add it to data->list.
185 : *
186 : * If there is more than one domain element associated to the current
187 : * schedule "time", then we need to continue the generation process
188 : * in generate_non_single_valued.
189 : * Note that the inverse schedule being single-valued may depend
190 : * on constraints that are only available in the original context
191 : * domain specified by the user. We therefore first introduce
192 : * some of the constraints of data->build->domain. In particular,
193 : * we intersect with a single-disjunct approximation of this set.
194 : * We perform this approximation to avoid further splitting up
195 : * the executed relation, possibly introducing a disjunctive guard
196 : * on the statement.
197 : *
198 : * On the other hand, we only perform the test after having taken the gist
199 : * of the domain as the resulting map is the one from which the call
200 : * expression is constructed. Using this map to construct the call
201 : * expression usually yields simpler results in cases where the original
202 : * map is not obviously single-valued.
203 : * If the original map is obviously single-valued, then the gist
204 : * operation is skipped.
205 : *
206 : * Because we perform the single-valuedness test on the gisted map,
207 : * we may in rare cases fail to recognize that the inverse schedule
208 : * is single-valued. This becomes problematic if this happens
209 : * from the recursive call through generate_non_single_valued
210 : * as we would then end up in an infinite recursion.
211 : * We therefore check if we are inside a call to generate_non_single_valued
212 : * and revert to the ungisted map if the gisted map turns out not to be
213 : * single-valued.
214 : *
215 : * Otherwise, call add_domain to generate a call expression (with guard) and
216 : * to call the at_each_domain callback, if any.
217 : */
218 0 : static isl_stat generate_domain(__isl_take isl_map *executed, void *user)
219 : {
220 0 : struct isl_generate_domain_data *data = user;
221 : isl_set *domain;
222 0 : isl_map *map = NULL;
223 : int empty, sv;
224 :
225 0 : domain = isl_ast_build_get_domain(data->build);
226 0 : domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
227 0 : executed = isl_map_intersect_domain(executed, domain);
228 0 : empty = isl_map_is_empty(executed);
229 0 : if (empty < 0)
230 0 : goto error;
231 0 : if (empty) {
232 0 : isl_map_free(executed);
233 0 : return isl_stat_ok;
234 : }
235 :
236 0 : sv = isl_map_plain_is_single_valued(executed);
237 0 : if (sv < 0)
238 0 : goto error;
239 0 : if (sv)
240 0 : return add_domain(executed, isl_map_copy(executed), data);
241 :
242 0 : executed = isl_map_coalesce(executed);
243 0 : map = isl_map_copy(executed);
244 0 : map = isl_ast_build_compute_gist_map_domain(data->build, map);
245 0 : sv = isl_map_is_single_valued(map);
246 0 : if (sv < 0)
247 0 : goto error;
248 0 : if (!sv) {
249 0 : isl_map_free(map);
250 0 : if (data->build->single_valued)
251 0 : map = isl_map_copy(executed);
252 : else
253 0 : return generate_non_single_valued(executed, data);
254 : }
255 :
256 0 : return add_domain(executed, map, data);
257 : error:
258 0 : isl_map_free(map);
259 0 : isl_map_free(executed);
260 0 : return isl_stat_error;
261 : }
262 :
263 : /* Call build->create_leaf to a create "leaf" node in the AST,
264 : * encapsulate the result in an isl_ast_graft and return the result
265 : * as a 1-element list.
266 : *
267 : * Note that the node returned by the user may be an entire tree.
268 : *
269 : * Since the node itself cannot enforce any constraints, we turn
270 : * all pending constraints into guards and add them to the resulting
271 : * graft to ensure that they will be generated.
272 : *
273 : * Before we pass control to the user, we first clear some information
274 : * from the build that is (presumbably) only meaningful
275 : * for the current code generation.
276 : * This includes the create_leaf callback itself, so we make a copy
277 : * of the build first.
278 : */
279 0 : static __isl_give isl_ast_graft_list *call_create_leaf(
280 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
281 : {
282 : isl_set *guard;
283 : isl_ast_node *node;
284 : isl_ast_graft *graft;
285 : isl_ast_build *user_build;
286 :
287 0 : guard = isl_ast_build_get_pending(build);
288 0 : user_build = isl_ast_build_copy(build);
289 0 : user_build = isl_ast_build_replace_pending_by_guard(user_build,
290 : isl_set_copy(guard));
291 0 : user_build = isl_ast_build_set_executed(user_build, executed);
292 0 : user_build = isl_ast_build_clear_local_info(user_build);
293 0 : if (!user_build)
294 0 : node = NULL;
295 : else
296 0 : node = build->create_leaf(user_build, build->create_leaf_user);
297 0 : graft = isl_ast_graft_alloc(node, build);
298 0 : graft = isl_ast_graft_add_guard(graft, guard, build);
299 0 : isl_ast_build_free(build);
300 0 : return isl_ast_graft_list_from_ast_graft(graft);
301 : }
302 :
303 : static __isl_give isl_ast_graft_list *build_ast_from_child(
304 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
305 : __isl_take isl_union_map *executed);
306 :
307 : /* Generate an AST after having handled the complete schedule
308 : * of this call to the code generator or the complete band
309 : * if we are generating an AST from a schedule tree.
310 : *
311 : * If we are inside a band node, then move on to the child of the band.
312 : *
313 : * If the user has specified a create_leaf callback, control
314 : * is passed to the user in call_create_leaf.
315 : *
316 : * Otherwise, we generate one or more calls for each individual
317 : * domain in generate_domain.
318 : */
319 0 : static __isl_give isl_ast_graft_list *generate_inner_level(
320 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
321 : {
322 : isl_ctx *ctx;
323 0 : struct isl_generate_domain_data data = { build };
324 :
325 0 : if (!build || !executed)
326 : goto error;
327 :
328 0 : if (isl_ast_build_has_schedule_node(build)) {
329 : isl_schedule_node *node;
330 0 : node = isl_ast_build_get_schedule_node(build);
331 0 : build = isl_ast_build_reset_schedule_node(build);
332 0 : return build_ast_from_child(build, node, executed);
333 : }
334 :
335 0 : if (build->create_leaf)
336 0 : return call_create_leaf(executed, build);
337 :
338 0 : ctx = isl_union_map_get_ctx(executed);
339 0 : data.list = isl_ast_graft_list_alloc(ctx, 0);
340 0 : if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0)
341 0 : data.list = isl_ast_graft_list_free(data.list);
342 :
343 : if (0)
344 0 : error: data.list = NULL;
345 0 : isl_ast_build_free(build);
346 0 : isl_union_map_free(executed);
347 0 : return data.list;
348 : }
349 :
350 : /* Call the before_each_for callback, if requested by the user.
351 : */
352 0 : static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node,
353 : __isl_keep isl_ast_build *build)
354 : {
355 : isl_id *id;
356 :
357 0 : if (!node || !build)
358 0 : return isl_ast_node_free(node);
359 0 : if (!build->before_each_for)
360 0 : return node;
361 0 : id = build->before_each_for(build, build->before_each_for_user);
362 0 : node = isl_ast_node_set_annotation(node, id);
363 0 : return node;
364 : }
365 :
366 : /* Call the after_each_for callback, if requested by the user.
367 : */
368 0 : static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft,
369 : __isl_keep isl_ast_build *build)
370 : {
371 0 : if (!graft || !build)
372 0 : return isl_ast_graft_free(graft);
373 0 : if (!build->after_each_for)
374 0 : return graft;
375 0 : graft->node = build->after_each_for(graft->node, build,
376 : build->after_each_for_user);
377 0 : if (!graft->node)
378 0 : return isl_ast_graft_free(graft);
379 0 : return graft;
380 : }
381 :
382 : /* Plug in all the know values of the current and outer dimensions
383 : * in the domain of "executed". In principle, we only need to plug
384 : * in the known value of the current dimension since the values of
385 : * outer dimensions have been plugged in already.
386 : * However, it turns out to be easier to just plug in all known values.
387 : */
388 0 : static __isl_give isl_union_map *plug_in_values(
389 : __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build)
390 : {
391 0 : return isl_ast_build_substitute_values_union_map_domain(build,
392 : executed);
393 : }
394 :
395 : /* Check if the constraint "c" is a lower bound on dimension "pos",
396 : * an upper bound, or independent of dimension "pos".
397 : */
398 0 : static int constraint_type(isl_constraint *c, int pos)
399 : {
400 0 : if (isl_constraint_is_lower_bound(c, isl_dim_set, pos))
401 0 : return 1;
402 0 : if (isl_constraint_is_upper_bound(c, isl_dim_set, pos))
403 0 : return 2;
404 0 : return 0;
405 : }
406 :
407 : /* Compare the types of the constraints "a" and "b",
408 : * resulting in constraints that are independent of "depth"
409 : * to be sorted before the lower bounds on "depth", which in
410 : * turn are sorted before the upper bounds on "depth".
411 : */
412 0 : static int cmp_constraint(__isl_keep isl_constraint *a,
413 : __isl_keep isl_constraint *b, void *user)
414 : {
415 0 : int *depth = user;
416 0 : int t1 = constraint_type(a, *depth);
417 0 : int t2 = constraint_type(b, *depth);
418 :
419 0 : return t1 - t2;
420 : }
421 :
422 : /* Extract a lower bound on dimension "pos" from constraint "c".
423 : *
424 : * If the constraint is of the form
425 : *
426 : * a x + f(...) >= 0
427 : *
428 : * then we essentially return
429 : *
430 : * l = ceil(-f(...)/a)
431 : *
432 : * However, if the current dimension is strided, then we need to make
433 : * sure that the lower bound we construct is of the form
434 : *
435 : * f + s a
436 : *
437 : * with f the offset and s the stride.
438 : * We therefore compute
439 : *
440 : * f + s * ceil((l - f)/s)
441 : */
442 0 : static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c,
443 : int pos, __isl_keep isl_ast_build *build)
444 : {
445 : isl_aff *aff;
446 :
447 0 : aff = isl_constraint_get_bound(c, isl_dim_set, pos);
448 0 : aff = isl_aff_ceil(aff);
449 :
450 0 : if (isl_ast_build_has_stride(build, pos)) {
451 : isl_aff *offset;
452 : isl_val *stride;
453 :
454 0 : offset = isl_ast_build_get_offset(build, pos);
455 0 : stride = isl_ast_build_get_stride(build, pos);
456 :
457 0 : aff = isl_aff_sub(aff, isl_aff_copy(offset));
458 0 : aff = isl_aff_scale_down_val(aff, isl_val_copy(stride));
459 0 : aff = isl_aff_ceil(aff);
460 0 : aff = isl_aff_scale_val(aff, stride);
461 0 : aff = isl_aff_add(aff, offset);
462 : }
463 :
464 0 : aff = isl_ast_build_compute_gist_aff(build, aff);
465 :
466 0 : return aff;
467 : }
468 :
469 : /* Return the exact lower bound (or upper bound if "upper" is set)
470 : * of "domain" as a piecewise affine expression.
471 : *
472 : * If we are computing a lower bound (of a strided dimension), then
473 : * we need to make sure it is of the form
474 : *
475 : * f + s a
476 : *
477 : * where f is the offset and s is the stride.
478 : * We therefore need to include the stride constraint before computing
479 : * the minimum.
480 : */
481 0 : static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain,
482 : __isl_keep isl_ast_build *build, int upper)
483 : {
484 : isl_set *stride;
485 : isl_map *it_map;
486 : isl_pw_aff *pa;
487 : isl_pw_multi_aff *pma;
488 :
489 0 : domain = isl_set_copy(domain);
490 0 : if (!upper) {
491 0 : stride = isl_ast_build_get_stride_constraint(build);
492 0 : domain = isl_set_intersect(domain, stride);
493 : }
494 0 : it_map = isl_ast_build_map_to_iterator(build, domain);
495 0 : if (upper)
496 0 : pma = isl_map_lexmax_pw_multi_aff(it_map);
497 : else
498 0 : pma = isl_map_lexmin_pw_multi_aff(it_map);
499 0 : pa = isl_pw_multi_aff_get_pw_aff(pma, 0);
500 0 : isl_pw_multi_aff_free(pma);
501 0 : pa = isl_ast_build_compute_gist_pw_aff(build, pa);
502 0 : pa = isl_pw_aff_coalesce(pa);
503 :
504 0 : return pa;
505 : }
506 :
507 : /* Callback for sorting the isl_pw_aff_list passed to reduce_list and
508 : * remove_redundant_lower_bounds.
509 : */
510 0 : static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b,
511 : void *user)
512 : {
513 0 : return isl_pw_aff_plain_cmp(a, b);
514 : }
515 :
516 : /* Given a list of lower bounds "list", remove those that are redundant
517 : * with respect to the other bounds in "list" and the domain of "build".
518 : *
519 : * We first sort the bounds in the same way as they would be sorted
520 : * by set_for_node_expressions so that we can try and remove the last
521 : * bounds first.
522 : *
523 : * For a lower bound to be effective, there needs to be at least
524 : * one domain element for which it is larger than all other lower bounds.
525 : * For each lower bound we therefore intersect the domain with
526 : * the conditions that it is larger than all other bounds and
527 : * check whether the result is empty. If so, the bound can be removed.
528 : */
529 0 : static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds(
530 : __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
531 : {
532 : int i, j, n;
533 : isl_set *domain;
534 :
535 0 : list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
536 0 : if (!list)
537 0 : return NULL;
538 :
539 0 : n = isl_pw_aff_list_n_pw_aff(list);
540 0 : if (n <= 1)
541 0 : return list;
542 :
543 0 : domain = isl_ast_build_get_domain(build);
544 :
545 0 : for (i = n - 1; i >= 0; --i) {
546 : isl_pw_aff *pa_i;
547 : isl_set *domain_i;
548 : int empty;
549 :
550 0 : domain_i = isl_set_copy(domain);
551 0 : pa_i = isl_pw_aff_list_get_pw_aff(list, i);
552 :
553 0 : for (j = 0; j < n; ++j) {
554 : isl_pw_aff *pa_j;
555 : isl_set *better;
556 :
557 0 : if (j == i)
558 0 : continue;
559 :
560 0 : pa_j = isl_pw_aff_list_get_pw_aff(list, j);
561 0 : better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j);
562 0 : domain_i = isl_set_intersect(domain_i, better);
563 : }
564 :
565 0 : empty = isl_set_is_empty(domain_i);
566 :
567 0 : isl_set_free(domain_i);
568 0 : isl_pw_aff_free(pa_i);
569 :
570 0 : if (empty < 0)
571 0 : goto error;
572 0 : if (!empty)
573 0 : continue;
574 0 : list = isl_pw_aff_list_drop(list, i, 1);
575 0 : n--;
576 : }
577 :
578 0 : isl_set_free(domain);
579 :
580 0 : return list;
581 : error:
582 0 : isl_set_free(domain);
583 0 : return isl_pw_aff_list_free(list);
584 : }
585 :
586 : /* Extract a lower bound on dimension "pos" from each constraint
587 : * in "constraints" and return the list of lower bounds.
588 : * If "constraints" has zero elements, then we extract a lower bound
589 : * from "domain" instead.
590 : *
591 : * If the current dimension is strided, then the lower bound
592 : * is adjusted by lower_bound to match the stride information.
593 : * This modification may make one or more lower bounds redundant
594 : * with respect to the other lower bounds. We therefore check
595 : * for this condition and remove the redundant lower bounds.
596 : */
597 0 : static __isl_give isl_pw_aff_list *lower_bounds(
598 : __isl_keep isl_constraint_list *constraints, int pos,
599 : __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
600 : {
601 : isl_ctx *ctx;
602 : isl_pw_aff_list *list;
603 : int i, n;
604 :
605 0 : if (!build)
606 0 : return NULL;
607 :
608 0 : n = isl_constraint_list_n_constraint(constraints);
609 0 : if (n == 0) {
610 : isl_pw_aff *pa;
611 0 : pa = exact_bound(domain, build, 0);
612 0 : return isl_pw_aff_list_from_pw_aff(pa);
613 : }
614 :
615 0 : ctx = isl_ast_build_get_ctx(build);
616 0 : list = isl_pw_aff_list_alloc(ctx,n);
617 :
618 0 : for (i = 0; i < n; ++i) {
619 : isl_aff *aff;
620 : isl_constraint *c;
621 :
622 0 : c = isl_constraint_list_get_constraint(constraints, i);
623 0 : aff = lower_bound(c, pos, build);
624 0 : isl_constraint_free(c);
625 0 : list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
626 : }
627 :
628 0 : if (isl_ast_build_has_stride(build, pos))
629 0 : list = remove_redundant_lower_bounds(list, build);
630 :
631 0 : return list;
632 : }
633 :
634 : /* Extract an upper bound on dimension "pos" from each constraint
635 : * in "constraints" and return the list of upper bounds.
636 : * If "constraints" has zero elements, then we extract an upper bound
637 : * from "domain" instead.
638 : */
639 0 : static __isl_give isl_pw_aff_list *upper_bounds(
640 : __isl_keep isl_constraint_list *constraints, int pos,
641 : __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
642 : {
643 : isl_ctx *ctx;
644 : isl_pw_aff_list *list;
645 : int i, n;
646 :
647 0 : n = isl_constraint_list_n_constraint(constraints);
648 0 : if (n == 0) {
649 : isl_pw_aff *pa;
650 0 : pa = exact_bound(domain, build, 1);
651 0 : return isl_pw_aff_list_from_pw_aff(pa);
652 : }
653 :
654 0 : ctx = isl_ast_build_get_ctx(build);
655 0 : list = isl_pw_aff_list_alloc(ctx,n);
656 :
657 0 : for (i = 0; i < n; ++i) {
658 : isl_aff *aff;
659 : isl_constraint *c;
660 :
661 0 : c = isl_constraint_list_get_constraint(constraints, i);
662 0 : aff = isl_constraint_get_bound(c, isl_dim_set, pos);
663 0 : isl_constraint_free(c);
664 0 : aff = isl_aff_floor(aff);
665 0 : list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
666 : }
667 :
668 0 : return list;
669 : }
670 :
671 : /* Return an isl_ast_expr that performs the reduction of type "type"
672 : * on AST expressions corresponding to the elements in "list".
673 : *
674 : * The list is assumed to contain at least one element.
675 : * If the list contains exactly one element, then the returned isl_ast_expr
676 : * simply computes that affine expression.
677 : * If the list contains more than one element, then we sort it
678 : * using a fairly abitrary but hopefully reasonably stable order.
679 : */
680 0 : static __isl_give isl_ast_expr *reduce_list(enum isl_ast_op_type type,
681 : __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
682 : {
683 : int i, n;
684 : isl_ctx *ctx;
685 : isl_ast_expr *expr;
686 :
687 0 : if (!list)
688 0 : return NULL;
689 :
690 0 : n = isl_pw_aff_list_n_pw_aff(list);
691 :
692 0 : if (n == 1)
693 0 : return isl_ast_build_expr_from_pw_aff_internal(build,
694 0 : isl_pw_aff_list_get_pw_aff(list, 0));
695 :
696 0 : ctx = isl_pw_aff_list_get_ctx(list);
697 0 : expr = isl_ast_expr_alloc_op(ctx, type, n);
698 0 : if (!expr)
699 0 : return NULL;
700 :
701 0 : list = isl_pw_aff_list_copy(list);
702 0 : list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
703 0 : if (!list)
704 0 : return isl_ast_expr_free(expr);
705 :
706 0 : for (i = 0; i < n; ++i) {
707 : isl_ast_expr *expr_i;
708 :
709 0 : expr_i = isl_ast_build_expr_from_pw_aff_internal(build,
710 0 : isl_pw_aff_list_get_pw_aff(list, i));
711 0 : if (!expr_i)
712 0 : goto error;
713 0 : expr->u.op.args[i] = expr_i;
714 : }
715 :
716 0 : isl_pw_aff_list_free(list);
717 0 : return expr;
718 : error:
719 0 : isl_pw_aff_list_free(list);
720 0 : isl_ast_expr_free(expr);
721 0 : return NULL;
722 : }
723 :
724 : /* Add guards implied by the "generated constraints",
725 : * but not (necessarily) enforced by the generated AST to "guard".
726 : * In particular, if there is any stride constraints,
727 : * then add the guard implied by those constraints.
728 : * If we have generated a degenerate loop, then add the guard
729 : * implied by "bounds" on the outer dimensions, i.e., the guard
730 : * that ensures that the single value actually exists.
731 : * Since there may also be guards implied by a combination
732 : * of these constraints, we first combine them before
733 : * deriving the implied constraints.
734 : */
735 0 : static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard,
736 : int degenerate, __isl_keep isl_basic_set *bounds,
737 : __isl_keep isl_ast_build *build)
738 : {
739 : int depth, has_stride;
740 : isl_space *space;
741 : isl_set *dom, *set;
742 :
743 0 : depth = isl_ast_build_get_depth(build);
744 0 : has_stride = isl_ast_build_has_stride(build, depth);
745 0 : if (!has_stride && !degenerate)
746 0 : return guard;
747 :
748 0 : space = isl_basic_set_get_space(bounds);
749 0 : dom = isl_set_universe(space);
750 :
751 0 : if (degenerate) {
752 0 : bounds = isl_basic_set_copy(bounds);
753 0 : bounds = isl_basic_set_drop_constraints_not_involving_dims(
754 : bounds, isl_dim_set, depth, 1);
755 0 : set = isl_set_from_basic_set(bounds);
756 0 : dom = isl_set_intersect(dom, set);
757 : }
758 :
759 0 : if (has_stride) {
760 0 : set = isl_ast_build_get_stride_constraint(build);
761 0 : dom = isl_set_intersect(dom, set);
762 : }
763 :
764 0 : dom = isl_set_eliminate(dom, isl_dim_set, depth, 1);
765 0 : dom = isl_ast_build_compute_gist(build, dom);
766 0 : guard = isl_set_intersect(guard, dom);
767 :
768 0 : return guard;
769 : }
770 :
771 : /* Update "graft" based on "sub_build" for the degenerate case.
772 : *
773 : * "build" is the build in which graft->node was created
774 : * "sub_build" contains information about the current level itself,
775 : * including the single value attained.
776 : *
777 : * We set the initialization part of the for loop to the single
778 : * value attained by the current dimension.
779 : * The increment and condition are not strictly needed as the are known
780 : * to be "1" and "iterator <= value" respectively.
781 : */
782 0 : static __isl_give isl_ast_graft *refine_degenerate(
783 : __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build,
784 : __isl_keep isl_ast_build *sub_build)
785 : {
786 : isl_pw_aff *value;
787 :
788 0 : if (!graft || !sub_build)
789 0 : return isl_ast_graft_free(graft);
790 :
791 0 : value = isl_pw_aff_copy(sub_build->value);
792 :
793 0 : graft->node->u.f.init = isl_ast_build_expr_from_pw_aff_internal(build,
794 : value);
795 0 : if (!graft->node->u.f.init)
796 0 : return isl_ast_graft_free(graft);
797 :
798 0 : return graft;
799 : }
800 :
801 : /* Return the intersection of constraints in "list" as a set.
802 : */
803 0 : static __isl_give isl_set *intersect_constraints(
804 : __isl_keep isl_constraint_list *list)
805 : {
806 : int i, n;
807 : isl_basic_set *bset;
808 :
809 0 : n = isl_constraint_list_n_constraint(list);
810 0 : if (n < 1)
811 0 : isl_die(isl_constraint_list_get_ctx(list), isl_error_internal,
812 : "expecting at least one constraint", return NULL);
813 :
814 0 : bset = isl_basic_set_from_constraint(
815 0 : isl_constraint_list_get_constraint(list, 0));
816 0 : for (i = 1; i < n; ++i) {
817 : isl_basic_set *bset_i;
818 :
819 0 : bset_i = isl_basic_set_from_constraint(
820 0 : isl_constraint_list_get_constraint(list, i));
821 0 : bset = isl_basic_set_intersect(bset, bset_i);
822 : }
823 :
824 0 : return isl_set_from_basic_set(bset);
825 : }
826 :
827 : /* Compute the constraints on the outer dimensions enforced by
828 : * graft->node and add those constraints to graft->enforced,
829 : * in case the upper bound is expressed as a set "upper".
830 : *
831 : * In particular, if l(...) is a lower bound in "lower", and
832 : *
833 : * -a i + f(...) >= 0 or a i <= f(...)
834 : *
835 : * is an upper bound ocnstraint on the current dimension i,
836 : * then the for loop enforces the constraint
837 : *
838 : * -a l(...) + f(...) >= 0 or a l(...) <= f(...)
839 : *
840 : * We therefore simply take each lower bound in turn, plug it into
841 : * the upper bounds and compute the intersection over all lower bounds.
842 : *
843 : * If a lower bound is a rational expression, then
844 : * isl_basic_set_preimage_multi_aff will force this rational
845 : * expression to have only integer values. However, the loop
846 : * itself does not enforce this integrality constraint. We therefore
847 : * use the ceil of the lower bounds instead of the lower bounds themselves.
848 : * Other constraints will make sure that the for loop is only executed
849 : * when each of the lower bounds attains an integral value.
850 : * In particular, potentially rational values only occur in
851 : * lower_bound if the offset is a (seemingly) rational expression,
852 : * but then outer conditions will make sure that this rational expression
853 : * only attains integer values.
854 : */
855 0 : static __isl_give isl_ast_graft *set_enforced_from_set(
856 : __isl_take isl_ast_graft *graft,
857 : __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper)
858 : {
859 : isl_space *space;
860 : isl_basic_set *enforced;
861 : isl_pw_multi_aff *pma;
862 : int i, n;
863 :
864 0 : if (!graft || !lower)
865 0 : return isl_ast_graft_free(graft);
866 :
867 0 : space = isl_set_get_space(upper);
868 0 : enforced = isl_basic_set_universe(isl_space_copy(space));
869 :
870 0 : space = isl_space_map_from_set(space);
871 0 : pma = isl_pw_multi_aff_identity(space);
872 :
873 0 : n = isl_pw_aff_list_n_pw_aff(lower);
874 0 : for (i = 0; i < n; ++i) {
875 : isl_pw_aff *pa;
876 : isl_set *enforced_i;
877 : isl_basic_set *hull;
878 : isl_pw_multi_aff *pma_i;
879 :
880 0 : pa = isl_pw_aff_list_get_pw_aff(lower, i);
881 0 : pa = isl_pw_aff_ceil(pa);
882 0 : pma_i = isl_pw_multi_aff_copy(pma);
883 0 : pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa);
884 0 : enforced_i = isl_set_copy(upper);
885 0 : enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i);
886 0 : hull = isl_set_simple_hull(enforced_i);
887 0 : enforced = isl_basic_set_intersect(enforced, hull);
888 : }
889 :
890 0 : isl_pw_multi_aff_free(pma);
891 :
892 0 : graft = isl_ast_graft_enforce(graft, enforced);
893 :
894 0 : return graft;
895 : }
896 :
897 : /* Compute the constraints on the outer dimensions enforced by
898 : * graft->node and add those constraints to graft->enforced,
899 : * in case the upper bound is expressed as
900 : * a list of affine expressions "upper".
901 : *
902 : * The enforced condition is that each lower bound expression is less
903 : * than or equal to each upper bound expression.
904 : */
905 0 : static __isl_give isl_ast_graft *set_enforced_from_list(
906 : __isl_take isl_ast_graft *graft,
907 : __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper)
908 : {
909 : isl_set *cond;
910 : isl_basic_set *enforced;
911 :
912 0 : lower = isl_pw_aff_list_copy(lower);
913 0 : upper = isl_pw_aff_list_copy(upper);
914 0 : cond = isl_pw_aff_list_le_set(lower, upper);
915 0 : enforced = isl_set_simple_hull(cond);
916 0 : graft = isl_ast_graft_enforce(graft, enforced);
917 :
918 0 : return graft;
919 : }
920 :
921 : /* Does "aff" have a negative constant term?
922 : */
923 0 : static isl_stat aff_constant_is_negative(__isl_take isl_set *set,
924 : __isl_take isl_aff *aff, void *user)
925 : {
926 0 : int *neg = user;
927 : isl_val *v;
928 :
929 0 : v = isl_aff_get_constant_val(aff);
930 0 : *neg = isl_val_is_neg(v);
931 0 : isl_val_free(v);
932 0 : isl_set_free(set);
933 0 : isl_aff_free(aff);
934 :
935 0 : return *neg ? isl_stat_ok : isl_stat_error;
936 : }
937 :
938 : /* Does "pa" have a negative constant term over its entire domain?
939 : */
940 0 : static isl_stat pw_aff_constant_is_negative(__isl_take isl_pw_aff *pa,
941 : void *user)
942 : {
943 : isl_stat r;
944 0 : int *neg = user;
945 :
946 0 : r = isl_pw_aff_foreach_piece(pa, &aff_constant_is_negative, user);
947 0 : isl_pw_aff_free(pa);
948 :
949 0 : return (*neg && r >= 0) ? isl_stat_ok : isl_stat_error;
950 : }
951 :
952 : /* Does each element in "list" have a negative constant term?
953 : *
954 : * The callback terminates the iteration as soon an element has been
955 : * found that does not have a negative constant term.
956 : */
957 0 : static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list)
958 : {
959 0 : int neg = 1;
960 :
961 0 : if (isl_pw_aff_list_foreach(list,
962 0 : &pw_aff_constant_is_negative, &neg) < 0 && neg)
963 0 : return -1;
964 :
965 0 : return neg;
966 : }
967 :
968 : /* Add 1 to each of the elements in "list", where each of these elements
969 : * is defined over the internal schedule space of "build".
970 : */
971 0 : static __isl_give isl_pw_aff_list *list_add_one(
972 : __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
973 : {
974 : int i, n;
975 : isl_space *space;
976 : isl_aff *aff;
977 : isl_pw_aff *one;
978 :
979 0 : space = isl_ast_build_get_space(build, 1);
980 0 : aff = isl_aff_zero_on_domain(isl_local_space_from_space(space));
981 0 : aff = isl_aff_add_constant_si(aff, 1);
982 0 : one = isl_pw_aff_from_aff(aff);
983 :
984 0 : n = isl_pw_aff_list_n_pw_aff(list);
985 0 : for (i = 0; i < n; ++i) {
986 : isl_pw_aff *pa;
987 0 : pa = isl_pw_aff_list_get_pw_aff(list, i);
988 0 : pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one));
989 0 : list = isl_pw_aff_list_set_pw_aff(list, i, pa);
990 : }
991 :
992 0 : isl_pw_aff_free(one);
993 :
994 0 : return list;
995 : }
996 :
997 : /* Set the condition part of the for node graft->node in case
998 : * the upper bound is represented as a list of piecewise affine expressions.
999 : *
1000 : * In particular, set the condition to
1001 : *
1002 : * iterator <= min(list of upper bounds)
1003 : *
1004 : * If each of the upper bounds has a negative constant term, then
1005 : * set the condition to
1006 : *
1007 : * iterator < min(list of (upper bound + 1)s)
1008 : *
1009 : */
1010 0 : static __isl_give isl_ast_graft *set_for_cond_from_list(
1011 : __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list,
1012 : __isl_keep isl_ast_build *build)
1013 : {
1014 : int neg;
1015 : isl_ast_expr *bound, *iterator, *cond;
1016 0 : enum isl_ast_op_type type = isl_ast_op_le;
1017 :
1018 0 : if (!graft || !list)
1019 0 : return isl_ast_graft_free(graft);
1020 :
1021 0 : neg = list_constant_is_negative(list);
1022 0 : if (neg < 0)
1023 0 : return isl_ast_graft_free(graft);
1024 0 : list = isl_pw_aff_list_copy(list);
1025 0 : if (neg) {
1026 0 : list = list_add_one(list, build);
1027 0 : type = isl_ast_op_lt;
1028 : }
1029 :
1030 0 : bound = reduce_list(isl_ast_op_min, list, build);
1031 0 : iterator = isl_ast_expr_copy(graft->node->u.f.iterator);
1032 0 : cond = isl_ast_expr_alloc_binary(type, iterator, bound);
1033 0 : graft->node->u.f.cond = cond;
1034 :
1035 0 : isl_pw_aff_list_free(list);
1036 0 : if (!graft->node->u.f.cond)
1037 0 : return isl_ast_graft_free(graft);
1038 0 : return graft;
1039 : }
1040 :
1041 : /* Set the condition part of the for node graft->node in case
1042 : * the upper bound is represented as a set.
1043 : */
1044 0 : static __isl_give isl_ast_graft *set_for_cond_from_set(
1045 : __isl_take isl_ast_graft *graft, __isl_keep isl_set *set,
1046 : __isl_keep isl_ast_build *build)
1047 : {
1048 : isl_ast_expr *cond;
1049 :
1050 0 : if (!graft)
1051 0 : return NULL;
1052 :
1053 0 : cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set));
1054 0 : graft->node->u.f.cond = cond;
1055 0 : if (!graft->node->u.f.cond)
1056 0 : return isl_ast_graft_free(graft);
1057 0 : return graft;
1058 : }
1059 :
1060 : /* Construct an isl_ast_expr for the increment (i.e., stride) of
1061 : * the current dimension.
1062 : */
1063 0 : static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build)
1064 : {
1065 : int depth;
1066 : isl_val *v;
1067 : isl_ctx *ctx;
1068 :
1069 0 : if (!build)
1070 0 : return NULL;
1071 0 : ctx = isl_ast_build_get_ctx(build);
1072 0 : depth = isl_ast_build_get_depth(build);
1073 :
1074 0 : if (!isl_ast_build_has_stride(build, depth))
1075 0 : return isl_ast_expr_alloc_int_si(ctx, 1);
1076 :
1077 0 : v = isl_ast_build_get_stride(build, depth);
1078 0 : return isl_ast_expr_from_val(v);
1079 : }
1080 :
1081 : /* Should we express the loop condition as
1082 : *
1083 : * iterator <= min(list of upper bounds)
1084 : *
1085 : * or as a conjunction of constraints?
1086 : *
1087 : * The first is constructed from a list of upper bounds.
1088 : * The second is constructed from a set.
1089 : *
1090 : * If there are no upper bounds in "constraints", then this could mean
1091 : * that "domain" simply doesn't have an upper bound or that we didn't
1092 : * pick any upper bound. In the first case, we want to generate the
1093 : * loop condition as a(n empty) conjunction of constraints
1094 : * In the second case, we will compute
1095 : * a single upper bound from "domain" and so we use the list form.
1096 : *
1097 : * If there are upper bounds in "constraints",
1098 : * then we use the list form iff the atomic_upper_bound option is set.
1099 : */
1100 0 : static int use_upper_bound_list(isl_ctx *ctx, int n_upper,
1101 : __isl_keep isl_set *domain, int depth)
1102 : {
1103 0 : if (n_upper > 0)
1104 0 : return isl_options_get_ast_build_atomic_upper_bound(ctx);
1105 : else
1106 0 : return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth);
1107 : }
1108 :
1109 : /* Fill in the expressions of the for node in graft->node.
1110 : *
1111 : * In particular,
1112 : * - set the initialization part of the loop to the maximum of the lower bounds
1113 : * - extract the increment from the stride of the current dimension
1114 : * - construct the for condition either based on a list of upper bounds
1115 : * or on a set of upper bound constraints.
1116 : */
1117 0 : static __isl_give isl_ast_graft *set_for_node_expressions(
1118 : __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower,
1119 : int use_list, __isl_keep isl_pw_aff_list *upper_list,
1120 : __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build)
1121 : {
1122 : isl_ast_node *node;
1123 :
1124 0 : if (!graft)
1125 0 : return NULL;
1126 :
1127 0 : build = isl_ast_build_copy(build);
1128 :
1129 0 : node = graft->node;
1130 0 : node->u.f.init = reduce_list(isl_ast_op_max, lower, build);
1131 0 : node->u.f.inc = for_inc(build);
1132 :
1133 0 : if (!node->u.f.init || !node->u.f.inc)
1134 0 : graft = isl_ast_graft_free(graft);
1135 :
1136 0 : if (use_list)
1137 0 : graft = set_for_cond_from_list(graft, upper_list, build);
1138 : else
1139 0 : graft = set_for_cond_from_set(graft, upper_set, build);
1140 :
1141 0 : isl_ast_build_free(build);
1142 :
1143 0 : return graft;
1144 : }
1145 :
1146 : /* Update "graft" based on "bounds" and "domain" for the generic,
1147 : * non-degenerate, case.
1148 : *
1149 : * "c_lower" and "c_upper" contain the lower and upper bounds
1150 : * that the loop node should express.
1151 : * "domain" is the subset of the intersection of the constraints
1152 : * for which some code is executed.
1153 : *
1154 : * There may be zero lower bounds or zero upper bounds in "constraints"
1155 : * in case the list of constraints was created
1156 : * based on the atomic option or based on separation with explicit bounds.
1157 : * In that case, we use "domain" to derive lower and/or upper bounds.
1158 : *
1159 : * We first compute a list of one or more lower bounds.
1160 : *
1161 : * Then we decide if we want to express the condition as
1162 : *
1163 : * iterator <= min(list of upper bounds)
1164 : *
1165 : * or as a conjunction of constraints.
1166 : *
1167 : * The set of enforced constraints is then computed either based on
1168 : * a list of upper bounds or on a set of upper bound constraints.
1169 : * We do not compute any enforced constraints if we were forced
1170 : * to compute a lower or upper bound using exact_bound. The domains
1171 : * of the resulting expressions may imply some bounds on outer dimensions
1172 : * that we do not want to appear in the enforced constraints since
1173 : * they are not actually enforced by the corresponding code.
1174 : *
1175 : * Finally, we fill in the expressions of the for node.
1176 : */
1177 0 : static __isl_give isl_ast_graft *refine_generic_bounds(
1178 : __isl_take isl_ast_graft *graft,
1179 : __isl_take isl_constraint_list *c_lower,
1180 : __isl_take isl_constraint_list *c_upper,
1181 : __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1182 : {
1183 : int depth;
1184 : isl_ctx *ctx;
1185 : isl_pw_aff_list *lower;
1186 : int use_list;
1187 0 : isl_set *upper_set = NULL;
1188 0 : isl_pw_aff_list *upper_list = NULL;
1189 : int n_lower, n_upper;
1190 :
1191 0 : if (!graft || !c_lower || !c_upper || !build)
1192 : goto error;
1193 :
1194 0 : depth = isl_ast_build_get_depth(build);
1195 0 : ctx = isl_ast_graft_get_ctx(graft);
1196 :
1197 0 : n_lower = isl_constraint_list_n_constraint(c_lower);
1198 0 : n_upper = isl_constraint_list_n_constraint(c_upper);
1199 :
1200 0 : use_list = use_upper_bound_list(ctx, n_upper, domain, depth);
1201 :
1202 0 : lower = lower_bounds(c_lower, depth, domain, build);
1203 :
1204 0 : if (use_list)
1205 0 : upper_list = upper_bounds(c_upper, depth, domain, build);
1206 0 : else if (n_upper > 0)
1207 0 : upper_set = intersect_constraints(c_upper);
1208 : else
1209 0 : upper_set = isl_set_universe(isl_set_get_space(domain));
1210 :
1211 0 : if (n_lower == 0 || n_upper == 0)
1212 : ;
1213 0 : else if (use_list)
1214 0 : graft = set_enforced_from_list(graft, lower, upper_list);
1215 : else
1216 0 : graft = set_enforced_from_set(graft, lower, depth, upper_set);
1217 :
1218 0 : graft = set_for_node_expressions(graft, lower, use_list, upper_list,
1219 : upper_set, build);
1220 :
1221 0 : isl_pw_aff_list_free(lower);
1222 0 : isl_pw_aff_list_free(upper_list);
1223 0 : isl_set_free(upper_set);
1224 0 : isl_constraint_list_free(c_lower);
1225 0 : isl_constraint_list_free(c_upper);
1226 :
1227 0 : return graft;
1228 : error:
1229 0 : isl_constraint_list_free(c_lower);
1230 0 : isl_constraint_list_free(c_upper);
1231 0 : return isl_ast_graft_free(graft);
1232 : }
1233 :
1234 : /* Internal data structure used inside count_constraints to keep
1235 : * track of the number of constraints that are independent of dimension "pos",
1236 : * the lower bounds in "pos" and the upper bounds in "pos".
1237 : */
1238 : struct isl_ast_count_constraints_data {
1239 : int pos;
1240 :
1241 : int n_indep;
1242 : int n_lower;
1243 : int n_upper;
1244 : };
1245 :
1246 : /* Increment data->n_indep, data->lower or data->upper depending
1247 : * on whether "c" is independenct of dimensions data->pos,
1248 : * a lower bound or an upper bound.
1249 : */
1250 0 : static isl_stat count_constraints(__isl_take isl_constraint *c, void *user)
1251 : {
1252 0 : struct isl_ast_count_constraints_data *data = user;
1253 :
1254 0 : if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos))
1255 0 : data->n_lower++;
1256 0 : else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos))
1257 0 : data->n_upper++;
1258 : else
1259 0 : data->n_indep++;
1260 :
1261 0 : isl_constraint_free(c);
1262 :
1263 0 : return isl_stat_ok;
1264 : }
1265 :
1266 : /* Update "graft" based on "bounds" and "domain" for the generic,
1267 : * non-degenerate, case.
1268 : *
1269 : * "list" respresent the list of bounds that need to be encoded by
1270 : * the for loop. Only the constraints that involve the iterator
1271 : * are relevant here. The other constraints are taken care of by
1272 : * the caller and are included in the generated constraints of "build".
1273 : * "domain" is the subset of the intersection of the constraints
1274 : * for which some code is executed.
1275 : * "build" is the build in which graft->node was created.
1276 : *
1277 : * We separate lower bounds, upper bounds and constraints that
1278 : * are independent of the loop iterator.
1279 : *
1280 : * The actual for loop bounds are generated in refine_generic_bounds.
1281 : */
1282 0 : static __isl_give isl_ast_graft *refine_generic_split(
1283 : __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list,
1284 : __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1285 : {
1286 : struct isl_ast_count_constraints_data data;
1287 : isl_constraint_list *lower;
1288 : isl_constraint_list *upper;
1289 :
1290 0 : if (!list)
1291 0 : return isl_ast_graft_free(graft);
1292 :
1293 0 : data.pos = isl_ast_build_get_depth(build);
1294 :
1295 0 : list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos);
1296 0 : if (!list)
1297 0 : return isl_ast_graft_free(graft);
1298 :
1299 0 : data.n_indep = data.n_lower = data.n_upper = 0;
1300 0 : if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) {
1301 0 : isl_constraint_list_free(list);
1302 0 : return isl_ast_graft_free(graft);
1303 : }
1304 :
1305 0 : lower = isl_constraint_list_drop(list, 0, data.n_indep);
1306 0 : upper = isl_constraint_list_copy(lower);
1307 0 : lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper);
1308 0 : upper = isl_constraint_list_drop(upper, 0, data.n_lower);
1309 :
1310 0 : return refine_generic_bounds(graft, lower, upper, domain, build);
1311 : }
1312 :
1313 : /* Update "graft" based on "bounds" and "domain" for the generic,
1314 : * non-degenerate, case.
1315 : *
1316 : * "bounds" respresent the bounds that need to be encoded by
1317 : * the for loop (or a guard around the for loop).
1318 : * "domain" is the subset of "bounds" for which some code is executed.
1319 : * "build" is the build in which graft->node was created.
1320 : *
1321 : * We break up "bounds" into a list of constraints and continue with
1322 : * refine_generic_split.
1323 : */
1324 0 : static __isl_give isl_ast_graft *refine_generic(
1325 : __isl_take isl_ast_graft *graft,
1326 : __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain,
1327 : __isl_keep isl_ast_build *build)
1328 : {
1329 : isl_constraint_list *list;
1330 :
1331 0 : if (!build || !graft)
1332 0 : return isl_ast_graft_free(graft);
1333 :
1334 0 : list = isl_basic_set_get_constraint_list(bounds);
1335 :
1336 0 : graft = refine_generic_split(graft, list, domain, build);
1337 :
1338 0 : return graft;
1339 : }
1340 :
1341 : /* Create a for node for the current level.
1342 : *
1343 : * Mark the for node degenerate if "degenerate" is set.
1344 : */
1345 0 : static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build,
1346 : int degenerate)
1347 : {
1348 : int depth;
1349 : isl_id *id;
1350 : isl_ast_node *node;
1351 :
1352 0 : if (!build)
1353 0 : return NULL;
1354 :
1355 0 : depth = isl_ast_build_get_depth(build);
1356 0 : id = isl_ast_build_get_iterator_id(build, depth);
1357 0 : node = isl_ast_node_alloc_for(id);
1358 0 : if (degenerate)
1359 0 : node = isl_ast_node_for_mark_degenerate(node);
1360 :
1361 0 : return node;
1362 : }
1363 :
1364 : /* If the ast_build_exploit_nested_bounds option is set, then return
1365 : * the constraints enforced by all elements in "list".
1366 : * Otherwise, return the universe.
1367 : */
1368 0 : static __isl_give isl_basic_set *extract_shared_enforced(
1369 : __isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build)
1370 : {
1371 : isl_ctx *ctx;
1372 : isl_space *space;
1373 :
1374 0 : if (!list)
1375 0 : return NULL;
1376 :
1377 0 : ctx = isl_ast_graft_list_get_ctx(list);
1378 0 : if (isl_options_get_ast_build_exploit_nested_bounds(ctx))
1379 0 : return isl_ast_graft_list_extract_shared_enforced(list, build);
1380 :
1381 0 : space = isl_ast_build_get_space(build, 1);
1382 0 : return isl_basic_set_universe(space);
1383 : }
1384 :
1385 : /* Return the pending constraints of "build" that are not already taken
1386 : * care of (by a combination of "enforced" and the generated constraints
1387 : * of "build").
1388 : */
1389 0 : static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build,
1390 : __isl_keep isl_basic_set *enforced)
1391 : {
1392 : isl_set *guard, *context;
1393 :
1394 0 : guard = isl_ast_build_get_pending(build);
1395 0 : context = isl_set_from_basic_set(isl_basic_set_copy(enforced));
1396 0 : context = isl_set_intersect(context,
1397 : isl_ast_build_get_generated(build));
1398 0 : return isl_set_gist(guard, context);
1399 : }
1400 :
1401 : /* Create an AST node for the current dimension based on
1402 : * the schedule domain "bounds" and return the node encapsulated
1403 : * in an isl_ast_graft.
1404 : *
1405 : * "executed" is the current inverse schedule, taking into account
1406 : * the bounds in "bounds"
1407 : * "domain" is the domain of "executed", with inner dimensions projected out.
1408 : * It may be a strict subset of "bounds" in case "bounds" was created
1409 : * based on the atomic option or based on separation with explicit bounds.
1410 : *
1411 : * "domain" may satisfy additional equalities that result
1412 : * from intersecting "executed" with "bounds" in add_node.
1413 : * It may also satisfy some global constraints that were dropped out because
1414 : * we performed separation with explicit bounds.
1415 : * The very first step is then to copy these constraints to "bounds".
1416 : *
1417 : * Since we may be calling before_each_for and after_each_for
1418 : * callbacks, we record the current inverse schedule in the build.
1419 : *
1420 : * We consider three builds,
1421 : * "build" is the one in which the current level is created,
1422 : * "body_build" is the build in which the next level is created,
1423 : * "sub_build" is essentially the same as "body_build", except that
1424 : * the depth has not been increased yet.
1425 : *
1426 : * "build" already contains information (in strides and offsets)
1427 : * about the strides at the current level, but this information is not
1428 : * reflected in the build->domain.
1429 : * We first add this information and the "bounds" to the sub_build->domain.
1430 : * isl_ast_build_set_loop_bounds adds the stride information and
1431 : * checks whether the current dimension attains
1432 : * only a single value and whether this single value can be represented using
1433 : * a single affine expression.
1434 : * In the first case, the current level is considered "degenerate".
1435 : * In the second, sub-case, the current level is considered "eliminated".
1436 : * Eliminated levels don't need to be reflected in the AST since we can
1437 : * simply plug in the affine expression. For degenerate, but non-eliminated,
1438 : * levels, we do introduce a for node, but mark is as degenerate so that
1439 : * it can be printed as an assignment of the single value to the loop
1440 : * "iterator".
1441 : *
1442 : * If the current level is eliminated, we explicitly plug in the value
1443 : * for the current level found by isl_ast_build_set_loop_bounds in the
1444 : * inverse schedule. This ensures that if we are working on a slice
1445 : * of the domain based on information available in the inverse schedule
1446 : * and the build domain, that then this information is also reflected
1447 : * in the inverse schedule. This operation also eliminates the current
1448 : * dimension from the inverse schedule making sure no inner dimensions depend
1449 : * on the current dimension. Otherwise, we create a for node, marking
1450 : * it degenerate if appropriate. The initial for node is still incomplete
1451 : * and will be completed in either refine_degenerate or refine_generic.
1452 : *
1453 : * We then generate a sequence of grafts for the next level,
1454 : * create a surrounding graft for the current level and insert
1455 : * the for node we created (if the current level is not eliminated).
1456 : * Before creating a graft for the current level, we first extract
1457 : * hoistable constraints from the child guards and combine them
1458 : * with the pending constraints in the build. These constraints
1459 : * are used to simplify the child guards and then added to the guard
1460 : * of the current graft to ensure that they will be generated.
1461 : * If the hoisted guard is a disjunction, then we use it directly
1462 : * to gist the guards on the children before intersect it with the
1463 : * pending constraints. We do so because this disjunction is typically
1464 : * identical to the guards on the children such that these guards
1465 : * can be effectively removed completely. After the intersection,
1466 : * the gist operation would have a harder time figuring this out.
1467 : *
1468 : * Finally, we set the bounds of the for loop in either
1469 : * refine_degenerate or refine_generic.
1470 : * We do so in a context where the pending constraints of the build
1471 : * have been replaced by the guard of the current graft.
1472 : */
1473 0 : static __isl_give isl_ast_graft *create_node_scaled(
1474 : __isl_take isl_union_map *executed,
1475 : __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1476 : __isl_take isl_ast_build *build)
1477 : {
1478 : int depth;
1479 : int degenerate;
1480 : isl_bool eliminated;
1481 : isl_basic_set *hull;
1482 : isl_basic_set *enforced;
1483 : isl_set *guard, *hoisted;
1484 0 : isl_ast_node *node = NULL;
1485 : isl_ast_graft *graft;
1486 : isl_ast_graft_list *children;
1487 : isl_ast_build *sub_build;
1488 : isl_ast_build *body_build;
1489 :
1490 0 : domain = isl_ast_build_eliminate_divs(build, domain);
1491 0 : domain = isl_set_detect_equalities(domain);
1492 0 : hull = isl_set_unshifted_simple_hull(isl_set_copy(domain));
1493 0 : bounds = isl_basic_set_intersect(bounds, hull);
1494 0 : build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
1495 :
1496 0 : depth = isl_ast_build_get_depth(build);
1497 0 : sub_build = isl_ast_build_copy(build);
1498 0 : bounds = isl_basic_set_remove_redundancies(bounds);
1499 0 : bounds = isl_ast_build_specialize_basic_set(sub_build, bounds);
1500 0 : sub_build = isl_ast_build_set_loop_bounds(sub_build,
1501 : isl_basic_set_copy(bounds));
1502 0 : degenerate = isl_ast_build_has_value(sub_build);
1503 0 : eliminated = isl_ast_build_has_affine_value(sub_build, depth);
1504 0 : if (degenerate < 0 || eliminated < 0)
1505 0 : executed = isl_union_map_free(executed);
1506 0 : if (!degenerate)
1507 0 : bounds = isl_ast_build_compute_gist_basic_set(build, bounds);
1508 0 : sub_build = isl_ast_build_set_pending_generated(sub_build,
1509 : isl_basic_set_copy(bounds));
1510 0 : if (eliminated)
1511 0 : executed = plug_in_values(executed, sub_build);
1512 : else
1513 0 : node = create_for(build, degenerate);
1514 :
1515 0 : body_build = isl_ast_build_copy(sub_build);
1516 0 : body_build = isl_ast_build_increase_depth(body_build);
1517 0 : if (!eliminated)
1518 0 : node = before_each_for(node, body_build);
1519 0 : children = generate_next_level(executed,
1520 : isl_ast_build_copy(body_build));
1521 :
1522 0 : enforced = extract_shared_enforced(children, build);
1523 0 : guard = extract_pending(sub_build, enforced);
1524 0 : hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build);
1525 0 : if (isl_set_n_basic_set(hoisted) > 1)
1526 0 : children = isl_ast_graft_list_gist_guards(children,
1527 : isl_set_copy(hoisted));
1528 0 : guard = isl_set_intersect(guard, hoisted);
1529 0 : if (!eliminated)
1530 0 : guard = add_implied_guards(guard, degenerate, bounds, build);
1531 :
1532 0 : graft = isl_ast_graft_alloc_from_children(children,
1533 : isl_set_copy(guard), enforced, build, sub_build);
1534 :
1535 0 : if (!eliminated) {
1536 : isl_ast_build *for_build;
1537 :
1538 0 : graft = isl_ast_graft_insert_for(graft, node);
1539 0 : for_build = isl_ast_build_copy(build);
1540 0 : for_build = isl_ast_build_replace_pending_by_guard(for_build,
1541 : isl_set_copy(guard));
1542 0 : if (degenerate)
1543 0 : graft = refine_degenerate(graft, for_build, sub_build);
1544 : else
1545 0 : graft = refine_generic(graft, bounds,
1546 : domain, for_build);
1547 0 : isl_ast_build_free(for_build);
1548 : }
1549 0 : isl_set_free(guard);
1550 0 : if (!eliminated)
1551 0 : graft = after_each_for(graft, body_build);
1552 :
1553 0 : isl_ast_build_free(body_build);
1554 0 : isl_ast_build_free(sub_build);
1555 0 : isl_ast_build_free(build);
1556 0 : isl_basic_set_free(bounds);
1557 0 : isl_set_free(domain);
1558 :
1559 0 : return graft;
1560 : }
1561 :
1562 : /* Internal data structure for checking if all constraints involving
1563 : * the input dimension "depth" are such that the other coefficients
1564 : * are multiples of "m", reducing "m" if they are not.
1565 : * If "m" is reduced all the way down to "1", then the check has failed
1566 : * and we break out of the iteration.
1567 : */
1568 : struct isl_check_scaled_data {
1569 : int depth;
1570 : isl_val *m;
1571 : };
1572 :
1573 : /* If constraint "c" involves the input dimension data->depth,
1574 : * then make sure that all the other coefficients are multiples of data->m,
1575 : * reducing data->m if needed.
1576 : * Break out of the iteration if data->m has become equal to "1".
1577 : */
1578 0 : static isl_stat constraint_check_scaled(__isl_take isl_constraint *c,
1579 : void *user)
1580 : {
1581 0 : struct isl_check_scaled_data *data = user;
1582 : int i, j, n;
1583 0 : enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out,
1584 : isl_dim_div };
1585 :
1586 0 : if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) {
1587 0 : isl_constraint_free(c);
1588 0 : return isl_stat_ok;
1589 : }
1590 :
1591 0 : for (i = 0; i < 4; ++i) {
1592 0 : n = isl_constraint_dim(c, t[i]);
1593 0 : for (j = 0; j < n; ++j) {
1594 : isl_val *d;
1595 :
1596 0 : if (t[i] == isl_dim_in && j == data->depth)
1597 0 : continue;
1598 0 : if (!isl_constraint_involves_dims(c, t[i], j, 1))
1599 0 : continue;
1600 0 : d = isl_constraint_get_coefficient_val(c, t[i], j);
1601 0 : data->m = isl_val_gcd(data->m, d);
1602 0 : if (isl_val_is_one(data->m))
1603 0 : break;
1604 : }
1605 0 : if (j < n)
1606 0 : break;
1607 : }
1608 :
1609 0 : isl_constraint_free(c);
1610 :
1611 0 : return i < 4 ? isl_stat_error : isl_stat_ok;
1612 : }
1613 :
1614 : /* For each constraint of "bmap" that involves the input dimension data->depth,
1615 : * make sure that all the other coefficients are multiples of data->m,
1616 : * reducing data->m if needed.
1617 : * Break out of the iteration if data->m has become equal to "1".
1618 : */
1619 0 : static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap,
1620 : void *user)
1621 : {
1622 : isl_stat r;
1623 :
1624 0 : r = isl_basic_map_foreach_constraint(bmap,
1625 : &constraint_check_scaled, user);
1626 0 : isl_basic_map_free(bmap);
1627 :
1628 0 : return r;
1629 : }
1630 :
1631 : /* For each constraint of "map" that involves the input dimension data->depth,
1632 : * make sure that all the other coefficients are multiples of data->m,
1633 : * reducing data->m if needed.
1634 : * Break out of the iteration if data->m has become equal to "1".
1635 : */
1636 0 : static isl_stat map_check_scaled(__isl_take isl_map *map, void *user)
1637 : {
1638 : isl_stat r;
1639 :
1640 0 : r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user);
1641 0 : isl_map_free(map);
1642 :
1643 0 : return r;
1644 : }
1645 :
1646 : /* Create an AST node for the current dimension based on
1647 : * the schedule domain "bounds" and return the node encapsulated
1648 : * in an isl_ast_graft.
1649 : *
1650 : * "executed" is the current inverse schedule, taking into account
1651 : * the bounds in "bounds"
1652 : * "domain" is the domain of "executed", with inner dimensions projected out.
1653 : *
1654 : *
1655 : * Before moving on to the actual AST node construction in create_node_scaled,
1656 : * we first check if the current dimension is strided and if we can scale
1657 : * down this stride. Note that we only do this if the ast_build_scale_strides
1658 : * option is set.
1659 : *
1660 : * In particular, let the current dimension take on values
1661 : *
1662 : * f + s a
1663 : *
1664 : * with a an integer. We check if we can find an integer m that (obviously)
1665 : * divides both f and s.
1666 : *
1667 : * If so, we check if the current dimension only appears in constraints
1668 : * where the coefficients of the other variables are multiples of m.
1669 : * We perform this extra check to avoid the risk of introducing
1670 : * divisions by scaling down the current dimension.
1671 : *
1672 : * If so, we scale the current dimension down by a factor of m.
1673 : * That is, we plug in
1674 : *
1675 : * i = m i' (1)
1676 : *
1677 : * Note that in principle we could always scale down strided loops
1678 : * by plugging in
1679 : *
1680 : * i = f + s i'
1681 : *
1682 : * but this may result in i' taking on larger values than the original i,
1683 : * due to the shift by "f".
1684 : * By constrast, the scaling in (1) can only reduce the (absolute) value "i".
1685 : */
1686 0 : static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed,
1687 : __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1688 : __isl_take isl_ast_build *build)
1689 : {
1690 : struct isl_check_scaled_data data;
1691 : isl_ctx *ctx;
1692 : isl_aff *offset;
1693 : isl_val *d;
1694 :
1695 0 : ctx = isl_ast_build_get_ctx(build);
1696 0 : if (!isl_options_get_ast_build_scale_strides(ctx))
1697 0 : return create_node_scaled(executed, bounds, domain, build);
1698 :
1699 0 : data.depth = isl_ast_build_get_depth(build);
1700 0 : if (!isl_ast_build_has_stride(build, data.depth))
1701 0 : return create_node_scaled(executed, bounds, domain, build);
1702 :
1703 0 : offset = isl_ast_build_get_offset(build, data.depth);
1704 0 : data.m = isl_ast_build_get_stride(build, data.depth);
1705 0 : if (!data.m)
1706 0 : offset = isl_aff_free(offset);
1707 0 : offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m));
1708 0 : d = isl_aff_get_denominator_val(offset);
1709 0 : if (!d)
1710 0 : executed = isl_union_map_free(executed);
1711 :
1712 0 : if (executed && isl_val_is_divisible_by(data.m, d))
1713 0 : data.m = isl_val_div(data.m, d);
1714 : else {
1715 0 : data.m = isl_val_set_si(data.m, 1);
1716 0 : isl_val_free(d);
1717 : }
1718 :
1719 0 : if (!isl_val_is_one(data.m)) {
1720 0 : if (isl_union_map_foreach_map(executed, &map_check_scaled,
1721 0 : &data) < 0 &&
1722 0 : !isl_val_is_one(data.m))
1723 0 : executed = isl_union_map_free(executed);
1724 : }
1725 :
1726 0 : if (!isl_val_is_one(data.m)) {
1727 : isl_space *space;
1728 : isl_multi_aff *ma;
1729 : isl_aff *aff;
1730 : isl_map *map;
1731 : isl_union_map *umap;
1732 :
1733 0 : space = isl_ast_build_get_space(build, 1);
1734 0 : space = isl_space_map_from_set(space);
1735 0 : ma = isl_multi_aff_identity(space);
1736 0 : aff = isl_multi_aff_get_aff(ma, data.depth);
1737 0 : aff = isl_aff_scale_val(aff, isl_val_copy(data.m));
1738 0 : ma = isl_multi_aff_set_aff(ma, data.depth, aff);
1739 :
1740 0 : bounds = isl_basic_set_preimage_multi_aff(bounds,
1741 : isl_multi_aff_copy(ma));
1742 0 : domain = isl_set_preimage_multi_aff(domain,
1743 : isl_multi_aff_copy(ma));
1744 0 : map = isl_map_reverse(isl_map_from_multi_aff(ma));
1745 0 : umap = isl_union_map_from_map(map);
1746 0 : executed = isl_union_map_apply_domain(executed,
1747 : isl_union_map_copy(umap));
1748 0 : build = isl_ast_build_scale_down(build, isl_val_copy(data.m),
1749 : umap);
1750 : }
1751 0 : isl_aff_free(offset);
1752 0 : isl_val_free(data.m);
1753 :
1754 0 : return create_node_scaled(executed, bounds, domain, build);
1755 : }
1756 :
1757 : /* Add the basic set to the list that "user" points to.
1758 : */
1759 0 : static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user)
1760 : {
1761 0 : isl_basic_set_list **list = user;
1762 :
1763 0 : *list = isl_basic_set_list_add(*list, bset);
1764 :
1765 0 : return isl_stat_ok;
1766 : }
1767 :
1768 : /* Extract the basic sets of "set" and collect them in an isl_basic_set_list.
1769 : */
1770 0 : static __isl_give isl_basic_set_list *isl_basic_set_list_from_set(
1771 : __isl_take isl_set *set)
1772 : {
1773 : int n;
1774 : isl_ctx *ctx;
1775 : isl_basic_set_list *list;
1776 :
1777 0 : if (!set)
1778 0 : return NULL;
1779 :
1780 0 : ctx = isl_set_get_ctx(set);
1781 :
1782 0 : n = isl_set_n_basic_set(set);
1783 0 : list = isl_basic_set_list_alloc(ctx, n);
1784 0 : if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0)
1785 0 : list = isl_basic_set_list_free(list);
1786 :
1787 0 : isl_set_free(set);
1788 0 : return list;
1789 : }
1790 :
1791 : /* Generate code for the schedule domain "bounds"
1792 : * and add the result to "list".
1793 : *
1794 : * We mainly detect strides here and check if the bounds do not
1795 : * conflict with the current build domain
1796 : * and then pass over control to create_node.
1797 : *
1798 : * "bounds" reflects the bounds on the current dimension and possibly
1799 : * some extra conditions on outer dimensions.
1800 : * It does not, however, include any divs involving the current dimension,
1801 : * so it does not capture any stride constraints.
1802 : * We therefore need to compute that part of the schedule domain that
1803 : * intersects with "bounds" and derive the strides from the result.
1804 : */
1805 0 : static __isl_give isl_ast_graft_list *add_node(
1806 : __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed,
1807 : __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build)
1808 : {
1809 : isl_ast_graft *graft;
1810 0 : isl_set *domain = NULL;
1811 : isl_union_set *uset;
1812 : int empty, disjoint;
1813 :
1814 0 : uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds));
1815 0 : executed = isl_union_map_intersect_domain(executed, uset);
1816 0 : empty = isl_union_map_is_empty(executed);
1817 0 : if (empty < 0)
1818 0 : goto error;
1819 0 : if (empty)
1820 0 : goto done;
1821 :
1822 0 : uset = isl_union_map_domain(isl_union_map_copy(executed));
1823 0 : domain = isl_set_from_union_set(uset);
1824 0 : domain = isl_ast_build_specialize(build, domain);
1825 :
1826 0 : domain = isl_set_compute_divs(domain);
1827 0 : domain = isl_ast_build_eliminate_inner(build, domain);
1828 0 : disjoint = isl_set_is_disjoint(domain, build->domain);
1829 0 : if (disjoint < 0)
1830 0 : goto error;
1831 0 : if (disjoint)
1832 0 : goto done;
1833 :
1834 0 : build = isl_ast_build_detect_strides(build, isl_set_copy(domain));
1835 :
1836 0 : graft = create_node(executed, bounds, domain,
1837 : isl_ast_build_copy(build));
1838 0 : list = isl_ast_graft_list_add(list, graft);
1839 0 : isl_ast_build_free(build);
1840 0 : return list;
1841 : error:
1842 0 : list = isl_ast_graft_list_free(list);
1843 : done:
1844 0 : isl_set_free(domain);
1845 0 : isl_basic_set_free(bounds);
1846 0 : isl_union_map_free(executed);
1847 0 : isl_ast_build_free(build);
1848 0 : return list;
1849 : }
1850 :
1851 : /* Does any element of i follow or coincide with any element of j
1852 : * at the current depth for equal values of the outer dimensions?
1853 : */
1854 0 : static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i,
1855 : __isl_keep isl_basic_set *j, void *user)
1856 : {
1857 0 : int depth = *(int *) user;
1858 : isl_basic_map *test;
1859 : isl_bool empty;
1860 : int l;
1861 :
1862 0 : test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
1863 : isl_basic_set_copy(j));
1864 0 : for (l = 0; l < depth; ++l)
1865 0 : test = isl_basic_map_equate(test, isl_dim_in, l,
1866 : isl_dim_out, l);
1867 0 : test = isl_basic_map_order_ge(test, isl_dim_in, depth,
1868 : isl_dim_out, depth);
1869 0 : empty = isl_basic_map_is_empty(test);
1870 0 : isl_basic_map_free(test);
1871 :
1872 0 : return empty < 0 ? isl_bool_error : !empty;
1873 : }
1874 :
1875 : /* Split up each element of "list" into a part that is related to "bset"
1876 : * according to "gt" and a part that is not.
1877 : * Return a list that consist of "bset" and all the pieces.
1878 : */
1879 0 : static __isl_give isl_basic_set_list *add_split_on(
1880 : __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset,
1881 : __isl_keep isl_basic_map *gt)
1882 : {
1883 : int i, n;
1884 : isl_basic_set_list *res;
1885 :
1886 0 : if (!list)
1887 0 : bset = isl_basic_set_free(bset);
1888 :
1889 0 : gt = isl_basic_map_copy(gt);
1890 0 : gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset));
1891 0 : n = isl_basic_set_list_n_basic_set(list);
1892 0 : res = isl_basic_set_list_from_basic_set(bset);
1893 0 : for (i = 0; res && i < n; ++i) {
1894 : isl_basic_set *bset;
1895 : isl_set *set1, *set2;
1896 : isl_basic_map *bmap;
1897 : int empty;
1898 :
1899 0 : bset = isl_basic_set_list_get_basic_set(list, i);
1900 0 : bmap = isl_basic_map_copy(gt);
1901 0 : bmap = isl_basic_map_intersect_range(bmap, bset);
1902 0 : bset = isl_basic_map_range(bmap);
1903 0 : empty = isl_basic_set_is_empty(bset);
1904 0 : if (empty < 0)
1905 0 : res = isl_basic_set_list_free(res);
1906 0 : if (empty) {
1907 0 : isl_basic_set_free(bset);
1908 0 : bset = isl_basic_set_list_get_basic_set(list, i);
1909 0 : res = isl_basic_set_list_add(res, bset);
1910 0 : continue;
1911 : }
1912 :
1913 0 : res = isl_basic_set_list_add(res, isl_basic_set_copy(bset));
1914 0 : set1 = isl_set_from_basic_set(bset);
1915 0 : bset = isl_basic_set_list_get_basic_set(list, i);
1916 0 : set2 = isl_set_from_basic_set(bset);
1917 0 : set1 = isl_set_subtract(set2, set1);
1918 0 : set1 = isl_set_make_disjoint(set1);
1919 :
1920 0 : res = isl_basic_set_list_concat(res,
1921 : isl_basic_set_list_from_set(set1));
1922 : }
1923 0 : isl_basic_map_free(gt);
1924 0 : isl_basic_set_list_free(list);
1925 0 : return res;
1926 : }
1927 :
1928 : static __isl_give isl_ast_graft_list *generate_sorted_domains(
1929 : __isl_keep isl_basic_set_list *domain_list,
1930 : __isl_keep isl_union_map *executed,
1931 : __isl_keep isl_ast_build *build);
1932 :
1933 : /* Internal data structure for add_nodes.
1934 : *
1935 : * "executed" and "build" are extra arguments to be passed to add_node.
1936 : * "list" collects the results.
1937 : */
1938 : struct isl_add_nodes_data {
1939 : isl_union_map *executed;
1940 : isl_ast_build *build;
1941 :
1942 : isl_ast_graft_list *list;
1943 : };
1944 :
1945 : /* Generate code for the schedule domains in "scc"
1946 : * and add the results to "list".
1947 : *
1948 : * The domains in "scc" form a strongly connected component in the ordering.
1949 : * If the number of domains in "scc" is larger than 1, then this means
1950 : * that we cannot determine a valid ordering for the domains in the component.
1951 : * This should be fairly rare because the individual domains
1952 : * have been made disjoint first.
1953 : * The problem is that the domains may be integrally disjoint but not
1954 : * rationally disjoint. For example, we may have domains
1955 : *
1956 : * { [i,i] : 0 <= i <= 1 } and { [i,1-i] : 0 <= i <= 1 }
1957 : *
1958 : * These two domains have an empty intersection, but their rational
1959 : * relaxations do intersect. It is impossible to order these domains
1960 : * in the second dimension because the first should be ordered before
1961 : * the second for outer dimension equal to 0, while it should be ordered
1962 : * after for outer dimension equal to 1.
1963 : *
1964 : * This may happen in particular in case of unrolling since the domain
1965 : * of each slice is replaced by its simple hull.
1966 : *
1967 : * For each basic set i in "scc" and for each of the following basic sets j,
1968 : * we split off that part of the basic set i that shares the outer dimensions
1969 : * with j and lies before j in the current dimension.
1970 : * We collect all the pieces in a new list that replaces "scc".
1971 : *
1972 : * While the elements in "scc" should be disjoint, we double-check
1973 : * this property to avoid running into an infinite recursion in case
1974 : * they intersect due to some internal error.
1975 : */
1976 0 : static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user)
1977 : {
1978 0 : struct isl_add_nodes_data *data = user;
1979 : int i, n, depth;
1980 : isl_basic_set *bset, *first;
1981 : isl_basic_set_list *list;
1982 : isl_space *space;
1983 : isl_basic_map *gt;
1984 :
1985 0 : n = isl_basic_set_list_n_basic_set(scc);
1986 0 : bset = isl_basic_set_list_get_basic_set(scc, 0);
1987 0 : if (n == 1) {
1988 0 : isl_basic_set_list_free(scc);
1989 0 : data->list = add_node(data->list,
1990 : isl_union_map_copy(data->executed), bset,
1991 : isl_ast_build_copy(data->build));
1992 0 : return data->list ? isl_stat_ok : isl_stat_error;
1993 : }
1994 :
1995 0 : depth = isl_ast_build_get_depth(data->build);
1996 0 : space = isl_basic_set_get_space(bset);
1997 0 : space = isl_space_map_from_set(space);
1998 0 : gt = isl_basic_map_universe(space);
1999 0 : for (i = 0; i < depth; ++i)
2000 0 : gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
2001 0 : gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
2002 :
2003 0 : first = isl_basic_set_copy(bset);
2004 0 : list = isl_basic_set_list_from_basic_set(bset);
2005 0 : for (i = 1; i < n; ++i) {
2006 : int disjoint;
2007 :
2008 0 : bset = isl_basic_set_list_get_basic_set(scc, i);
2009 :
2010 0 : disjoint = isl_basic_set_is_disjoint(bset, first);
2011 0 : if (disjoint < 0)
2012 0 : list = isl_basic_set_list_free(list);
2013 0 : else if (!disjoint)
2014 0 : isl_die(isl_basic_set_list_get_ctx(scc),
2015 : isl_error_internal,
2016 : "basic sets in scc are assumed to be disjoint",
2017 : list = isl_basic_set_list_free(list));
2018 :
2019 0 : list = add_split_on(list, bset, gt);
2020 : }
2021 0 : isl_basic_set_free(first);
2022 0 : isl_basic_map_free(gt);
2023 0 : isl_basic_set_list_free(scc);
2024 0 : scc = list;
2025 0 : data->list = isl_ast_graft_list_concat(data->list,
2026 : generate_sorted_domains(scc, data->executed, data->build));
2027 0 : isl_basic_set_list_free(scc);
2028 :
2029 0 : return data->list ? isl_stat_ok : isl_stat_error;
2030 : }
2031 :
2032 : /* Sort the domains in "domain_list" according to the execution order
2033 : * at the current depth (for equal values of the outer dimensions),
2034 : * generate code for each of them, collecting the results in a list.
2035 : * If no code is generated (because the intersection of the inverse schedule
2036 : * with the domains turns out to be empty), then an empty list is returned.
2037 : *
2038 : * The caller is responsible for ensuring that the basic sets in "domain_list"
2039 : * are pair-wise disjoint. It can, however, in principle happen that
2040 : * two basic sets should be ordered one way for one value of the outer
2041 : * dimensions and the other way for some other value of the outer dimensions.
2042 : * We therefore play safe and look for strongly connected components.
2043 : * The function add_nodes takes care of handling non-trivial components.
2044 : */
2045 0 : static __isl_give isl_ast_graft_list *generate_sorted_domains(
2046 : __isl_keep isl_basic_set_list *domain_list,
2047 : __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2048 : {
2049 : isl_ctx *ctx;
2050 : struct isl_add_nodes_data data;
2051 : int depth;
2052 : int n;
2053 :
2054 0 : if (!domain_list)
2055 0 : return NULL;
2056 :
2057 0 : ctx = isl_basic_set_list_get_ctx(domain_list);
2058 0 : n = isl_basic_set_list_n_basic_set(domain_list);
2059 0 : data.list = isl_ast_graft_list_alloc(ctx, n);
2060 0 : if (n == 0)
2061 0 : return data.list;
2062 0 : if (n == 1)
2063 0 : return add_node(data.list, isl_union_map_copy(executed),
2064 0 : isl_basic_set_list_get_basic_set(domain_list, 0),
2065 : isl_ast_build_copy(build));
2066 :
2067 0 : depth = isl_ast_build_get_depth(build);
2068 0 : data.executed = executed;
2069 0 : data.build = build;
2070 0 : if (isl_basic_set_list_foreach_scc(domain_list,
2071 : &domain_follows_at_depth, &depth,
2072 : &add_nodes, &data) < 0)
2073 0 : data.list = isl_ast_graft_list_free(data.list);
2074 :
2075 0 : return data.list;
2076 : }
2077 :
2078 : /* Do i and j share any values for the outer dimensions?
2079 : */
2080 0 : static isl_bool shared_outer(__isl_keep isl_basic_set *i,
2081 : __isl_keep isl_basic_set *j, void *user)
2082 : {
2083 0 : int depth = *(int *) user;
2084 : isl_basic_map *test;
2085 : isl_bool empty;
2086 : int l;
2087 :
2088 0 : test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
2089 : isl_basic_set_copy(j));
2090 0 : for (l = 0; l < depth; ++l)
2091 0 : test = isl_basic_map_equate(test, isl_dim_in, l,
2092 : isl_dim_out, l);
2093 0 : empty = isl_basic_map_is_empty(test);
2094 0 : isl_basic_map_free(test);
2095 :
2096 0 : return empty < 0 ? isl_bool_error : !empty;
2097 : }
2098 :
2099 : /* Internal data structure for generate_sorted_domains_wrap.
2100 : *
2101 : * "n" is the total number of basic sets
2102 : * "executed" and "build" are extra arguments to be passed
2103 : * to generate_sorted_domains.
2104 : *
2105 : * "single" is set to 1 by generate_sorted_domains_wrap if there
2106 : * is only a single component.
2107 : * "list" collects the results.
2108 : */
2109 : struct isl_ast_generate_parallel_domains_data {
2110 : int n;
2111 : isl_union_map *executed;
2112 : isl_ast_build *build;
2113 :
2114 : int single;
2115 : isl_ast_graft_list *list;
2116 : };
2117 :
2118 : /* Call generate_sorted_domains on "scc", fuse the result into a list
2119 : * with either zero or one graft and collect the these single element
2120 : * lists into data->list.
2121 : *
2122 : * If there is only one component, i.e., if the number of basic sets
2123 : * in the current component is equal to the total number of basic sets,
2124 : * then data->single is set to 1 and the result of generate_sorted_domains
2125 : * is not fused.
2126 : */
2127 0 : static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc,
2128 : void *user)
2129 : {
2130 0 : struct isl_ast_generate_parallel_domains_data *data = user;
2131 : isl_ast_graft_list *list;
2132 :
2133 0 : list = generate_sorted_domains(scc, data->executed, data->build);
2134 0 : data->single = isl_basic_set_list_n_basic_set(scc) == data->n;
2135 0 : if (!data->single)
2136 0 : list = isl_ast_graft_list_fuse(list, data->build);
2137 0 : if (!data->list)
2138 0 : data->list = list;
2139 : else
2140 0 : data->list = isl_ast_graft_list_concat(data->list, list);
2141 :
2142 0 : isl_basic_set_list_free(scc);
2143 0 : if (!data->list)
2144 0 : return isl_stat_error;
2145 :
2146 0 : return isl_stat_ok;
2147 : }
2148 :
2149 : /* Look for any (weakly connected) components in the "domain_list"
2150 : * of domains that share some values of the outer dimensions.
2151 : * That is, domains in different components do not share any values
2152 : * of the outer dimensions. This means that these components
2153 : * can be freely reordered.
2154 : * Within each of the components, we sort the domains according
2155 : * to the execution order at the current depth.
2156 : *
2157 : * If there is more than one component, then generate_sorted_domains_wrap
2158 : * fuses the result of each call to generate_sorted_domains
2159 : * into a list with either zero or one graft and collects these (at most)
2160 : * single element lists into a bigger list. This means that the elements of the
2161 : * final list can be freely reordered. In particular, we sort them
2162 : * according to an arbitrary but fixed ordering to ease merging of
2163 : * graft lists from different components.
2164 : */
2165 0 : static __isl_give isl_ast_graft_list *generate_parallel_domains(
2166 : __isl_keep isl_basic_set_list *domain_list,
2167 : __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2168 : {
2169 : int depth;
2170 : struct isl_ast_generate_parallel_domains_data data;
2171 :
2172 0 : if (!domain_list)
2173 0 : return NULL;
2174 :
2175 0 : data.n = isl_basic_set_list_n_basic_set(domain_list);
2176 0 : if (data.n <= 1)
2177 0 : return generate_sorted_domains(domain_list, executed, build);
2178 :
2179 0 : depth = isl_ast_build_get_depth(build);
2180 0 : data.list = NULL;
2181 0 : data.executed = executed;
2182 0 : data.build = build;
2183 0 : data.single = 0;
2184 0 : if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth,
2185 : &generate_sorted_domains_wrap,
2186 : &data) < 0)
2187 0 : data.list = isl_ast_graft_list_free(data.list);
2188 :
2189 0 : if (!data.single)
2190 0 : data.list = isl_ast_graft_list_sort_guard(data.list);
2191 :
2192 0 : return data.list;
2193 : }
2194 :
2195 : /* Internal data for separate_domain.
2196 : *
2197 : * "explicit" is set if we only want to use explicit bounds.
2198 : *
2199 : * "domain" collects the separated domains.
2200 : */
2201 : struct isl_separate_domain_data {
2202 : isl_ast_build *build;
2203 : int explicit;
2204 : isl_set *domain;
2205 : };
2206 :
2207 : /* Extract implicit bounds on the current dimension for the executed "map".
2208 : *
2209 : * The domain of "map" may involve inner dimensions, so we
2210 : * need to eliminate them.
2211 : */
2212 0 : static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map,
2213 : __isl_keep isl_ast_build *build)
2214 : {
2215 : isl_set *domain;
2216 :
2217 0 : domain = isl_map_domain(map);
2218 0 : domain = isl_ast_build_eliminate(build, domain);
2219 :
2220 0 : return domain;
2221 : }
2222 :
2223 : /* Extract explicit bounds on the current dimension for the executed "map".
2224 : *
2225 : * Rather than eliminating the inner dimensions as in implicit_bounds,
2226 : * we simply drop any constraints involving those inner dimensions.
2227 : * The idea is that most bounds that are implied by constraints on the
2228 : * inner dimensions will be enforced by for loops and not by explicit guards.
2229 : * There is then no need to separate along those bounds.
2230 : */
2231 0 : static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map,
2232 : __isl_keep isl_ast_build *build)
2233 : {
2234 : isl_set *domain;
2235 : int depth, dim;
2236 :
2237 0 : dim = isl_map_dim(map, isl_dim_out);
2238 0 : map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim);
2239 :
2240 0 : domain = isl_map_domain(map);
2241 0 : depth = isl_ast_build_get_depth(build);
2242 0 : dim = isl_set_dim(domain, isl_dim_set);
2243 0 : domain = isl_set_detect_equalities(domain);
2244 0 : domain = isl_set_drop_constraints_involving_dims(domain,
2245 0 : isl_dim_set, depth + 1, dim - (depth + 1));
2246 0 : domain = isl_set_remove_divs_involving_dims(domain,
2247 : isl_dim_set, depth, 1);
2248 0 : domain = isl_set_remove_unknown_divs(domain);
2249 :
2250 0 : return domain;
2251 : }
2252 :
2253 : /* Split data->domain into pieces that intersect with the range of "map"
2254 : * and pieces that do not intersect with the range of "map"
2255 : * and then add that part of the range of "map" that does not intersect
2256 : * with data->domain.
2257 : */
2258 0 : static isl_stat separate_domain(__isl_take isl_map *map, void *user)
2259 : {
2260 0 : struct isl_separate_domain_data *data = user;
2261 : isl_set *domain;
2262 : isl_set *d1, *d2;
2263 :
2264 0 : if (data->explicit)
2265 0 : domain = explicit_bounds(map, data->build);
2266 : else
2267 0 : domain = implicit_bounds(map, data->build);
2268 :
2269 0 : domain = isl_set_coalesce(domain);
2270 0 : domain = isl_set_make_disjoint(domain);
2271 0 : d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain));
2272 0 : d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain));
2273 0 : data->domain = isl_set_intersect(data->domain, domain);
2274 0 : data->domain = isl_set_union(data->domain, d1);
2275 0 : data->domain = isl_set_union(data->domain, d2);
2276 :
2277 0 : return isl_stat_ok;
2278 : }
2279 :
2280 : /* Separate the schedule domains of "executed".
2281 : *
2282 : * That is, break up the domain of "executed" into basic sets,
2283 : * such that for each basic set S, every element in S is associated with
2284 : * the same domain spaces.
2285 : *
2286 : * "space" is the (single) domain space of "executed".
2287 : */
2288 0 : static __isl_give isl_set *separate_schedule_domains(
2289 : __isl_take isl_space *space, __isl_take isl_union_map *executed,
2290 : __isl_keep isl_ast_build *build)
2291 : {
2292 0 : struct isl_separate_domain_data data = { build };
2293 : isl_ctx *ctx;
2294 :
2295 0 : ctx = isl_ast_build_get_ctx(build);
2296 0 : data.explicit = isl_options_get_ast_build_separation_bounds(ctx) ==
2297 : ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT;
2298 0 : data.domain = isl_set_empty(space);
2299 0 : if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0)
2300 0 : data.domain = isl_set_free(data.domain);
2301 :
2302 0 : isl_union_map_free(executed);
2303 0 : return data.domain;
2304 : }
2305 :
2306 : /* Temporary data used during the search for a lower bound for unrolling.
2307 : *
2308 : * "build" is the build in which the unrolling will be performed
2309 : * "domain" is the original set for which to find a lower bound
2310 : * "depth" is the dimension for which to find a lower boudn
2311 : * "expansion" is the expansion that needs to be applied to "domain"
2312 : * in the unrolling that will be performed
2313 : *
2314 : * "lower" is the best lower bound found so far. It is NULL if we have not
2315 : * found any yet.
2316 : * "n" is the corresponding size. If lower is NULL, then the value of n
2317 : * is undefined.
2318 : * "n_div" is the maximal number of integer divisions in the first
2319 : * unrolled iteration (after expansion). It is set to -1 if it hasn't
2320 : * been computed yet.
2321 : */
2322 : struct isl_find_unroll_data {
2323 : isl_ast_build *build;
2324 : isl_set *domain;
2325 : int depth;
2326 : isl_basic_map *expansion;
2327 :
2328 : isl_aff *lower;
2329 : int *n;
2330 : int n_div;
2331 : };
2332 :
2333 : /* Return the constraint
2334 : *
2335 : * i_"depth" = aff + offset
2336 : */
2337 0 : static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff,
2338 : int offset)
2339 : {
2340 0 : aff = isl_aff_copy(aff);
2341 0 : aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1);
2342 0 : aff = isl_aff_add_constant_si(aff, offset);
2343 0 : return isl_equality_from_aff(aff);
2344 : }
2345 :
2346 : /* Update *user to the number of integer divsions in the first element
2347 : * of "ma", if it is larger than the current value.
2348 : */
2349 0 : static isl_stat update_n_div(__isl_take isl_set *set,
2350 : __isl_take isl_multi_aff *ma, void *user)
2351 : {
2352 : isl_aff *aff;
2353 0 : int *n = user;
2354 : int n_div;
2355 :
2356 0 : aff = isl_multi_aff_get_aff(ma, 0);
2357 0 : n_div = isl_aff_dim(aff, isl_dim_div);
2358 0 : isl_aff_free(aff);
2359 0 : isl_multi_aff_free(ma);
2360 0 : isl_set_free(set);
2361 :
2362 0 : if (n_div > *n)
2363 0 : *n = n_div;
2364 :
2365 0 : return aff ? isl_stat_ok : isl_stat_error;
2366 : }
2367 :
2368 : /* Get the number of integer divisions in the expression for the iterator
2369 : * value at the first slice in the unrolling based on lower bound "lower",
2370 : * taking into account the expansion that needs to be performed on this slice.
2371 : */
2372 0 : static int get_expanded_n_div(struct isl_find_unroll_data *data,
2373 : __isl_keep isl_aff *lower)
2374 : {
2375 : isl_constraint *c;
2376 : isl_set *set;
2377 : isl_map *it_map, *expansion;
2378 : isl_pw_multi_aff *pma;
2379 : int n;
2380 :
2381 0 : c = at_offset(data->depth, lower, 0);
2382 0 : set = isl_set_copy(data->domain);
2383 0 : set = isl_set_add_constraint(set, c);
2384 0 : expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion));
2385 0 : set = isl_set_apply(set, expansion);
2386 0 : it_map = isl_ast_build_map_to_iterator(data->build, set);
2387 0 : pma = isl_pw_multi_aff_from_map(it_map);
2388 0 : n = 0;
2389 0 : if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0)
2390 0 : n = -1;
2391 0 : isl_pw_multi_aff_free(pma);
2392 :
2393 0 : return n;
2394 : }
2395 :
2396 : /* Is the lower bound "lower" with corresponding iteration count "n"
2397 : * better than the one stored in "data"?
2398 : * If there is no upper bound on the iteration count ("n" is infinity) or
2399 : * if the count is too large, then we cannot use this lower bound.
2400 : * Otherwise, if there was no previous lower bound or
2401 : * if the iteration count of the new lower bound is smaller than
2402 : * the iteration count of the previous lower bound, then we consider
2403 : * the new lower bound to be better.
2404 : * If the iteration count is the same, then compare the number
2405 : * of integer divisions that would be needed to express
2406 : * the iterator value at the first slice in the unrolling
2407 : * according to the lower bound. If we end up computing this
2408 : * number, then store the lowest value in data->n_div.
2409 : */
2410 0 : static int is_better_lower_bound(struct isl_find_unroll_data *data,
2411 : __isl_keep isl_aff *lower, __isl_keep isl_val *n)
2412 : {
2413 : int cmp;
2414 : int n_div;
2415 :
2416 0 : if (!n)
2417 0 : return -1;
2418 0 : if (isl_val_is_infty(n))
2419 0 : return 0;
2420 0 : if (isl_val_cmp_si(n, INT_MAX) > 0)
2421 0 : return 0;
2422 0 : if (!data->lower)
2423 0 : return 1;
2424 0 : cmp = isl_val_cmp_si(n, *data->n);
2425 0 : if (cmp < 0)
2426 0 : return 1;
2427 0 : if (cmp > 0)
2428 0 : return 0;
2429 0 : if (data->n_div < 0)
2430 0 : data->n_div = get_expanded_n_div(data, data->lower);
2431 0 : if (data->n_div < 0)
2432 0 : return -1;
2433 0 : if (data->n_div == 0)
2434 0 : return 0;
2435 0 : n_div = get_expanded_n_div(data, lower);
2436 0 : if (n_div < 0)
2437 0 : return -1;
2438 0 : if (n_div >= data->n_div)
2439 0 : return 0;
2440 0 : data->n_div = n_div;
2441 :
2442 0 : return 1;
2443 : }
2444 :
2445 : /* Check if we can use "c" as a lower bound and if it is better than
2446 : * any previously found lower bound.
2447 : *
2448 : * If "c" does not involve the dimension at the current depth,
2449 : * then we cannot use it.
2450 : * Otherwise, let "c" be of the form
2451 : *
2452 : * i >= f(j)/a
2453 : *
2454 : * We compute the maximal value of
2455 : *
2456 : * -ceil(f(j)/a)) + i + 1
2457 : *
2458 : * over the domain. If there is such a value "n", then we know
2459 : *
2460 : * -ceil(f(j)/a)) + i + 1 <= n
2461 : *
2462 : * or
2463 : *
2464 : * i < ceil(f(j)/a)) + n
2465 : *
2466 : * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling.
2467 : * We just need to check if we have found any lower bound before and
2468 : * if the new lower bound is better (smaller n or fewer integer divisions)
2469 : * than the previously found lower bounds.
2470 : */
2471 0 : static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data,
2472 : __isl_keep isl_constraint *c)
2473 : {
2474 : isl_aff *aff, *lower;
2475 : isl_val *max;
2476 : int better;
2477 :
2478 0 : if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth))
2479 0 : return isl_stat_ok;
2480 :
2481 0 : lower = isl_constraint_get_bound(c, isl_dim_set, data->depth);
2482 0 : lower = isl_aff_ceil(lower);
2483 0 : aff = isl_aff_copy(lower);
2484 0 : aff = isl_aff_neg(aff);
2485 0 : aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1);
2486 0 : aff = isl_aff_add_constant_si(aff, 1);
2487 0 : max = isl_set_max_val(data->domain, aff);
2488 0 : isl_aff_free(aff);
2489 :
2490 0 : better = is_better_lower_bound(data, lower, max);
2491 0 : if (better < 0 || !better) {
2492 0 : isl_val_free(max);
2493 0 : isl_aff_free(lower);
2494 0 : return better < 0 ? isl_stat_error : isl_stat_ok;
2495 : }
2496 :
2497 0 : isl_aff_free(data->lower);
2498 0 : data->lower = lower;
2499 0 : *data->n = isl_val_get_num_si(max);
2500 0 : isl_val_free(max);
2501 :
2502 0 : return isl_stat_ok;
2503 : }
2504 :
2505 : /* Check if we can use "c" as a lower bound and if it is better than
2506 : * any previously found lower bound.
2507 : */
2508 0 : static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user)
2509 : {
2510 : struct isl_find_unroll_data *data;
2511 : isl_stat r;
2512 :
2513 0 : data = (struct isl_find_unroll_data *) user;
2514 0 : r = update_unrolling_lower_bound(data, c);
2515 0 : isl_constraint_free(c);
2516 :
2517 0 : return r;
2518 : }
2519 :
2520 : /* Look for a lower bound l(i) on the dimension at "depth"
2521 : * and a size n such that "domain" is a subset of
2522 : *
2523 : * { [i] : l(i) <= i_d < l(i) + n }
2524 : *
2525 : * where d is "depth" and l(i) depends only on earlier dimensions.
2526 : * Furthermore, try and find a lower bound such that n is as small as possible.
2527 : * In particular, "n" needs to be finite.
2528 : * "build" is the build in which the unrolling will be performed.
2529 : * "expansion" is the expansion that needs to be applied to "domain"
2530 : * in the unrolling that will be performed.
2531 : *
2532 : * Inner dimensions have been eliminated from "domain" by the caller.
2533 : *
2534 : * We first construct a collection of lower bounds on the input set
2535 : * by computing its simple hull. We then iterate through them,
2536 : * discarding those that we cannot use (either because they do not
2537 : * involve the dimension at "depth" or because they have no corresponding
2538 : * upper bound, meaning that "n" would be unbounded) and pick out the
2539 : * best from the remaining ones.
2540 : *
2541 : * If we cannot find a suitable lower bound, then we consider that
2542 : * to be an error.
2543 : */
2544 0 : static __isl_give isl_aff *find_unroll_lower_bound(
2545 : __isl_keep isl_ast_build *build, __isl_keep isl_set *domain,
2546 : int depth, __isl_keep isl_basic_map *expansion, int *n)
2547 : {
2548 0 : struct isl_find_unroll_data data =
2549 : { build, domain, depth, expansion, NULL, n, -1 };
2550 : isl_basic_set *hull;
2551 :
2552 0 : hull = isl_set_simple_hull(isl_set_copy(domain));
2553 :
2554 0 : if (isl_basic_set_foreach_constraint(hull,
2555 : &constraint_find_unroll, &data) < 0)
2556 0 : goto error;
2557 :
2558 0 : isl_basic_set_free(hull);
2559 :
2560 0 : if (!data.lower)
2561 0 : isl_die(isl_set_get_ctx(domain), isl_error_invalid,
2562 : "cannot find lower bound for unrolling", return NULL);
2563 :
2564 0 : return data.lower;
2565 : error:
2566 0 : isl_basic_set_free(hull);
2567 0 : return isl_aff_free(data.lower);
2568 : }
2569 :
2570 : /* Call "fn" on each iteration of the current dimension of "domain".
2571 : * If "init" is not NULL, then it is called with the number of
2572 : * iterations before any call to "fn".
2573 : * Return -1 on failure.
2574 : *
2575 : * Since we are going to be iterating over the individual values,
2576 : * we first check if there are any strides on the current dimension.
2577 : * If there is, we rewrite the current dimension i as
2578 : *
2579 : * i = stride i' + offset
2580 : *
2581 : * and then iterate over individual values of i' instead.
2582 : *
2583 : * We then look for a lower bound on i' and a size such that the domain
2584 : * is a subset of
2585 : *
2586 : * { [j,i'] : l(j) <= i' < l(j) + n }
2587 : *
2588 : * and then take slices of the domain at values of i'
2589 : * between l(j) and l(j) + n - 1.
2590 : *
2591 : * We compute the unshifted simple hull of each slice to ensure that
2592 : * we have a single basic set per offset. The slicing constraint
2593 : * may get simplified away before the unshifted simple hull is taken
2594 : * and may therefore in some rare cases disappear from the result.
2595 : * We therefore explicitly add the constraint back after computing
2596 : * the unshifted simple hull to ensure that the basic sets
2597 : * remain disjoint. The constraints that are dropped by taking the hull
2598 : * will be taken into account at the next level, as in the case of the
2599 : * atomic option.
2600 : *
2601 : * Finally, we map i' back to i and call "fn".
2602 : */
2603 0 : static int foreach_iteration(__isl_take isl_set *domain,
2604 : __isl_keep isl_ast_build *build, int (*init)(int n, void *user),
2605 : int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user)
2606 : {
2607 : int i, n;
2608 : int empty;
2609 : int depth;
2610 : isl_multi_aff *expansion;
2611 : isl_basic_map *bmap;
2612 0 : isl_aff *lower = NULL;
2613 : isl_ast_build *stride_build;
2614 :
2615 0 : depth = isl_ast_build_get_depth(build);
2616 :
2617 0 : domain = isl_ast_build_eliminate_inner(build, domain);
2618 0 : domain = isl_set_intersect(domain, isl_ast_build_get_domain(build));
2619 0 : stride_build = isl_ast_build_copy(build);
2620 0 : stride_build = isl_ast_build_detect_strides(stride_build,
2621 : isl_set_copy(domain));
2622 0 : expansion = isl_ast_build_get_stride_expansion(stride_build);
2623 :
2624 0 : domain = isl_set_preimage_multi_aff(domain,
2625 : isl_multi_aff_copy(expansion));
2626 0 : domain = isl_ast_build_eliminate_divs(stride_build, domain);
2627 0 : isl_ast_build_free(stride_build);
2628 :
2629 0 : bmap = isl_basic_map_from_multi_aff(expansion);
2630 :
2631 0 : empty = isl_set_is_empty(domain);
2632 0 : if (empty < 0) {
2633 0 : n = -1;
2634 0 : } else if (empty) {
2635 0 : n = 0;
2636 : } else {
2637 0 : lower = find_unroll_lower_bound(build, domain, depth, bmap, &n);
2638 0 : if (!lower)
2639 0 : n = -1;
2640 : }
2641 0 : if (n >= 0 && init && init(n, user) < 0)
2642 0 : n = -1;
2643 0 : for (i = 0; i < n; ++i) {
2644 : isl_set *set;
2645 : isl_basic_set *bset;
2646 : isl_constraint *slice;
2647 :
2648 0 : slice = at_offset(depth, lower, i);
2649 0 : set = isl_set_copy(domain);
2650 0 : set = isl_set_add_constraint(set, isl_constraint_copy(slice));
2651 0 : bset = isl_set_unshifted_simple_hull(set);
2652 0 : bset = isl_basic_set_add_constraint(bset, slice);
2653 0 : bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap));
2654 :
2655 0 : if (fn(bset, user) < 0)
2656 0 : break;
2657 : }
2658 :
2659 0 : isl_aff_free(lower);
2660 0 : isl_set_free(domain);
2661 0 : isl_basic_map_free(bmap);
2662 :
2663 0 : return n < 0 || i < n ? -1 : 0;
2664 : }
2665 :
2666 : /* Data structure for storing the results and the intermediate objects
2667 : * of compute_domains.
2668 : *
2669 : * "list" is the main result of the function and contains a list
2670 : * of disjoint basic sets for which code should be generated.
2671 : *
2672 : * "executed" and "build" are inputs to compute_domains.
2673 : * "schedule_domain" is the domain of "executed".
2674 : *
2675 : * "option" contains the domains at the current depth that should by
2676 : * atomic, separated or unrolled. These domains are as specified by
2677 : * the user, except that inner dimensions have been eliminated and
2678 : * that they have been made pair-wise disjoint.
2679 : *
2680 : * "sep_class" contains the user-specified split into separation classes
2681 : * specialized to the current depth.
2682 : * "done" contains the union of the separation domains that have already
2683 : * been handled.
2684 : */
2685 : struct isl_codegen_domains {
2686 : isl_basic_set_list *list;
2687 :
2688 : isl_union_map *executed;
2689 : isl_ast_build *build;
2690 : isl_set *schedule_domain;
2691 :
2692 : isl_set *option[4];
2693 :
2694 : isl_map *sep_class;
2695 : isl_set *done;
2696 : };
2697 :
2698 : /* Internal data structure for do_unroll.
2699 : *
2700 : * "domains" stores the results of compute_domains.
2701 : * "class_domain" is the original class domain passed to do_unroll.
2702 : * "unroll_domain" collects the unrolled iterations.
2703 : */
2704 : struct isl_ast_unroll_data {
2705 : struct isl_codegen_domains *domains;
2706 : isl_set *class_domain;
2707 : isl_set *unroll_domain;
2708 : };
2709 :
2710 : /* Given an iteration of an unrolled domain represented by "bset",
2711 : * add it to data->domains->list.
2712 : * Since we may have dropped some constraints, we intersect with
2713 : * the class domain again to ensure that each element in the list
2714 : * is disjoint from the other class domains.
2715 : */
2716 0 : static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user)
2717 : {
2718 0 : struct isl_ast_unroll_data *data = user;
2719 : isl_set *set;
2720 : isl_basic_set_list *list;
2721 :
2722 0 : set = isl_set_from_basic_set(bset);
2723 0 : data->unroll_domain = isl_set_union(data->unroll_domain,
2724 : isl_set_copy(set));
2725 0 : set = isl_set_intersect(set, isl_set_copy(data->class_domain));
2726 0 : set = isl_set_make_disjoint(set);
2727 0 : list = isl_basic_set_list_from_set(set);
2728 0 : data->domains->list = isl_basic_set_list_concat(data->domains->list,
2729 : list);
2730 :
2731 0 : return 0;
2732 : }
2733 :
2734 : /* Extend domains->list with a list of basic sets, one for each value
2735 : * of the current dimension in "domain" and remove the corresponding
2736 : * sets from the class domain. Return the updated class domain.
2737 : * The divs that involve the current dimension have not been projected out
2738 : * from this domain.
2739 : *
2740 : * We call foreach_iteration to iterate over the individual values and
2741 : * in do_unroll_iteration we collect the individual basic sets in
2742 : * domains->list and their union in data->unroll_domain, which is then
2743 : * used to update the class domain.
2744 : */
2745 0 : static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains,
2746 : __isl_take isl_set *domain, __isl_take isl_set *class_domain)
2747 : {
2748 : struct isl_ast_unroll_data data;
2749 :
2750 0 : if (!domain)
2751 0 : return isl_set_free(class_domain);
2752 0 : if (!class_domain)
2753 0 : return isl_set_free(domain);
2754 :
2755 0 : data.domains = domains;
2756 0 : data.class_domain = class_domain;
2757 0 : data.unroll_domain = isl_set_empty(isl_set_get_space(domain));
2758 :
2759 0 : if (foreach_iteration(domain, domains->build, NULL,
2760 : &do_unroll_iteration, &data) < 0)
2761 0 : data.unroll_domain = isl_set_free(data.unroll_domain);
2762 :
2763 0 : class_domain = isl_set_subtract(class_domain, data.unroll_domain);
2764 :
2765 0 : return class_domain;
2766 : }
2767 :
2768 : /* Add domains to domains->list for each individual value of the current
2769 : * dimension, for that part of the schedule domain that lies in the
2770 : * intersection of the option domain and the class domain.
2771 : * Remove the corresponding sets from the class domain and
2772 : * return the updated class domain.
2773 : *
2774 : * We first break up the unroll option domain into individual pieces
2775 : * and then handle each of them separately. The unroll option domain
2776 : * has been made disjoint in compute_domains_init_options,
2777 : *
2778 : * Note that we actively want to combine different pieces of the
2779 : * schedule domain that have the same value at the current dimension.
2780 : * We therefore need to break up the unroll option domain before
2781 : * intersecting with class and schedule domain, hoping that the
2782 : * unroll option domain specified by the user is relatively simple.
2783 : */
2784 0 : static __isl_give isl_set *compute_unroll_domains(
2785 : struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2786 : {
2787 : isl_set *unroll_domain;
2788 : isl_basic_set_list *unroll_list;
2789 : int i, n;
2790 : int empty;
2791 :
2792 0 : empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]);
2793 0 : if (empty < 0)
2794 0 : return isl_set_free(class_domain);
2795 0 : if (empty)
2796 0 : return class_domain;
2797 :
2798 0 : unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]);
2799 0 : unroll_list = isl_basic_set_list_from_set(unroll_domain);
2800 :
2801 0 : n = isl_basic_set_list_n_basic_set(unroll_list);
2802 0 : for (i = 0; i < n; ++i) {
2803 : isl_basic_set *bset;
2804 :
2805 0 : bset = isl_basic_set_list_get_basic_set(unroll_list, i);
2806 0 : unroll_domain = isl_set_from_basic_set(bset);
2807 0 : unroll_domain = isl_set_intersect(unroll_domain,
2808 : isl_set_copy(class_domain));
2809 0 : unroll_domain = isl_set_intersect(unroll_domain,
2810 : isl_set_copy(domains->schedule_domain));
2811 :
2812 0 : empty = isl_set_is_empty(unroll_domain);
2813 0 : if (empty >= 0 && empty) {
2814 0 : isl_set_free(unroll_domain);
2815 0 : continue;
2816 : }
2817 :
2818 0 : class_domain = do_unroll(domains, unroll_domain, class_domain);
2819 : }
2820 :
2821 0 : isl_basic_set_list_free(unroll_list);
2822 :
2823 0 : return class_domain;
2824 : }
2825 :
2826 : /* Try and construct a single basic set that includes the intersection of
2827 : * the schedule domain, the atomic option domain and the class domain.
2828 : * Add the resulting basic set(s) to domains->list and remove them
2829 : * from class_domain. Return the updated class domain.
2830 : *
2831 : * We construct a single domain rather than trying to combine
2832 : * the schedule domains of individual domains because we are working
2833 : * within a single component so that non-overlapping schedule domains
2834 : * should already have been separated.
2835 : * We do however need to make sure that this single domains is a subset
2836 : * of the class domain so that it would not intersect with any other
2837 : * class domains. This means that we may end up splitting up the atomic
2838 : * domain in case separation classes are being used.
2839 : *
2840 : * "domain" is the intersection of the schedule domain and the class domain,
2841 : * with inner dimensions projected out.
2842 : */
2843 0 : static __isl_give isl_set *compute_atomic_domain(
2844 : struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2845 : {
2846 : isl_basic_set *bset;
2847 : isl_basic_set_list *list;
2848 : isl_set *domain, *atomic_domain;
2849 : int empty;
2850 :
2851 0 : domain = isl_set_copy(domains->option[isl_ast_loop_atomic]);
2852 0 : domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2853 0 : domain = isl_set_intersect(domain,
2854 : isl_set_copy(domains->schedule_domain));
2855 0 : empty = isl_set_is_empty(domain);
2856 0 : if (empty < 0)
2857 0 : class_domain = isl_set_free(class_domain);
2858 0 : if (empty) {
2859 0 : isl_set_free(domain);
2860 0 : return class_domain;
2861 : }
2862 :
2863 0 : domain = isl_ast_build_eliminate(domains->build, domain);
2864 0 : domain = isl_set_coalesce_preserve(domain);
2865 0 : bset = isl_set_unshifted_simple_hull(domain);
2866 0 : domain = isl_set_from_basic_set(bset);
2867 0 : atomic_domain = isl_set_copy(domain);
2868 0 : domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2869 0 : class_domain = isl_set_subtract(class_domain, atomic_domain);
2870 0 : domain = isl_set_make_disjoint(domain);
2871 0 : list = isl_basic_set_list_from_set(domain);
2872 0 : domains->list = isl_basic_set_list_concat(domains->list, list);
2873 :
2874 0 : return class_domain;
2875 : }
2876 :
2877 : /* Split up the schedule domain into uniform basic sets,
2878 : * in the sense that each element in a basic set is associated to
2879 : * elements of the same domains, and add the result to domains->list.
2880 : * Do this for that part of the schedule domain that lies in the
2881 : * intersection of "class_domain" and the separate option domain.
2882 : *
2883 : * "class_domain" may or may not include the constraints
2884 : * of the schedule domain, but this does not make a difference
2885 : * since we are going to intersect it with the domain of the inverse schedule.
2886 : * If it includes schedule domain constraints, then they may involve
2887 : * inner dimensions, but we will eliminate them in separation_domain.
2888 : */
2889 0 : static int compute_separate_domain(struct isl_codegen_domains *domains,
2890 : __isl_keep isl_set *class_domain)
2891 : {
2892 : isl_space *space;
2893 : isl_set *domain;
2894 : isl_union_map *executed;
2895 : isl_basic_set_list *list;
2896 : int empty;
2897 :
2898 0 : domain = isl_set_copy(domains->option[isl_ast_loop_separate]);
2899 0 : domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2900 0 : executed = isl_union_map_copy(domains->executed);
2901 0 : executed = isl_union_map_intersect_domain(executed,
2902 : isl_union_set_from_set(domain));
2903 0 : empty = isl_union_map_is_empty(executed);
2904 0 : if (empty < 0 || empty) {
2905 0 : isl_union_map_free(executed);
2906 0 : return empty < 0 ? -1 : 0;
2907 : }
2908 :
2909 0 : space = isl_set_get_space(class_domain);
2910 0 : domain = separate_schedule_domains(space, executed, domains->build);
2911 :
2912 0 : list = isl_basic_set_list_from_set(domain);
2913 0 : domains->list = isl_basic_set_list_concat(domains->list, list);
2914 :
2915 0 : return 0;
2916 : }
2917 :
2918 : /* Split up the domain at the current depth into disjoint
2919 : * basic sets for which code should be generated separately
2920 : * for the given separation class domain.
2921 : *
2922 : * If any separation classes have been defined, then "class_domain"
2923 : * is the domain of the current class and does not refer to inner dimensions.
2924 : * Otherwise, "class_domain" is the universe domain.
2925 : *
2926 : * We first make sure that the class domain is disjoint from
2927 : * previously considered class domains.
2928 : *
2929 : * The separate domains can be computed directly from the "class_domain".
2930 : *
2931 : * The unroll, atomic and remainder domains need the constraints
2932 : * from the schedule domain.
2933 : *
2934 : * For unrolling, the actual schedule domain is needed (with divs that
2935 : * may refer to the current dimension) so that stride detection can be
2936 : * performed.
2937 : *
2938 : * For atomic and remainder domains, inner dimensions and divs involving
2939 : * the current dimensions should be eliminated.
2940 : * In case we are working within a separation class, we need to intersect
2941 : * the result with the current "class_domain" to ensure that the domains
2942 : * are disjoint from those generated from other class domains.
2943 : *
2944 : * The domain that has been made atomic may be larger than specified
2945 : * by the user since it needs to be representable as a single basic set.
2946 : * This possibly larger domain is removed from class_domain by
2947 : * compute_atomic_domain. It is computed first so that the extended domain
2948 : * would not overlap with any domains computed before.
2949 : * Similary, the unrolled domains may have some constraints removed and
2950 : * may therefore also be larger than specified by the user.
2951 : *
2952 : * If anything is left after handling separate, unroll and atomic,
2953 : * we split it up into basic sets and append the basic sets to domains->list.
2954 : */
2955 0 : static isl_stat compute_partial_domains(struct isl_codegen_domains *domains,
2956 : __isl_take isl_set *class_domain)
2957 : {
2958 : isl_basic_set_list *list;
2959 : isl_set *domain;
2960 :
2961 0 : class_domain = isl_set_subtract(class_domain,
2962 : isl_set_copy(domains->done));
2963 0 : domains->done = isl_set_union(domains->done,
2964 : isl_set_copy(class_domain));
2965 :
2966 0 : class_domain = compute_atomic_domain(domains, class_domain);
2967 0 : class_domain = compute_unroll_domains(domains, class_domain);
2968 :
2969 0 : domain = isl_set_copy(class_domain);
2970 :
2971 0 : if (compute_separate_domain(domains, domain) < 0)
2972 0 : goto error;
2973 0 : domain = isl_set_subtract(domain,
2974 : isl_set_copy(domains->option[isl_ast_loop_separate]));
2975 :
2976 0 : domain = isl_set_intersect(domain,
2977 : isl_set_copy(domains->schedule_domain));
2978 :
2979 0 : domain = isl_ast_build_eliminate(domains->build, domain);
2980 0 : domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2981 :
2982 0 : domain = isl_set_coalesce_preserve(domain);
2983 0 : domain = isl_set_make_disjoint(domain);
2984 :
2985 0 : list = isl_basic_set_list_from_set(domain);
2986 0 : domains->list = isl_basic_set_list_concat(domains->list, list);
2987 :
2988 0 : isl_set_free(class_domain);
2989 :
2990 0 : return isl_stat_ok;
2991 : error:
2992 0 : isl_set_free(domain);
2993 0 : isl_set_free(class_domain);
2994 0 : return isl_stat_error;
2995 : }
2996 :
2997 : /* Split up the domain at the current depth into disjoint
2998 : * basic sets for which code should be generated separately
2999 : * for the separation class identified by "pnt".
3000 : *
3001 : * We extract the corresponding class domain from domains->sep_class,
3002 : * eliminate inner dimensions and pass control to compute_partial_domains.
3003 : */
3004 0 : static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user)
3005 : {
3006 0 : struct isl_codegen_domains *domains = user;
3007 : isl_set *class_set;
3008 : isl_set *domain;
3009 : int disjoint;
3010 :
3011 0 : class_set = isl_set_from_point(pnt);
3012 0 : domain = isl_map_domain(isl_map_intersect_range(
3013 : isl_map_copy(domains->sep_class), class_set));
3014 0 : domain = isl_ast_build_compute_gist(domains->build, domain);
3015 0 : domain = isl_ast_build_eliminate(domains->build, domain);
3016 :
3017 0 : disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain);
3018 0 : if (disjoint < 0)
3019 0 : return isl_stat_error;
3020 0 : if (disjoint) {
3021 0 : isl_set_free(domain);
3022 0 : return isl_stat_ok;
3023 : }
3024 :
3025 0 : return compute_partial_domains(domains, domain);
3026 : }
3027 :
3028 : /* Extract the domains at the current depth that should be atomic,
3029 : * separated or unrolled and store them in option.
3030 : *
3031 : * The domains specified by the user might overlap, so we make
3032 : * them disjoint by subtracting earlier domains from later domains.
3033 : */
3034 0 : static void compute_domains_init_options(isl_set *option[4],
3035 : __isl_keep isl_ast_build *build)
3036 : {
3037 : enum isl_ast_loop_type type, type2;
3038 : isl_set *unroll;
3039 :
3040 0 : for (type = isl_ast_loop_atomic;
3041 0 : type <= isl_ast_loop_separate; ++type) {
3042 0 : option[type] = isl_ast_build_get_option_domain(build, type);
3043 0 : for (type2 = isl_ast_loop_atomic; type2 < type; ++type2)
3044 0 : option[type] = isl_set_subtract(option[type],
3045 0 : isl_set_copy(option[type2]));
3046 : }
3047 :
3048 0 : unroll = option[isl_ast_loop_unroll];
3049 0 : unroll = isl_set_coalesce(unroll);
3050 0 : unroll = isl_set_make_disjoint(unroll);
3051 0 : option[isl_ast_loop_unroll] = unroll;
3052 0 : }
3053 :
3054 : /* Split up the domain at the current depth into disjoint
3055 : * basic sets for which code should be generated separately,
3056 : * based on the user-specified options.
3057 : * Return the list of disjoint basic sets.
3058 : *
3059 : * There are three kinds of domains that we need to keep track of.
3060 : * - the "schedule domain" is the domain of "executed"
3061 : * - the "class domain" is the domain corresponding to the currrent
3062 : * separation class
3063 : * - the "option domain" is the domain corresponding to one of the options
3064 : * atomic, unroll or separate
3065 : *
3066 : * We first consider the individial values of the separation classes
3067 : * and split up the domain for each of them separately.
3068 : * Finally, we consider the remainder. If no separation classes were
3069 : * specified, then we call compute_partial_domains with the universe
3070 : * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain",
3071 : * with inner dimensions removed. We do this because we want to
3072 : * avoid computing the complement of the class domains (i.e., the difference
3073 : * between the universe and domains->done).
3074 : */
3075 0 : static __isl_give isl_basic_set_list *compute_domains(
3076 : __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
3077 : {
3078 : struct isl_codegen_domains domains;
3079 : isl_ctx *ctx;
3080 : isl_set *domain;
3081 : isl_union_set *schedule_domain;
3082 : isl_set *classes;
3083 : isl_space *space;
3084 : int n_param;
3085 : enum isl_ast_loop_type type;
3086 : int empty;
3087 :
3088 0 : if (!executed)
3089 0 : return NULL;
3090 :
3091 0 : ctx = isl_union_map_get_ctx(executed);
3092 0 : domains.list = isl_basic_set_list_alloc(ctx, 0);
3093 :
3094 0 : schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3095 0 : domain = isl_set_from_union_set(schedule_domain);
3096 :
3097 0 : compute_domains_init_options(domains.option, build);
3098 :
3099 0 : domains.sep_class = isl_ast_build_get_separation_class(build);
3100 0 : classes = isl_map_range(isl_map_copy(domains.sep_class));
3101 0 : n_param = isl_set_dim(classes, isl_dim_param);
3102 0 : classes = isl_set_project_out(classes, isl_dim_param, 0, n_param);
3103 :
3104 0 : space = isl_set_get_space(domain);
3105 0 : domains.build = build;
3106 0 : domains.schedule_domain = isl_set_copy(domain);
3107 0 : domains.executed = executed;
3108 0 : domains.done = isl_set_empty(space);
3109 :
3110 0 : if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0)
3111 0 : domains.list = isl_basic_set_list_free(domains.list);
3112 0 : isl_set_free(classes);
3113 :
3114 0 : empty = isl_set_is_empty(domains.done);
3115 0 : if (empty < 0) {
3116 0 : domains.list = isl_basic_set_list_free(domains.list);
3117 0 : domain = isl_set_free(domain);
3118 0 : } else if (empty) {
3119 0 : isl_set_free(domain);
3120 0 : domain = isl_set_universe(isl_set_get_space(domains.done));
3121 : } else {
3122 0 : domain = isl_ast_build_eliminate(build, domain);
3123 : }
3124 0 : if (compute_partial_domains(&domains, domain) < 0)
3125 0 : domains.list = isl_basic_set_list_free(domains.list);
3126 :
3127 0 : isl_set_free(domains.schedule_domain);
3128 0 : isl_set_free(domains.done);
3129 0 : isl_map_free(domains.sep_class);
3130 0 : for (type = isl_ast_loop_atomic; type <= isl_ast_loop_separate; ++type)
3131 0 : isl_set_free(domains.option[type]);
3132 :
3133 0 : return domains.list;
3134 : }
3135 :
3136 : /* Generate code for a single component, after shifting (if any)
3137 : * has been applied, in case the schedule was specified as a union map.
3138 : *
3139 : * We first split up the domain at the current depth into disjoint
3140 : * basic sets based on the user-specified options.
3141 : * Then we generated code for each of them and concatenate the results.
3142 : */
3143 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_flat(
3144 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3145 : {
3146 : isl_basic_set_list *domain_list;
3147 0 : isl_ast_graft_list *list = NULL;
3148 :
3149 0 : domain_list = compute_domains(executed, build);
3150 0 : list = generate_parallel_domains(domain_list, executed, build);
3151 :
3152 0 : isl_basic_set_list_free(domain_list);
3153 0 : isl_union_map_free(executed);
3154 0 : isl_ast_build_free(build);
3155 :
3156 0 : return list;
3157 : }
3158 :
3159 : /* Generate code for a single component, after shifting (if any)
3160 : * has been applied, in case the schedule was specified as a schedule tree
3161 : * and the separate option was specified.
3162 : *
3163 : * We perform separation on the domain of "executed" and then generate
3164 : * an AST for each of the resulting disjoint basic sets.
3165 : */
3166 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate(
3167 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3168 : {
3169 : isl_space *space;
3170 : isl_set *domain;
3171 : isl_basic_set_list *domain_list;
3172 : isl_ast_graft_list *list;
3173 :
3174 0 : space = isl_ast_build_get_space(build, 1);
3175 0 : domain = separate_schedule_domains(space,
3176 : isl_union_map_copy(executed), build);
3177 0 : domain_list = isl_basic_set_list_from_set(domain);
3178 :
3179 0 : list = generate_parallel_domains(domain_list, executed, build);
3180 :
3181 0 : isl_basic_set_list_free(domain_list);
3182 0 : isl_union_map_free(executed);
3183 0 : isl_ast_build_free(build);
3184 :
3185 0 : return list;
3186 : }
3187 :
3188 : /* Internal data structure for generate_shifted_component_tree_unroll.
3189 : *
3190 : * "executed" and "build" are inputs to generate_shifted_component_tree_unroll.
3191 : * "list" collects the constructs grafts.
3192 : */
3193 : struct isl_ast_unroll_tree_data {
3194 : isl_union_map *executed;
3195 : isl_ast_build *build;
3196 : isl_ast_graft_list *list;
3197 : };
3198 :
3199 : /* Initialize data->list to a list of "n" elements.
3200 : */
3201 0 : static int init_unroll_tree(int n, void *user)
3202 : {
3203 0 : struct isl_ast_unroll_tree_data *data = user;
3204 : isl_ctx *ctx;
3205 :
3206 0 : ctx = isl_ast_build_get_ctx(data->build);
3207 0 : data->list = isl_ast_graft_list_alloc(ctx, n);
3208 :
3209 0 : return 0;
3210 : }
3211 :
3212 : /* Given an iteration of an unrolled domain represented by "bset",
3213 : * generate the corresponding AST and add the result to data->list.
3214 : */
3215 0 : static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user)
3216 : {
3217 0 : struct isl_ast_unroll_tree_data *data = user;
3218 :
3219 0 : data->list = add_node(data->list, isl_union_map_copy(data->executed),
3220 : bset, isl_ast_build_copy(data->build));
3221 :
3222 0 : return 0;
3223 : }
3224 :
3225 : /* Generate code for a single component, after shifting (if any)
3226 : * has been applied, in case the schedule was specified as a schedule tree
3227 : * and the unroll option was specified.
3228 : *
3229 : * We call foreach_iteration to iterate over the individual values and
3230 : * construct and collect the corresponding grafts in do_unroll_tree_iteration.
3231 : */
3232 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll(
3233 : __isl_take isl_union_map *executed, __isl_take isl_set *domain,
3234 : __isl_take isl_ast_build *build)
3235 : {
3236 0 : struct isl_ast_unroll_tree_data data = { executed, build, NULL };
3237 :
3238 0 : if (foreach_iteration(domain, build, &init_unroll_tree,
3239 : &do_unroll_tree_iteration, &data) < 0)
3240 0 : data.list = isl_ast_graft_list_free(data.list);
3241 :
3242 0 : isl_union_map_free(executed);
3243 0 : isl_ast_build_free(build);
3244 :
3245 0 : return data.list;
3246 : }
3247 :
3248 : /* Does "domain" involve a disjunction that is purely based on
3249 : * constraints involving only outer dimension?
3250 : *
3251 : * In particular, is there a disjunction such that the constraints
3252 : * involving the current and later dimensions are the same over
3253 : * all the disjuncts?
3254 : */
3255 0 : static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain,
3256 : __isl_keep isl_ast_build *build)
3257 : {
3258 : isl_basic_set *hull;
3259 : isl_set *shared, *inner;
3260 : isl_bool equal;
3261 : int depth, dim;
3262 :
3263 0 : if (isl_set_n_basic_set(domain) <= 1)
3264 0 : return isl_bool_false;
3265 :
3266 0 : inner = isl_set_copy(domain);
3267 0 : depth = isl_ast_build_get_depth(build);
3268 0 : dim = isl_set_dim(inner, isl_dim_set);
3269 0 : inner = isl_set_drop_constraints_not_involving_dims(inner,
3270 0 : isl_dim_set, depth, dim - depth);
3271 0 : hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner));
3272 0 : shared = isl_set_from_basic_set(hull);
3273 0 : equal = isl_set_plain_is_equal(inner, shared);
3274 0 : isl_set_free(inner);
3275 0 : isl_set_free(shared);
3276 :
3277 0 : return equal;
3278 : }
3279 :
3280 : /* Generate code for a single component, after shifting (if any)
3281 : * has been applied, in case the schedule was specified as a schedule tree.
3282 : * In particular, handle the base case where there is either no isolated
3283 : * set or we are within the isolated set (in which case "isolated" is set)
3284 : * or the iterations that precede or follow the isolated set.
3285 : *
3286 : * The schedule domain is broken up or combined into basic sets
3287 : * according to the AST generation option specified in the current
3288 : * schedule node, which may be either atomic, separate, unroll or
3289 : * unspecified. If the option is unspecified, then we currently simply
3290 : * split the schedule domain into disjoint basic sets.
3291 : *
3292 : * In case the separate option is specified, the AST generation is
3293 : * handled by generate_shifted_component_tree_separate.
3294 : * In the other cases, we need the global schedule domain.
3295 : * In the unroll case, the AST generation is then handled by
3296 : * generate_shifted_component_tree_unroll which needs the actual
3297 : * schedule domain (with divs that may refer to the current dimension)
3298 : * so that stride detection can be performed.
3299 : * In the atomic or unspecified case, inner dimensions and divs involving
3300 : * the current dimensions should be eliminated.
3301 : * The result is then either combined into a single basic set or
3302 : * split up into disjoint basic sets.
3303 : * Finally an AST is generated for each basic set and the results are
3304 : * concatenated.
3305 : *
3306 : * If the schedule domain involves a disjunction that is purely based on
3307 : * constraints involving only outer dimension, then it is treated as
3308 : * if atomic was specified. This ensures that only a single loop
3309 : * is generated instead of a sequence of identical loops with
3310 : * different guards.
3311 : */
3312 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base(
3313 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
3314 : int isolated)
3315 : {
3316 : isl_bool outer_disjunction;
3317 : isl_union_set *schedule_domain;
3318 : isl_set *domain;
3319 : isl_basic_set_list *domain_list;
3320 : isl_ast_graft_list *list;
3321 : enum isl_ast_loop_type type;
3322 :
3323 0 : type = isl_ast_build_get_loop_type(build, isolated);
3324 0 : if (type < 0)
3325 0 : goto error;
3326 :
3327 0 : if (type == isl_ast_loop_separate)
3328 0 : return generate_shifted_component_tree_separate(executed,
3329 : build);
3330 :
3331 0 : schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3332 0 : domain = isl_set_from_union_set(schedule_domain);
3333 :
3334 0 : if (type == isl_ast_loop_unroll)
3335 0 : return generate_shifted_component_tree_unroll(executed, domain,
3336 : build);
3337 :
3338 0 : domain = isl_ast_build_eliminate(build, domain);
3339 0 : domain = isl_set_coalesce_preserve(domain);
3340 :
3341 0 : outer_disjunction = has_pure_outer_disjunction(domain, build);
3342 0 : if (outer_disjunction < 0)
3343 0 : domain = isl_set_free(domain);
3344 :
3345 0 : if (outer_disjunction || type == isl_ast_loop_atomic) {
3346 : isl_basic_set *hull;
3347 0 : hull = isl_set_unshifted_simple_hull(domain);
3348 0 : domain_list = isl_basic_set_list_from_basic_set(hull);
3349 : } else {
3350 0 : domain = isl_set_make_disjoint(domain);
3351 0 : domain_list = isl_basic_set_list_from_set(domain);
3352 : }
3353 :
3354 0 : list = generate_parallel_domains(domain_list, executed, build);
3355 :
3356 0 : isl_basic_set_list_free(domain_list);
3357 0 : isl_union_map_free(executed);
3358 0 : isl_ast_build_free(build);
3359 :
3360 0 : return list;
3361 : error:
3362 0 : isl_union_map_free(executed);
3363 0 : isl_ast_build_free(build);
3364 0 : return NULL;
3365 : }
3366 :
3367 : /* Extract out the disjunction imposed by "domain" on the outer
3368 : * schedule dimensions.
3369 : *
3370 : * In particular, remove all inner dimensions from "domain" (including
3371 : * the current dimension) and then remove the constraints that are shared
3372 : * by all disjuncts in the result.
3373 : */
3374 0 : static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain,
3375 : __isl_keep isl_ast_build *build)
3376 : {
3377 : isl_set *hull;
3378 : int depth, dim;
3379 :
3380 0 : domain = isl_ast_build_specialize(build, domain);
3381 0 : depth = isl_ast_build_get_depth(build);
3382 0 : dim = isl_set_dim(domain, isl_dim_set);
3383 0 : domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth);
3384 0 : domain = isl_set_remove_unknown_divs(domain);
3385 0 : hull = isl_set_copy(domain);
3386 0 : hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull));
3387 0 : domain = isl_set_gist(domain, hull);
3388 :
3389 0 : return domain;
3390 : }
3391 :
3392 : /* Add "guard" to the grafts in "list".
3393 : * "build" is the outer AST build, while "sub_build" includes "guard"
3394 : * in its generated domain.
3395 : *
3396 : * First combine the grafts into a single graft and then add the guard.
3397 : * If the list is empty, or if some error occurred, then simply return
3398 : * the list.
3399 : */
3400 0 : static __isl_give isl_ast_graft_list *list_add_guard(
3401 : __isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard,
3402 : __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build)
3403 : {
3404 : isl_ast_graft *graft;
3405 :
3406 0 : list = isl_ast_graft_list_fuse(list, sub_build);
3407 :
3408 0 : if (isl_ast_graft_list_n_ast_graft(list) != 1)
3409 0 : return list;
3410 :
3411 0 : graft = isl_ast_graft_list_get_ast_graft(list, 0);
3412 0 : graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build);
3413 0 : list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
3414 :
3415 0 : return list;
3416 : }
3417 :
3418 : /* Generate code for a single component, after shifting (if any)
3419 : * has been applied, in case the schedule was specified as a schedule tree.
3420 : * In particular, do so for the specified subset of the schedule domain.
3421 : *
3422 : * If we are outside of the isolated part, then "domain" may include
3423 : * a disjunction. Explicitly generate this disjunction at this point
3424 : * instead of relying on the disjunction getting hoisted back up
3425 : * to this level.
3426 : */
3427 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part(
3428 : __isl_keep isl_union_map *executed, __isl_take isl_set *domain,
3429 : __isl_keep isl_ast_build *build, int isolated)
3430 : {
3431 : isl_union_set *uset;
3432 : isl_ast_graft_list *list;
3433 : isl_ast_build *sub_build;
3434 : int empty;
3435 :
3436 0 : uset = isl_union_set_from_set(isl_set_copy(domain));
3437 0 : executed = isl_union_map_copy(executed);
3438 0 : executed = isl_union_map_intersect_domain(executed, uset);
3439 0 : empty = isl_union_map_is_empty(executed);
3440 0 : if (empty < 0)
3441 0 : goto error;
3442 0 : if (empty) {
3443 : isl_ctx *ctx;
3444 0 : isl_union_map_free(executed);
3445 0 : isl_set_free(domain);
3446 0 : ctx = isl_ast_build_get_ctx(build);
3447 0 : return isl_ast_graft_list_alloc(ctx, 0);
3448 : }
3449 :
3450 0 : sub_build = isl_ast_build_copy(build);
3451 0 : if (!isolated) {
3452 0 : domain = extract_disjunction(domain, build);
3453 0 : sub_build = isl_ast_build_restrict_generated(sub_build,
3454 : isl_set_copy(domain));
3455 : }
3456 0 : list = generate_shifted_component_tree_base(executed,
3457 : isl_ast_build_copy(sub_build), isolated);
3458 0 : if (!isolated)
3459 0 : list = list_add_guard(list, domain, build, sub_build);
3460 0 : isl_ast_build_free(sub_build);
3461 0 : isl_set_free(domain);
3462 0 : return list;
3463 : error:
3464 0 : isl_union_map_free(executed);
3465 0 : isl_set_free(domain);
3466 0 : return NULL;
3467 : }
3468 :
3469 : /* Generate code for a single component, after shifting (if any)
3470 : * has been applied, in case the schedule was specified as a schedule tree.
3471 : * In particular, do so for the specified sequence of subsets
3472 : * of the schedule domain, "before", "isolated", "after" and "other",
3473 : * where only the "isolated" part is considered to be isolated.
3474 : */
3475 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_parts(
3476 : __isl_take isl_union_map *executed, __isl_take isl_set *before,
3477 : __isl_take isl_set *isolated, __isl_take isl_set *after,
3478 : __isl_take isl_set *other, __isl_take isl_ast_build *build)
3479 : {
3480 : isl_ast_graft_list *list, *res;
3481 :
3482 0 : res = generate_shifted_component_tree_part(executed, before, build, 0);
3483 0 : list = generate_shifted_component_tree_part(executed, isolated,
3484 : build, 1);
3485 0 : res = isl_ast_graft_list_concat(res, list);
3486 0 : list = generate_shifted_component_tree_part(executed, after, build, 0);
3487 0 : res = isl_ast_graft_list_concat(res, list);
3488 0 : list = generate_shifted_component_tree_part(executed, other, build, 0);
3489 0 : res = isl_ast_graft_list_concat(res, list);
3490 :
3491 0 : isl_union_map_free(executed);
3492 0 : isl_ast_build_free(build);
3493 :
3494 0 : return res;
3495 : }
3496 :
3497 : /* Does "set" intersect "first", but not "second"?
3498 : */
3499 0 : static isl_bool only_intersects_first(__isl_keep isl_set *set,
3500 : __isl_keep isl_set *first, __isl_keep isl_set *second)
3501 : {
3502 : isl_bool disjoint;
3503 :
3504 0 : disjoint = isl_set_is_disjoint(set, first);
3505 0 : if (disjoint < 0)
3506 0 : return isl_bool_error;
3507 0 : if (disjoint)
3508 0 : return isl_bool_false;
3509 :
3510 0 : return isl_set_is_disjoint(set, second);
3511 : }
3512 :
3513 : /* Generate code for a single component, after shifting (if any)
3514 : * has been applied, in case the schedule was specified as a schedule tree.
3515 : * In particular, do so in case of isolation where there is
3516 : * only an "isolated" part and an "after" part.
3517 : * "dead1" and "dead2" are freed by this function in order to simplify
3518 : * the caller.
3519 : *
3520 : * The "before" and "other" parts are set to empty sets.
3521 : */
3522 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_only_after(
3523 : __isl_take isl_union_map *executed, __isl_take isl_set *isolated,
3524 : __isl_take isl_set *after, __isl_take isl_ast_build *build,
3525 : __isl_take isl_set *dead1, __isl_take isl_set *dead2)
3526 : {
3527 : isl_set *empty;
3528 :
3529 0 : empty = isl_set_empty(isl_set_get_space(after));
3530 0 : isl_set_free(dead1);
3531 0 : isl_set_free(dead2);
3532 0 : return generate_shifted_component_parts(executed, isl_set_copy(empty),
3533 : isolated, after, empty, build);
3534 : }
3535 :
3536 : /* Generate code for a single component, after shifting (if any)
3537 : * has been applied, in case the schedule was specified as a schedule tree.
3538 : *
3539 : * We first check if the user has specified an isolated schedule domain
3540 : * and that we are not already outside of this isolated schedule domain.
3541 : * If so, we break up the schedule domain into iterations that
3542 : * precede the isolated domain, the isolated domain itself,
3543 : * the iterations that follow the isolated domain and
3544 : * the remaining iterations (those that are incomparable
3545 : * to the isolated domain).
3546 : * We generate an AST for each piece and concatenate the results.
3547 : *
3548 : * If the isolated domain is not convex, then it is replaced
3549 : * by a convex superset to ensure that the sets of preceding and
3550 : * following iterations are properly defined and, in particular,
3551 : * that there are no intermediate iterations that do not belong
3552 : * to the isolated domain.
3553 : *
3554 : * In the special case where at least one element of the schedule
3555 : * domain that does not belong to the isolated domain needs
3556 : * to be scheduled after this isolated domain, but none of those
3557 : * elements need to be scheduled before, break up the schedule domain
3558 : * in only two parts, the isolated domain, and a part that will be
3559 : * scheduled after the isolated domain.
3560 : *
3561 : * If no isolated set has been specified, then we generate an
3562 : * AST for the entire inverse schedule.
3563 : */
3564 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_tree(
3565 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3566 : {
3567 : int i, depth;
3568 : int empty, has_isolate;
3569 : isl_space *space;
3570 : isl_union_set *schedule_domain;
3571 : isl_set *domain;
3572 : isl_basic_set *hull;
3573 : isl_set *isolated, *before, *after, *test;
3574 : isl_map *gt, *lt;
3575 : isl_bool pure;
3576 :
3577 0 : build = isl_ast_build_extract_isolated(build);
3578 0 : has_isolate = isl_ast_build_has_isolated(build);
3579 0 : if (has_isolate < 0)
3580 0 : executed = isl_union_map_free(executed);
3581 0 : else if (!has_isolate)
3582 0 : return generate_shifted_component_tree_base(executed, build, 0);
3583 :
3584 0 : schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3585 0 : domain = isl_set_from_union_set(schedule_domain);
3586 :
3587 0 : isolated = isl_ast_build_get_isolated(build);
3588 0 : isolated = isl_set_intersect(isolated, isl_set_copy(domain));
3589 0 : test = isl_ast_build_specialize(build, isl_set_copy(isolated));
3590 0 : empty = isl_set_is_empty(test);
3591 0 : isl_set_free(test);
3592 0 : if (empty < 0)
3593 0 : goto error;
3594 0 : if (empty) {
3595 0 : isl_set_free(isolated);
3596 0 : isl_set_free(domain);
3597 0 : return generate_shifted_component_tree_base(executed, build, 0);
3598 : }
3599 0 : isolated = isl_ast_build_eliminate(build, isolated);
3600 0 : hull = isl_set_unshifted_simple_hull(isolated);
3601 0 : isolated = isl_set_from_basic_set(hull);
3602 :
3603 0 : depth = isl_ast_build_get_depth(build);
3604 0 : space = isl_space_map_from_set(isl_set_get_space(isolated));
3605 0 : gt = isl_map_universe(space);
3606 0 : for (i = 0; i < depth; ++i)
3607 0 : gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
3608 0 : gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
3609 0 : lt = isl_map_reverse(isl_map_copy(gt));
3610 0 : before = isl_set_apply(isl_set_copy(isolated), gt);
3611 0 : after = isl_set_apply(isl_set_copy(isolated), lt);
3612 :
3613 0 : domain = isl_set_subtract(domain, isl_set_copy(isolated));
3614 0 : pure = only_intersects_first(domain, after, before);
3615 0 : if (pure < 0)
3616 0 : executed = isl_union_map_free(executed);
3617 0 : else if (pure)
3618 0 : return generate_shifted_component_only_after(executed, isolated,
3619 : domain, build, before, after);
3620 0 : domain = isl_set_subtract(domain, isl_set_copy(before));
3621 0 : domain = isl_set_subtract(domain, isl_set_copy(after));
3622 0 : after = isl_set_subtract(after, isl_set_copy(isolated));
3623 0 : after = isl_set_subtract(after, isl_set_copy(before));
3624 0 : before = isl_set_subtract(before, isl_set_copy(isolated));
3625 :
3626 0 : return generate_shifted_component_parts(executed, before, isolated,
3627 : after, domain, build);
3628 : error:
3629 0 : isl_set_free(domain);
3630 0 : isl_set_free(isolated);
3631 0 : isl_union_map_free(executed);
3632 0 : isl_ast_build_free(build);
3633 0 : return NULL;
3634 : }
3635 :
3636 : /* Generate code for a single component, after shifting (if any)
3637 : * has been applied.
3638 : *
3639 : * Call generate_shifted_component_tree or generate_shifted_component_flat
3640 : * depending on whether the schedule was specified as a schedule tree.
3641 : */
3642 0 : static __isl_give isl_ast_graft_list *generate_shifted_component(
3643 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3644 : {
3645 0 : if (isl_ast_build_has_schedule_node(build))
3646 0 : return generate_shifted_component_tree(executed, build);
3647 : else
3648 0 : return generate_shifted_component_flat(executed, build);
3649 : }
3650 :
3651 : struct isl_set_map_pair {
3652 : isl_set *set;
3653 : isl_map *map;
3654 : };
3655 :
3656 : /* Given an array "domain" of isl_set_map_pairs and an array "order"
3657 : * of indices into the "domain" array,
3658 : * return the union of the "map" fields of the elements
3659 : * indexed by the first "n" elements of "order".
3660 : */
3661 0 : static __isl_give isl_union_map *construct_component_executed(
3662 : struct isl_set_map_pair *domain, int *order, int n)
3663 : {
3664 : int i;
3665 : isl_map *map;
3666 : isl_union_map *executed;
3667 :
3668 0 : map = isl_map_copy(domain[order[0]].map);
3669 0 : executed = isl_union_map_from_map(map);
3670 0 : for (i = 1; i < n; ++i) {
3671 0 : map = isl_map_copy(domain[order[i]].map);
3672 0 : executed = isl_union_map_add_map(executed, map);
3673 : }
3674 :
3675 0 : return executed;
3676 : }
3677 :
3678 : /* Generate code for a single component, after shifting (if any)
3679 : * has been applied.
3680 : *
3681 : * The component inverse schedule is specified as the "map" fields
3682 : * of the elements of "domain" indexed by the first "n" elements of "order".
3683 : */
3684 0 : static __isl_give isl_ast_graft_list *generate_shifted_component_from_list(
3685 : struct isl_set_map_pair *domain, int *order, int n,
3686 : __isl_take isl_ast_build *build)
3687 : {
3688 : isl_union_map *executed;
3689 :
3690 0 : executed = construct_component_executed(domain, order, n);
3691 0 : return generate_shifted_component(executed, build);
3692 : }
3693 :
3694 : /* Does set dimension "pos" of "set" have an obviously fixed value?
3695 : */
3696 0 : static int dim_is_fixed(__isl_keep isl_set *set, int pos)
3697 : {
3698 : int fixed;
3699 : isl_val *v;
3700 :
3701 0 : v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos);
3702 0 : if (!v)
3703 0 : return -1;
3704 0 : fixed = !isl_val_is_nan(v);
3705 0 : isl_val_free(v);
3706 :
3707 0 : return fixed;
3708 : }
3709 :
3710 : /* Given an array "domain" of isl_set_map_pairs and an array "order"
3711 : * of indices into the "domain" array,
3712 : * do all (except for at most one) of the "set" field of the elements
3713 : * indexed by the first "n" elements of "order" have a fixed value
3714 : * at position "depth"?
3715 : */
3716 0 : static int at_most_one_non_fixed(struct isl_set_map_pair *domain,
3717 : int *order, int n, int depth)
3718 : {
3719 : int i;
3720 0 : int non_fixed = -1;
3721 :
3722 0 : for (i = 0; i < n; ++i) {
3723 : int f;
3724 :
3725 0 : f = dim_is_fixed(domain[order[i]].set, depth);
3726 0 : if (f < 0)
3727 0 : return -1;
3728 0 : if (f)
3729 0 : continue;
3730 0 : if (non_fixed >= 0)
3731 0 : return 0;
3732 0 : non_fixed = i;
3733 : }
3734 :
3735 0 : return 1;
3736 : }
3737 :
3738 : /* Given an array "domain" of isl_set_map_pairs and an array "order"
3739 : * of indices into the "domain" array,
3740 : * eliminate the inner dimensions from the "set" field of the elements
3741 : * indexed by the first "n" elements of "order", provided the current
3742 : * dimension does not have a fixed value.
3743 : *
3744 : * Return the index of the first element in "order" with a corresponding
3745 : * "set" field that does not have an (obviously) fixed value.
3746 : */
3747 0 : static int eliminate_non_fixed(struct isl_set_map_pair *domain,
3748 : int *order, int n, int depth, __isl_keep isl_ast_build *build)
3749 : {
3750 : int i;
3751 0 : int base = -1;
3752 :
3753 0 : for (i = n - 1; i >= 0; --i) {
3754 : int f;
3755 0 : f = dim_is_fixed(domain[order[i]].set, depth);
3756 0 : if (f < 0)
3757 0 : return -1;
3758 0 : if (f)
3759 0 : continue;
3760 0 : domain[order[i]].set = isl_ast_build_eliminate_inner(build,
3761 0 : domain[order[i]].set);
3762 0 : base = i;
3763 : }
3764 :
3765 0 : return base;
3766 : }
3767 :
3768 : /* Given an array "domain" of isl_set_map_pairs and an array "order"
3769 : * of indices into the "domain" array,
3770 : * find the element of "domain" (amongst those indexed by the first "n"
3771 : * elements of "order") with the "set" field that has the smallest
3772 : * value for the current iterator.
3773 : *
3774 : * Note that the domain with the smallest value may depend on the parameters
3775 : * and/or outer loop dimension. Since the result of this function is only
3776 : * used as heuristic, we only make a reasonable attempt at finding the best
3777 : * domain, one that should work in case a single domain provides the smallest
3778 : * value for the current dimension over all values of the parameters
3779 : * and outer dimensions.
3780 : *
3781 : * In particular, we compute the smallest value of the first domain
3782 : * and replace it by that of any later domain if that later domain
3783 : * has a smallest value that is smaller for at least some value
3784 : * of the parameters and outer dimensions.
3785 : */
3786 0 : static int first_offset(struct isl_set_map_pair *domain, int *order, int n,
3787 : __isl_keep isl_ast_build *build)
3788 : {
3789 : int i;
3790 : isl_map *min_first;
3791 0 : int first = 0;
3792 :
3793 0 : min_first = isl_ast_build_map_to_iterator(build,
3794 0 : isl_set_copy(domain[order[0]].set));
3795 0 : min_first = isl_map_lexmin(min_first);
3796 :
3797 0 : for (i = 1; i < n; ++i) {
3798 : isl_map *min, *test;
3799 : int empty;
3800 :
3801 0 : min = isl_ast_build_map_to_iterator(build,
3802 0 : isl_set_copy(domain[order[i]].set));
3803 0 : min = isl_map_lexmin(min);
3804 0 : test = isl_map_copy(min);
3805 0 : test = isl_map_apply_domain(isl_map_copy(min_first), test);
3806 0 : test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0);
3807 0 : empty = isl_map_is_empty(test);
3808 0 : isl_map_free(test);
3809 0 : if (empty >= 0 && !empty) {
3810 0 : isl_map_free(min_first);
3811 0 : first = i;
3812 0 : min_first = min;
3813 : } else
3814 0 : isl_map_free(min);
3815 :
3816 0 : if (empty < 0)
3817 0 : break;
3818 : }
3819 :
3820 0 : isl_map_free(min_first);
3821 :
3822 0 : return i < n ? -1 : first;
3823 : }
3824 :
3825 : /* Construct a shifted inverse schedule based on the original inverse schedule,
3826 : * the stride and the offset.
3827 : *
3828 : * The original inverse schedule is specified as the "map" fields
3829 : * of the elements of "domain" indexed by the first "n" elements of "order".
3830 : *
3831 : * "stride" and "offset" are such that the difference
3832 : * between the values of the current dimension of domain "i"
3833 : * and the values of the current dimension for some reference domain are
3834 : * equal to
3835 : *
3836 : * stride * integer + offset[i]
3837 : *
3838 : * Moreover, 0 <= offset[i] < stride.
3839 : *
3840 : * For each domain, we create a map
3841 : *
3842 : * { [..., j, ...] -> [..., j - offset[i], offset[i], ....] }
3843 : *
3844 : * where j refers to the current dimension and the other dimensions are
3845 : * unchanged, and apply this map to the original schedule domain.
3846 : *
3847 : * For example, for the original schedule
3848 : *
3849 : * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
3850 : *
3851 : * and assuming the offset is 0 for the A domain and 1 for the B domain,
3852 : * we apply the mapping
3853 : *
3854 : * { [j] -> [j, 0] }
3855 : *
3856 : * to the schedule of the "A" domain and the mapping
3857 : *
3858 : * { [j - 1] -> [j, 1] }
3859 : *
3860 : * to the schedule of the "B" domain.
3861 : *
3862 : *
3863 : * Note that after the transformation, the differences between pairs
3864 : * of values of the current dimension over all domains are multiples
3865 : * of stride and that we have therefore exposed the stride.
3866 : *
3867 : *
3868 : * To see that the mapping preserves the lexicographic order,
3869 : * first note that each of the individual maps above preserves the order.
3870 : * If the value of the current iterator is j1 in one domain and j2 in another,
3871 : * then if j1 = j2, we know that the same map is applied to both domains
3872 : * and the order is preserved.
3873 : * Otherwise, let us assume, without loss of generality, that j1 < j2.
3874 : * If c1 >= c2 (with c1 and c2 the corresponding offsets), then
3875 : *
3876 : * j1 - c1 < j2 - c2
3877 : *
3878 : * and the order is preserved.
3879 : * If c1 < c2, then we know
3880 : *
3881 : * 0 <= c2 - c1 < s
3882 : *
3883 : * We also have
3884 : *
3885 : * j2 - j1 = n * s + r
3886 : *
3887 : * with n >= 0 and 0 <= r < s.
3888 : * In other words, r = c2 - c1.
3889 : * If n > 0, then
3890 : *
3891 : * j1 - c1 < j2 - c2
3892 : *
3893 : * If n = 0, then
3894 : *
3895 : * j1 - c1 = j2 - c2
3896 : *
3897 : * and so
3898 : *
3899 : * (j1 - c1, c1) << (j2 - c2, c2)
3900 : *
3901 : * with "<<" the lexicographic order, proving that the order is preserved
3902 : * in all cases.
3903 : */
3904 0 : static __isl_give isl_union_map *construct_shifted_executed(
3905 : struct isl_set_map_pair *domain, int *order, int n,
3906 : __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3907 : __isl_take isl_ast_build *build)
3908 : {
3909 : int i;
3910 : isl_union_map *executed;
3911 : isl_space *space;
3912 : isl_map *map;
3913 : int depth;
3914 : isl_constraint *c;
3915 :
3916 0 : depth = isl_ast_build_get_depth(build);
3917 0 : space = isl_ast_build_get_space(build, 1);
3918 0 : executed = isl_union_map_empty(isl_space_copy(space));
3919 0 : space = isl_space_map_from_set(space);
3920 0 : map = isl_map_identity(isl_space_copy(space));
3921 0 : map = isl_map_eliminate(map, isl_dim_out, depth, 1);
3922 0 : map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1);
3923 0 : space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1);
3924 :
3925 0 : c = isl_constraint_alloc_equality(isl_local_space_from_space(space));
3926 0 : c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1);
3927 0 : c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1);
3928 :
3929 0 : for (i = 0; i < n; ++i) {
3930 : isl_map *map_i;
3931 : isl_val *v;
3932 :
3933 0 : v = isl_multi_val_get_val(offset, i);
3934 0 : if (!v)
3935 0 : break;
3936 0 : map_i = isl_map_copy(map);
3937 0 : map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1,
3938 : isl_val_copy(v));
3939 0 : v = isl_val_neg(v);
3940 0 : c = isl_constraint_set_constant_val(c, v);
3941 0 : map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c));
3942 :
3943 0 : map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map),
3944 : map_i);
3945 0 : executed = isl_union_map_add_map(executed, map_i);
3946 : }
3947 :
3948 0 : isl_constraint_free(c);
3949 0 : isl_map_free(map);
3950 :
3951 0 : if (i < n)
3952 0 : executed = isl_union_map_free(executed);
3953 :
3954 0 : return executed;
3955 : }
3956 :
3957 : /* Generate code for a single component, after exposing the stride,
3958 : * given that the schedule domain is "shifted strided".
3959 : *
3960 : * The component inverse schedule is specified as the "map" fields
3961 : * of the elements of "domain" indexed by the first "n" elements of "order".
3962 : *
3963 : * The schedule domain being "shifted strided" means that the differences
3964 : * between the values of the current dimension of domain "i"
3965 : * and the values of the current dimension for some reference domain are
3966 : * equal to
3967 : *
3968 : * stride * integer + offset[i]
3969 : *
3970 : * We first look for the domain with the "smallest" value for the current
3971 : * dimension and adjust the offsets such that the offset of the "smallest"
3972 : * domain is equal to zero. The other offsets are reduced modulo stride.
3973 : *
3974 : * Based on this information, we construct a new inverse schedule in
3975 : * construct_shifted_executed that exposes the stride.
3976 : * Since this involves the introduction of a new schedule dimension,
3977 : * the build needs to be changed accordingly.
3978 : * After computing the AST, the newly introduced dimension needs
3979 : * to be removed again from the list of grafts. We do this by plugging
3980 : * in a mapping that represents the new schedule domain in terms of the
3981 : * old schedule domain.
3982 : */
3983 0 : static __isl_give isl_ast_graft_list *generate_shift_component(
3984 : struct isl_set_map_pair *domain, int *order, int n,
3985 : __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3986 : __isl_take isl_ast_build *build)
3987 : {
3988 : isl_ast_graft_list *list;
3989 : int first;
3990 : int depth;
3991 : isl_val *val;
3992 : isl_multi_val *mv;
3993 : isl_space *space;
3994 : isl_multi_aff *ma, *zero;
3995 : isl_union_map *executed;
3996 :
3997 0 : depth = isl_ast_build_get_depth(build);
3998 :
3999 0 : first = first_offset(domain, order, n, build);
4000 0 : if (first < 0)
4001 0 : goto error;
4002 :
4003 0 : mv = isl_multi_val_copy(offset);
4004 0 : val = isl_multi_val_get_val(offset, first);
4005 0 : val = isl_val_neg(val);
4006 0 : mv = isl_multi_val_add_val(mv, val);
4007 0 : mv = isl_multi_val_mod_val(mv, isl_val_copy(stride));
4008 :
4009 0 : executed = construct_shifted_executed(domain, order, n, stride, mv,
4010 : build);
4011 0 : space = isl_ast_build_get_space(build, 1);
4012 0 : space = isl_space_map_from_set(space);
4013 0 : ma = isl_multi_aff_identity(isl_space_copy(space));
4014 0 : space = isl_space_from_domain(isl_space_domain(space));
4015 0 : space = isl_space_add_dims(space, isl_dim_out, 1);
4016 0 : zero = isl_multi_aff_zero(space);
4017 0 : ma = isl_multi_aff_range_splice(ma, depth + 1, zero);
4018 0 : build = isl_ast_build_insert_dim(build, depth + 1);
4019 0 : list = generate_shifted_component(executed, build);
4020 :
4021 0 : list = isl_ast_graft_list_preimage_multi_aff(list, ma);
4022 :
4023 0 : isl_multi_val_free(mv);
4024 :
4025 0 : return list;
4026 : error:
4027 0 : isl_ast_build_free(build);
4028 0 : return NULL;
4029 : }
4030 :
4031 : /* Does any node in the schedule tree rooted at the current schedule node
4032 : * of "build" depend on outer schedule nodes?
4033 : */
4034 0 : static int has_anchored_subtree(__isl_keep isl_ast_build *build)
4035 : {
4036 : isl_schedule_node *node;
4037 0 : int dependent = 0;
4038 :
4039 0 : node = isl_ast_build_get_schedule_node(build);
4040 0 : dependent = isl_schedule_node_is_subtree_anchored(node);
4041 0 : isl_schedule_node_free(node);
4042 :
4043 0 : return dependent;
4044 : }
4045 :
4046 : /* Generate code for a single component.
4047 : *
4048 : * The component inverse schedule is specified as the "map" fields
4049 : * of the elements of "domain" indexed by the first "n" elements of "order".
4050 : *
4051 : * This function may modify the "set" fields of "domain".
4052 : *
4053 : * Before proceeding with the actual code generation for the component,
4054 : * we first check if there are any "shifted" strides, meaning that
4055 : * the schedule domains of the individual domains are all strided,
4056 : * but that they have different offsets, resulting in the union
4057 : * of schedule domains not being strided anymore.
4058 : *
4059 : * The simplest example is the schedule
4060 : *
4061 : * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
4062 : *
4063 : * Both schedule domains are strided, but their union is not.
4064 : * This function detects such cases and then rewrites the schedule to
4065 : *
4066 : * { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 }
4067 : *
4068 : * In the new schedule, the schedule domains have the same offset (modulo
4069 : * the stride), ensuring that the union of schedule domains is also strided.
4070 : *
4071 : *
4072 : * If there is only a single domain in the component, then there is
4073 : * nothing to do. Similarly, if the current schedule dimension has
4074 : * a fixed value for almost all domains then there is nothing to be done.
4075 : * In particular, we need at least two domains where the current schedule
4076 : * dimension does not have a fixed value.
4077 : * Finally, in case of a schedule map input,
4078 : * if any of the options refer to the current schedule dimension,
4079 : * then we bail out as well. It would be possible to reformulate the options
4080 : * in terms of the new schedule domain, but that would introduce constraints
4081 : * that separate the domains in the options and that is something we would
4082 : * like to avoid.
4083 : * In the case of a schedule tree input, we bail out if any of
4084 : * the descendants of the current schedule node refer to outer
4085 : * schedule nodes in any way.
4086 : *
4087 : *
4088 : * To see if there is any shifted stride, we look at the differences
4089 : * between the values of the current dimension in pairs of domains
4090 : * for equal values of outer dimensions. These differences should be
4091 : * of the form
4092 : *
4093 : * m x + r
4094 : *
4095 : * with "m" the stride and "r" a constant. Note that we cannot perform
4096 : * this analysis on individual domains as the lower bound in each domain
4097 : * may depend on parameters or outer dimensions and so the current dimension
4098 : * itself may not have a fixed remainder on division by the stride.
4099 : *
4100 : * In particular, we compare the first domain that does not have an
4101 : * obviously fixed value for the current dimension to itself and all
4102 : * other domains and collect the offsets and the gcd of the strides.
4103 : * If the gcd becomes one, then we failed to find shifted strides.
4104 : * If the gcd is zero, then the differences were all fixed, meaning
4105 : * that some domains had non-obviously fixed values for the current dimension.
4106 : * If all the offsets are the same (for those domains that do not have
4107 : * an obviously fixed value for the current dimension), then we do not
4108 : * apply the transformation.
4109 : * If none of the domains were skipped, then there is nothing to do.
4110 : * If some of them were skipped, then if we apply separation, the schedule
4111 : * domain should get split in pieces with a (non-shifted) stride.
4112 : *
4113 : * Otherwise, we apply a shift to expose the stride in
4114 : * generate_shift_component.
4115 : */
4116 0 : static __isl_give isl_ast_graft_list *generate_component(
4117 : struct isl_set_map_pair *domain, int *order, int n,
4118 : __isl_take isl_ast_build *build)
4119 : {
4120 : int i, d;
4121 : int depth;
4122 : isl_ctx *ctx;
4123 : isl_map *map;
4124 : isl_set *deltas;
4125 0 : isl_val *gcd = NULL;
4126 : isl_multi_val *mv;
4127 : int fixed, skip;
4128 : int base;
4129 : isl_ast_graft_list *list;
4130 0 : int res = 0;
4131 :
4132 0 : depth = isl_ast_build_get_depth(build);
4133 :
4134 0 : skip = n == 1;
4135 0 : if (skip >= 0 && !skip)
4136 0 : skip = at_most_one_non_fixed(domain, order, n, depth);
4137 0 : if (skip >= 0 && !skip) {
4138 0 : if (isl_ast_build_has_schedule_node(build))
4139 0 : skip = has_anchored_subtree(build);
4140 : else
4141 0 : skip = isl_ast_build_options_involve_depth(build);
4142 : }
4143 0 : if (skip < 0)
4144 0 : goto error;
4145 0 : if (skip)
4146 0 : return generate_shifted_component_from_list(domain,
4147 : order, n, build);
4148 :
4149 0 : base = eliminate_non_fixed(domain, order, n, depth, build);
4150 0 : if (base < 0)
4151 0 : goto error;
4152 :
4153 0 : ctx = isl_ast_build_get_ctx(build);
4154 :
4155 0 : mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n));
4156 :
4157 0 : fixed = 1;
4158 0 : for (i = 0; i < n; ++i) {
4159 : isl_val *r, *m;
4160 :
4161 0 : map = isl_map_from_domain_and_range(
4162 0 : isl_set_copy(domain[order[base]].set),
4163 0 : isl_set_copy(domain[order[i]].set));
4164 0 : for (d = 0; d < depth; ++d)
4165 0 : map = isl_map_equate(map, isl_dim_in, d,
4166 : isl_dim_out, d);
4167 0 : deltas = isl_map_deltas(map);
4168 0 : res = isl_set_dim_residue_class_val(deltas, depth, &m, &r);
4169 0 : isl_set_free(deltas);
4170 0 : if (res < 0)
4171 0 : break;
4172 :
4173 0 : if (i == 0)
4174 0 : gcd = m;
4175 : else
4176 0 : gcd = isl_val_gcd(gcd, m);
4177 0 : if (isl_val_is_one(gcd)) {
4178 0 : isl_val_free(r);
4179 0 : break;
4180 : }
4181 0 : mv = isl_multi_val_set_val(mv, i, r);
4182 :
4183 0 : res = dim_is_fixed(domain[order[i]].set, depth);
4184 0 : if (res < 0)
4185 0 : break;
4186 0 : if (res)
4187 0 : continue;
4188 :
4189 0 : if (fixed && i > base) {
4190 : isl_val *a, *b;
4191 0 : a = isl_multi_val_get_val(mv, i);
4192 0 : b = isl_multi_val_get_val(mv, base);
4193 0 : if (isl_val_ne(a, b))
4194 0 : fixed = 0;
4195 0 : isl_val_free(a);
4196 0 : isl_val_free(b);
4197 : }
4198 : }
4199 :
4200 0 : if (res < 0 || !gcd) {
4201 0 : isl_ast_build_free(build);
4202 0 : list = NULL;
4203 0 : } else if (i < n || fixed || isl_val_is_zero(gcd)) {
4204 0 : list = generate_shifted_component_from_list(domain,
4205 : order, n, build);
4206 : } else {
4207 0 : list = generate_shift_component(domain, order, n, gcd, mv,
4208 : build);
4209 : }
4210 :
4211 0 : isl_val_free(gcd);
4212 0 : isl_multi_val_free(mv);
4213 :
4214 0 : return list;
4215 : error:
4216 0 : isl_ast_build_free(build);
4217 0 : return NULL;
4218 : }
4219 :
4220 : /* Store both "map" itself and its domain in the
4221 : * structure pointed to by *next and advance to the next array element.
4222 : */
4223 0 : static isl_stat extract_domain(__isl_take isl_map *map, void *user)
4224 : {
4225 0 : struct isl_set_map_pair **next = user;
4226 :
4227 0 : (*next)->map = isl_map_copy(map);
4228 0 : (*next)->set = isl_map_domain(map);
4229 0 : (*next)++;
4230 :
4231 0 : return isl_stat_ok;
4232 : }
4233 :
4234 : static int after_in_tree(__isl_keep isl_union_map *umap,
4235 : __isl_keep isl_schedule_node *node);
4236 :
4237 : /* Is any domain element of "umap" scheduled after any of
4238 : * the corresponding image elements by the tree rooted at
4239 : * the child of "node"?
4240 : */
4241 0 : static int after_in_child(__isl_keep isl_union_map *umap,
4242 : __isl_keep isl_schedule_node *node)
4243 : {
4244 : isl_schedule_node *child;
4245 : int after;
4246 :
4247 0 : child = isl_schedule_node_get_child(node, 0);
4248 0 : after = after_in_tree(umap, child);
4249 0 : isl_schedule_node_free(child);
4250 :
4251 0 : return after;
4252 : }
4253 :
4254 : /* Is any domain element of "umap" scheduled after any of
4255 : * the corresponding image elements by the tree rooted at
4256 : * the band node "node"?
4257 : *
4258 : * We first check if any domain element is scheduled after any
4259 : * of the corresponding image elements by the band node itself.
4260 : * If not, we restrict "map" to those pairs of element that
4261 : * are scheduled together by the band node and continue with
4262 : * the child of the band node.
4263 : * If there are no such pairs then the map passed to after_in_child
4264 : * will be empty causing it to return 0.
4265 : */
4266 0 : static int after_in_band(__isl_keep isl_union_map *umap,
4267 : __isl_keep isl_schedule_node *node)
4268 : {
4269 : isl_multi_union_pw_aff *mupa;
4270 : isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2;
4271 : isl_union_set *domain, *range;
4272 : isl_space *space;
4273 : int empty;
4274 : int after;
4275 :
4276 0 : if (isl_schedule_node_band_n_member(node) == 0)
4277 0 : return after_in_child(umap, node);
4278 :
4279 0 : mupa = isl_schedule_node_band_get_partial_schedule(node);
4280 0 : space = isl_multi_union_pw_aff_get_space(mupa);
4281 0 : partial = isl_union_map_from_multi_union_pw_aff(mupa);
4282 0 : test = isl_union_map_copy(umap);
4283 0 : test = isl_union_map_apply_domain(test, isl_union_map_copy(partial));
4284 0 : test = isl_union_map_apply_range(test, isl_union_map_copy(partial));
4285 0 : gt = isl_union_map_from_map(isl_map_lex_gt(space));
4286 0 : test = isl_union_map_intersect(test, gt);
4287 0 : empty = isl_union_map_is_empty(test);
4288 0 : isl_union_map_free(test);
4289 :
4290 0 : if (empty < 0 || !empty) {
4291 0 : isl_union_map_free(partial);
4292 0 : return empty < 0 ? -1 : 1;
4293 : }
4294 :
4295 0 : universe = isl_union_map_universe(isl_union_map_copy(umap));
4296 0 : domain = isl_union_map_domain(isl_union_map_copy(universe));
4297 0 : range = isl_union_map_range(universe);
4298 0 : umap1 = isl_union_map_copy(partial);
4299 0 : umap1 = isl_union_map_intersect_domain(umap1, domain);
4300 0 : umap2 = isl_union_map_intersect_domain(partial, range);
4301 0 : test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4302 0 : test = isl_union_map_intersect(test, isl_union_map_copy(umap));
4303 0 : after = after_in_child(test, node);
4304 0 : isl_union_map_free(test);
4305 0 : return after;
4306 : }
4307 :
4308 : /* Is any domain element of "umap" scheduled after any of
4309 : * the corresponding image elements by the tree rooted at
4310 : * the context node "node"?
4311 : *
4312 : * The context constraints apply to the schedule domain,
4313 : * so we cannot apply them directly to "umap", which contains
4314 : * pairs of statement instances. Instead, we add them
4315 : * to the range of the prefix schedule for both domain and
4316 : * range of "umap".
4317 : */
4318 0 : static int after_in_context(__isl_keep isl_union_map *umap,
4319 : __isl_keep isl_schedule_node *node)
4320 : {
4321 : isl_union_map *prefix, *universe, *umap1, *umap2;
4322 : isl_union_set *domain, *range;
4323 : isl_set *context;
4324 : int after;
4325 :
4326 0 : umap = isl_union_map_copy(umap);
4327 0 : context = isl_schedule_node_context_get_context(node);
4328 0 : prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
4329 0 : universe = isl_union_map_universe(isl_union_map_copy(umap));
4330 0 : domain = isl_union_map_domain(isl_union_map_copy(universe));
4331 0 : range = isl_union_map_range(universe);
4332 0 : umap1 = isl_union_map_copy(prefix);
4333 0 : umap1 = isl_union_map_intersect_domain(umap1, domain);
4334 0 : umap2 = isl_union_map_intersect_domain(prefix, range);
4335 0 : umap1 = isl_union_map_intersect_range(umap1,
4336 : isl_union_set_from_set(context));
4337 0 : umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4338 0 : umap = isl_union_map_intersect(umap, umap1);
4339 :
4340 0 : after = after_in_child(umap, node);
4341 :
4342 0 : isl_union_map_free(umap);
4343 :
4344 0 : return after;
4345 : }
4346 :
4347 : /* Is any domain element of "umap" scheduled after any of
4348 : * the corresponding image elements by the tree rooted at
4349 : * the expansion node "node"?
4350 : *
4351 : * We apply the expansion to domain and range of "umap" and
4352 : * continue with its child.
4353 : */
4354 0 : static int after_in_expansion(__isl_keep isl_union_map *umap,
4355 : __isl_keep isl_schedule_node *node)
4356 : {
4357 : isl_union_map *expansion;
4358 : int after;
4359 :
4360 0 : expansion = isl_schedule_node_expansion_get_expansion(node);
4361 0 : umap = isl_union_map_copy(umap);
4362 0 : umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion));
4363 0 : umap = isl_union_map_apply_range(umap, expansion);
4364 :
4365 0 : after = after_in_child(umap, node);
4366 :
4367 0 : isl_union_map_free(umap);
4368 :
4369 0 : return after;
4370 : }
4371 :
4372 : /* Is any domain element of "umap" scheduled after any of
4373 : * the corresponding image elements by the tree rooted at
4374 : * the extension node "node"?
4375 : *
4376 : * Since the extension node may add statement instances before or
4377 : * after the pairs of statement instances in "umap", we return 1
4378 : * to ensure that these pairs are not broken up.
4379 : */
4380 0 : static int after_in_extension(__isl_keep isl_union_map *umap,
4381 : __isl_keep isl_schedule_node *node)
4382 : {
4383 0 : return 1;
4384 : }
4385 :
4386 : /* Is any domain element of "umap" scheduled after any of
4387 : * the corresponding image elements by the tree rooted at
4388 : * the filter node "node"?
4389 : *
4390 : * We intersect domain and range of "umap" with the filter and
4391 : * continue with its child.
4392 : */
4393 0 : static int after_in_filter(__isl_keep isl_union_map *umap,
4394 : __isl_keep isl_schedule_node *node)
4395 : {
4396 : isl_union_set *filter;
4397 : int after;
4398 :
4399 0 : umap = isl_union_map_copy(umap);
4400 0 : filter = isl_schedule_node_filter_get_filter(node);
4401 0 : umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter));
4402 0 : umap = isl_union_map_intersect_range(umap, filter);
4403 :
4404 0 : after = after_in_child(umap, node);
4405 :
4406 0 : isl_union_map_free(umap);
4407 :
4408 0 : return after;
4409 : }
4410 :
4411 : /* Is any domain element of "umap" scheduled after any of
4412 : * the corresponding image elements by the tree rooted at
4413 : * the set node "node"?
4414 : *
4415 : * This is only the case if this condition holds in any
4416 : * of the (filter) children of the set node.
4417 : * In particular, if the domain and the range of "umap"
4418 : * are contained in different children, then the condition
4419 : * does not hold.
4420 : */
4421 0 : static int after_in_set(__isl_keep isl_union_map *umap,
4422 : __isl_keep isl_schedule_node *node)
4423 : {
4424 : int i, n;
4425 :
4426 0 : n = isl_schedule_node_n_children(node);
4427 0 : for (i = 0; i < n; ++i) {
4428 : isl_schedule_node *child;
4429 : int after;
4430 :
4431 0 : child = isl_schedule_node_get_child(node, i);
4432 0 : after = after_in_tree(umap, child);
4433 0 : isl_schedule_node_free(child);
4434 :
4435 0 : if (after < 0 || after)
4436 0 : return after;
4437 : }
4438 :
4439 0 : return 0;
4440 : }
4441 :
4442 : /* Return the filter of child "i" of "node".
4443 : */
4444 0 : static __isl_give isl_union_set *child_filter(
4445 : __isl_keep isl_schedule_node *node, int i)
4446 : {
4447 : isl_schedule_node *child;
4448 : isl_union_set *filter;
4449 :
4450 0 : child = isl_schedule_node_get_child(node, i);
4451 0 : filter = isl_schedule_node_filter_get_filter(child);
4452 0 : isl_schedule_node_free(child);
4453 :
4454 0 : return filter;
4455 : }
4456 :
4457 : /* Is any domain element of "umap" scheduled after any of
4458 : * the corresponding image elements by the tree rooted at
4459 : * the sequence node "node"?
4460 : *
4461 : * This happens in particular if any domain element is
4462 : * contained in a later child than one containing a range element or
4463 : * if the condition holds within a given child in the sequence.
4464 : * The later part of the condition is checked by after_in_set.
4465 : */
4466 0 : static int after_in_sequence(__isl_keep isl_union_map *umap,
4467 : __isl_keep isl_schedule_node *node)
4468 : {
4469 : int i, j, n;
4470 : isl_union_map *umap_i;
4471 0 : int empty, after = 0;
4472 :
4473 0 : n = isl_schedule_node_n_children(node);
4474 0 : for (i = 1; i < n; ++i) {
4475 : isl_union_set *filter_i;
4476 :
4477 0 : umap_i = isl_union_map_copy(umap);
4478 0 : filter_i = child_filter(node, i);
4479 0 : umap_i = isl_union_map_intersect_domain(umap_i, filter_i);
4480 0 : empty = isl_union_map_is_empty(umap_i);
4481 0 : if (empty < 0)
4482 0 : goto error;
4483 0 : if (empty) {
4484 0 : isl_union_map_free(umap_i);
4485 0 : continue;
4486 : }
4487 :
4488 0 : for (j = 0; j < i; ++j) {
4489 : isl_union_set *filter_j;
4490 : isl_union_map *umap_ij;
4491 :
4492 0 : umap_ij = isl_union_map_copy(umap_i);
4493 0 : filter_j = child_filter(node, j);
4494 0 : umap_ij = isl_union_map_intersect_range(umap_ij,
4495 : filter_j);
4496 0 : empty = isl_union_map_is_empty(umap_ij);
4497 0 : isl_union_map_free(umap_ij);
4498 :
4499 0 : if (empty < 0)
4500 0 : goto error;
4501 0 : if (!empty)
4502 0 : after = 1;
4503 0 : if (after)
4504 0 : break;
4505 : }
4506 :
4507 0 : isl_union_map_free(umap_i);
4508 0 : if (after)
4509 0 : break;
4510 : }
4511 :
4512 0 : if (after < 0 || after)
4513 0 : return after;
4514 :
4515 0 : return after_in_set(umap, node);
4516 : error:
4517 0 : isl_union_map_free(umap_i);
4518 0 : return -1;
4519 : }
4520 :
4521 : /* Is any domain element of "umap" scheduled after any of
4522 : * the corresponding image elements by the tree rooted at "node"?
4523 : *
4524 : * If "umap" is empty, then clearly there is no such element.
4525 : * Otherwise, consider the different types of nodes separately.
4526 : */
4527 0 : static int after_in_tree(__isl_keep isl_union_map *umap,
4528 : __isl_keep isl_schedule_node *node)
4529 : {
4530 : int empty;
4531 : enum isl_schedule_node_type type;
4532 :
4533 0 : empty = isl_union_map_is_empty(umap);
4534 0 : if (empty < 0)
4535 0 : return -1;
4536 0 : if (empty)
4537 0 : return 0;
4538 0 : if (!node)
4539 0 : return -1;
4540 :
4541 0 : type = isl_schedule_node_get_type(node);
4542 0 : switch (type) {
4543 : case isl_schedule_node_error:
4544 0 : return -1;
4545 : case isl_schedule_node_leaf:
4546 0 : return 0;
4547 : case isl_schedule_node_band:
4548 0 : return after_in_band(umap, node);
4549 : case isl_schedule_node_domain:
4550 0 : isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
4551 : "unexpected internal domain node", return -1);
4552 : case isl_schedule_node_context:
4553 0 : return after_in_context(umap, node);
4554 : case isl_schedule_node_expansion:
4555 0 : return after_in_expansion(umap, node);
4556 : case isl_schedule_node_extension:
4557 0 : return after_in_extension(umap, node);
4558 : case isl_schedule_node_filter:
4559 0 : return after_in_filter(umap, node);
4560 : case isl_schedule_node_guard:
4561 : case isl_schedule_node_mark:
4562 0 : return after_in_child(umap, node);
4563 : case isl_schedule_node_set:
4564 0 : return after_in_set(umap, node);
4565 : case isl_schedule_node_sequence:
4566 0 : return after_in_sequence(umap, node);
4567 : }
4568 :
4569 0 : return 1;
4570 : }
4571 :
4572 : /* Is any domain element of "map1" scheduled after any domain
4573 : * element of "map2" by the subtree underneath the current band node,
4574 : * while at the same time being scheduled together by the current
4575 : * band node, i.e., by "map1" and "map2?
4576 : *
4577 : * If the child of the current band node is a leaf, then
4578 : * no element can be scheduled after any other element.
4579 : *
4580 : * Otherwise, we construct a relation between domain elements
4581 : * of "map1" and domain elements of "map2" that are scheduled
4582 : * together and then check if the subtree underneath the current
4583 : * band node determines their relative order.
4584 : */
4585 0 : static int after_in_subtree(__isl_keep isl_ast_build *build,
4586 : __isl_keep isl_map *map1, __isl_keep isl_map *map2)
4587 : {
4588 : isl_schedule_node *node;
4589 : isl_map *map;
4590 : isl_union_map *umap;
4591 : int after;
4592 :
4593 0 : node = isl_ast_build_get_schedule_node(build);
4594 0 : if (!node)
4595 0 : return -1;
4596 0 : node = isl_schedule_node_child(node, 0);
4597 0 : if (isl_schedule_node_get_type(node) == isl_schedule_node_leaf) {
4598 0 : isl_schedule_node_free(node);
4599 0 : return 0;
4600 : }
4601 0 : map = isl_map_copy(map2);
4602 0 : map = isl_map_apply_domain(map, isl_map_copy(map1));
4603 0 : umap = isl_union_map_from_map(map);
4604 0 : after = after_in_tree(umap, node);
4605 0 : isl_union_map_free(umap);
4606 0 : isl_schedule_node_free(node);
4607 0 : return after;
4608 : }
4609 :
4610 : /* Internal data for any_scheduled_after.
4611 : *
4612 : * "build" is the build in which the AST is constructed.
4613 : * "depth" is the number of loops that have already been generated
4614 : * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled
4615 : * "domain" is an array of set-map pairs corresponding to the different
4616 : * iteration domains. The set is the schedule domain, i.e., the domain
4617 : * of the inverse schedule, while the map is the inverse schedule itself.
4618 : */
4619 : struct isl_any_scheduled_after_data {
4620 : isl_ast_build *build;
4621 : int depth;
4622 : int group_coscheduled;
4623 : struct isl_set_map_pair *domain;
4624 : };
4625 :
4626 : /* Is any element of domain "i" scheduled after any element of domain "j"
4627 : * (for a common iteration of the first data->depth loops)?
4628 : *
4629 : * data->domain[i].set contains the domain of the inverse schedule
4630 : * for domain "i", i.e., elements in the schedule domain.
4631 : *
4632 : * If we are inside a band of a schedule tree and there is a pair
4633 : * of elements in the two domains that is schedule together by
4634 : * the current band, then we check if any element of "i" may be schedule
4635 : * after element of "j" by the descendants of the band node.
4636 : *
4637 : * If data->group_coscheduled is set, then we also return 1 if there
4638 : * is any pair of elements in the two domains that are scheduled together.
4639 : */
4640 0 : static isl_bool any_scheduled_after(int i, int j, void *user)
4641 : {
4642 0 : struct isl_any_scheduled_after_data *data = user;
4643 0 : int dim = isl_set_dim(data->domain[i].set, isl_dim_set);
4644 : int pos;
4645 :
4646 0 : for (pos = data->depth; pos < dim; ++pos) {
4647 : int follows;
4648 :
4649 0 : follows = isl_set_follows_at(data->domain[i].set,
4650 0 : data->domain[j].set, pos);
4651 :
4652 0 : if (follows < -1)
4653 0 : return isl_bool_error;
4654 0 : if (follows > 0)
4655 0 : return isl_bool_true;
4656 0 : if (follows < 0)
4657 0 : return isl_bool_false;
4658 : }
4659 :
4660 0 : if (isl_ast_build_has_schedule_node(data->build)) {
4661 : int after;
4662 :
4663 0 : after = after_in_subtree(data->build, data->domain[i].map,
4664 0 : data->domain[j].map);
4665 0 : if (after < 0 || after)
4666 0 : return after;
4667 : }
4668 :
4669 0 : return data->group_coscheduled;
4670 : }
4671 :
4672 : /* Look for independent components at the current depth and generate code
4673 : * for each component separately. The resulting lists of grafts are
4674 : * merged in an attempt to combine grafts with identical guards.
4675 : *
4676 : * Code for two domains can be generated separately if all the elements
4677 : * of one domain are scheduled before (or together with) all the elements
4678 : * of the other domain. We therefore consider the graph with as nodes
4679 : * the domains and an edge between two nodes if any element of the first
4680 : * node is scheduled after any element of the second node.
4681 : * If the ast_build_group_coscheduled is set, then we also add an edge if
4682 : * there is any pair of elements in the two domains that are scheduled
4683 : * together.
4684 : * Code is then generated (by generate_component)
4685 : * for each of the strongly connected components in this graph
4686 : * in their topological order.
4687 : *
4688 : * Since the test is performed on the domain of the inverse schedules of
4689 : * the different domains, we precompute these domains and store
4690 : * them in data.domain.
4691 : */
4692 0 : static __isl_give isl_ast_graft_list *generate_components(
4693 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4694 : {
4695 : int i;
4696 0 : isl_ctx *ctx = isl_ast_build_get_ctx(build);
4697 0 : int n = isl_union_map_n_map(executed);
4698 : struct isl_any_scheduled_after_data data;
4699 : struct isl_set_map_pair *next;
4700 0 : struct isl_tarjan_graph *g = NULL;
4701 0 : isl_ast_graft_list *list = NULL;
4702 0 : int n_domain = 0;
4703 :
4704 0 : data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n);
4705 0 : if (!data.domain)
4706 0 : goto error;
4707 0 : n_domain = n;
4708 :
4709 0 : next = data.domain;
4710 0 : if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0)
4711 0 : goto error;
4712 :
4713 0 : if (!build)
4714 0 : goto error;
4715 0 : data.build = build;
4716 0 : data.depth = isl_ast_build_get_depth(build);
4717 0 : data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx);
4718 0 : g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data);
4719 0 : if (!g)
4720 0 : goto error;
4721 :
4722 0 : list = isl_ast_graft_list_alloc(ctx, 0);
4723 :
4724 0 : i = 0;
4725 0 : while (list && n) {
4726 : isl_ast_graft_list *list_c;
4727 0 : int first = i;
4728 :
4729 0 : if (g->order[i] == -1)
4730 0 : isl_die(ctx, isl_error_internal, "cannot happen",
4731 : goto error);
4732 0 : ++i; --n;
4733 0 : while (g->order[i] != -1) {
4734 0 : ++i; --n;
4735 : }
4736 :
4737 0 : list_c = generate_component(data.domain,
4738 0 : g->order + first, i - first,
4739 : isl_ast_build_copy(build));
4740 0 : list = isl_ast_graft_list_merge(list, list_c, build);
4741 :
4742 0 : ++i;
4743 : }
4744 :
4745 : if (0)
4746 0 : error: list = isl_ast_graft_list_free(list);
4747 0 : isl_tarjan_graph_free(g);
4748 0 : for (i = 0; i < n_domain; ++i) {
4749 0 : isl_map_free(data.domain[i].map);
4750 0 : isl_set_free(data.domain[i].set);
4751 : }
4752 0 : free(data.domain);
4753 0 : isl_union_map_free(executed);
4754 0 : isl_ast_build_free(build);
4755 :
4756 0 : return list;
4757 : }
4758 :
4759 : /* Generate code for the next level (and all inner levels).
4760 : *
4761 : * If "executed" is empty, i.e., no code needs to be generated,
4762 : * then we return an empty list.
4763 : *
4764 : * If we have already generated code for all loop levels, then we pass
4765 : * control to generate_inner_level.
4766 : *
4767 : * If "executed" lives in a single space, i.e., if code needs to be
4768 : * generated for a single domain, then there can only be a single
4769 : * component and we go directly to generate_shifted_component.
4770 : * Otherwise, we call generate_components to detect the components
4771 : * and to call generate_component on each of them separately.
4772 : */
4773 0 : static __isl_give isl_ast_graft_list *generate_next_level(
4774 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4775 : {
4776 : int depth;
4777 :
4778 0 : if (!build || !executed)
4779 : goto error;
4780 :
4781 0 : if (isl_union_map_is_empty(executed)) {
4782 0 : isl_ctx *ctx = isl_ast_build_get_ctx(build);
4783 0 : isl_union_map_free(executed);
4784 0 : isl_ast_build_free(build);
4785 0 : return isl_ast_graft_list_alloc(ctx, 0);
4786 : }
4787 :
4788 0 : depth = isl_ast_build_get_depth(build);
4789 0 : if (depth >= isl_ast_build_dim(build, isl_dim_set))
4790 0 : return generate_inner_level(executed, build);
4791 :
4792 0 : if (isl_union_map_n_map(executed) == 1)
4793 0 : return generate_shifted_component(executed, build);
4794 :
4795 0 : return generate_components(executed, build);
4796 : error:
4797 0 : isl_union_map_free(executed);
4798 0 : isl_ast_build_free(build);
4799 0 : return NULL;
4800 : }
4801 :
4802 : /* Internal data structure used by isl_ast_build_node_from_schedule_map.
4803 : * internal, executed and build are the inputs to generate_code.
4804 : * list collects the output.
4805 : */
4806 : struct isl_generate_code_data {
4807 : int internal;
4808 : isl_union_map *executed;
4809 : isl_ast_build *build;
4810 :
4811 : isl_ast_graft_list *list;
4812 : };
4813 :
4814 : /* Given an inverse schedule in terms of the external build schedule, i.e.,
4815 : *
4816 : * [E -> S] -> D
4817 : *
4818 : * with E the external build schedule and S the additional schedule "space",
4819 : * reformulate the inverse schedule in terms of the internal schedule domain,
4820 : * i.e., return
4821 : *
4822 : * [I -> S] -> D
4823 : *
4824 : * We first obtain a mapping
4825 : *
4826 : * I -> E
4827 : *
4828 : * take the inverse and the product with S -> S, resulting in
4829 : *
4830 : * [I -> S] -> [E -> S]
4831 : *
4832 : * Applying the map to the input produces the desired result.
4833 : */
4834 0 : static __isl_give isl_union_map *internal_executed(
4835 : __isl_take isl_union_map *executed, __isl_keep isl_space *space,
4836 : __isl_keep isl_ast_build *build)
4837 : {
4838 : isl_map *id, *proj;
4839 :
4840 0 : proj = isl_ast_build_get_schedule_map(build);
4841 0 : proj = isl_map_reverse(proj);
4842 0 : space = isl_space_map_from_set(isl_space_copy(space));
4843 0 : id = isl_map_identity(space);
4844 0 : proj = isl_map_product(proj, id);
4845 0 : executed = isl_union_map_apply_domain(executed,
4846 : isl_union_map_from_map(proj));
4847 0 : return executed;
4848 : }
4849 :
4850 : /* Generate an AST that visits the elements in the range of data->executed
4851 : * in the relative order specified by the corresponding domain element(s)
4852 : * for those domain elements that belong to "set".
4853 : * Add the result to data->list.
4854 : *
4855 : * The caller ensures that "set" is a universe domain.
4856 : * "space" is the space of the additional part of the schedule.
4857 : * It is equal to the space of "set" if build->domain is parametric.
4858 : * Otherwise, it is equal to the range of the wrapped space of "set".
4859 : *
4860 : * If the build space is not parametric and
4861 : * if isl_ast_build_node_from_schedule_map
4862 : * was called from an outside user (data->internal not set), then
4863 : * the (inverse) schedule refers to the external build domain and needs to
4864 : * be transformed to refer to the internal build domain.
4865 : *
4866 : * If the build space is parametric, then we add some of the parameter
4867 : * constraints to the executed relation. Adding these constraints
4868 : * allows for an earlier detection of conflicts in some cases.
4869 : * However, we do not want to divide the executed relation into
4870 : * more disjuncts than necessary. We therefore approximate
4871 : * the constraints on the parameters by a single disjunct set.
4872 : *
4873 : * The build is extended to include the additional part of the schedule.
4874 : * If the original build space was not parametric, then the options
4875 : * in data->build refer only to the additional part of the schedule
4876 : * and they need to be adjusted to refer to the complete AST build
4877 : * domain.
4878 : *
4879 : * After having adjusted inverse schedule and build, we start generating
4880 : * code with the outer loop of the current code generation
4881 : * in generate_next_level.
4882 : *
4883 : * If the original build space was not parametric, we undo the embedding
4884 : * on the resulting isl_ast_node_list so that it can be used within
4885 : * the outer AST build.
4886 : */
4887 0 : static isl_stat generate_code_in_space(struct isl_generate_code_data *data,
4888 : __isl_take isl_set *set, __isl_take isl_space *space)
4889 : {
4890 : isl_union_map *executed;
4891 : isl_ast_build *build;
4892 : isl_ast_graft_list *list;
4893 : int embed;
4894 :
4895 0 : executed = isl_union_map_copy(data->executed);
4896 0 : executed = isl_union_map_intersect_domain(executed,
4897 : isl_union_set_from_set(set));
4898 :
4899 0 : embed = !isl_set_is_params(data->build->domain);
4900 0 : if (embed && !data->internal)
4901 0 : executed = internal_executed(executed, space, data->build);
4902 0 : if (!embed) {
4903 : isl_set *domain;
4904 0 : domain = isl_ast_build_get_domain(data->build);
4905 0 : domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
4906 0 : executed = isl_union_map_intersect_params(executed, domain);
4907 : }
4908 :
4909 0 : build = isl_ast_build_copy(data->build);
4910 0 : build = isl_ast_build_product(build, space);
4911 :
4912 0 : list = generate_next_level(executed, build);
4913 :
4914 0 : list = isl_ast_graft_list_unembed(list, embed);
4915 :
4916 0 : data->list = isl_ast_graft_list_concat(data->list, list);
4917 :
4918 0 : return isl_stat_ok;
4919 : }
4920 :
4921 : /* Generate an AST that visits the elements in the range of data->executed
4922 : * in the relative order specified by the corresponding domain element(s)
4923 : * for those domain elements that belong to "set".
4924 : * Add the result to data->list.
4925 : *
4926 : * The caller ensures that "set" is a universe domain.
4927 : *
4928 : * If the build space S is not parametric, then the space of "set"
4929 : * need to be a wrapped relation with S as domain. That is, it needs
4930 : * to be of the form
4931 : *
4932 : * [S -> T]
4933 : *
4934 : * Check this property and pass control to generate_code_in_space
4935 : * passing along T.
4936 : * If the build space is not parametric, then T is the space of "set".
4937 : */
4938 0 : static isl_stat generate_code_set(__isl_take isl_set *set, void *user)
4939 : {
4940 0 : struct isl_generate_code_data *data = user;
4941 : isl_space *space, *build_space;
4942 : int is_domain;
4943 :
4944 0 : space = isl_set_get_space(set);
4945 :
4946 0 : if (isl_set_is_params(data->build->domain))
4947 0 : return generate_code_in_space(data, set, space);
4948 :
4949 0 : build_space = isl_ast_build_get_space(data->build, data->internal);
4950 0 : space = isl_space_unwrap(space);
4951 0 : is_domain = isl_space_is_domain(build_space, space);
4952 0 : isl_space_free(build_space);
4953 0 : space = isl_space_range(space);
4954 :
4955 0 : if (is_domain < 0)
4956 0 : goto error;
4957 0 : if (!is_domain)
4958 0 : isl_die(isl_set_get_ctx(set), isl_error_invalid,
4959 : "invalid nested schedule space", goto error);
4960 :
4961 0 : return generate_code_in_space(data, set, space);
4962 : error:
4963 0 : isl_set_free(set);
4964 0 : isl_space_free(space);
4965 0 : return isl_stat_error;
4966 : }
4967 :
4968 : /* Generate an AST that visits the elements in the range of "executed"
4969 : * in the relative order specified by the corresponding domain element(s).
4970 : *
4971 : * "build" is an isl_ast_build that has either been constructed by
4972 : * isl_ast_build_from_context or passed to a callback set by
4973 : * isl_ast_build_set_create_leaf.
4974 : * In the first case, the space of the isl_ast_build is typically
4975 : * a parametric space, although this is currently not enforced.
4976 : * In the second case, the space is never a parametric space.
4977 : * If the space S is not parametric, then the domain space(s) of "executed"
4978 : * need to be wrapped relations with S as domain.
4979 : *
4980 : * If the domain of "executed" consists of several spaces, then an AST
4981 : * is generated for each of them (in arbitrary order) and the results
4982 : * are concatenated.
4983 : *
4984 : * If "internal" is set, then the domain "S" above refers to the internal
4985 : * schedule domain representation. Otherwise, it refers to the external
4986 : * representation, as returned by isl_ast_build_get_schedule_space.
4987 : *
4988 : * We essentially run over all the spaces in the domain of "executed"
4989 : * and call generate_code_set on each of them.
4990 : */
4991 0 : static __isl_give isl_ast_graft_list *generate_code(
4992 : __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
4993 : int internal)
4994 : {
4995 : isl_ctx *ctx;
4996 0 : struct isl_generate_code_data data = { 0 };
4997 : isl_space *space;
4998 : isl_union_set *schedule_domain;
4999 : isl_union_map *universe;
5000 :
5001 0 : if (!build)
5002 0 : goto error;
5003 0 : space = isl_ast_build_get_space(build, 1);
5004 0 : space = isl_space_align_params(space,
5005 : isl_union_map_get_space(executed));
5006 0 : space = isl_space_align_params(space,
5007 : isl_union_map_get_space(build->options));
5008 0 : build = isl_ast_build_align_params(build, isl_space_copy(space));
5009 0 : executed = isl_union_map_align_params(executed, space);
5010 0 : if (!executed || !build)
5011 : goto error;
5012 :
5013 0 : ctx = isl_ast_build_get_ctx(build);
5014 :
5015 0 : data.internal = internal;
5016 0 : data.executed = executed;
5017 0 : data.build = build;
5018 0 : data.list = isl_ast_graft_list_alloc(ctx, 0);
5019 :
5020 0 : universe = isl_union_map_universe(isl_union_map_copy(executed));
5021 0 : schedule_domain = isl_union_map_domain(universe);
5022 0 : if (isl_union_set_foreach_set(schedule_domain, &generate_code_set,
5023 : &data) < 0)
5024 0 : data.list = isl_ast_graft_list_free(data.list);
5025 :
5026 0 : isl_union_set_free(schedule_domain);
5027 0 : isl_union_map_free(executed);
5028 :
5029 0 : isl_ast_build_free(build);
5030 0 : return data.list;
5031 : error:
5032 0 : isl_union_map_free(executed);
5033 0 : isl_ast_build_free(build);
5034 0 : return NULL;
5035 : }
5036 :
5037 : /* Generate an AST that visits the elements in the domain of "schedule"
5038 : * in the relative order specified by the corresponding image element(s).
5039 : *
5040 : * "build" is an isl_ast_build that has either been constructed by
5041 : * isl_ast_build_from_context or passed to a callback set by
5042 : * isl_ast_build_set_create_leaf.
5043 : * In the first case, the space of the isl_ast_build is typically
5044 : * a parametric space, although this is currently not enforced.
5045 : * In the second case, the space is never a parametric space.
5046 : * If the space S is not parametric, then the range space(s) of "schedule"
5047 : * need to be wrapped relations with S as domain.
5048 : *
5049 : * If the range of "schedule" consists of several spaces, then an AST
5050 : * is generated for each of them (in arbitrary order) and the results
5051 : * are concatenated.
5052 : *
5053 : * We first initialize the local copies of the relevant options.
5054 : * We do this here rather than when the isl_ast_build is created
5055 : * because the options may have changed between the construction
5056 : * of the isl_ast_build and the call to isl_generate_code.
5057 : *
5058 : * The main computation is performed on an inverse schedule (with
5059 : * the schedule domain in the domain and the elements to be executed
5060 : * in the range) called "executed".
5061 : */
5062 0 : __isl_give isl_ast_node *isl_ast_build_node_from_schedule_map(
5063 : __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5064 : {
5065 : isl_ast_graft_list *list;
5066 : isl_ast_node *node;
5067 : isl_union_map *executed;
5068 :
5069 0 : build = isl_ast_build_copy(build);
5070 0 : build = isl_ast_build_set_single_valued(build, 0);
5071 0 : schedule = isl_union_map_coalesce(schedule);
5072 0 : schedule = isl_union_map_remove_redundancies(schedule);
5073 0 : executed = isl_union_map_reverse(schedule);
5074 0 : list = generate_code(executed, isl_ast_build_copy(build), 0);
5075 0 : node = isl_ast_node_from_graft_list(list, build);
5076 0 : isl_ast_build_free(build);
5077 :
5078 0 : return node;
5079 : }
5080 :
5081 : /* The old name for isl_ast_build_node_from_schedule_map.
5082 : * It is being kept for backward compatibility, but
5083 : * it will be removed in the future.
5084 : */
5085 0 : __isl_give isl_ast_node *isl_ast_build_ast_from_schedule(
5086 : __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5087 : {
5088 0 : return isl_ast_build_node_from_schedule_map(build, schedule);
5089 : }
5090 :
5091 : /* Generate an AST that visits the elements in the domain of "executed"
5092 : * in the relative order specified by the band node "node" and its descendants.
5093 : *
5094 : * The relation "executed" maps the outer generated loop iterators
5095 : * to the domain elements executed by those iterations.
5096 : *
5097 : * If the band is empty, we continue with its descendants.
5098 : * Otherwise, we extend the build and the inverse schedule with
5099 : * the additional space/partial schedule and continue generating
5100 : * an AST in generate_next_level.
5101 : * As soon as we have extended the inverse schedule with the additional
5102 : * partial schedule, we look for equalities that may exists between
5103 : * the old and the new part.
5104 : */
5105 0 : static __isl_give isl_ast_graft_list *build_ast_from_band(
5106 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5107 : __isl_take isl_union_map *executed)
5108 : {
5109 : isl_space *space;
5110 : isl_multi_union_pw_aff *extra;
5111 : isl_union_map *extra_umap;
5112 : isl_ast_graft_list *list;
5113 : unsigned n1, n2;
5114 :
5115 0 : if (!build || !node || !executed)
5116 : goto error;
5117 :
5118 0 : if (isl_schedule_node_band_n_member(node) == 0)
5119 0 : return build_ast_from_child(build, node, executed);
5120 :
5121 0 : extra = isl_schedule_node_band_get_partial_schedule(node);
5122 0 : extra = isl_multi_union_pw_aff_align_params(extra,
5123 : isl_ast_build_get_space(build, 1));
5124 0 : space = isl_multi_union_pw_aff_get_space(extra);
5125 :
5126 0 : extra_umap = isl_union_map_from_multi_union_pw_aff(extra);
5127 0 : extra_umap = isl_union_map_reverse(extra_umap);
5128 :
5129 0 : executed = isl_union_map_domain_product(executed, extra_umap);
5130 0 : executed = isl_union_map_detect_equalities(executed);
5131 :
5132 0 : n1 = isl_ast_build_dim(build, isl_dim_param);
5133 0 : build = isl_ast_build_product(build, space);
5134 0 : n2 = isl_ast_build_dim(build, isl_dim_param);
5135 0 : if (n2 > n1)
5136 0 : isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5137 : "band node is not allowed to introduce new parameters",
5138 : build = isl_ast_build_free(build));
5139 0 : build = isl_ast_build_set_schedule_node(build, node);
5140 :
5141 0 : list = generate_next_level(executed, build);
5142 :
5143 0 : list = isl_ast_graft_list_unembed(list, 1);
5144 :
5145 0 : return list;
5146 : error:
5147 0 : isl_schedule_node_free(node);
5148 0 : isl_union_map_free(executed);
5149 0 : isl_ast_build_free(build);
5150 0 : return NULL;
5151 : }
5152 :
5153 : /* Hoist a list of grafts (in practice containing a single graft)
5154 : * from "sub_build" (which includes extra context information)
5155 : * to "build".
5156 : *
5157 : * In particular, project out all additional parameters introduced
5158 : * by the context node from the enforced constraints and the guard
5159 : * of the single graft.
5160 : */
5161 0 : static __isl_give isl_ast_graft_list *hoist_out_of_context(
5162 : __isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build,
5163 : __isl_keep isl_ast_build *sub_build)
5164 : {
5165 : isl_ast_graft *graft;
5166 : isl_basic_set *enforced;
5167 : isl_set *guard;
5168 : unsigned n_param, extra_param;
5169 :
5170 0 : if (!build || !sub_build)
5171 0 : return isl_ast_graft_list_free(list);
5172 :
5173 0 : n_param = isl_ast_build_dim(build, isl_dim_param);
5174 0 : extra_param = isl_ast_build_dim(sub_build, isl_dim_param);
5175 :
5176 0 : if (extra_param == n_param)
5177 0 : return list;
5178 :
5179 0 : extra_param -= n_param;
5180 0 : enforced = isl_ast_graft_list_extract_shared_enforced(list, sub_build);
5181 0 : enforced = isl_basic_set_project_out(enforced, isl_dim_param,
5182 : n_param, extra_param);
5183 0 : enforced = isl_basic_set_remove_unknown_divs(enforced);
5184 0 : guard = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
5185 0 : guard = isl_set_remove_divs_involving_dims(guard, isl_dim_param,
5186 : n_param, extra_param);
5187 0 : guard = isl_set_project_out(guard, isl_dim_param, n_param, extra_param);
5188 0 : guard = isl_set_compute_divs(guard);
5189 0 : graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
5190 : build, sub_build);
5191 0 : list = isl_ast_graft_list_from_ast_graft(graft);
5192 :
5193 0 : return list;
5194 : }
5195 :
5196 : /* Generate an AST that visits the elements in the domain of "executed"
5197 : * in the relative order specified by the context node "node"
5198 : * and its descendants.
5199 : *
5200 : * The relation "executed" maps the outer generated loop iterators
5201 : * to the domain elements executed by those iterations.
5202 : *
5203 : * The context node may introduce additional parameters as well as
5204 : * constraints on the outer schedule dimensions or original parameters.
5205 : *
5206 : * We add the extra parameters to a new build and the context
5207 : * constraints to both the build and (as a single disjunct)
5208 : * to the domain of "executed". Since the context constraints
5209 : * are specified in terms of the input schedule, we first need
5210 : * to map them to the internal schedule domain.
5211 : *
5212 : * After constructing the AST from the descendants of "node",
5213 : * we combine the list of grafts into a single graft within
5214 : * the new build, in order to be able to exploit the additional
5215 : * context constraints during this combination.
5216 : *
5217 : * Additionally, if the current node is the outermost node in
5218 : * the schedule tree (apart from the root domain node), we generate
5219 : * all pending guards, again to be able to exploit the additional
5220 : * context constraints. We currently do not do this for internal
5221 : * context nodes since we may still want to hoist conditions
5222 : * to outer AST nodes.
5223 : *
5224 : * If the context node introduced any new parameters, then they
5225 : * are removed from the set of enforced constraints and guard
5226 : * in hoist_out_of_context.
5227 : */
5228 0 : static __isl_give isl_ast_graft_list *build_ast_from_context(
5229 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5230 : __isl_take isl_union_map *executed)
5231 : {
5232 : isl_set *context;
5233 : isl_space *space;
5234 : isl_multi_aff *internal2input;
5235 : isl_ast_build *sub_build;
5236 : isl_ast_graft_list *list;
5237 : int n, depth;
5238 :
5239 0 : depth = isl_schedule_node_get_tree_depth(node);
5240 0 : space = isl_ast_build_get_space(build, 1);
5241 0 : context = isl_schedule_node_context_get_context(node);
5242 0 : context = isl_set_align_params(context, space);
5243 0 : sub_build = isl_ast_build_copy(build);
5244 0 : space = isl_set_get_space(context);
5245 0 : sub_build = isl_ast_build_align_params(sub_build, space);
5246 0 : internal2input = isl_ast_build_get_internal2input(sub_build);
5247 0 : context = isl_set_preimage_multi_aff(context, internal2input);
5248 0 : sub_build = isl_ast_build_restrict_generated(sub_build,
5249 : isl_set_copy(context));
5250 0 : context = isl_set_from_basic_set(isl_set_simple_hull(context));
5251 0 : executed = isl_union_map_intersect_domain(executed,
5252 : isl_union_set_from_set(context));
5253 :
5254 0 : list = build_ast_from_child(isl_ast_build_copy(sub_build),
5255 : node, executed);
5256 0 : n = isl_ast_graft_list_n_ast_graft(list);
5257 0 : if (n < 0)
5258 0 : list = isl_ast_graft_list_free(list);
5259 :
5260 0 : list = isl_ast_graft_list_fuse(list, sub_build);
5261 0 : if (depth == 1)
5262 0 : list = isl_ast_graft_list_insert_pending_guard_nodes(list,
5263 : sub_build);
5264 0 : if (n >= 1)
5265 0 : list = hoist_out_of_context(list, build, sub_build);
5266 :
5267 0 : isl_ast_build_free(build);
5268 0 : isl_ast_build_free(sub_build);
5269 :
5270 0 : return list;
5271 : }
5272 :
5273 : /* Generate an AST that visits the elements in the domain of "executed"
5274 : * in the relative order specified by the expansion node "node" and
5275 : * its descendants.
5276 : *
5277 : * The relation "executed" maps the outer generated loop iterators
5278 : * to the domain elements executed by those iterations.
5279 : *
5280 : * We expand the domain elements by the expansion and
5281 : * continue with the descendants of the node.
5282 : */
5283 0 : static __isl_give isl_ast_graft_list *build_ast_from_expansion(
5284 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5285 : __isl_take isl_union_map *executed)
5286 : {
5287 : isl_union_map *expansion;
5288 : unsigned n1, n2;
5289 :
5290 0 : expansion = isl_schedule_node_expansion_get_expansion(node);
5291 0 : expansion = isl_union_map_align_params(expansion,
5292 : isl_union_map_get_space(executed));
5293 :
5294 0 : n1 = isl_union_map_dim(executed, isl_dim_param);
5295 0 : executed = isl_union_map_apply_range(executed, expansion);
5296 0 : n2 = isl_union_map_dim(executed, isl_dim_param);
5297 0 : if (n2 > n1)
5298 0 : isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5299 : "expansion node is not allowed to introduce "
5300 : "new parameters", goto error);
5301 :
5302 0 : return build_ast_from_child(build, node, executed);
5303 : error:
5304 0 : isl_ast_build_free(build);
5305 0 : isl_schedule_node_free(node);
5306 0 : isl_union_map_free(executed);
5307 0 : return NULL;
5308 : }
5309 :
5310 : /* Generate an AST that visits the elements in the domain of "executed"
5311 : * in the relative order specified by the extension node "node" and
5312 : * its descendants.
5313 : *
5314 : * The relation "executed" maps the outer generated loop iterators
5315 : * to the domain elements executed by those iterations.
5316 : *
5317 : * Extend the inverse schedule with the extension applied to current
5318 : * set of generated constraints. Since the extension if formulated
5319 : * in terms of the input schedule, it first needs to be transformed
5320 : * to refer to the internal schedule.
5321 : */
5322 0 : static __isl_give isl_ast_graft_list *build_ast_from_extension(
5323 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5324 : __isl_take isl_union_map *executed)
5325 : {
5326 : isl_union_set *schedule_domain;
5327 : isl_union_map *extension;
5328 : isl_set *set;
5329 :
5330 0 : set = isl_ast_build_get_generated(build);
5331 0 : set = isl_set_from_basic_set(isl_set_simple_hull(set));
5332 0 : schedule_domain = isl_union_set_from_set(set);
5333 :
5334 0 : extension = isl_schedule_node_extension_get_extension(node);
5335 :
5336 0 : extension = isl_union_map_preimage_domain_multi_aff(extension,
5337 : isl_multi_aff_copy(build->internal2input));
5338 0 : extension = isl_union_map_intersect_domain(extension, schedule_domain);
5339 0 : extension = isl_ast_build_substitute_values_union_map_domain(build,
5340 : extension);
5341 0 : executed = isl_union_map_union(executed, extension);
5342 :
5343 0 : return build_ast_from_child(build, node, executed);
5344 : }
5345 :
5346 : /* Generate an AST that visits the elements in the domain of "executed"
5347 : * in the relative order specified by the filter node "node" and
5348 : * its descendants.
5349 : *
5350 : * The relation "executed" maps the outer generated loop iterators
5351 : * to the domain elements executed by those iterations.
5352 : *
5353 : * We simply intersect the iteration domain (i.e., the range of "executed")
5354 : * with the filter and continue with the descendants of the node,
5355 : * unless the resulting inverse schedule is empty, in which
5356 : * case we return an empty list.
5357 : *
5358 : * If the result of the intersection is equal to the original "executed"
5359 : * relation, then keep the original representation since the intersection
5360 : * may have unnecessarily broken up the relation into a greater number
5361 : * of disjuncts.
5362 : */
5363 0 : static __isl_give isl_ast_graft_list *build_ast_from_filter(
5364 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5365 : __isl_take isl_union_map *executed)
5366 : {
5367 : isl_ctx *ctx;
5368 : isl_union_set *filter;
5369 : isl_union_map *orig;
5370 : isl_ast_graft_list *list;
5371 : int empty;
5372 : isl_bool unchanged;
5373 : unsigned n1, n2;
5374 :
5375 0 : orig = isl_union_map_copy(executed);
5376 0 : if (!build || !node || !executed)
5377 : goto error;
5378 :
5379 0 : filter = isl_schedule_node_filter_get_filter(node);
5380 0 : filter = isl_union_set_align_params(filter,
5381 : isl_union_map_get_space(executed));
5382 0 : n1 = isl_union_map_dim(executed, isl_dim_param);
5383 0 : executed = isl_union_map_intersect_range(executed, filter);
5384 0 : n2 = isl_union_map_dim(executed, isl_dim_param);
5385 0 : if (n2 > n1)
5386 0 : isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5387 : "filter node is not allowed to introduce "
5388 : "new parameters", goto error);
5389 :
5390 0 : unchanged = isl_union_map_is_subset(orig, executed);
5391 0 : empty = isl_union_map_is_empty(executed);
5392 0 : if (unchanged < 0 || empty < 0)
5393 : goto error;
5394 0 : if (unchanged) {
5395 0 : isl_union_map_free(executed);
5396 0 : return build_ast_from_child(build, node, orig);
5397 : }
5398 0 : isl_union_map_free(orig);
5399 0 : if (!empty)
5400 0 : return build_ast_from_child(build, node, executed);
5401 :
5402 0 : ctx = isl_ast_build_get_ctx(build);
5403 0 : list = isl_ast_graft_list_alloc(ctx, 0);
5404 0 : isl_ast_build_free(build);
5405 0 : isl_schedule_node_free(node);
5406 0 : isl_union_map_free(executed);
5407 0 : return list;
5408 : error:
5409 0 : isl_ast_build_free(build);
5410 0 : isl_schedule_node_free(node);
5411 0 : isl_union_map_free(executed);
5412 0 : isl_union_map_free(orig);
5413 0 : return NULL;
5414 : }
5415 :
5416 : /* Generate an AST that visits the elements in the domain of "executed"
5417 : * in the relative order specified by the guard node "node" and
5418 : * its descendants.
5419 : *
5420 : * The relation "executed" maps the outer generated loop iterators
5421 : * to the domain elements executed by those iterations.
5422 : *
5423 : * Ensure that the associated guard is enforced by the outer AST
5424 : * constructs by adding it to the guard of the graft.
5425 : * Since we know that we will enforce the guard, we can also include it
5426 : * in the generated constraints used to construct an AST for
5427 : * the descendant nodes.
5428 : */
5429 0 : static __isl_give isl_ast_graft_list *build_ast_from_guard(
5430 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5431 : __isl_take isl_union_map *executed)
5432 : {
5433 : isl_space *space;
5434 : isl_set *guard, *hoisted;
5435 : isl_basic_set *enforced;
5436 : isl_ast_build *sub_build;
5437 : isl_ast_graft *graft;
5438 : isl_ast_graft_list *list;
5439 : unsigned n1, n2;
5440 :
5441 0 : space = isl_ast_build_get_space(build, 1);
5442 0 : guard = isl_schedule_node_guard_get_guard(node);
5443 0 : n1 = isl_space_dim(space, isl_dim_param);
5444 0 : guard = isl_set_align_params(guard, space);
5445 0 : n2 = isl_set_dim(guard, isl_dim_param);
5446 0 : if (n2 > n1)
5447 0 : isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5448 : "guard node is not allowed to introduce "
5449 : "new parameters", guard = isl_set_free(guard));
5450 0 : guard = isl_set_preimage_multi_aff(guard,
5451 : isl_multi_aff_copy(build->internal2input));
5452 0 : guard = isl_ast_build_specialize(build, guard);
5453 0 : guard = isl_set_gist(guard, isl_set_copy(build->generated));
5454 :
5455 0 : sub_build = isl_ast_build_copy(build);
5456 0 : sub_build = isl_ast_build_restrict_generated(sub_build,
5457 : isl_set_copy(guard));
5458 :
5459 0 : list = build_ast_from_child(isl_ast_build_copy(sub_build),
5460 : node, executed);
5461 :
5462 0 : hoisted = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
5463 0 : if (isl_set_n_basic_set(hoisted) > 1)
5464 0 : list = isl_ast_graft_list_gist_guards(list,
5465 : isl_set_copy(hoisted));
5466 0 : guard = isl_set_intersect(guard, hoisted);
5467 0 : enforced = extract_shared_enforced(list, build);
5468 0 : graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
5469 : build, sub_build);
5470 :
5471 0 : isl_ast_build_free(sub_build);
5472 0 : isl_ast_build_free(build);
5473 0 : return isl_ast_graft_list_from_ast_graft(graft);
5474 : }
5475 :
5476 : /* Call the before_each_mark callback, if requested by the user.
5477 : *
5478 : * Return 0 on success and -1 on error.
5479 : *
5480 : * The caller is responsible for recording the current inverse schedule
5481 : * in "build".
5482 : */
5483 0 : static isl_stat before_each_mark(__isl_keep isl_id *mark,
5484 : __isl_keep isl_ast_build *build)
5485 : {
5486 0 : if (!build)
5487 0 : return isl_stat_error;
5488 0 : if (!build->before_each_mark)
5489 0 : return isl_stat_ok;
5490 0 : return build->before_each_mark(mark, build,
5491 : build->before_each_mark_user);
5492 : }
5493 :
5494 : /* Call the after_each_mark callback, if requested by the user.
5495 : *
5496 : * The caller is responsible for recording the current inverse schedule
5497 : * in "build".
5498 : */
5499 0 : static __isl_give isl_ast_graft *after_each_mark(
5500 : __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build)
5501 : {
5502 0 : if (!graft || !build)
5503 0 : return isl_ast_graft_free(graft);
5504 0 : if (!build->after_each_mark)
5505 0 : return graft;
5506 0 : graft->node = build->after_each_mark(graft->node, build,
5507 : build->after_each_mark_user);
5508 0 : if (!graft->node)
5509 0 : return isl_ast_graft_free(graft);
5510 0 : return graft;
5511 : }
5512 :
5513 :
5514 : /* Generate an AST that visits the elements in the domain of "executed"
5515 : * in the relative order specified by the mark node "node" and
5516 : * its descendants.
5517 : *
5518 : * The relation "executed" maps the outer generated loop iterators
5519 : * to the domain elements executed by those iterations.
5520 :
5521 : * Since we may be calling before_each_mark and after_each_mark
5522 : * callbacks, we record the current inverse schedule in the build.
5523 : *
5524 : * We generate an AST for the child of the mark node, combine
5525 : * the graft list into a single graft and then insert the mark
5526 : * in the AST of that single graft.
5527 : */
5528 0 : static __isl_give isl_ast_graft_list *build_ast_from_mark(
5529 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5530 : __isl_take isl_union_map *executed)
5531 : {
5532 : isl_id *mark;
5533 : isl_ast_graft *graft;
5534 : isl_ast_graft_list *list;
5535 : int n;
5536 :
5537 0 : build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
5538 :
5539 0 : mark = isl_schedule_node_mark_get_id(node);
5540 0 : if (before_each_mark(mark, build) < 0)
5541 0 : node = isl_schedule_node_free(node);
5542 :
5543 0 : list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
5544 0 : list = isl_ast_graft_list_fuse(list, build);
5545 0 : n = isl_ast_graft_list_n_ast_graft(list);
5546 0 : if (n < 0)
5547 0 : list = isl_ast_graft_list_free(list);
5548 0 : if (n == 0) {
5549 0 : isl_id_free(mark);
5550 : } else {
5551 0 : graft = isl_ast_graft_list_get_ast_graft(list, 0);
5552 0 : graft = isl_ast_graft_insert_mark(graft, mark);
5553 0 : graft = after_each_mark(graft, build);
5554 0 : list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
5555 : }
5556 0 : isl_ast_build_free(build);
5557 :
5558 0 : return list;
5559 : }
5560 :
5561 : static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
5562 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5563 : __isl_take isl_union_map *executed);
5564 :
5565 : /* Generate an AST that visits the elements in the domain of "executed"
5566 : * in the relative order specified by the sequence (or set) node "node" and
5567 : * its descendants.
5568 : *
5569 : * The relation "executed" maps the outer generated loop iterators
5570 : * to the domain elements executed by those iterations.
5571 : *
5572 : * We simply generate an AST for each of the children and concatenate
5573 : * the results.
5574 : */
5575 0 : static __isl_give isl_ast_graft_list *build_ast_from_sequence(
5576 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5577 : __isl_take isl_union_map *executed)
5578 : {
5579 : int i, n;
5580 : isl_ctx *ctx;
5581 : isl_ast_graft_list *list;
5582 :
5583 0 : ctx = isl_ast_build_get_ctx(build);
5584 0 : list = isl_ast_graft_list_alloc(ctx, 0);
5585 :
5586 0 : n = isl_schedule_node_n_children(node);
5587 0 : for (i = 0; i < n; ++i) {
5588 : isl_schedule_node *child;
5589 : isl_ast_graft_list *list_i;
5590 :
5591 0 : child = isl_schedule_node_get_child(node, i);
5592 0 : list_i = build_ast_from_schedule_node(isl_ast_build_copy(build),
5593 : child, isl_union_map_copy(executed));
5594 0 : list = isl_ast_graft_list_concat(list, list_i);
5595 : }
5596 0 : isl_ast_build_free(build);
5597 0 : isl_schedule_node_free(node);
5598 0 : isl_union_map_free(executed);
5599 :
5600 0 : return list;
5601 : }
5602 :
5603 : /* Generate an AST that visits the elements in the domain of "executed"
5604 : * in the relative order specified by the node "node" and its descendants.
5605 : *
5606 : * The relation "executed" maps the outer generated loop iterators
5607 : * to the domain elements executed by those iterations.
5608 : *
5609 : * If the node is a leaf, then we pass control to generate_inner_level.
5610 : * Note that the current build does not refer to any band node, so
5611 : * that generate_inner_level will not try to visit the child of
5612 : * the leaf node.
5613 : *
5614 : * The other node types are handled in separate functions.
5615 : * Set nodes are currently treated in the same way as sequence nodes.
5616 : * The children of a set node may be executed in any order,
5617 : * including the order of the children.
5618 : */
5619 0 : static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
5620 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5621 : __isl_take isl_union_map *executed)
5622 : {
5623 : enum isl_schedule_node_type type;
5624 :
5625 0 : type = isl_schedule_node_get_type(node);
5626 :
5627 0 : switch (type) {
5628 : case isl_schedule_node_error:
5629 0 : goto error;
5630 : case isl_schedule_node_leaf:
5631 0 : isl_schedule_node_free(node);
5632 0 : return generate_inner_level(executed, build);
5633 : case isl_schedule_node_band:
5634 0 : return build_ast_from_band(build, node, executed);
5635 : case isl_schedule_node_context:
5636 0 : return build_ast_from_context(build, node, executed);
5637 : case isl_schedule_node_domain:
5638 0 : isl_die(isl_schedule_node_get_ctx(node), isl_error_unsupported,
5639 : "unexpected internal domain node", goto error);
5640 : case isl_schedule_node_expansion:
5641 0 : return build_ast_from_expansion(build, node, executed);
5642 : case isl_schedule_node_extension:
5643 0 : return build_ast_from_extension(build, node, executed);
5644 : case isl_schedule_node_filter:
5645 0 : return build_ast_from_filter(build, node, executed);
5646 : case isl_schedule_node_guard:
5647 0 : return build_ast_from_guard(build, node, executed);
5648 : case isl_schedule_node_mark:
5649 0 : return build_ast_from_mark(build, node, executed);
5650 : case isl_schedule_node_sequence:
5651 : case isl_schedule_node_set:
5652 0 : return build_ast_from_sequence(build, node, executed);
5653 : }
5654 :
5655 0 : isl_die(isl_ast_build_get_ctx(build), isl_error_internal,
5656 : "unhandled type", goto error);
5657 : error:
5658 0 : isl_union_map_free(executed);
5659 0 : isl_schedule_node_free(node);
5660 0 : isl_ast_build_free(build);
5661 :
5662 0 : return NULL;
5663 : }
5664 :
5665 : /* Generate an AST that visits the elements in the domain of "executed"
5666 : * in the relative order specified by the (single) child of "node" and
5667 : * its descendants.
5668 : *
5669 : * The relation "executed" maps the outer generated loop iterators
5670 : * to the domain elements executed by those iterations.
5671 : *
5672 : * This function is never called on a leaf, set or sequence node,
5673 : * so the node always has exactly one child.
5674 : */
5675 0 : static __isl_give isl_ast_graft_list *build_ast_from_child(
5676 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5677 : __isl_take isl_union_map *executed)
5678 : {
5679 0 : node = isl_schedule_node_child(node, 0);
5680 0 : return build_ast_from_schedule_node(build, node, executed);
5681 : }
5682 :
5683 : /* Generate an AST that visits the elements in the domain of the domain
5684 : * node "node" in the relative order specified by its descendants.
5685 : *
5686 : * An initial inverse schedule is created that maps a zero-dimensional
5687 : * schedule space to the node domain.
5688 : * The input "build" is assumed to have a parametric domain and
5689 : * is replaced by the same zero-dimensional schedule space.
5690 : *
5691 : * We also add some of the parameter constraints in the build domain
5692 : * to the executed relation. Adding these constraints
5693 : * allows for an earlier detection of conflicts in some cases.
5694 : * However, we do not want to divide the executed relation into
5695 : * more disjuncts than necessary. We therefore approximate
5696 : * the constraints on the parameters by a single disjunct set.
5697 : */
5698 0 : static __isl_give isl_ast_node *build_ast_from_domain(
5699 : __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node)
5700 : {
5701 : isl_ctx *ctx;
5702 : isl_union_set *domain, *schedule_domain;
5703 : isl_union_map *executed;
5704 : isl_space *space;
5705 : isl_set *set;
5706 : isl_ast_graft_list *list;
5707 : isl_ast_node *ast;
5708 : int is_params;
5709 :
5710 0 : if (!build)
5711 0 : goto error;
5712 :
5713 0 : ctx = isl_ast_build_get_ctx(build);
5714 0 : space = isl_ast_build_get_space(build, 1);
5715 0 : is_params = isl_space_is_params(space);
5716 0 : isl_space_free(space);
5717 0 : if (is_params < 0)
5718 0 : goto error;
5719 0 : if (!is_params)
5720 0 : isl_die(ctx, isl_error_unsupported,
5721 : "expecting parametric initial context", goto error);
5722 :
5723 0 : domain = isl_schedule_node_domain_get_domain(node);
5724 0 : domain = isl_union_set_coalesce(domain);
5725 :
5726 0 : space = isl_union_set_get_space(domain);
5727 0 : space = isl_space_set_from_params(space);
5728 0 : build = isl_ast_build_product(build, space);
5729 :
5730 0 : set = isl_ast_build_get_domain(build);
5731 0 : set = isl_set_from_basic_set(isl_set_simple_hull(set));
5732 0 : schedule_domain = isl_union_set_from_set(set);
5733 :
5734 0 : executed = isl_union_map_from_domain_and_range(schedule_domain, domain);
5735 0 : list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
5736 0 : ast = isl_ast_node_from_graft_list(list, build);
5737 0 : isl_ast_build_free(build);
5738 :
5739 0 : return ast;
5740 : error:
5741 0 : isl_schedule_node_free(node);
5742 0 : isl_ast_build_free(build);
5743 0 : return NULL;
5744 : }
5745 :
5746 : /* Generate an AST that visits the elements in the domain of "schedule"
5747 : * in the relative order specified by the schedule tree.
5748 : *
5749 : * "build" is an isl_ast_build that has been created using
5750 : * isl_ast_build_alloc or isl_ast_build_from_context based
5751 : * on a parametric set.
5752 : *
5753 : * The construction starts at the root node of the schedule,
5754 : * which is assumed to be a domain node.
5755 : */
5756 0 : __isl_give isl_ast_node *isl_ast_build_node_from_schedule(
5757 : __isl_keep isl_ast_build *build, __isl_take isl_schedule *schedule)
5758 : {
5759 : isl_ctx *ctx;
5760 : isl_schedule_node *node;
5761 :
5762 0 : if (!build || !schedule)
5763 : goto error;
5764 :
5765 0 : ctx = isl_ast_build_get_ctx(build);
5766 :
5767 0 : node = isl_schedule_get_root(schedule);
5768 0 : if (!node)
5769 0 : goto error;
5770 0 : isl_schedule_free(schedule);
5771 :
5772 0 : build = isl_ast_build_copy(build);
5773 0 : build = isl_ast_build_set_single_valued(build, 0);
5774 0 : if (isl_schedule_node_get_type(node) != isl_schedule_node_domain)
5775 0 : isl_die(ctx, isl_error_unsupported,
5776 : "expecting root domain node",
5777 : build = isl_ast_build_free(build));
5778 0 : return build_ast_from_domain(build, node);
5779 : error:
5780 0 : isl_schedule_free(schedule);
5781 0 : return NULL;
5782 : }
|