Line data Source code
1 : /*
2 : * Copyright 2011 INRIA Saclay
3 : * Copyright 2012-2014 Ecole Normale Superieure
4 : * Copyright 2015-2016 Sven Verdoolaege
5 : * Copyright 2016 INRIA Paris
6 : * Copyright 2017 Sven Verdoolaege
7 : *
8 : * Use of this software is governed by the MIT license
9 : *
10 : * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11 : * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12 : * 91893 Orsay, France
13 : * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14 : * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15 : * CS 42112, 75589 Paris Cedex 12, France
16 : */
17 :
18 : #include <isl_ctx_private.h>
19 : #include <isl_map_private.h>
20 : #include <isl_space_private.h>
21 : #include <isl_aff_private.h>
22 : #include <isl/hash.h>
23 : #include <isl/id.h>
24 : #include <isl/constraint.h>
25 : #include <isl/schedule.h>
26 : #include <isl_schedule_constraints.h>
27 : #include <isl/schedule_node.h>
28 : #include <isl_mat_private.h>
29 : #include <isl_vec_private.h>
30 : #include <isl/set.h>
31 : #include <isl_union_set_private.h>
32 : #include <isl_seq.h>
33 : #include <isl_tab.h>
34 : #include <isl_dim_map.h>
35 : #include <isl/map_to_basic_set.h>
36 : #include <isl_sort.h>
37 : #include <isl_options_private.h>
38 : #include <isl_tarjan.h>
39 : #include <isl_morph.h>
40 : #include <isl/ilp.h>
41 : #include <isl_val_private.h>
42 :
43 : /*
44 : * The scheduling algorithm implemented in this file was inspired by
45 : * Bondhugula et al., "Automatic Transformations for Communication-Minimized
46 : * Parallelization and Locality Optimization in the Polyhedral Model".
47 : *
48 : * For a detailed description of the variant implemented in isl,
49 : * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
50 : */
51 :
52 :
53 : /* Internal information about a node that is used during the construction
54 : * of a schedule.
55 : * space represents the original space in which the domain lives;
56 : * that is, the space is not affected by compression
57 : * sched is a matrix representation of the schedule being constructed
58 : * for this node; if compressed is set, then this schedule is
59 : * defined over the compressed domain space
60 : * sched_map is an isl_map representation of the same (partial) schedule
61 : * sched_map may be NULL; if compressed is set, then this map
62 : * is defined over the uncompressed domain space
63 : * rank is the number of linearly independent rows in the linear part
64 : * of sched
65 : * the rows of "vmap" represent a change of basis for the node
66 : * variables; the first rank rows span the linear part of
67 : * the schedule rows; the remaining rows are linearly independent
68 : * the rows of "indep" represent linear combinations of the schedule
69 : * coefficients that are non-zero when the schedule coefficients are
70 : * linearly independent of previously computed schedule rows.
71 : * start is the first variable in the LP problem in the sequences that
72 : * represents the schedule coefficients of this node
73 : * nvar is the dimension of the (compressed) domain
74 : * nparam is the number of parameters or 0 if we are not constructing
75 : * a parametric schedule
76 : *
77 : * If compressed is set, then hull represents the constraints
78 : * that were used to derive the compression, while compress and
79 : * decompress map the original space to the compressed space and
80 : * vice versa.
81 : *
82 : * scc is the index of SCC (or WCC) this node belongs to
83 : *
84 : * "cluster" is only used inside extract_clusters and identifies
85 : * the cluster of SCCs that the node belongs to.
86 : *
87 : * coincident contains a boolean for each of the rows of the schedule,
88 : * indicating whether the corresponding scheduling dimension satisfies
89 : * the coincidence constraints in the sense that the corresponding
90 : * dependence distances are zero.
91 : *
92 : * If the schedule_treat_coalescing option is set, then
93 : * "sizes" contains the sizes of the (compressed) instance set
94 : * in each direction. If there is no fixed size in a given direction,
95 : * then the corresponding size value is set to infinity.
96 : * If the schedule_treat_coalescing option or the schedule_max_coefficient
97 : * option is set, then "max" contains the maximal values for
98 : * schedule coefficients of the (compressed) variables. If no bound
99 : * needs to be imposed on a particular variable, then the corresponding
100 : * value is negative.
101 : * If not NULL, then "bounds" contains a non-parametric set
102 : * in the compressed space that is bounded by the size in each direction.
103 : */
104 : struct isl_sched_node {
105 : isl_space *space;
106 : int compressed;
107 : isl_set *hull;
108 : isl_multi_aff *compress;
109 : isl_multi_aff *decompress;
110 : isl_mat *sched;
111 : isl_map *sched_map;
112 : int rank;
113 : isl_mat *indep;
114 : isl_mat *vmap;
115 : int start;
116 : int nvar;
117 : int nparam;
118 :
119 : int scc;
120 : int cluster;
121 :
122 : int *coincident;
123 :
124 : isl_multi_val *sizes;
125 : isl_basic_set *bounds;
126 : isl_vec *max;
127 : };
128 :
129 0 : static int node_has_tuples(const void *entry, const void *val)
130 : {
131 0 : struct isl_sched_node *node = (struct isl_sched_node *)entry;
132 0 : isl_space *space = (isl_space *) val;
133 :
134 0 : return isl_space_has_equal_tuples(node->space, space);
135 : }
136 :
137 0 : static int node_scc_exactly(struct isl_sched_node *node, int scc)
138 : {
139 0 : return node->scc == scc;
140 : }
141 :
142 0 : static int node_scc_at_most(struct isl_sched_node *node, int scc)
143 : {
144 0 : return node->scc <= scc;
145 : }
146 :
147 0 : static int node_scc_at_least(struct isl_sched_node *node, int scc)
148 : {
149 0 : return node->scc >= scc;
150 : }
151 :
152 : /* An edge in the dependence graph. An edge may be used to
153 : * ensure validity of the generated schedule, to minimize the dependence
154 : * distance or both
155 : *
156 : * map is the dependence relation, with i -> j in the map if j depends on i
157 : * tagged_condition and tagged_validity contain the union of all tagged
158 : * condition or conditional validity dependence relations that
159 : * specialize the dependence relation "map"; that is,
160 : * if (i -> a) -> (j -> b) is an element of "tagged_condition"
161 : * or "tagged_validity", then i -> j is an element of "map".
162 : * If these fields are NULL, then they represent the empty relation.
163 : * src is the source node
164 : * dst is the sink node
165 : *
166 : * types is a bit vector containing the types of this edge.
167 : * validity is set if the edge is used to ensure correctness
168 : * coincidence is used to enforce zero dependence distances
169 : * proximity is set if the edge is used to minimize dependence distances
170 : * condition is set if the edge represents a condition
171 : * for a conditional validity schedule constraint
172 : * local can only be set for condition edges and indicates that
173 : * the dependence distance over the edge should be zero
174 : * conditional_validity is set if the edge is used to conditionally
175 : * ensure correctness
176 : *
177 : * For validity edges, start and end mark the sequence of inequality
178 : * constraints in the LP problem that encode the validity constraint
179 : * corresponding to this edge.
180 : *
181 : * During clustering, an edge may be marked "no_merge" if it should
182 : * not be used to merge clusters.
183 : * The weight is also only used during clustering and it is
184 : * an indication of how many schedule dimensions on either side
185 : * of the schedule constraints can be aligned.
186 : * If the weight is negative, then this means that this edge was postponed
187 : * by has_bounded_distances or any_no_merge. The original weight can
188 : * be retrieved by adding 1 + graph->max_weight, with "graph"
189 : * the graph containing this edge.
190 : */
191 : struct isl_sched_edge {
192 : isl_map *map;
193 : isl_union_map *tagged_condition;
194 : isl_union_map *tagged_validity;
195 :
196 : struct isl_sched_node *src;
197 : struct isl_sched_node *dst;
198 :
199 : unsigned types;
200 :
201 : int start;
202 : int end;
203 :
204 : int no_merge;
205 : int weight;
206 : };
207 :
208 : /* Is "edge" marked as being of type "type"?
209 : */
210 0 : static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
211 : {
212 0 : return ISL_FL_ISSET(edge->types, 1 << type);
213 : }
214 :
215 : /* Mark "edge" as being of type "type".
216 : */
217 0 : static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
218 : {
219 0 : ISL_FL_SET(edge->types, 1 << type);
220 0 : }
221 :
222 : /* No longer mark "edge" as being of type "type"?
223 : */
224 0 : static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
225 : {
226 0 : ISL_FL_CLR(edge->types, 1 << type);
227 0 : }
228 :
229 : /* Is "edge" marked as a validity edge?
230 : */
231 0 : static int is_validity(struct isl_sched_edge *edge)
232 : {
233 0 : return is_type(edge, isl_edge_validity);
234 : }
235 :
236 : /* Mark "edge" as a validity edge.
237 : */
238 0 : static void set_validity(struct isl_sched_edge *edge)
239 : {
240 0 : set_type(edge, isl_edge_validity);
241 0 : }
242 :
243 : /* Is "edge" marked as a proximity edge?
244 : */
245 0 : static int is_proximity(struct isl_sched_edge *edge)
246 : {
247 0 : return is_type(edge, isl_edge_proximity);
248 : }
249 :
250 : /* Is "edge" marked as a local edge?
251 : */
252 0 : static int is_local(struct isl_sched_edge *edge)
253 : {
254 0 : return is_type(edge, isl_edge_local);
255 : }
256 :
257 : /* Mark "edge" as a local edge.
258 : */
259 0 : static void set_local(struct isl_sched_edge *edge)
260 : {
261 0 : set_type(edge, isl_edge_local);
262 0 : }
263 :
264 : /* No longer mark "edge" as a local edge.
265 : */
266 0 : static void clear_local(struct isl_sched_edge *edge)
267 : {
268 0 : clear_type(edge, isl_edge_local);
269 0 : }
270 :
271 : /* Is "edge" marked as a coincidence edge?
272 : */
273 0 : static int is_coincidence(struct isl_sched_edge *edge)
274 : {
275 0 : return is_type(edge, isl_edge_coincidence);
276 : }
277 :
278 : /* Is "edge" marked as a condition edge?
279 : */
280 0 : static int is_condition(struct isl_sched_edge *edge)
281 : {
282 0 : return is_type(edge, isl_edge_condition);
283 : }
284 :
285 : /* Is "edge" marked as a conditional validity edge?
286 : */
287 0 : static int is_conditional_validity(struct isl_sched_edge *edge)
288 : {
289 0 : return is_type(edge, isl_edge_conditional_validity);
290 : }
291 :
292 : /* Is "edge" of a type that can appear multiple times between
293 : * the same pair of nodes?
294 : *
295 : * Condition edges and conditional validity edges may have tagged
296 : * dependence relations, in which case an edge is added for each
297 : * pair of tags.
298 : */
299 0 : static int is_multi_edge_type(struct isl_sched_edge *edge)
300 : {
301 0 : return is_condition(edge) || is_conditional_validity(edge);
302 : }
303 :
304 : /* Internal information about the dependence graph used during
305 : * the construction of the schedule.
306 : *
307 : * intra_hmap is a cache, mapping dependence relations to their dual,
308 : * for dependences from a node to itself, possibly without
309 : * coefficients for the parameters
310 : * intra_hmap_param is a cache, mapping dependence relations to their dual,
311 : * for dependences from a node to itself, including coefficients
312 : * for the parameters
313 : * inter_hmap is a cache, mapping dependence relations to their dual,
314 : * for dependences between distinct nodes
315 : * if compression is involved then the key for these maps
316 : * is the original, uncompressed dependence relation, while
317 : * the value is the dual of the compressed dependence relation.
318 : *
319 : * n is the number of nodes
320 : * node is the list of nodes
321 : * maxvar is the maximal number of variables over all nodes
322 : * max_row is the allocated number of rows in the schedule
323 : * n_row is the current (maximal) number of linearly independent
324 : * rows in the node schedules
325 : * n_total_row is the current number of rows in the node schedules
326 : * band_start is the starting row in the node schedules of the current band
327 : * root is set to the original dependence graph from which this graph
328 : * is derived through splitting. If this graph is not the result of
329 : * splitting, then the root field points to the graph itself.
330 : *
331 : * sorted contains a list of node indices sorted according to the
332 : * SCC to which a node belongs
333 : *
334 : * n_edge is the number of edges
335 : * edge is the list of edges
336 : * max_edge contains the maximal number of edges of each type;
337 : * in particular, it contains the number of edges in the inital graph.
338 : * edge_table contains pointers into the edge array, hashed on the source
339 : * and sink spaces; there is one such table for each type;
340 : * a given edge may be referenced from more than one table
341 : * if the corresponding relation appears in more than one of the
342 : * sets of dependences; however, for each type there is only
343 : * a single edge between a given pair of source and sink space
344 : * in the entire graph
345 : *
346 : * node_table contains pointers into the node array, hashed on the space tuples
347 : *
348 : * region contains a list of variable sequences that should be non-trivial
349 : *
350 : * lp contains the (I)LP problem used to obtain new schedule rows
351 : *
352 : * src_scc and dst_scc are the source and sink SCCs of an edge with
353 : * conflicting constraints
354 : *
355 : * scc represents the number of components
356 : * weak is set if the components are weakly connected
357 : *
358 : * max_weight is used during clustering and represents the maximal
359 : * weight of the relevant proximity edges.
360 : */
361 : struct isl_sched_graph {
362 : isl_map_to_basic_set *intra_hmap;
363 : isl_map_to_basic_set *intra_hmap_param;
364 : isl_map_to_basic_set *inter_hmap;
365 :
366 : struct isl_sched_node *node;
367 : int n;
368 : int maxvar;
369 : int max_row;
370 : int n_row;
371 :
372 : int *sorted;
373 :
374 : int n_total_row;
375 : int band_start;
376 :
377 : struct isl_sched_graph *root;
378 :
379 : struct isl_sched_edge *edge;
380 : int n_edge;
381 : int max_edge[isl_edge_last + 1];
382 : struct isl_hash_table *edge_table[isl_edge_last + 1];
383 :
384 : struct isl_hash_table *node_table;
385 : struct isl_trivial_region *region;
386 :
387 : isl_basic_set *lp;
388 :
389 : int src_scc;
390 : int dst_scc;
391 :
392 : int scc;
393 : int weak;
394 :
395 : int max_weight;
396 : };
397 :
398 : /* Initialize node_table based on the list of nodes.
399 : */
400 0 : static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
401 : {
402 : int i;
403 :
404 0 : graph->node_table = isl_hash_table_alloc(ctx, graph->n);
405 0 : if (!graph->node_table)
406 0 : return -1;
407 :
408 0 : for (i = 0; i < graph->n; ++i) {
409 : struct isl_hash_table_entry *entry;
410 : uint32_t hash;
411 :
412 0 : hash = isl_space_get_tuple_hash(graph->node[i].space);
413 0 : entry = isl_hash_table_find(ctx, graph->node_table, hash,
414 : &node_has_tuples,
415 0 : graph->node[i].space, 1);
416 0 : if (!entry)
417 0 : return -1;
418 0 : entry->data = &graph->node[i];
419 : }
420 :
421 0 : return 0;
422 : }
423 :
424 : /* Return a pointer to the node that lives within the given space,
425 : * an invalid node if there is no such node, or NULL in case of error.
426 : */
427 0 : static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
428 : struct isl_sched_graph *graph, __isl_keep isl_space *space)
429 : {
430 : struct isl_hash_table_entry *entry;
431 : uint32_t hash;
432 :
433 0 : if (!space)
434 0 : return NULL;
435 :
436 0 : hash = isl_space_get_tuple_hash(space);
437 0 : entry = isl_hash_table_find(ctx, graph->node_table, hash,
438 : &node_has_tuples, space, 0);
439 :
440 0 : return entry ? entry->data : graph->node + graph->n;
441 : }
442 :
443 : /* Is "node" a node in "graph"?
444 : */
445 0 : static int is_node(struct isl_sched_graph *graph,
446 : struct isl_sched_node *node)
447 : {
448 0 : return node && node >= &graph->node[0] && node < &graph->node[graph->n];
449 : }
450 :
451 0 : static int edge_has_src_and_dst(const void *entry, const void *val)
452 : {
453 0 : const struct isl_sched_edge *edge = entry;
454 0 : const struct isl_sched_edge *temp = val;
455 :
456 0 : return edge->src == temp->src && edge->dst == temp->dst;
457 : }
458 :
459 : /* Add the given edge to graph->edge_table[type].
460 : */
461 0 : static isl_stat graph_edge_table_add(isl_ctx *ctx,
462 : struct isl_sched_graph *graph, enum isl_edge_type type,
463 : struct isl_sched_edge *edge)
464 : {
465 : struct isl_hash_table_entry *entry;
466 : uint32_t hash;
467 :
468 0 : hash = isl_hash_init();
469 0 : hash = isl_hash_builtin(hash, edge->src);
470 0 : hash = isl_hash_builtin(hash, edge->dst);
471 0 : entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
472 : &edge_has_src_and_dst, edge, 1);
473 0 : if (!entry)
474 0 : return isl_stat_error;
475 0 : entry->data = edge;
476 :
477 0 : return isl_stat_ok;
478 : }
479 :
480 : /* Add "edge" to all relevant edge tables.
481 : * That is, for every type of the edge, add it to the corresponding table.
482 : */
483 0 : static isl_stat graph_edge_tables_add(isl_ctx *ctx,
484 : struct isl_sched_graph *graph, struct isl_sched_edge *edge)
485 : {
486 : enum isl_edge_type t;
487 :
488 0 : for (t = isl_edge_first; t <= isl_edge_last; ++t) {
489 0 : if (!is_type(edge, t))
490 0 : continue;
491 0 : if (graph_edge_table_add(ctx, graph, t, edge) < 0)
492 0 : return isl_stat_error;
493 : }
494 :
495 0 : return isl_stat_ok;
496 : }
497 :
498 : /* Allocate the edge_tables based on the maximal number of edges of
499 : * each type.
500 : */
501 0 : static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
502 : {
503 : int i;
504 :
505 0 : for (i = 0; i <= isl_edge_last; ++i) {
506 0 : graph->edge_table[i] = isl_hash_table_alloc(ctx,
507 : graph->max_edge[i]);
508 0 : if (!graph->edge_table[i])
509 0 : return -1;
510 : }
511 :
512 0 : return 0;
513 : }
514 :
515 : /* If graph->edge_table[type] contains an edge from the given source
516 : * to the given destination, then return the hash table entry of this edge.
517 : * Otherwise, return NULL.
518 : */
519 0 : static struct isl_hash_table_entry *graph_find_edge_entry(
520 : struct isl_sched_graph *graph,
521 : enum isl_edge_type type,
522 : struct isl_sched_node *src, struct isl_sched_node *dst)
523 : {
524 0 : isl_ctx *ctx = isl_space_get_ctx(src->space);
525 : uint32_t hash;
526 0 : struct isl_sched_edge temp = { .src = src, .dst = dst };
527 :
528 0 : hash = isl_hash_init();
529 0 : hash = isl_hash_builtin(hash, temp.src);
530 0 : hash = isl_hash_builtin(hash, temp.dst);
531 0 : return isl_hash_table_find(ctx, graph->edge_table[type], hash,
532 : &edge_has_src_and_dst, &temp, 0);
533 : }
534 :
535 :
536 : /* If graph->edge_table[type] contains an edge from the given source
537 : * to the given destination, then return this edge.
538 : * Otherwise, return NULL.
539 : */
540 0 : static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
541 : enum isl_edge_type type,
542 : struct isl_sched_node *src, struct isl_sched_node *dst)
543 : {
544 : struct isl_hash_table_entry *entry;
545 :
546 0 : entry = graph_find_edge_entry(graph, type, src, dst);
547 0 : if (!entry)
548 0 : return NULL;
549 :
550 0 : return entry->data;
551 : }
552 :
553 : /* Check whether the dependence graph has an edge of the given type
554 : * between the given two nodes.
555 : */
556 0 : static isl_bool graph_has_edge(struct isl_sched_graph *graph,
557 : enum isl_edge_type type,
558 : struct isl_sched_node *src, struct isl_sched_node *dst)
559 : {
560 : struct isl_sched_edge *edge;
561 : isl_bool empty;
562 :
563 0 : edge = graph_find_edge(graph, type, src, dst);
564 0 : if (!edge)
565 0 : return isl_bool_false;
566 :
567 0 : empty = isl_map_plain_is_empty(edge->map);
568 0 : if (empty < 0)
569 0 : return isl_bool_error;
570 :
571 0 : return !empty;
572 : }
573 :
574 : /* Look for any edge with the same src, dst and map fields as "model".
575 : *
576 : * Return the matching edge if one can be found.
577 : * Return "model" if no matching edge is found.
578 : * Return NULL on error.
579 : */
580 0 : static struct isl_sched_edge *graph_find_matching_edge(
581 : struct isl_sched_graph *graph, struct isl_sched_edge *model)
582 : {
583 : enum isl_edge_type i;
584 : struct isl_sched_edge *edge;
585 :
586 0 : for (i = isl_edge_first; i <= isl_edge_last; ++i) {
587 : int is_equal;
588 :
589 0 : edge = graph_find_edge(graph, i, model->src, model->dst);
590 0 : if (!edge)
591 0 : continue;
592 0 : is_equal = isl_map_plain_is_equal(model->map, edge->map);
593 0 : if (is_equal < 0)
594 0 : return NULL;
595 0 : if (is_equal)
596 0 : return edge;
597 : }
598 :
599 0 : return model;
600 : }
601 :
602 : /* Remove the given edge from all the edge_tables that refer to it.
603 : */
604 0 : static void graph_remove_edge(struct isl_sched_graph *graph,
605 : struct isl_sched_edge *edge)
606 : {
607 0 : isl_ctx *ctx = isl_map_get_ctx(edge->map);
608 : enum isl_edge_type i;
609 :
610 0 : for (i = isl_edge_first; i <= isl_edge_last; ++i) {
611 : struct isl_hash_table_entry *entry;
612 :
613 0 : entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
614 0 : if (!entry)
615 0 : continue;
616 0 : if (entry->data != edge)
617 0 : continue;
618 0 : isl_hash_table_remove(ctx, graph->edge_table[i], entry);
619 : }
620 0 : }
621 :
622 : /* Check whether the dependence graph has any edge
623 : * between the given two nodes.
624 : */
625 0 : static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
626 : struct isl_sched_node *src, struct isl_sched_node *dst)
627 : {
628 : enum isl_edge_type i;
629 : isl_bool r;
630 :
631 0 : for (i = isl_edge_first; i <= isl_edge_last; ++i) {
632 0 : r = graph_has_edge(graph, i, src, dst);
633 0 : if (r < 0 || r)
634 0 : return r;
635 : }
636 :
637 0 : return r;
638 : }
639 :
640 : /* Check whether the dependence graph has a validity edge
641 : * between the given two nodes.
642 : *
643 : * Conditional validity edges are essentially validity edges that
644 : * can be ignored if the corresponding condition edges are iteration private.
645 : * Here, we are only checking for the presence of validity
646 : * edges, so we need to consider the conditional validity edges too.
647 : * In particular, this function is used during the detection
648 : * of strongly connected components and we cannot ignore
649 : * conditional validity edges during this detection.
650 : */
651 0 : static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
652 : struct isl_sched_node *src, struct isl_sched_node *dst)
653 : {
654 : isl_bool r;
655 :
656 0 : r = graph_has_edge(graph, isl_edge_validity, src, dst);
657 0 : if (r < 0 || r)
658 0 : return r;
659 :
660 0 : return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
661 : }
662 :
663 : /* Perform all the required memory allocations for a schedule graph "graph"
664 : * with "n_node" nodes and "n_edge" edge and initialize the corresponding
665 : * fields.
666 : */
667 0 : static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
668 : int n_node, int n_edge)
669 : {
670 : int i;
671 :
672 0 : graph->n = n_node;
673 0 : graph->n_edge = n_edge;
674 0 : graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
675 0 : graph->sorted = isl_calloc_array(ctx, int, graph->n);
676 0 : graph->region = isl_alloc_array(ctx,
677 : struct isl_trivial_region, graph->n);
678 0 : graph->edge = isl_calloc_array(ctx,
679 : struct isl_sched_edge, graph->n_edge);
680 :
681 0 : graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
682 0 : graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
683 0 : graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
684 :
685 0 : if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) ||
686 0 : !graph->sorted)
687 0 : return isl_stat_error;
688 :
689 0 : for(i = 0; i < graph->n; ++i)
690 0 : graph->sorted[i] = i;
691 :
692 0 : return isl_stat_ok;
693 : }
694 :
695 : /* Free the memory associated to node "node" in "graph".
696 : * The "coincident" field is shared by nodes in a graph and its subgraph.
697 : * It therefore only needs to be freed for the original dependence graph,
698 : * i.e., one that is not the result of splitting.
699 : */
700 0 : static void clear_node(struct isl_sched_graph *graph,
701 : struct isl_sched_node *node)
702 : {
703 0 : isl_space_free(node->space);
704 0 : isl_set_free(node->hull);
705 0 : isl_multi_aff_free(node->compress);
706 0 : isl_multi_aff_free(node->decompress);
707 0 : isl_mat_free(node->sched);
708 0 : isl_map_free(node->sched_map);
709 0 : isl_mat_free(node->indep);
710 0 : isl_mat_free(node->vmap);
711 0 : if (graph->root == graph)
712 0 : free(node->coincident);
713 0 : isl_multi_val_free(node->sizes);
714 0 : isl_basic_set_free(node->bounds);
715 0 : isl_vec_free(node->max);
716 0 : }
717 :
718 0 : static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
719 : {
720 : int i;
721 :
722 0 : isl_map_to_basic_set_free(graph->intra_hmap);
723 0 : isl_map_to_basic_set_free(graph->intra_hmap_param);
724 0 : isl_map_to_basic_set_free(graph->inter_hmap);
725 :
726 0 : if (graph->node)
727 0 : for (i = 0; i < graph->n; ++i)
728 0 : clear_node(graph, &graph->node[i]);
729 0 : free(graph->node);
730 0 : free(graph->sorted);
731 0 : if (graph->edge)
732 0 : for (i = 0; i < graph->n_edge; ++i) {
733 0 : isl_map_free(graph->edge[i].map);
734 0 : isl_union_map_free(graph->edge[i].tagged_condition);
735 0 : isl_union_map_free(graph->edge[i].tagged_validity);
736 : }
737 0 : free(graph->edge);
738 0 : free(graph->region);
739 0 : for (i = 0; i <= isl_edge_last; ++i)
740 0 : isl_hash_table_free(ctx, graph->edge_table[i]);
741 0 : isl_hash_table_free(ctx, graph->node_table);
742 0 : isl_basic_set_free(graph->lp);
743 0 : }
744 :
745 : /* For each "set" on which this function is called, increment
746 : * graph->n by one and update graph->maxvar.
747 : */
748 0 : static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
749 : {
750 0 : struct isl_sched_graph *graph = user;
751 0 : int nvar = isl_set_dim(set, isl_dim_set);
752 :
753 0 : graph->n++;
754 0 : if (nvar > graph->maxvar)
755 0 : graph->maxvar = nvar;
756 :
757 0 : isl_set_free(set);
758 :
759 0 : return isl_stat_ok;
760 : }
761 :
762 : /* Compute the number of rows that should be allocated for the schedule.
763 : * In particular, we need one row for each variable or one row
764 : * for each basic map in the dependences.
765 : * Note that it is practically impossible to exhaust both
766 : * the number of dependences and the number of variables.
767 : */
768 0 : static isl_stat compute_max_row(struct isl_sched_graph *graph,
769 : __isl_keep isl_schedule_constraints *sc)
770 : {
771 : int n_edge;
772 : isl_stat r;
773 : isl_union_set *domain;
774 :
775 0 : graph->n = 0;
776 0 : graph->maxvar = 0;
777 0 : domain = isl_schedule_constraints_get_domain(sc);
778 0 : r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
779 0 : isl_union_set_free(domain);
780 0 : if (r < 0)
781 0 : return isl_stat_error;
782 0 : n_edge = isl_schedule_constraints_n_basic_map(sc);
783 0 : if (n_edge < 0)
784 0 : return isl_stat_error;
785 0 : graph->max_row = n_edge + graph->maxvar;
786 :
787 0 : return isl_stat_ok;
788 : }
789 :
790 : /* Does "bset" have any defining equalities for its set variables?
791 : */
792 0 : static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
793 : {
794 : int i, n;
795 :
796 0 : if (!bset)
797 0 : return isl_bool_error;
798 :
799 0 : n = isl_basic_set_dim(bset, isl_dim_set);
800 0 : for (i = 0; i < n; ++i) {
801 : isl_bool has;
802 :
803 0 : has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
804 : NULL);
805 0 : if (has < 0 || has)
806 0 : return has;
807 : }
808 :
809 0 : return isl_bool_false;
810 : }
811 :
812 : /* Set the entries of node->max to the value of the schedule_max_coefficient
813 : * option, if set.
814 : */
815 0 : static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
816 : {
817 : int max;
818 :
819 0 : max = isl_options_get_schedule_max_coefficient(ctx);
820 0 : if (max == -1)
821 0 : return isl_stat_ok;
822 :
823 0 : node->max = isl_vec_alloc(ctx, node->nvar);
824 0 : node->max = isl_vec_set_si(node->max, max);
825 0 : if (!node->max)
826 0 : return isl_stat_error;
827 :
828 0 : return isl_stat_ok;
829 : }
830 :
831 : /* Set the entries of node->max to the minimum of the schedule_max_coefficient
832 : * option (if set) and half of the minimum of the sizes in the other
833 : * dimensions. Round up when computing the half such that
834 : * if the minimum of the sizes is one, half of the size is taken to be one
835 : * rather than zero.
836 : * If the global minimum is unbounded (i.e., if both
837 : * the schedule_max_coefficient is not set and the sizes in the other
838 : * dimensions are unbounded), then store a negative value.
839 : * If the schedule coefficient is close to the size of the instance set
840 : * in another dimension, then the schedule may represent a loop
841 : * coalescing transformation (especially if the coefficient
842 : * in that other dimension is one). Forcing the coefficient to be
843 : * smaller than or equal to half the minimal size should avoid this
844 : * situation.
845 : */
846 0 : static isl_stat compute_max_coefficient(isl_ctx *ctx,
847 : struct isl_sched_node *node)
848 : {
849 : int max;
850 : int i, j;
851 : isl_vec *v;
852 :
853 0 : max = isl_options_get_schedule_max_coefficient(ctx);
854 0 : v = isl_vec_alloc(ctx, node->nvar);
855 0 : if (!v)
856 0 : return isl_stat_error;
857 :
858 0 : for (i = 0; i < node->nvar; ++i) {
859 0 : isl_int_set_si(v->el[i], max);
860 0 : isl_int_mul_si(v->el[i], v->el[i], 2);
861 : }
862 :
863 0 : for (i = 0; i < node->nvar; ++i) {
864 : isl_val *size;
865 :
866 0 : size = isl_multi_val_get_val(node->sizes, i);
867 0 : if (!size)
868 0 : goto error;
869 0 : if (!isl_val_is_int(size)) {
870 0 : isl_val_free(size);
871 0 : continue;
872 : }
873 0 : for (j = 0; j < node->nvar; ++j) {
874 0 : if (j == i)
875 0 : continue;
876 0 : if (isl_int_is_neg(v->el[j]) ||
877 0 : isl_int_gt(v->el[j], size->n))
878 0 : isl_int_set(v->el[j], size->n);
879 : }
880 0 : isl_val_free(size);
881 : }
882 :
883 0 : for (i = 0; i < node->nvar; ++i)
884 0 : isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
885 :
886 0 : node->max = v;
887 0 : return isl_stat_ok;
888 : error:
889 0 : isl_vec_free(v);
890 0 : return isl_stat_error;
891 : }
892 :
893 : /* Compute and return the size of "set" in dimension "dim".
894 : * The size is taken to be the difference in values for that variable
895 : * for fixed values of the other variables.
896 : * This assumes that "set" is convex.
897 : * In particular, the variable is first isolated from the other variables
898 : * in the range of a map
899 : *
900 : * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
901 : *
902 : * and then duplicated
903 : *
904 : * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
905 : *
906 : * The shared variables are then projected out and the maximal value
907 : * of i_dim' - i_dim is computed.
908 : */
909 0 : static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
910 : {
911 : isl_map *map;
912 : isl_local_space *ls;
913 : isl_aff *obj;
914 : isl_val *v;
915 :
916 0 : map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
917 0 : map = isl_map_project_out(map, isl_dim_in, dim, 1);
918 0 : map = isl_map_range_product(map, isl_map_copy(map));
919 0 : map = isl_set_unwrap(isl_map_range(map));
920 0 : set = isl_map_deltas(map);
921 0 : ls = isl_local_space_from_space(isl_set_get_space(set));
922 0 : obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
923 0 : v = isl_set_max_val(set, obj);
924 0 : isl_aff_free(obj);
925 0 : isl_set_free(set);
926 :
927 0 : return v;
928 : }
929 :
930 : /* Compute the size of the instance set "set" of "node", after compression,
931 : * as well as bounds on the corresponding coefficients, if needed.
932 : *
933 : * The sizes are needed when the schedule_treat_coalescing option is set.
934 : * The bounds are needed when the schedule_treat_coalescing option or
935 : * the schedule_max_coefficient option is set.
936 : *
937 : * If the schedule_treat_coalescing option is not set, then at most
938 : * the bounds need to be set and this is done in set_max_coefficient.
939 : * Otherwise, compress the domain if needed, compute the size
940 : * in each direction and store the results in node->size.
941 : * If the domain is not convex, then the sizes are computed
942 : * on a convex superset in order to avoid picking up sizes
943 : * that are valid for the individual disjuncts, but not for
944 : * the domain as a whole.
945 : * Finally, set the bounds on the coefficients based on the sizes
946 : * and the schedule_max_coefficient option in compute_max_coefficient.
947 : */
948 0 : static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
949 : __isl_take isl_set *set)
950 : {
951 : int j, n;
952 : isl_multi_val *mv;
953 :
954 0 : if (!isl_options_get_schedule_treat_coalescing(ctx)) {
955 0 : isl_set_free(set);
956 0 : return set_max_coefficient(ctx, node);
957 : }
958 :
959 0 : if (node->compressed)
960 0 : set = isl_set_preimage_multi_aff(set,
961 : isl_multi_aff_copy(node->decompress));
962 0 : set = isl_set_from_basic_set(isl_set_simple_hull(set));
963 0 : mv = isl_multi_val_zero(isl_set_get_space(set));
964 0 : n = isl_set_dim(set, isl_dim_set);
965 0 : for (j = 0; j < n; ++j) {
966 : isl_val *v;
967 :
968 0 : v = compute_size(isl_set_copy(set), j);
969 0 : mv = isl_multi_val_set_val(mv, j, v);
970 : }
971 0 : node->sizes = mv;
972 0 : isl_set_free(set);
973 0 : if (!node->sizes)
974 0 : return isl_stat_error;
975 0 : return compute_max_coefficient(ctx, node);
976 : }
977 :
978 : /* Add a new node to the graph representing the given instance set.
979 : * "nvar" is the (possibly compressed) number of variables and
980 : * may be smaller than then number of set variables in "set"
981 : * if "compressed" is set.
982 : * If "compressed" is set, then "hull" represents the constraints
983 : * that were used to derive the compression, while "compress" and
984 : * "decompress" map the original space to the compressed space and
985 : * vice versa.
986 : * If "compressed" is not set, then "hull", "compress" and "decompress"
987 : * should be NULL.
988 : *
989 : * Compute the size of the instance set and bounds on the coefficients,
990 : * if needed.
991 : */
992 0 : static isl_stat add_node(struct isl_sched_graph *graph,
993 : __isl_take isl_set *set, int nvar, int compressed,
994 : __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
995 : __isl_take isl_multi_aff *decompress)
996 : {
997 : int nparam;
998 : isl_ctx *ctx;
999 : isl_mat *sched;
1000 : isl_space *space;
1001 : int *coincident;
1002 : struct isl_sched_node *node;
1003 :
1004 0 : if (!set)
1005 0 : goto error;
1006 :
1007 0 : ctx = isl_set_get_ctx(set);
1008 0 : nparam = isl_set_dim(set, isl_dim_param);
1009 0 : if (!ctx->opt->schedule_parametric)
1010 0 : nparam = 0;
1011 0 : sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
1012 0 : node = &graph->node[graph->n];
1013 0 : graph->n++;
1014 0 : space = isl_set_get_space(set);
1015 0 : node->space = space;
1016 0 : node->nvar = nvar;
1017 0 : node->nparam = nparam;
1018 0 : node->sched = sched;
1019 0 : node->sched_map = NULL;
1020 0 : coincident = isl_calloc_array(ctx, int, graph->max_row);
1021 0 : node->coincident = coincident;
1022 0 : node->compressed = compressed;
1023 0 : node->hull = hull;
1024 0 : node->compress = compress;
1025 0 : node->decompress = decompress;
1026 0 : if (compute_sizes_and_max(ctx, node, set) < 0)
1027 0 : return isl_stat_error;
1028 :
1029 0 : if (!space || !sched || (graph->max_row && !coincident))
1030 0 : return isl_stat_error;
1031 0 : if (compressed && (!hull || !compress || !decompress))
1032 0 : return isl_stat_error;
1033 :
1034 0 : return isl_stat_ok;
1035 : error:
1036 0 : isl_set_free(set);
1037 0 : isl_set_free(hull);
1038 0 : isl_multi_aff_free(compress);
1039 0 : isl_multi_aff_free(decompress);
1040 0 : return isl_stat_error;
1041 : }
1042 :
1043 : /* Construct an identifier for node "node", which will represent "set".
1044 : * The name of the identifier is either "compressed" or
1045 : * "compressed_<name>", with <name> the name of the space of "set".
1046 : * The user pointer of the identifier points to "node".
1047 : */
1048 0 : static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
1049 : struct isl_sched_node *node)
1050 : {
1051 : isl_bool has_name;
1052 : isl_ctx *ctx;
1053 : isl_id *id;
1054 : isl_printer *p;
1055 : const char *name;
1056 : char *id_name;
1057 :
1058 0 : has_name = isl_set_has_tuple_name(set);
1059 0 : if (has_name < 0)
1060 0 : return NULL;
1061 :
1062 0 : ctx = isl_set_get_ctx(set);
1063 0 : if (!has_name)
1064 0 : return isl_id_alloc(ctx, "compressed", node);
1065 :
1066 0 : p = isl_printer_to_str(ctx);
1067 0 : name = isl_set_get_tuple_name(set);
1068 0 : p = isl_printer_print_str(p, "compressed_");
1069 0 : p = isl_printer_print_str(p, name);
1070 0 : id_name = isl_printer_get_str(p);
1071 0 : isl_printer_free(p);
1072 :
1073 0 : id = isl_id_alloc(ctx, id_name, node);
1074 0 : free(id_name);
1075 :
1076 0 : return id;
1077 : }
1078 :
1079 : /* Add a new node to the graph representing the given set.
1080 : *
1081 : * If any of the set variables is defined by an equality, then
1082 : * we perform variable compression such that we can perform
1083 : * the scheduling on the compressed domain.
1084 : * In this case, an identifier is used that references the new node
1085 : * such that each compressed space is unique and
1086 : * such that the node can be recovered from the compressed space.
1087 : */
1088 0 : static isl_stat extract_node(__isl_take isl_set *set, void *user)
1089 : {
1090 : int nvar;
1091 : isl_bool has_equality;
1092 : isl_id *id;
1093 : isl_basic_set *hull;
1094 : isl_set *hull_set;
1095 : isl_morph *morph;
1096 : isl_multi_aff *compress, *decompress;
1097 0 : struct isl_sched_graph *graph = user;
1098 :
1099 0 : hull = isl_set_affine_hull(isl_set_copy(set));
1100 0 : hull = isl_basic_set_remove_divs(hull);
1101 0 : nvar = isl_set_dim(set, isl_dim_set);
1102 0 : has_equality = has_any_defining_equality(hull);
1103 :
1104 0 : if (has_equality < 0)
1105 0 : goto error;
1106 0 : if (!has_equality) {
1107 0 : isl_basic_set_free(hull);
1108 0 : return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1109 : }
1110 :
1111 0 : id = construct_compressed_id(set, &graph->node[graph->n]);
1112 0 : morph = isl_basic_set_variable_compression_with_id(hull,
1113 : isl_dim_set, id);
1114 0 : isl_id_free(id);
1115 0 : nvar = isl_morph_ran_dim(morph, isl_dim_set);
1116 0 : compress = isl_morph_get_var_multi_aff(morph);
1117 0 : morph = isl_morph_inverse(morph);
1118 0 : decompress = isl_morph_get_var_multi_aff(morph);
1119 0 : isl_morph_free(morph);
1120 :
1121 0 : hull_set = isl_set_from_basic_set(hull);
1122 0 : return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1123 : error:
1124 0 : isl_basic_set_free(hull);
1125 0 : isl_set_free(set);
1126 0 : return isl_stat_error;
1127 : }
1128 :
1129 : struct isl_extract_edge_data {
1130 : enum isl_edge_type type;
1131 : struct isl_sched_graph *graph;
1132 : };
1133 :
1134 : /* Merge edge2 into edge1, freeing the contents of edge2.
1135 : * Return 0 on success and -1 on failure.
1136 : *
1137 : * edge1 and edge2 are assumed to have the same value for the map field.
1138 : */
1139 0 : static int merge_edge(struct isl_sched_edge *edge1,
1140 : struct isl_sched_edge *edge2)
1141 : {
1142 0 : edge1->types |= edge2->types;
1143 0 : isl_map_free(edge2->map);
1144 :
1145 0 : if (is_condition(edge2)) {
1146 0 : if (!edge1->tagged_condition)
1147 0 : edge1->tagged_condition = edge2->tagged_condition;
1148 : else
1149 0 : edge1->tagged_condition =
1150 0 : isl_union_map_union(edge1->tagged_condition,
1151 : edge2->tagged_condition);
1152 : }
1153 :
1154 0 : if (is_conditional_validity(edge2)) {
1155 0 : if (!edge1->tagged_validity)
1156 0 : edge1->tagged_validity = edge2->tagged_validity;
1157 : else
1158 0 : edge1->tagged_validity =
1159 0 : isl_union_map_union(edge1->tagged_validity,
1160 : edge2->tagged_validity);
1161 : }
1162 :
1163 0 : if (is_condition(edge2) && !edge1->tagged_condition)
1164 0 : return -1;
1165 0 : if (is_conditional_validity(edge2) && !edge1->tagged_validity)
1166 0 : return -1;
1167 :
1168 0 : return 0;
1169 : }
1170 :
1171 : /* Insert dummy tags in domain and range of "map".
1172 : *
1173 : * In particular, if "map" is of the form
1174 : *
1175 : * A -> B
1176 : *
1177 : * then return
1178 : *
1179 : * [A -> dummy_tag] -> [B -> dummy_tag]
1180 : *
1181 : * where the dummy_tags are identical and equal to any dummy tags
1182 : * introduced by any other call to this function.
1183 : */
1184 0 : static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1185 : {
1186 : static char dummy;
1187 : isl_ctx *ctx;
1188 : isl_id *id;
1189 : isl_space *space;
1190 : isl_set *domain, *range;
1191 :
1192 0 : ctx = isl_map_get_ctx(map);
1193 :
1194 0 : id = isl_id_alloc(ctx, NULL, &dummy);
1195 0 : space = isl_space_params(isl_map_get_space(map));
1196 0 : space = isl_space_set_from_params(space);
1197 0 : space = isl_space_set_tuple_id(space, isl_dim_set, id);
1198 0 : space = isl_space_map_from_set(space);
1199 :
1200 0 : domain = isl_map_wrap(map);
1201 0 : range = isl_map_wrap(isl_map_universe(space));
1202 0 : map = isl_map_from_domain_and_range(domain, range);
1203 0 : map = isl_map_zip(map);
1204 :
1205 0 : return map;
1206 : }
1207 :
1208 : /* Given that at least one of "src" or "dst" is compressed, return
1209 : * a map between the spaces of these nodes restricted to the affine
1210 : * hull that was used in the compression.
1211 : */
1212 0 : static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1213 : struct isl_sched_node *dst)
1214 : {
1215 : isl_set *dom, *ran;
1216 :
1217 0 : if (src->compressed)
1218 0 : dom = isl_set_copy(src->hull);
1219 : else
1220 0 : dom = isl_set_universe(isl_space_copy(src->space));
1221 0 : if (dst->compressed)
1222 0 : ran = isl_set_copy(dst->hull);
1223 : else
1224 0 : ran = isl_set_universe(isl_space_copy(dst->space));
1225 :
1226 0 : return isl_map_from_domain_and_range(dom, ran);
1227 : }
1228 :
1229 : /* Intersect the domains of the nested relations in domain and range
1230 : * of "tagged" with "map".
1231 : */
1232 0 : static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1233 : __isl_keep isl_map *map)
1234 : {
1235 : isl_set *set;
1236 :
1237 0 : tagged = isl_map_zip(tagged);
1238 0 : set = isl_map_wrap(isl_map_copy(map));
1239 0 : tagged = isl_map_intersect_domain(tagged, set);
1240 0 : tagged = isl_map_zip(tagged);
1241 0 : return tagged;
1242 : }
1243 :
1244 : /* Return a pointer to the node that lives in the domain space of "map",
1245 : * an invalid node if there is no such node, or NULL in case of error.
1246 : */
1247 0 : static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1248 : struct isl_sched_graph *graph, __isl_keep isl_map *map)
1249 : {
1250 : struct isl_sched_node *node;
1251 : isl_space *space;
1252 :
1253 0 : space = isl_space_domain(isl_map_get_space(map));
1254 0 : node = graph_find_node(ctx, graph, space);
1255 0 : isl_space_free(space);
1256 :
1257 0 : return node;
1258 : }
1259 :
1260 : /* Return a pointer to the node that lives in the range space of "map",
1261 : * an invalid node if there is no such node, or NULL in case of error.
1262 : */
1263 0 : static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1264 : struct isl_sched_graph *graph, __isl_keep isl_map *map)
1265 : {
1266 : struct isl_sched_node *node;
1267 : isl_space *space;
1268 :
1269 0 : space = isl_space_range(isl_map_get_space(map));
1270 0 : node = graph_find_node(ctx, graph, space);
1271 0 : isl_space_free(space);
1272 :
1273 0 : return node;
1274 : }
1275 :
1276 : /* Refrain from adding a new edge based on "map".
1277 : * Instead, just free the map.
1278 : * "tagged" is either a copy of "map" with additional tags or NULL.
1279 : */
1280 0 : static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
1281 : {
1282 0 : isl_map_free(map);
1283 0 : isl_map_free(tagged);
1284 :
1285 0 : return isl_stat_ok;
1286 : }
1287 :
1288 : /* Add a new edge to the graph based on the given map
1289 : * and add it to data->graph->edge_table[data->type].
1290 : * If a dependence relation of a given type happens to be identical
1291 : * to one of the dependence relations of a type that was added before,
1292 : * then we don't create a new edge, but instead mark the original edge
1293 : * as also representing a dependence of the current type.
1294 : *
1295 : * Edges of type isl_edge_condition or isl_edge_conditional_validity
1296 : * may be specified as "tagged" dependence relations. That is, "map"
1297 : * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1298 : * the dependence on iterations and a and b are tags.
1299 : * edge->map is set to the relation containing the elements i -> j,
1300 : * while edge->tagged_condition and edge->tagged_validity contain
1301 : * the union of all the "map" relations
1302 : * for which extract_edge is called that result in the same edge->map.
1303 : *
1304 : * If the source or the destination node is compressed, then
1305 : * intersect both "map" and "tagged" with the constraints that
1306 : * were used to construct the compression.
1307 : * This ensures that there are no schedule constraints defined
1308 : * outside of these domains, while the scheduler no longer has
1309 : * any control over those outside parts.
1310 : */
1311 0 : static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1312 : {
1313 : isl_bool empty;
1314 0 : isl_ctx *ctx = isl_map_get_ctx(map);
1315 0 : struct isl_extract_edge_data *data = user;
1316 0 : struct isl_sched_graph *graph = data->graph;
1317 : struct isl_sched_node *src, *dst;
1318 : struct isl_sched_edge *edge;
1319 0 : isl_map *tagged = NULL;
1320 :
1321 0 : if (data->type == isl_edge_condition ||
1322 0 : data->type == isl_edge_conditional_validity) {
1323 0 : if (isl_map_can_zip(map)) {
1324 0 : tagged = isl_map_copy(map);
1325 0 : map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1326 : } else {
1327 0 : tagged = insert_dummy_tags(isl_map_copy(map));
1328 : }
1329 : }
1330 :
1331 0 : src = find_domain_node(ctx, graph, map);
1332 0 : dst = find_range_node(ctx, graph, map);
1333 :
1334 0 : if (!src || !dst)
1335 : goto error;
1336 0 : if (!is_node(graph, src) || !is_node(graph, dst))
1337 0 : return skip_edge(map, tagged);
1338 :
1339 0 : if (src->compressed || dst->compressed) {
1340 : isl_map *hull;
1341 0 : hull = extract_hull(src, dst);
1342 0 : if (tagged)
1343 0 : tagged = map_intersect_domains(tagged, hull);
1344 0 : map = isl_map_intersect(map, hull);
1345 : }
1346 :
1347 0 : empty = isl_map_plain_is_empty(map);
1348 0 : if (empty < 0)
1349 0 : goto error;
1350 0 : if (empty)
1351 0 : return skip_edge(map, tagged);
1352 :
1353 0 : graph->edge[graph->n_edge].src = src;
1354 0 : graph->edge[graph->n_edge].dst = dst;
1355 0 : graph->edge[graph->n_edge].map = map;
1356 0 : graph->edge[graph->n_edge].types = 0;
1357 0 : graph->edge[graph->n_edge].tagged_condition = NULL;
1358 0 : graph->edge[graph->n_edge].tagged_validity = NULL;
1359 0 : set_type(&graph->edge[graph->n_edge], data->type);
1360 0 : if (data->type == isl_edge_condition)
1361 0 : graph->edge[graph->n_edge].tagged_condition =
1362 0 : isl_union_map_from_map(tagged);
1363 0 : if (data->type == isl_edge_conditional_validity)
1364 0 : graph->edge[graph->n_edge].tagged_validity =
1365 0 : isl_union_map_from_map(tagged);
1366 :
1367 0 : edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1368 0 : if (!edge) {
1369 0 : graph->n_edge++;
1370 0 : return isl_stat_error;
1371 : }
1372 0 : if (edge == &graph->edge[graph->n_edge])
1373 0 : return graph_edge_table_add(ctx, graph, data->type,
1374 0 : &graph->edge[graph->n_edge++]);
1375 :
1376 0 : if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1377 0 : return isl_stat_error;
1378 :
1379 0 : return graph_edge_table_add(ctx, graph, data->type, edge);
1380 : error:
1381 0 : isl_map_free(map);
1382 0 : isl_map_free(tagged);
1383 0 : return isl_stat_error;
1384 : }
1385 :
1386 : /* Initialize the schedule graph "graph" from the schedule constraints "sc".
1387 : *
1388 : * The context is included in the domain before the nodes of
1389 : * the graphs are extracted in order to be able to exploit
1390 : * any possible additional equalities.
1391 : * Note that this intersection is only performed locally here.
1392 : */
1393 0 : static isl_stat graph_init(struct isl_sched_graph *graph,
1394 : __isl_keep isl_schedule_constraints *sc)
1395 : {
1396 : isl_ctx *ctx;
1397 : isl_union_set *domain;
1398 : isl_union_map *c;
1399 : struct isl_extract_edge_data data;
1400 : enum isl_edge_type i;
1401 : isl_stat r;
1402 :
1403 0 : if (!sc)
1404 0 : return isl_stat_error;
1405 :
1406 0 : ctx = isl_schedule_constraints_get_ctx(sc);
1407 :
1408 0 : domain = isl_schedule_constraints_get_domain(sc);
1409 0 : graph->n = isl_union_set_n_set(domain);
1410 0 : isl_union_set_free(domain);
1411 :
1412 0 : if (graph_alloc(ctx, graph, graph->n,
1413 : isl_schedule_constraints_n_map(sc)) < 0)
1414 0 : return isl_stat_error;
1415 :
1416 0 : if (compute_max_row(graph, sc) < 0)
1417 0 : return isl_stat_error;
1418 0 : graph->root = graph;
1419 0 : graph->n = 0;
1420 0 : domain = isl_schedule_constraints_get_domain(sc);
1421 0 : domain = isl_union_set_intersect_params(domain,
1422 : isl_schedule_constraints_get_context(sc));
1423 0 : r = isl_union_set_foreach_set(domain, &extract_node, graph);
1424 0 : isl_union_set_free(domain);
1425 0 : if (r < 0)
1426 0 : return isl_stat_error;
1427 0 : if (graph_init_table(ctx, graph) < 0)
1428 0 : return isl_stat_error;
1429 0 : for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1430 0 : c = isl_schedule_constraints_get(sc, i);
1431 0 : graph->max_edge[i] = isl_union_map_n_map(c);
1432 0 : isl_union_map_free(c);
1433 0 : if (!c)
1434 0 : return isl_stat_error;
1435 : }
1436 0 : if (graph_init_edge_tables(ctx, graph) < 0)
1437 0 : return isl_stat_error;
1438 0 : graph->n_edge = 0;
1439 0 : data.graph = graph;
1440 0 : for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1441 : isl_stat r;
1442 :
1443 0 : data.type = i;
1444 0 : c = isl_schedule_constraints_get(sc, i);
1445 0 : r = isl_union_map_foreach_map(c, &extract_edge, &data);
1446 0 : isl_union_map_free(c);
1447 0 : if (r < 0)
1448 0 : return isl_stat_error;
1449 : }
1450 :
1451 0 : return isl_stat_ok;
1452 : }
1453 :
1454 : /* Check whether there is any dependence from node[j] to node[i]
1455 : * or from node[i] to node[j].
1456 : */
1457 0 : static isl_bool node_follows_weak(int i, int j, void *user)
1458 : {
1459 : isl_bool f;
1460 0 : struct isl_sched_graph *graph = user;
1461 :
1462 0 : f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1463 0 : if (f < 0 || f)
1464 0 : return f;
1465 0 : return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1466 : }
1467 :
1468 : /* Check whether there is a (conditional) validity dependence from node[j]
1469 : * to node[i], forcing node[i] to follow node[j].
1470 : */
1471 0 : static isl_bool node_follows_strong(int i, int j, void *user)
1472 : {
1473 0 : struct isl_sched_graph *graph = user;
1474 :
1475 0 : return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1476 : }
1477 :
1478 : /* Use Tarjan's algorithm for computing the strongly connected components
1479 : * in the dependence graph only considering those edges defined by "follows".
1480 : */
1481 0 : static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1482 : isl_bool (*follows)(int i, int j, void *user))
1483 : {
1484 : int i, n;
1485 0 : struct isl_tarjan_graph *g = NULL;
1486 :
1487 0 : g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1488 0 : if (!g)
1489 0 : return isl_stat_error;
1490 :
1491 0 : graph->scc = 0;
1492 0 : i = 0;
1493 0 : n = graph->n;
1494 0 : while (n) {
1495 0 : while (g->order[i] != -1) {
1496 0 : graph->node[g->order[i]].scc = graph->scc;
1497 0 : --n;
1498 0 : ++i;
1499 : }
1500 0 : ++i;
1501 0 : graph->scc++;
1502 : }
1503 :
1504 0 : isl_tarjan_graph_free(g);
1505 :
1506 0 : return isl_stat_ok;
1507 : }
1508 :
1509 : /* Apply Tarjan's algorithm to detect the strongly connected components
1510 : * in the dependence graph.
1511 : * Only consider the (conditional) validity dependences and clear "weak".
1512 : */
1513 0 : static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1514 : {
1515 0 : graph->weak = 0;
1516 0 : return detect_ccs(ctx, graph, &node_follows_strong);
1517 : }
1518 :
1519 : /* Apply Tarjan's algorithm to detect the (weakly) connected components
1520 : * in the dependence graph.
1521 : * Consider all dependences and set "weak".
1522 : */
1523 0 : static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1524 : {
1525 0 : graph->weak = 1;
1526 0 : return detect_ccs(ctx, graph, &node_follows_weak);
1527 : }
1528 :
1529 0 : static int cmp_scc(const void *a, const void *b, void *data)
1530 : {
1531 0 : struct isl_sched_graph *graph = data;
1532 0 : const int *i1 = a;
1533 0 : const int *i2 = b;
1534 :
1535 0 : return graph->node[*i1].scc - graph->node[*i2].scc;
1536 : }
1537 :
1538 : /* Sort the elements of graph->sorted according to the corresponding SCCs.
1539 : */
1540 0 : static int sort_sccs(struct isl_sched_graph *graph)
1541 : {
1542 0 : return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1543 : }
1544 :
1545 : /* Return a non-parametric set in the compressed space of "node" that is
1546 : * bounded by the size in each direction
1547 : *
1548 : * { [x] : -S_i <= x_i <= S_i }
1549 : *
1550 : * If S_i is infinity in direction i, then there are no constraints
1551 : * in that direction.
1552 : *
1553 : * Cache the result in node->bounds.
1554 : */
1555 0 : static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1556 : {
1557 : isl_space *space;
1558 : isl_basic_set *bounds;
1559 : int i;
1560 : unsigned nparam;
1561 :
1562 0 : if (node->bounds)
1563 0 : return isl_basic_set_copy(node->bounds);
1564 :
1565 0 : if (node->compressed)
1566 0 : space = isl_multi_aff_get_domain_space(node->decompress);
1567 : else
1568 0 : space = isl_space_copy(node->space);
1569 0 : nparam = isl_space_dim(space, isl_dim_param);
1570 0 : space = isl_space_drop_dims(space, isl_dim_param, 0, nparam);
1571 0 : bounds = isl_basic_set_universe(space);
1572 :
1573 0 : for (i = 0; i < node->nvar; ++i) {
1574 : isl_val *size;
1575 :
1576 0 : size = isl_multi_val_get_val(node->sizes, i);
1577 0 : if (!size)
1578 0 : return isl_basic_set_free(bounds);
1579 0 : if (!isl_val_is_int(size)) {
1580 0 : isl_val_free(size);
1581 0 : continue;
1582 : }
1583 0 : bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1584 : isl_val_copy(size));
1585 0 : bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1586 : isl_val_neg(size));
1587 : }
1588 :
1589 0 : node->bounds = isl_basic_set_copy(bounds);
1590 0 : return bounds;
1591 : }
1592 :
1593 : /* Drop some constraints from "delta" that could be exploited
1594 : * to construct loop coalescing schedules.
1595 : * In particular, drop those constraint that bound the difference
1596 : * to the size of the domain.
1597 : * First project out the parameters to improve the effectiveness.
1598 : */
1599 0 : static __isl_give isl_set *drop_coalescing_constraints(
1600 : __isl_take isl_set *delta, struct isl_sched_node *node)
1601 : {
1602 : unsigned nparam;
1603 : isl_basic_set *bounds;
1604 :
1605 0 : bounds = get_size_bounds(node);
1606 :
1607 0 : nparam = isl_set_dim(delta, isl_dim_param);
1608 0 : delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1609 0 : delta = isl_set_remove_divs(delta);
1610 0 : delta = isl_set_plain_gist_basic_set(delta, bounds);
1611 0 : return delta;
1612 : }
1613 :
1614 : /* Given a dependence relation R from "node" to itself,
1615 : * construct the set of coefficients of valid constraints for elements
1616 : * in that dependence relation.
1617 : * In particular, the result contains tuples of coefficients
1618 : * c_0, c_n, c_x such that
1619 : *
1620 : * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1621 : *
1622 : * or, equivalently,
1623 : *
1624 : * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1625 : *
1626 : * We choose here to compute the dual of delta R.
1627 : * Alternatively, we could have computed the dual of R, resulting
1628 : * in a set of tuples c_0, c_n, c_x, c_y, and then
1629 : * plugged in (c_0, c_n, c_x, -c_x).
1630 : *
1631 : * If "need_param" is set, then the resulting coefficients effectively
1632 : * include coefficients for the parameters c_n. Otherwise, they may
1633 : * have been projected out already.
1634 : * Since the constraints may be different for these two cases,
1635 : * they are stored in separate caches.
1636 : * In particular, if no parameter coefficients are required and
1637 : * the schedule_treat_coalescing option is set, then the parameters
1638 : * are projected out and some constraints that could be exploited
1639 : * to construct coalescing schedules are removed before the dual
1640 : * is computed.
1641 : *
1642 : * If "node" has been compressed, then the dependence relation
1643 : * is also compressed before the set of coefficients is computed.
1644 : */
1645 0 : static __isl_give isl_basic_set *intra_coefficients(
1646 : struct isl_sched_graph *graph, struct isl_sched_node *node,
1647 : __isl_take isl_map *map, int need_param)
1648 : {
1649 : isl_ctx *ctx;
1650 : isl_set *delta;
1651 : isl_map *key;
1652 : isl_basic_set *coef;
1653 : isl_maybe_isl_basic_set m;
1654 0 : isl_map_to_basic_set **hmap = &graph->intra_hmap;
1655 : int treat;
1656 :
1657 0 : if (!map)
1658 0 : return NULL;
1659 :
1660 0 : ctx = isl_map_get_ctx(map);
1661 0 : treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx);
1662 0 : if (!treat)
1663 0 : hmap = &graph->intra_hmap_param;
1664 0 : m = isl_map_to_basic_set_try_get(*hmap, map);
1665 0 : if (m.valid < 0 || m.valid) {
1666 0 : isl_map_free(map);
1667 0 : return m.value;
1668 : }
1669 :
1670 0 : key = isl_map_copy(map);
1671 0 : if (node->compressed) {
1672 0 : map = isl_map_preimage_domain_multi_aff(map,
1673 : isl_multi_aff_copy(node->decompress));
1674 0 : map = isl_map_preimage_range_multi_aff(map,
1675 : isl_multi_aff_copy(node->decompress));
1676 : }
1677 0 : delta = isl_map_deltas(map);
1678 0 : if (treat)
1679 0 : delta = drop_coalescing_constraints(delta, node);
1680 0 : delta = isl_set_remove_divs(delta);
1681 0 : coef = isl_set_coefficients(delta);
1682 0 : *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1683 :
1684 0 : return coef;
1685 : }
1686 :
1687 : /* Given a dependence relation R, construct the set of coefficients
1688 : * of valid constraints for elements in that dependence relation.
1689 : * In particular, the result contains tuples of coefficients
1690 : * c_0, c_n, c_x, c_y such that
1691 : *
1692 : * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1693 : *
1694 : * If the source or destination nodes of "edge" have been compressed,
1695 : * then the dependence relation is also compressed before
1696 : * the set of coefficients is computed.
1697 : */
1698 0 : static __isl_give isl_basic_set *inter_coefficients(
1699 : struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1700 : __isl_take isl_map *map)
1701 : {
1702 : isl_set *set;
1703 : isl_map *key;
1704 : isl_basic_set *coef;
1705 : isl_maybe_isl_basic_set m;
1706 :
1707 0 : m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1708 0 : if (m.valid < 0 || m.valid) {
1709 0 : isl_map_free(map);
1710 0 : return m.value;
1711 : }
1712 :
1713 0 : key = isl_map_copy(map);
1714 0 : if (edge->src->compressed)
1715 0 : map = isl_map_preimage_domain_multi_aff(map,
1716 0 : isl_multi_aff_copy(edge->src->decompress));
1717 0 : if (edge->dst->compressed)
1718 0 : map = isl_map_preimage_range_multi_aff(map,
1719 0 : isl_multi_aff_copy(edge->dst->decompress));
1720 0 : set = isl_map_wrap(isl_map_remove_divs(map));
1721 0 : coef = isl_set_coefficients(set);
1722 0 : graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1723 : isl_basic_set_copy(coef));
1724 :
1725 0 : return coef;
1726 : }
1727 :
1728 : /* Return the position of the coefficients of the variables in
1729 : * the coefficients constraints "coef".
1730 : *
1731 : * The space of "coef" is of the form
1732 : *
1733 : * { coefficients[[cst, params] -> S] }
1734 : *
1735 : * Return the position of S.
1736 : */
1737 0 : static int coef_var_offset(__isl_keep isl_basic_set *coef)
1738 : {
1739 : int offset;
1740 : isl_space *space;
1741 :
1742 0 : space = isl_space_unwrap(isl_basic_set_get_space(coef));
1743 0 : offset = isl_space_dim(space, isl_dim_in);
1744 0 : isl_space_free(space);
1745 :
1746 0 : return offset;
1747 : }
1748 :
1749 : /* Return the offset of the coefficient of the constant term of "node"
1750 : * within the (I)LP.
1751 : *
1752 : * Within each node, the coefficients have the following order:
1753 : * - positive and negative parts of c_i_x
1754 : * - c_i_n (if parametric)
1755 : * - c_i_0
1756 : */
1757 0 : static int node_cst_coef_offset(struct isl_sched_node *node)
1758 : {
1759 0 : return node->start + 2 * node->nvar + node->nparam;
1760 : }
1761 :
1762 : /* Return the offset of the coefficients of the parameters of "node"
1763 : * within the (I)LP.
1764 : *
1765 : * Within each node, the coefficients have the following order:
1766 : * - positive and negative parts of c_i_x
1767 : * - c_i_n (if parametric)
1768 : * - c_i_0
1769 : */
1770 0 : static int node_par_coef_offset(struct isl_sched_node *node)
1771 : {
1772 0 : return node->start + 2 * node->nvar;
1773 : }
1774 :
1775 : /* Return the offset of the coefficients of the variables of "node"
1776 : * within the (I)LP.
1777 : *
1778 : * Within each node, the coefficients have the following order:
1779 : * - positive and negative parts of c_i_x
1780 : * - c_i_n (if parametric)
1781 : * - c_i_0
1782 : */
1783 0 : static int node_var_coef_offset(struct isl_sched_node *node)
1784 : {
1785 0 : return node->start;
1786 : }
1787 :
1788 : /* Return the position of the pair of variables encoding
1789 : * coefficient "i" of "node".
1790 : *
1791 : * The order of these variable pairs is the opposite of
1792 : * that of the coefficients, with 2 variables per coefficient.
1793 : */
1794 0 : static int node_var_coef_pos(struct isl_sched_node *node, int i)
1795 : {
1796 0 : return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1797 : }
1798 :
1799 : /* Construct an isl_dim_map for mapping constraints on coefficients
1800 : * for "node" to the corresponding positions in graph->lp.
1801 : * "offset" is the offset of the coefficients for the variables
1802 : * in the input constraints.
1803 : * "s" is the sign of the mapping.
1804 : *
1805 : * The input constraints are given in terms of the coefficients
1806 : * (c_0, c_x) or (c_0, c_n, c_x).
1807 : * The mapping produced by this function essentially plugs in
1808 : * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1809 : * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1810 : * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1811 : * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1812 : * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1813 : * Furthermore, the order of these pairs is the opposite of that
1814 : * of the corresponding coefficients.
1815 : *
1816 : * The caller can extend the mapping to also map the other coefficients
1817 : * (and therefore not plug in 0).
1818 : */
1819 0 : static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1820 : struct isl_sched_graph *graph, struct isl_sched_node *node,
1821 : int offset, int s)
1822 : {
1823 : int pos;
1824 : unsigned total;
1825 : isl_dim_map *dim_map;
1826 :
1827 0 : if (!node || !graph->lp)
1828 0 : return NULL;
1829 :
1830 0 : total = isl_basic_set_total_dim(graph->lp);
1831 0 : pos = node_var_coef_pos(node, 0);
1832 0 : dim_map = isl_dim_map_alloc(ctx, total);
1833 0 : isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
1834 0 : isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
1835 :
1836 0 : return dim_map;
1837 : }
1838 :
1839 : /* Construct an isl_dim_map for mapping constraints on coefficients
1840 : * for "src" (node i) and "dst" (node j) to the corresponding positions
1841 : * in graph->lp.
1842 : * "offset" is the offset of the coefficients for the variables of "src"
1843 : * in the input constraints.
1844 : * "s" is the sign of the mapping.
1845 : *
1846 : * The input constraints are given in terms of the coefficients
1847 : * (c_0, c_n, c_x, c_y).
1848 : * The mapping produced by this function essentially plugs in
1849 : * (c_j_0 - c_i_0, c_j_n - c_i_n,
1850 : * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1851 : * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1852 : * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1853 : * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1854 : * Furthermore, the order of these pairs is the opposite of that
1855 : * of the corresponding coefficients.
1856 : *
1857 : * The caller can further extend the mapping.
1858 : */
1859 0 : static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1860 : struct isl_sched_graph *graph, struct isl_sched_node *src,
1861 : struct isl_sched_node *dst, int offset, int s)
1862 : {
1863 : int pos;
1864 : unsigned total;
1865 : isl_dim_map *dim_map;
1866 :
1867 0 : if (!src || !dst || !graph->lp)
1868 0 : return NULL;
1869 :
1870 0 : total = isl_basic_set_total_dim(graph->lp);
1871 0 : dim_map = isl_dim_map_alloc(ctx, total);
1872 :
1873 0 : pos = node_cst_coef_offset(dst);
1874 0 : isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
1875 0 : pos = node_par_coef_offset(dst);
1876 0 : isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
1877 0 : pos = node_var_coef_pos(dst, 0);
1878 0 : isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
1879 0 : dst->nvar, -s);
1880 0 : isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
1881 0 : dst->nvar, s);
1882 :
1883 0 : pos = node_cst_coef_offset(src);
1884 0 : isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
1885 0 : pos = node_par_coef_offset(src);
1886 0 : isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
1887 0 : pos = node_var_coef_pos(src, 0);
1888 0 : isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
1889 0 : isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
1890 :
1891 0 : return dim_map;
1892 : }
1893 :
1894 : /* Add the constraints from "src" to "dst" using "dim_map",
1895 : * after making sure there is enough room in "dst" for the extra constraints.
1896 : */
1897 0 : static __isl_give isl_basic_set *add_constraints_dim_map(
1898 : __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1899 : __isl_take isl_dim_map *dim_map)
1900 : {
1901 : int n_eq, n_ineq;
1902 :
1903 0 : n_eq = isl_basic_set_n_equality(src);
1904 0 : n_ineq = isl_basic_set_n_inequality(src);
1905 0 : dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1906 0 : dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1907 0 : return dst;
1908 : }
1909 :
1910 : /* Add constraints to graph->lp that force validity for the given
1911 : * dependence from a node i to itself.
1912 : * That is, add constraints that enforce
1913 : *
1914 : * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1915 : * = c_i_x (y - x) >= 0
1916 : *
1917 : * for each (x,y) in R.
1918 : * We obtain general constraints on coefficients (c_0, c_x)
1919 : * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
1920 : * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1921 : * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1922 : * Note that the result of intra_coefficients may also contain
1923 : * parameter coefficients c_n, in which case 0 is plugged in for them as well.
1924 : */
1925 0 : static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1926 : struct isl_sched_edge *edge)
1927 : {
1928 : int offset;
1929 0 : isl_map *map = isl_map_copy(edge->map);
1930 0 : isl_ctx *ctx = isl_map_get_ctx(map);
1931 : isl_dim_map *dim_map;
1932 : isl_basic_set *coef;
1933 0 : struct isl_sched_node *node = edge->src;
1934 :
1935 0 : coef = intra_coefficients(graph, node, map, 0);
1936 :
1937 0 : offset = coef_var_offset(coef);
1938 :
1939 0 : if (!coef)
1940 0 : return isl_stat_error;
1941 :
1942 0 : dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1943 0 : graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1944 :
1945 0 : return isl_stat_ok;
1946 : }
1947 :
1948 : /* Add constraints to graph->lp that force validity for the given
1949 : * dependence from node i to node j.
1950 : * That is, add constraints that enforce
1951 : *
1952 : * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1953 : *
1954 : * for each (x,y) in R.
1955 : * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1956 : * of valid constraints for R and then plug in
1957 : * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1958 : * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1959 : * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1960 : */
1961 0 : static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1962 : struct isl_sched_edge *edge)
1963 : {
1964 : int offset;
1965 : isl_map *map;
1966 : isl_ctx *ctx;
1967 : isl_dim_map *dim_map;
1968 : isl_basic_set *coef;
1969 0 : struct isl_sched_node *src = edge->src;
1970 0 : struct isl_sched_node *dst = edge->dst;
1971 :
1972 0 : if (!graph->lp)
1973 0 : return isl_stat_error;
1974 :
1975 0 : map = isl_map_copy(edge->map);
1976 0 : ctx = isl_map_get_ctx(map);
1977 0 : coef = inter_coefficients(graph, edge, map);
1978 :
1979 0 : offset = coef_var_offset(coef);
1980 :
1981 0 : if (!coef)
1982 0 : return isl_stat_error;
1983 :
1984 0 : dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1985 :
1986 0 : edge->start = graph->lp->n_ineq;
1987 0 : graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1988 0 : if (!graph->lp)
1989 0 : return isl_stat_error;
1990 0 : edge->end = graph->lp->n_ineq;
1991 :
1992 0 : return isl_stat_ok;
1993 : }
1994 :
1995 : /* Add constraints to graph->lp that bound the dependence distance for the given
1996 : * dependence from a node i to itself.
1997 : * If s = 1, we add the constraint
1998 : *
1999 : * c_i_x (y - x) <= m_0 + m_n n
2000 : *
2001 : * or
2002 : *
2003 : * -c_i_x (y - x) + m_0 + m_n n >= 0
2004 : *
2005 : * for each (x,y) in R.
2006 : * If s = -1, we add the constraint
2007 : *
2008 : * -c_i_x (y - x) <= m_0 + m_n n
2009 : *
2010 : * or
2011 : *
2012 : * c_i_x (y - x) + m_0 + m_n n >= 0
2013 : *
2014 : * for each (x,y) in R.
2015 : * We obtain general constraints on coefficients (c_0, c_n, c_x)
2016 : * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
2017 : * with each coefficient (except m_0) represented as a pair of non-negative
2018 : * coefficients.
2019 : *
2020 : *
2021 : * If "local" is set, then we add constraints
2022 : *
2023 : * c_i_x (y - x) <= 0
2024 : *
2025 : * or
2026 : *
2027 : * -c_i_x (y - x) <= 0
2028 : *
2029 : * instead, forcing the dependence distance to be (less than or) equal to 0.
2030 : * That is, we plug in (0, 0, -s * c_i_x),
2031 : * intra_coefficients is not required to have c_n in its result when
2032 : * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in.
2033 : * Note that dependences marked local are treated as validity constraints
2034 : * by add_all_validity_constraints and therefore also have
2035 : * their distances bounded by 0 from below.
2036 : */
2037 0 : static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2038 : struct isl_sched_edge *edge, int s, int local)
2039 : {
2040 : int offset;
2041 : unsigned nparam;
2042 0 : isl_map *map = isl_map_copy(edge->map);
2043 0 : isl_ctx *ctx = isl_map_get_ctx(map);
2044 : isl_dim_map *dim_map;
2045 : isl_basic_set *coef;
2046 0 : struct isl_sched_node *node = edge->src;
2047 :
2048 0 : coef = intra_coefficients(graph, node, map, !local);
2049 :
2050 0 : offset = coef_var_offset(coef);
2051 :
2052 0 : if (!coef)
2053 0 : return isl_stat_error;
2054 :
2055 0 : nparam = isl_space_dim(node->space, isl_dim_param);
2056 0 : dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2057 :
2058 0 : if (!local) {
2059 0 : isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2060 0 : isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2061 0 : isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2062 : }
2063 0 : graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2064 :
2065 0 : return isl_stat_ok;
2066 : }
2067 :
2068 : /* Add constraints to graph->lp that bound the dependence distance for the given
2069 : * dependence from node i to node j.
2070 : * If s = 1, we add the constraint
2071 : *
2072 : * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2073 : * <= m_0 + m_n n
2074 : *
2075 : * or
2076 : *
2077 : * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2078 : * m_0 + m_n n >= 0
2079 : *
2080 : * for each (x,y) in R.
2081 : * If s = -1, we add the constraint
2082 : *
2083 : * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2084 : * <= m_0 + m_n n
2085 : *
2086 : * or
2087 : *
2088 : * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2089 : * m_0 + m_n n >= 0
2090 : *
2091 : * for each (x,y) in R.
2092 : * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2093 : * of valid constraints for R and then plug in
2094 : * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2095 : * s*c_i_x, -s*c_j_x)
2096 : * with each coefficient (except m_0, c_*_0 and c_*_n)
2097 : * represented as a pair of non-negative coefficients.
2098 : *
2099 : *
2100 : * If "local" is set (and s = 1), then we add constraints
2101 : *
2102 : * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2103 : *
2104 : * or
2105 : *
2106 : * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2107 : *
2108 : * instead, forcing the dependence distance to be (less than or) equal to 0.
2109 : * That is, we plug in
2110 : * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2111 : * Note that dependences marked local are treated as validity constraints
2112 : * by add_all_validity_constraints and therefore also have
2113 : * their distances bounded by 0 from below.
2114 : */
2115 0 : static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2116 : struct isl_sched_edge *edge, int s, int local)
2117 : {
2118 : int offset;
2119 : unsigned nparam;
2120 0 : isl_map *map = isl_map_copy(edge->map);
2121 0 : isl_ctx *ctx = isl_map_get_ctx(map);
2122 : isl_dim_map *dim_map;
2123 : isl_basic_set *coef;
2124 0 : struct isl_sched_node *src = edge->src;
2125 0 : struct isl_sched_node *dst = edge->dst;
2126 :
2127 0 : coef = inter_coefficients(graph, edge, map);
2128 :
2129 0 : offset = coef_var_offset(coef);
2130 :
2131 0 : if (!coef)
2132 0 : return isl_stat_error;
2133 :
2134 0 : nparam = isl_space_dim(src->space, isl_dim_param);
2135 0 : dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2136 :
2137 0 : if (!local) {
2138 0 : isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2139 0 : isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2140 0 : isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2141 : }
2142 :
2143 0 : graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2144 :
2145 0 : return isl_stat_ok;
2146 : }
2147 :
2148 : /* Should the distance over "edge" be forced to zero?
2149 : * That is, is it marked as a local edge?
2150 : * If "use_coincidence" is set, then coincidence edges are treated
2151 : * as local edges.
2152 : */
2153 0 : static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2154 : {
2155 0 : return is_local(edge) || (use_coincidence && is_coincidence(edge));
2156 : }
2157 :
2158 : /* Add all validity constraints to graph->lp.
2159 : *
2160 : * An edge that is forced to be local needs to have its dependence
2161 : * distances equal to zero. We take care of bounding them by 0 from below
2162 : * here. add_all_proximity_constraints takes care of bounding them by 0
2163 : * from above.
2164 : *
2165 : * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2166 : * Otherwise, we ignore them.
2167 : */
2168 0 : static int add_all_validity_constraints(struct isl_sched_graph *graph,
2169 : int use_coincidence)
2170 : {
2171 : int i;
2172 :
2173 0 : for (i = 0; i < graph->n_edge; ++i) {
2174 0 : struct isl_sched_edge *edge = &graph->edge[i];
2175 : int zero;
2176 :
2177 0 : zero = force_zero(edge, use_coincidence);
2178 0 : if (!is_validity(edge) && !zero)
2179 0 : continue;
2180 0 : if (edge->src != edge->dst)
2181 0 : continue;
2182 0 : if (add_intra_validity_constraints(graph, edge) < 0)
2183 0 : return -1;
2184 : }
2185 :
2186 0 : for (i = 0; i < graph->n_edge; ++i) {
2187 0 : struct isl_sched_edge *edge = &graph->edge[i];
2188 : int zero;
2189 :
2190 0 : zero = force_zero(edge, use_coincidence);
2191 0 : if (!is_validity(edge) && !zero)
2192 0 : continue;
2193 0 : if (edge->src == edge->dst)
2194 0 : continue;
2195 0 : if (add_inter_validity_constraints(graph, edge) < 0)
2196 0 : return -1;
2197 : }
2198 :
2199 0 : return 0;
2200 : }
2201 :
2202 : /* Add constraints to graph->lp that bound the dependence distance
2203 : * for all dependence relations.
2204 : * If a given proximity dependence is identical to a validity
2205 : * dependence, then the dependence distance is already bounded
2206 : * from below (by zero), so we only need to bound the distance
2207 : * from above. (This includes the case of "local" dependences
2208 : * which are treated as validity dependence by add_all_validity_constraints.)
2209 : * Otherwise, we need to bound the distance both from above and from below.
2210 : *
2211 : * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2212 : * Otherwise, we ignore them.
2213 : */
2214 0 : static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2215 : int use_coincidence)
2216 : {
2217 : int i;
2218 :
2219 0 : for (i = 0; i < graph->n_edge; ++i) {
2220 0 : struct isl_sched_edge *edge = &graph->edge[i];
2221 : int zero;
2222 :
2223 0 : zero = force_zero(edge, use_coincidence);
2224 0 : if (!is_proximity(edge) && !zero)
2225 0 : continue;
2226 0 : if (edge->src == edge->dst &&
2227 0 : add_intra_proximity_constraints(graph, edge, 1, zero) < 0)
2228 0 : return -1;
2229 0 : if (edge->src != edge->dst &&
2230 0 : add_inter_proximity_constraints(graph, edge, 1, zero) < 0)
2231 0 : return -1;
2232 0 : if (is_validity(edge) || zero)
2233 0 : continue;
2234 0 : if (edge->src == edge->dst &&
2235 0 : add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
2236 0 : return -1;
2237 0 : if (edge->src != edge->dst &&
2238 0 : add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
2239 0 : return -1;
2240 : }
2241 :
2242 0 : return 0;
2243 : }
2244 :
2245 : /* Normalize the rows of "indep" such that all rows are lexicographically
2246 : * positive and such that each row contains as many final zeros as possible,
2247 : * given the choice for the previous rows.
2248 : * Do this by performing elementary row operations.
2249 : */
2250 0 : static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2251 : {
2252 0 : indep = isl_mat_reverse_gauss(indep);
2253 0 : indep = isl_mat_lexnonneg_rows(indep);
2254 0 : return indep;
2255 : }
2256 :
2257 : /* Compute a basis for the rows in the linear part of the schedule
2258 : * and extend this basis to a full basis. The remaining rows
2259 : * can then be used to force linear independence from the rows
2260 : * in the schedule.
2261 : *
2262 : * In particular, given the schedule rows S, we compute
2263 : *
2264 : * S = H Q
2265 : * S U = H
2266 : *
2267 : * with H the Hermite normal form of S. That is, all but the
2268 : * first rank columns of H are zero and so each row in S is
2269 : * a linear combination of the first rank rows of Q.
2270 : * The matrix Q can be used as a variable transformation
2271 : * that isolates the directions of S in the first rank rows.
2272 : * Transposing S U = H yields
2273 : *
2274 : * U^T S^T = H^T
2275 : *
2276 : * with all but the first rank rows of H^T zero.
2277 : * The last rows of U^T are therefore linear combinations
2278 : * of schedule coefficients that are all zero on schedule
2279 : * coefficients that are linearly dependent on the rows of S.
2280 : * At least one of these combinations is non-zero on
2281 : * linearly independent schedule coefficients.
2282 : * The rows are normalized to involve as few of the last
2283 : * coefficients as possible and to have a positive initial value.
2284 : */
2285 0 : static int node_update_vmap(struct isl_sched_node *node)
2286 : {
2287 : isl_mat *H, *U, *Q;
2288 0 : int n_row = isl_mat_rows(node->sched);
2289 :
2290 0 : H = isl_mat_sub_alloc(node->sched, 0, n_row,
2291 0 : 1 + node->nparam, node->nvar);
2292 :
2293 0 : H = isl_mat_left_hermite(H, 0, &U, &Q);
2294 0 : isl_mat_free(node->indep);
2295 0 : isl_mat_free(node->vmap);
2296 0 : node->vmap = Q;
2297 0 : node->indep = isl_mat_transpose(U);
2298 0 : node->rank = isl_mat_initial_non_zero_cols(H);
2299 0 : node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2300 0 : node->indep = normalize_independent(node->indep);
2301 0 : isl_mat_free(H);
2302 :
2303 0 : if (!node->indep || !node->vmap || node->rank < 0)
2304 0 : return -1;
2305 0 : return 0;
2306 : }
2307 :
2308 : /* Is "edge" marked as a validity or a conditional validity edge?
2309 : */
2310 0 : static int is_any_validity(struct isl_sched_edge *edge)
2311 : {
2312 0 : return is_validity(edge) || is_conditional_validity(edge);
2313 : }
2314 :
2315 : /* How many times should we count the constraints in "edge"?
2316 : *
2317 : * We count as follows
2318 : * validity -> 1 (>= 0)
2319 : * validity+proximity -> 2 (>= 0 and upper bound)
2320 : * proximity -> 2 (lower and upper bound)
2321 : * local(+any) -> 2 (>= 0 and <= 0)
2322 : *
2323 : * If an edge is only marked conditional_validity then it counts
2324 : * as zero since it is only checked afterwards.
2325 : *
2326 : * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2327 : * Otherwise, we ignore them.
2328 : */
2329 0 : static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2330 : {
2331 0 : if (is_proximity(edge) || force_zero(edge, use_coincidence))
2332 0 : return 2;
2333 0 : if (is_validity(edge))
2334 0 : return 1;
2335 0 : return 0;
2336 : }
2337 :
2338 : /* How many times should the constraints in "edge" be counted
2339 : * as a parametric intra-node constraint?
2340 : *
2341 : * Only proximity edges that are not forced zero need
2342 : * coefficient constraints that include coefficients for parameters.
2343 : * If the edge is also a validity edge, then only
2344 : * an upper bound is introduced. Otherwise, both lower and upper bounds
2345 : * are introduced.
2346 : */
2347 0 : static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2348 : int use_coincidence)
2349 : {
2350 0 : if (edge->src != edge->dst)
2351 0 : return 0;
2352 0 : if (!is_proximity(edge))
2353 0 : return 0;
2354 0 : if (force_zero(edge, use_coincidence))
2355 0 : return 0;
2356 0 : if (is_validity(edge))
2357 0 : return 1;
2358 : else
2359 0 : return 2;
2360 : }
2361 :
2362 : /* Add "f" times the number of equality and inequality constraints of "bset"
2363 : * to "n_eq" and "n_ineq" and free "bset".
2364 : */
2365 0 : static isl_stat update_count(__isl_take isl_basic_set *bset,
2366 : int f, int *n_eq, int *n_ineq)
2367 : {
2368 0 : if (!bset)
2369 0 : return isl_stat_error;
2370 :
2371 0 : *n_eq += isl_basic_set_n_equality(bset);
2372 0 : *n_ineq += isl_basic_set_n_inequality(bset);
2373 0 : isl_basic_set_free(bset);
2374 :
2375 0 : return isl_stat_ok;
2376 : }
2377 :
2378 : /* Count the number of equality and inequality constraints
2379 : * that will be added for the given map.
2380 : *
2381 : * The edges that require parameter coefficients are counted separately.
2382 : *
2383 : * "use_coincidence" is set if we should take into account coincidence edges.
2384 : */
2385 0 : static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2386 : struct isl_sched_edge *edge, __isl_take isl_map *map,
2387 : int *n_eq, int *n_ineq, int use_coincidence)
2388 : {
2389 : isl_map *copy;
2390 : isl_basic_set *coef;
2391 0 : int f = edge_multiplicity(edge, use_coincidence);
2392 0 : int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2393 :
2394 0 : if (f == 0) {
2395 0 : isl_map_free(map);
2396 0 : return isl_stat_ok;
2397 : }
2398 :
2399 0 : if (edge->src != edge->dst) {
2400 0 : coef = inter_coefficients(graph, edge, map);
2401 0 : return update_count(coef, f, n_eq, n_ineq);
2402 : }
2403 :
2404 0 : if (fp > 0) {
2405 0 : copy = isl_map_copy(map);
2406 0 : coef = intra_coefficients(graph, edge->src, copy, 1);
2407 0 : if (update_count(coef, fp, n_eq, n_ineq) < 0)
2408 0 : goto error;
2409 : }
2410 :
2411 0 : if (f > fp) {
2412 0 : copy = isl_map_copy(map);
2413 0 : coef = intra_coefficients(graph, edge->src, copy, 0);
2414 0 : if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2415 0 : goto error;
2416 : }
2417 :
2418 0 : isl_map_free(map);
2419 0 : return isl_stat_ok;
2420 : error:
2421 0 : isl_map_free(map);
2422 0 : return isl_stat_error;
2423 : }
2424 :
2425 : /* Count the number of equality and inequality constraints
2426 : * that will be added to the main lp problem.
2427 : * We count as follows
2428 : * validity -> 1 (>= 0)
2429 : * validity+proximity -> 2 (>= 0 and upper bound)
2430 : * proximity -> 2 (lower and upper bound)
2431 : * local(+any) -> 2 (>= 0 and <= 0)
2432 : *
2433 : * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2434 : * Otherwise, we ignore them.
2435 : */
2436 0 : static int count_constraints(struct isl_sched_graph *graph,
2437 : int *n_eq, int *n_ineq, int use_coincidence)
2438 : {
2439 : int i;
2440 :
2441 0 : *n_eq = *n_ineq = 0;
2442 0 : for (i = 0; i < graph->n_edge; ++i) {
2443 0 : struct isl_sched_edge *edge = &graph->edge[i];
2444 0 : isl_map *map = isl_map_copy(edge->map);
2445 :
2446 0 : if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2447 : use_coincidence) < 0)
2448 0 : return -1;
2449 : }
2450 :
2451 0 : return 0;
2452 : }
2453 :
2454 : /* Count the number of constraints that will be added by
2455 : * add_bound_constant_constraints to bound the values of the constant terms
2456 : * and increment *n_eq and *n_ineq accordingly.
2457 : *
2458 : * In practice, add_bound_constant_constraints only adds inequalities.
2459 : */
2460 0 : static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2461 : struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2462 : {
2463 0 : if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2464 0 : return isl_stat_ok;
2465 :
2466 0 : *n_ineq += graph->n;
2467 :
2468 0 : return isl_stat_ok;
2469 : }
2470 :
2471 : /* Add constraints to bound the values of the constant terms in the schedule,
2472 : * if requested by the user.
2473 : *
2474 : * The maximal value of the constant terms is defined by the option
2475 : * "schedule_max_constant_term".
2476 : */
2477 0 : static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2478 : struct isl_sched_graph *graph)
2479 : {
2480 : int i, k;
2481 : int max;
2482 : int total;
2483 :
2484 0 : max = isl_options_get_schedule_max_constant_term(ctx);
2485 0 : if (max == -1)
2486 0 : return isl_stat_ok;
2487 :
2488 0 : total = isl_basic_set_dim(graph->lp, isl_dim_set);
2489 :
2490 0 : for (i = 0; i < graph->n; ++i) {
2491 0 : struct isl_sched_node *node = &graph->node[i];
2492 : int pos;
2493 :
2494 0 : k = isl_basic_set_alloc_inequality(graph->lp);
2495 0 : if (k < 0)
2496 0 : return isl_stat_error;
2497 0 : isl_seq_clr(graph->lp->ineq[k], 1 + total);
2498 0 : pos = node_cst_coef_offset(node);
2499 0 : isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2500 0 : isl_int_set_si(graph->lp->ineq[k][0], max);
2501 : }
2502 :
2503 0 : return isl_stat_ok;
2504 : }
2505 :
2506 : /* Count the number of constraints that will be added by
2507 : * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2508 : * accordingly.
2509 : *
2510 : * In practice, add_bound_coefficient_constraints only adds inequalities.
2511 : */
2512 0 : static int count_bound_coefficient_constraints(isl_ctx *ctx,
2513 : struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2514 : {
2515 : int i;
2516 :
2517 0 : if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2518 0 : !isl_options_get_schedule_treat_coalescing(ctx))
2519 0 : return 0;
2520 :
2521 0 : for (i = 0; i < graph->n; ++i)
2522 0 : *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2523 :
2524 0 : return 0;
2525 : }
2526 :
2527 : /* Add constraints to graph->lp that bound the values of
2528 : * the parameter schedule coefficients of "node" to "max" and
2529 : * the variable schedule coefficients to the corresponding entry
2530 : * in node->max.
2531 : * In either case, a negative value means that no bound needs to be imposed.
2532 : *
2533 : * For parameter coefficients, this amounts to adding a constraint
2534 : *
2535 : * c_n <= max
2536 : *
2537 : * i.e.,
2538 : *
2539 : * -c_n + max >= 0
2540 : *
2541 : * The variables coefficients are, however, not represented directly.
2542 : * Instead, the variable coefficients c_x are written as differences
2543 : * c_x = c_x^+ - c_x^-.
2544 : * That is,
2545 : *
2546 : * -max_i <= c_x_i <= max_i
2547 : *
2548 : * is encoded as
2549 : *
2550 : * -max_i <= c_x_i^+ - c_x_i^- <= max_i
2551 : *
2552 : * or
2553 : *
2554 : * -(c_x_i^+ - c_x_i^-) + max_i >= 0
2555 : * c_x_i^+ - c_x_i^- + max_i >= 0
2556 : */
2557 0 : static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2558 : struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2559 : {
2560 : int i, j, k;
2561 : int total;
2562 : isl_vec *ineq;
2563 :
2564 0 : total = isl_basic_set_dim(graph->lp, isl_dim_set);
2565 :
2566 0 : for (j = 0; j < node->nparam; ++j) {
2567 : int dim;
2568 :
2569 0 : if (max < 0)
2570 0 : continue;
2571 :
2572 0 : k = isl_basic_set_alloc_inequality(graph->lp);
2573 0 : if (k < 0)
2574 0 : return isl_stat_error;
2575 0 : dim = 1 + node_par_coef_offset(node) + j;
2576 0 : isl_seq_clr(graph->lp->ineq[k], 1 + total);
2577 0 : isl_int_set_si(graph->lp->ineq[k][dim], -1);
2578 0 : isl_int_set_si(graph->lp->ineq[k][0], max);
2579 : }
2580 :
2581 0 : ineq = isl_vec_alloc(ctx, 1 + total);
2582 0 : ineq = isl_vec_clr(ineq);
2583 0 : if (!ineq)
2584 0 : return isl_stat_error;
2585 0 : for (i = 0; i < node->nvar; ++i) {
2586 0 : int pos = 1 + node_var_coef_pos(node, i);
2587 :
2588 0 : if (isl_int_is_neg(node->max->el[i]))
2589 0 : continue;
2590 :
2591 0 : isl_int_set_si(ineq->el[pos], 1);
2592 0 : isl_int_set_si(ineq->el[pos + 1], -1);
2593 0 : isl_int_set(ineq->el[0], node->max->el[i]);
2594 :
2595 0 : k = isl_basic_set_alloc_inequality(graph->lp);
2596 0 : if (k < 0)
2597 0 : goto error;
2598 0 : isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2599 :
2600 0 : isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2601 0 : k = isl_basic_set_alloc_inequality(graph->lp);
2602 0 : if (k < 0)
2603 0 : goto error;
2604 0 : isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2605 :
2606 0 : isl_seq_clr(ineq->el + pos, 2);
2607 : }
2608 0 : isl_vec_free(ineq);
2609 :
2610 0 : return isl_stat_ok;
2611 : error:
2612 0 : isl_vec_free(ineq);
2613 0 : return isl_stat_error;
2614 : }
2615 :
2616 : /* Add constraints that bound the values of the variable and parameter
2617 : * coefficients of the schedule.
2618 : *
2619 : * The maximal value of the coefficients is defined by the option
2620 : * 'schedule_max_coefficient' and the entries in node->max.
2621 : * These latter entries are only set if either the schedule_max_coefficient
2622 : * option or the schedule_treat_coalescing option is set.
2623 : */
2624 0 : static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2625 : struct isl_sched_graph *graph)
2626 : {
2627 : int i;
2628 : int max;
2629 :
2630 0 : max = isl_options_get_schedule_max_coefficient(ctx);
2631 :
2632 0 : if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx))
2633 0 : return isl_stat_ok;
2634 :
2635 0 : for (i = 0; i < graph->n; ++i) {
2636 0 : struct isl_sched_node *node = &graph->node[i];
2637 :
2638 0 : if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2639 0 : return isl_stat_error;
2640 : }
2641 :
2642 0 : return isl_stat_ok;
2643 : }
2644 :
2645 : /* Add a constraint to graph->lp that equates the value at position
2646 : * "sum_pos" to the sum of the "n" values starting at "first".
2647 : */
2648 0 : static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2649 : int sum_pos, int first, int n)
2650 : {
2651 : int i, k;
2652 : int total;
2653 :
2654 0 : total = isl_basic_set_dim(graph->lp, isl_dim_set);
2655 :
2656 0 : k = isl_basic_set_alloc_equality(graph->lp);
2657 0 : if (k < 0)
2658 0 : return isl_stat_error;
2659 0 : isl_seq_clr(graph->lp->eq[k], 1 + total);
2660 0 : isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2661 0 : for (i = 0; i < n; ++i)
2662 0 : isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2663 :
2664 0 : return isl_stat_ok;
2665 : }
2666 :
2667 : /* Add a constraint to graph->lp that equates the value at position
2668 : * "sum_pos" to the sum of the parameter coefficients of all nodes.
2669 : */
2670 0 : static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2671 : int sum_pos)
2672 : {
2673 : int i, j, k;
2674 : int total;
2675 :
2676 0 : total = isl_basic_set_dim(graph->lp, isl_dim_set);
2677 :
2678 0 : k = isl_basic_set_alloc_equality(graph->lp);
2679 0 : if (k < 0)
2680 0 : return isl_stat_error;
2681 0 : isl_seq_clr(graph->lp->eq[k], 1 + total);
2682 0 : isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2683 0 : for (i = 0; i < graph->n; ++i) {
2684 0 : int pos = 1 + node_par_coef_offset(&graph->node[i]);
2685 :
2686 0 : for (j = 0; j < graph->node[i].nparam; ++j)
2687 0 : isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2688 : }
2689 :
2690 0 : return isl_stat_ok;
2691 : }
2692 :
2693 : /* Add a constraint to graph->lp that equates the value at position
2694 : * "sum_pos" to the sum of the variable coefficients of all nodes.
2695 : */
2696 0 : static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2697 : int sum_pos)
2698 : {
2699 : int i, j, k;
2700 : int total;
2701 :
2702 0 : total = isl_basic_set_dim(graph->lp, isl_dim_set);
2703 :
2704 0 : k = isl_basic_set_alloc_equality(graph->lp);
2705 0 : if (k < 0)
2706 0 : return isl_stat_error;
2707 0 : isl_seq_clr(graph->lp->eq[k], 1 + total);
2708 0 : isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2709 0 : for (i = 0; i < graph->n; ++i) {
2710 0 : struct isl_sched_node *node = &graph->node[i];
2711 0 : int pos = 1 + node_var_coef_offset(node);
2712 :
2713 0 : for (j = 0; j < 2 * node->nvar; ++j)
2714 0 : isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2715 : }
2716 :
2717 0 : return isl_stat_ok;
2718 : }
2719 :
2720 : /* Construct an ILP problem for finding schedule coefficients
2721 : * that result in non-negative, but small dependence distances
2722 : * over all dependences.
2723 : * In particular, the dependence distances over proximity edges
2724 : * are bounded by m_0 + m_n n and we compute schedule coefficients
2725 : * with small values (preferably zero) of m_n and m_0.
2726 : *
2727 : * All variables of the ILP are non-negative. The actual coefficients
2728 : * may be negative, so each coefficient is represented as the difference
2729 : * of two non-negative variables. The negative part always appears
2730 : * immediately before the positive part.
2731 : * Other than that, the variables have the following order
2732 : *
2733 : * - sum of positive and negative parts of m_n coefficients
2734 : * - m_0
2735 : * - sum of all c_n coefficients
2736 : * (unconstrained when computing non-parametric schedules)
2737 : * - sum of positive and negative parts of all c_x coefficients
2738 : * - positive and negative parts of m_n coefficients
2739 : * - for each node
2740 : * - positive and negative parts of c_i_x, in opposite order
2741 : * - c_i_n (if parametric)
2742 : * - c_i_0
2743 : *
2744 : * The constraints are those from the edges plus two or three equalities
2745 : * to express the sums.
2746 : *
2747 : * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2748 : * Otherwise, we ignore them.
2749 : */
2750 0 : static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2751 : int use_coincidence)
2752 : {
2753 : int i;
2754 : unsigned nparam;
2755 : unsigned total;
2756 : isl_space *space;
2757 : int parametric;
2758 : int param_pos;
2759 : int n_eq, n_ineq;
2760 :
2761 0 : parametric = ctx->opt->schedule_parametric;
2762 0 : nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2763 0 : param_pos = 4;
2764 0 : total = param_pos + 2 * nparam;
2765 0 : for (i = 0; i < graph->n; ++i) {
2766 0 : struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2767 0 : if (node_update_vmap(node) < 0)
2768 0 : return isl_stat_error;
2769 0 : node->start = total;
2770 0 : total += 1 + node->nparam + 2 * node->nvar;
2771 : }
2772 :
2773 0 : if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2774 0 : return isl_stat_error;
2775 0 : if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2776 0 : return isl_stat_error;
2777 0 : if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2778 0 : return isl_stat_error;
2779 :
2780 0 : space = isl_space_set_alloc(ctx, 0, total);
2781 0 : isl_basic_set_free(graph->lp);
2782 0 : n_eq += 2 + parametric;
2783 :
2784 0 : graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2785 :
2786 0 : if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2787 0 : return isl_stat_error;
2788 0 : if (parametric && add_param_sum_constraint(graph, 2) < 0)
2789 0 : return isl_stat_error;
2790 0 : if (add_var_sum_constraint(graph, 3) < 0)
2791 0 : return isl_stat_error;
2792 0 : if (add_bound_constant_constraints(ctx, graph) < 0)
2793 0 : return isl_stat_error;
2794 0 : if (add_bound_coefficient_constraints(ctx, graph) < 0)
2795 0 : return isl_stat_error;
2796 0 : if (add_all_validity_constraints(graph, use_coincidence) < 0)
2797 0 : return isl_stat_error;
2798 0 : if (add_all_proximity_constraints(graph, use_coincidence) < 0)
2799 0 : return isl_stat_error;
2800 :
2801 0 : return isl_stat_ok;
2802 : }
2803 :
2804 : /* Analyze the conflicting constraint found by
2805 : * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity
2806 : * constraint of one of the edges between distinct nodes, living, moreover
2807 : * in distinct SCCs, then record the source and sink SCC as this may
2808 : * be a good place to cut between SCCs.
2809 : */
2810 0 : static int check_conflict(int con, void *user)
2811 : {
2812 : int i;
2813 0 : struct isl_sched_graph *graph = user;
2814 :
2815 0 : if (graph->src_scc >= 0)
2816 0 : return 0;
2817 :
2818 0 : con -= graph->lp->n_eq;
2819 :
2820 0 : if (con >= graph->lp->n_ineq)
2821 0 : return 0;
2822 :
2823 0 : for (i = 0; i < graph->n_edge; ++i) {
2824 0 : if (!is_validity(&graph->edge[i]))
2825 0 : continue;
2826 0 : if (graph->edge[i].src == graph->edge[i].dst)
2827 0 : continue;
2828 0 : if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
2829 0 : continue;
2830 0 : if (graph->edge[i].start > con)
2831 0 : continue;
2832 0 : if (graph->edge[i].end <= con)
2833 0 : continue;
2834 0 : graph->src_scc = graph->edge[i].src->scc;
2835 0 : graph->dst_scc = graph->edge[i].dst->scc;
2836 : }
2837 :
2838 0 : return 0;
2839 : }
2840 :
2841 : /* Check whether the next schedule row of the given node needs to be
2842 : * non-trivial. Lower-dimensional domains may have some trivial rows,
2843 : * but as soon as the number of remaining required non-trivial rows
2844 : * is as large as the number or remaining rows to be computed,
2845 : * all remaining rows need to be non-trivial.
2846 : */
2847 0 : static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2848 : {
2849 0 : return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2850 : }
2851 :
2852 : /* Construct a non-triviality region with triviality directions
2853 : * corresponding to the rows of "indep".
2854 : * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
2855 : * while the triviality directions are expressed in terms of
2856 : * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
2857 : * before c^+_i. Furthermore,
2858 : * the pairs of non-negative variables representing the coefficients
2859 : * are stored in the opposite order.
2860 : */
2861 0 : static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
2862 : {
2863 : isl_ctx *ctx;
2864 : isl_mat *mat;
2865 : int i, j, n, n_var;
2866 :
2867 0 : if (!indep)
2868 0 : return NULL;
2869 :
2870 0 : ctx = isl_mat_get_ctx(indep);
2871 0 : n = isl_mat_rows(indep);
2872 0 : n_var = isl_mat_cols(indep);
2873 0 : mat = isl_mat_alloc(ctx, n, 2 * n_var);
2874 0 : if (!mat)
2875 0 : return NULL;
2876 0 : for (i = 0; i < n; ++i) {
2877 0 : for (j = 0; j < n_var; ++j) {
2878 0 : int nj = n_var - 1 - j;
2879 0 : isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
2880 0 : isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
2881 : }
2882 : }
2883 :
2884 0 : return mat;
2885 : }
2886 :
2887 : /* Solve the ILP problem constructed in setup_lp.
2888 : * For each node such that all the remaining rows of its schedule
2889 : * need to be non-trivial, we construct a non-triviality region.
2890 : * This region imposes that the next row is independent of previous rows.
2891 : * In particular, the non-triviality region enforces that at least
2892 : * one of the linear combinations in the rows of node->indep is non-zero.
2893 : */
2894 0 : static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2895 : {
2896 : int i;
2897 : isl_vec *sol;
2898 : isl_basic_set *lp;
2899 :
2900 0 : for (i = 0; i < graph->n; ++i) {
2901 0 : struct isl_sched_node *node = &graph->node[i];
2902 : isl_mat *trivial;
2903 :
2904 0 : graph->region[i].pos = node_var_coef_offset(node);
2905 0 : if (needs_row(graph, node))
2906 0 : trivial = construct_trivial(node->indep);
2907 : else
2908 0 : trivial = isl_mat_zero(ctx, 0, 0);
2909 0 : graph->region[i].trivial = trivial;
2910 : }
2911 0 : lp = isl_basic_set_copy(graph->lp);
2912 0 : sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2913 : graph->region, &check_conflict, graph);
2914 0 : for (i = 0; i < graph->n; ++i)
2915 0 : isl_mat_free(graph->region[i].trivial);
2916 0 : return sol;
2917 : }
2918 :
2919 : /* Extract the coefficients for the variables of "node" from "sol".
2920 : *
2921 : * Each schedule coefficient c_i_x is represented as the difference
2922 : * between two non-negative variables c_i_x^+ - c_i_x^-.
2923 : * The c_i_x^- appear before their c_i_x^+ counterpart.
2924 : * Furthermore, the order of these pairs is the opposite of that
2925 : * of the corresponding coefficients.
2926 : *
2927 : * Return c_i_x = c_i_x^+ - c_i_x^-
2928 : */
2929 0 : static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2930 : __isl_keep isl_vec *sol)
2931 : {
2932 : int i;
2933 : int pos;
2934 : isl_vec *csol;
2935 :
2936 0 : if (!sol)
2937 0 : return NULL;
2938 0 : csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2939 0 : if (!csol)
2940 0 : return NULL;
2941 :
2942 0 : pos = 1 + node_var_coef_offset(node);
2943 0 : for (i = 0; i < node->nvar; ++i)
2944 0 : isl_int_sub(csol->el[node->nvar - 1 - i],
2945 : sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2946 :
2947 0 : return csol;
2948 : }
2949 :
2950 : /* Update the schedules of all nodes based on the given solution
2951 : * of the LP problem.
2952 : * The new row is added to the current band.
2953 : * All possibly negative coefficients are encoded as a difference
2954 : * of two non-negative variables, so we need to perform the subtraction
2955 : * here.
2956 : *
2957 : * If coincident is set, then the caller guarantees that the new
2958 : * row satisfies the coincidence constraints.
2959 : */
2960 0 : static int update_schedule(struct isl_sched_graph *graph,
2961 : __isl_take isl_vec *sol, int coincident)
2962 : {
2963 : int i, j;
2964 0 : isl_vec *csol = NULL;
2965 :
2966 0 : if (!sol)
2967 0 : goto error;
2968 0 : if (sol->size == 0)
2969 0 : isl_die(sol->ctx, isl_error_internal,
2970 : "no solution found", goto error);
2971 0 : if (graph->n_total_row >= graph->max_row)
2972 0 : isl_die(sol->ctx, isl_error_internal,
2973 : "too many schedule rows", goto error);
2974 :
2975 0 : for (i = 0; i < graph->n; ++i) {
2976 0 : struct isl_sched_node *node = &graph->node[i];
2977 : int pos;
2978 0 : int row = isl_mat_rows(node->sched);
2979 :
2980 0 : isl_vec_free(csol);
2981 0 : csol = extract_var_coef(node, sol);
2982 0 : if (!csol)
2983 0 : goto error;
2984 :
2985 0 : isl_map_free(node->sched_map);
2986 0 : node->sched_map = NULL;
2987 0 : node->sched = isl_mat_add_rows(node->sched, 1);
2988 0 : if (!node->sched)
2989 0 : goto error;
2990 0 : pos = node_cst_coef_offset(node);
2991 0 : node->sched = isl_mat_set_element(node->sched,
2992 0 : row, 0, sol->el[1 + pos]);
2993 0 : pos = node_par_coef_offset(node);
2994 0 : for (j = 0; j < node->nparam; ++j)
2995 0 : node->sched = isl_mat_set_element(node->sched,
2996 0 : row, 1 + j, sol->el[1 + pos + j]);
2997 0 : for (j = 0; j < node->nvar; ++j)
2998 0 : node->sched = isl_mat_set_element(node->sched,
2999 0 : row, 1 + node->nparam + j, csol->el[j]);
3000 0 : node->coincident[graph->n_total_row] = coincident;
3001 : }
3002 0 : isl_vec_free(sol);
3003 0 : isl_vec_free(csol);
3004 :
3005 0 : graph->n_row++;
3006 0 : graph->n_total_row++;
3007 :
3008 0 : return 0;
3009 : error:
3010 0 : isl_vec_free(sol);
3011 0 : isl_vec_free(csol);
3012 0 : return -1;
3013 : }
3014 :
3015 : /* Convert row "row" of node->sched into an isl_aff living in "ls"
3016 : * and return this isl_aff.
3017 : */
3018 0 : static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
3019 : struct isl_sched_node *node, int row)
3020 : {
3021 : int j;
3022 : isl_int v;
3023 : isl_aff *aff;
3024 :
3025 0 : isl_int_init(v);
3026 :
3027 0 : aff = isl_aff_zero_on_domain(ls);
3028 0 : if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
3029 0 : goto error;
3030 0 : aff = isl_aff_set_constant(aff, v);
3031 0 : for (j = 0; j < node->nparam; ++j) {
3032 0 : if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
3033 0 : goto error;
3034 0 : aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
3035 : }
3036 0 : for (j = 0; j < node->nvar; ++j) {
3037 0 : if (isl_mat_get_element(node->sched, row,
3038 0 : 1 + node->nparam + j, &v) < 0)
3039 0 : goto error;
3040 0 : aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
3041 : }
3042 :
3043 0 : isl_int_clear(v);
3044 :
3045 0 : return aff;
3046 : error:
3047 0 : isl_int_clear(v);
3048 0 : isl_aff_free(aff);
3049 0 : return NULL;
3050 : }
3051 :
3052 : /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
3053 : * and return this multi_aff.
3054 : *
3055 : * The result is defined over the uncompressed node domain.
3056 : */
3057 0 : static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
3058 : struct isl_sched_node *node, int first, int n)
3059 : {
3060 : int i;
3061 : isl_space *space;
3062 : isl_local_space *ls;
3063 : isl_aff *aff;
3064 : isl_multi_aff *ma;
3065 : int nrow;
3066 :
3067 0 : if (!node)
3068 0 : return NULL;
3069 0 : nrow = isl_mat_rows(node->sched);
3070 0 : if (node->compressed)
3071 0 : space = isl_multi_aff_get_domain_space(node->decompress);
3072 : else
3073 0 : space = isl_space_copy(node->space);
3074 0 : ls = isl_local_space_from_space(isl_space_copy(space));
3075 0 : space = isl_space_from_domain(space);
3076 0 : space = isl_space_add_dims(space, isl_dim_out, n);
3077 0 : ma = isl_multi_aff_zero(space);
3078 :
3079 0 : for (i = first; i < first + n; ++i) {
3080 0 : aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
3081 0 : ma = isl_multi_aff_set_aff(ma, i - first, aff);
3082 : }
3083 :
3084 0 : isl_local_space_free(ls);
3085 :
3086 0 : if (node->compressed)
3087 0 : ma = isl_multi_aff_pullback_multi_aff(ma,
3088 : isl_multi_aff_copy(node->compress));
3089 :
3090 0 : return ma;
3091 : }
3092 :
3093 : /* Convert node->sched into a multi_aff and return this multi_aff.
3094 : *
3095 : * The result is defined over the uncompressed node domain.
3096 : */
3097 0 : static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3098 : struct isl_sched_node *node)
3099 : {
3100 : int nrow;
3101 :
3102 0 : nrow = isl_mat_rows(node->sched);
3103 0 : return node_extract_partial_schedule_multi_aff(node, 0, nrow);
3104 : }
3105 :
3106 : /* Convert node->sched into a map and return this map.
3107 : *
3108 : * The result is cached in node->sched_map, which needs to be released
3109 : * whenever node->sched is updated.
3110 : * It is defined over the uncompressed node domain.
3111 : */
3112 0 : static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3113 : {
3114 0 : if (!node->sched_map) {
3115 : isl_multi_aff *ma;
3116 :
3117 0 : ma = node_extract_schedule_multi_aff(node);
3118 0 : node->sched_map = isl_map_from_multi_aff(ma);
3119 : }
3120 :
3121 0 : return isl_map_copy(node->sched_map);
3122 : }
3123 :
3124 : /* Construct a map that can be used to update a dependence relation
3125 : * based on the current schedule.
3126 : * That is, construct a map expressing that source and sink
3127 : * are executed within the same iteration of the current schedule.
3128 : * This map can then be intersected with the dependence relation.
3129 : * This is not the most efficient way, but this shouldn't be a critical
3130 : * operation.
3131 : */
3132 0 : static __isl_give isl_map *specializer(struct isl_sched_node *src,
3133 : struct isl_sched_node *dst)
3134 : {
3135 : isl_map *src_sched, *dst_sched;
3136 :
3137 0 : src_sched = node_extract_schedule(src);
3138 0 : dst_sched = node_extract_schedule(dst);
3139 0 : return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3140 : }
3141 :
3142 : /* Intersect the domains of the nested relations in domain and range
3143 : * of "umap" with "map".
3144 : */
3145 0 : static __isl_give isl_union_map *intersect_domains(
3146 : __isl_take isl_union_map *umap, __isl_keep isl_map *map)
3147 : {
3148 : isl_union_set *uset;
3149 :
3150 0 : umap = isl_union_map_zip(umap);
3151 0 : uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3152 0 : umap = isl_union_map_intersect_domain(umap, uset);
3153 0 : umap = isl_union_map_zip(umap);
3154 0 : return umap;
3155 : }
3156 :
3157 : /* Update the dependence relation of the given edge based
3158 : * on the current schedule.
3159 : * If the dependence is carried completely by the current schedule, then
3160 : * it is removed from the edge_tables. It is kept in the list of edges
3161 : * as otherwise all edge_tables would have to be recomputed.
3162 : *
3163 : * If the edge is of a type that can appear multiple times
3164 : * between the same pair of nodes, then it is added to
3165 : * the edge table (again). This prevents the situation
3166 : * where none of these edges is referenced from the edge table
3167 : * because the one that was referenced turned out to be empty and
3168 : * was therefore removed from the table.
3169 : */
3170 0 : static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
3171 : struct isl_sched_edge *edge)
3172 : {
3173 : int empty;
3174 : isl_map *id;
3175 :
3176 0 : id = specializer(edge->src, edge->dst);
3177 0 : edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3178 0 : if (!edge->map)
3179 0 : goto error;
3180 :
3181 0 : if (edge->tagged_condition) {
3182 0 : edge->tagged_condition =
3183 0 : intersect_domains(edge->tagged_condition, id);
3184 0 : if (!edge->tagged_condition)
3185 0 : goto error;
3186 : }
3187 0 : if (edge->tagged_validity) {
3188 0 : edge->tagged_validity =
3189 0 : intersect_domains(edge->tagged_validity, id);
3190 0 : if (!edge->tagged_validity)
3191 0 : goto error;
3192 : }
3193 :
3194 0 : empty = isl_map_plain_is_empty(edge->map);
3195 0 : if (empty < 0)
3196 0 : goto error;
3197 0 : if (empty) {
3198 0 : graph_remove_edge(graph, edge);
3199 0 : } else if (is_multi_edge_type(edge)) {
3200 0 : if (graph_edge_tables_add(ctx, graph, edge) < 0)
3201 0 : goto error;
3202 : }
3203 :
3204 0 : isl_map_free(id);
3205 0 : return isl_stat_ok;
3206 : error:
3207 0 : isl_map_free(id);
3208 0 : return isl_stat_error;
3209 : }
3210 :
3211 : /* Does the domain of "umap" intersect "uset"?
3212 : */
3213 0 : static int domain_intersects(__isl_keep isl_union_map *umap,
3214 : __isl_keep isl_union_set *uset)
3215 : {
3216 : int empty;
3217 :
3218 0 : umap = isl_union_map_copy(umap);
3219 0 : umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3220 0 : empty = isl_union_map_is_empty(umap);
3221 0 : isl_union_map_free(umap);
3222 :
3223 0 : return empty < 0 ? -1 : !empty;
3224 : }
3225 :
3226 : /* Does the range of "umap" intersect "uset"?
3227 : */
3228 0 : static int range_intersects(__isl_keep isl_union_map *umap,
3229 : __isl_keep isl_union_set *uset)
3230 : {
3231 : int empty;
3232 :
3233 0 : umap = isl_union_map_copy(umap);
3234 0 : umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3235 0 : empty = isl_union_map_is_empty(umap);
3236 0 : isl_union_map_free(umap);
3237 :
3238 0 : return empty < 0 ? -1 : !empty;
3239 : }
3240 :
3241 : /* Are the condition dependences of "edge" local with respect to
3242 : * the current schedule?
3243 : *
3244 : * That is, are domain and range of the condition dependences mapped
3245 : * to the same point?
3246 : *
3247 : * In other words, is the condition false?
3248 : */
3249 0 : static int is_condition_false(struct isl_sched_edge *edge)
3250 : {
3251 : isl_union_map *umap;
3252 : isl_map *map, *sched, *test;
3253 : int empty, local;
3254 :
3255 0 : empty = isl_union_map_is_empty(edge->tagged_condition);
3256 0 : if (empty < 0 || empty)
3257 0 : return empty;
3258 :
3259 0 : umap = isl_union_map_copy(edge->tagged_condition);
3260 0 : umap = isl_union_map_zip(umap);
3261 0 : umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3262 0 : map = isl_map_from_union_map(umap);
3263 :
3264 0 : sched = node_extract_schedule(edge->src);
3265 0 : map = isl_map_apply_domain(map, sched);
3266 0 : sched = node_extract_schedule(edge->dst);
3267 0 : map = isl_map_apply_range(map, sched);
3268 :
3269 0 : test = isl_map_identity(isl_map_get_space(map));
3270 0 : local = isl_map_is_subset(map, test);
3271 0 : isl_map_free(map);
3272 0 : isl_map_free(test);
3273 :
3274 0 : return local;
3275 : }
3276 :
3277 : /* For each conditional validity constraint that is adjacent
3278 : * to a condition with domain in condition_source or range in condition_sink,
3279 : * turn it into an unconditional validity constraint.
3280 : */
3281 0 : static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3282 : __isl_take isl_union_set *condition_source,
3283 : __isl_take isl_union_set *condition_sink)
3284 : {
3285 : int i;
3286 :
3287 0 : condition_source = isl_union_set_coalesce(condition_source);
3288 0 : condition_sink = isl_union_set_coalesce(condition_sink);
3289 :
3290 0 : for (i = 0; i < graph->n_edge; ++i) {
3291 : int adjacent;
3292 : isl_union_map *validity;
3293 :
3294 0 : if (!is_conditional_validity(&graph->edge[i]))
3295 0 : continue;
3296 0 : if (is_validity(&graph->edge[i]))
3297 0 : continue;
3298 :
3299 0 : validity = graph->edge[i].tagged_validity;
3300 0 : adjacent = domain_intersects(validity, condition_sink);
3301 0 : if (adjacent >= 0 && !adjacent)
3302 0 : adjacent = range_intersects(validity, condition_source);
3303 0 : if (adjacent < 0)
3304 0 : goto error;
3305 0 : if (!adjacent)
3306 0 : continue;
3307 :
3308 0 : set_validity(&graph->edge[i]);
3309 : }
3310 :
3311 0 : isl_union_set_free(condition_source);
3312 0 : isl_union_set_free(condition_sink);
3313 0 : return 0;
3314 : error:
3315 0 : isl_union_set_free(condition_source);
3316 0 : isl_union_set_free(condition_sink);
3317 0 : return -1;
3318 : }
3319 :
3320 : /* Update the dependence relations of all edges based on the current schedule
3321 : * and enforce conditional validity constraints that are adjacent
3322 : * to satisfied condition constraints.
3323 : *
3324 : * First check if any of the condition constraints are satisfied
3325 : * (i.e., not local to the outer schedule) and keep track of
3326 : * their domain and range.
3327 : * Then update all dependence relations (which removes the non-local
3328 : * constraints).
3329 : * Finally, if any condition constraints turned out to be satisfied,
3330 : * then turn all adjacent conditional validity constraints into
3331 : * unconditional validity constraints.
3332 : */
3333 0 : static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3334 : {
3335 : int i;
3336 0 : int any = 0;
3337 : isl_union_set *source, *sink;
3338 :
3339 0 : source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3340 0 : sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3341 0 : for (i = 0; i < graph->n_edge; ++i) {
3342 : int local;
3343 : isl_union_set *uset;
3344 : isl_union_map *umap;
3345 :
3346 0 : if (!is_condition(&graph->edge[i]))
3347 0 : continue;
3348 0 : if (is_local(&graph->edge[i]))
3349 0 : continue;
3350 0 : local = is_condition_false(&graph->edge[i]);
3351 0 : if (local < 0)
3352 0 : goto error;
3353 0 : if (local)
3354 0 : continue;
3355 :
3356 0 : any = 1;
3357 :
3358 0 : umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3359 0 : uset = isl_union_map_domain(umap);
3360 0 : source = isl_union_set_union(source, uset);
3361 :
3362 0 : umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3363 0 : uset = isl_union_map_range(umap);
3364 0 : sink = isl_union_set_union(sink, uset);
3365 : }
3366 :
3367 0 : for (i = 0; i < graph->n_edge; ++i) {
3368 0 : if (update_edge(ctx, graph, &graph->edge[i]) < 0)
3369 0 : goto error;
3370 : }
3371 :
3372 0 : if (any)
3373 0 : return unconditionalize_adjacent_validity(graph, source, sink);
3374 :
3375 0 : isl_union_set_free(source);
3376 0 : isl_union_set_free(sink);
3377 0 : return 0;
3378 : error:
3379 0 : isl_union_set_free(source);
3380 0 : isl_union_set_free(sink);
3381 0 : return -1;
3382 : }
3383 :
3384 0 : static void next_band(struct isl_sched_graph *graph)
3385 : {
3386 0 : graph->band_start = graph->n_total_row;
3387 0 : }
3388 :
3389 : /* Return the union of the universe domains of the nodes in "graph"
3390 : * that satisfy "pred".
3391 : */
3392 0 : static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3393 : struct isl_sched_graph *graph,
3394 : int (*pred)(struct isl_sched_node *node, int data), int data)
3395 : {
3396 : int i;
3397 : isl_set *set;
3398 : isl_union_set *dom;
3399 :
3400 0 : for (i = 0; i < graph->n; ++i)
3401 0 : if (pred(&graph->node[i], data))
3402 0 : break;
3403 :
3404 0 : if (i >= graph->n)
3405 0 : isl_die(ctx, isl_error_internal,
3406 : "empty component", return NULL);
3407 :
3408 0 : set = isl_set_universe(isl_space_copy(graph->node[i].space));
3409 0 : dom = isl_union_set_from_set(set);
3410 :
3411 0 : for (i = i + 1; i < graph->n; ++i) {
3412 0 : if (!pred(&graph->node[i], data))
3413 0 : continue;
3414 0 : set = isl_set_universe(isl_space_copy(graph->node[i].space));
3415 0 : dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3416 : }
3417 :
3418 0 : return dom;
3419 : }
3420 :
3421 : /* Return a list of unions of universe domains, where each element
3422 : * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3423 : */
3424 0 : static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3425 : struct isl_sched_graph *graph)
3426 : {
3427 : int i;
3428 : isl_union_set_list *filters;
3429 :
3430 0 : filters = isl_union_set_list_alloc(ctx, graph->scc);
3431 0 : for (i = 0; i < graph->scc; ++i) {
3432 : isl_union_set *dom;
3433 :
3434 0 : dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3435 0 : filters = isl_union_set_list_add(filters, dom);
3436 : }
3437 :
3438 0 : return filters;
3439 : }
3440 :
3441 : /* Return a list of two unions of universe domains, one for the SCCs up
3442 : * to and including graph->src_scc and another for the other SCCs.
3443 : */
3444 0 : static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3445 : struct isl_sched_graph *graph)
3446 : {
3447 : isl_union_set *dom;
3448 : isl_union_set_list *filters;
3449 :
3450 0 : filters = isl_union_set_list_alloc(ctx, 2);
3451 0 : dom = isl_sched_graph_domain(ctx, graph,
3452 : &node_scc_at_most, graph->src_scc);
3453 0 : filters = isl_union_set_list_add(filters, dom);
3454 0 : dom = isl_sched_graph_domain(ctx, graph,
3455 0 : &node_scc_at_least, graph->src_scc + 1);
3456 0 : filters = isl_union_set_list_add(filters, dom);
3457 :
3458 0 : return filters;
3459 : }
3460 :
3461 : /* Copy nodes that satisfy node_pred from the src dependence graph
3462 : * to the dst dependence graph.
3463 : */
3464 0 : static isl_stat copy_nodes(struct isl_sched_graph *dst,
3465 : struct isl_sched_graph *src,
3466 : int (*node_pred)(struct isl_sched_node *node, int data), int data)
3467 : {
3468 : int i;
3469 :
3470 0 : dst->n = 0;
3471 0 : for (i = 0; i < src->n; ++i) {
3472 : int j;
3473 :
3474 0 : if (!node_pred(&src->node[i], data))
3475 0 : continue;
3476 :
3477 0 : j = dst->n;
3478 0 : dst->node[j].space = isl_space_copy(src->node[i].space);
3479 0 : dst->node[j].compressed = src->node[i].compressed;
3480 0 : dst->node[j].hull = isl_set_copy(src->node[i].hull);
3481 0 : dst->node[j].compress =
3482 0 : isl_multi_aff_copy(src->node[i].compress);
3483 0 : dst->node[j].decompress =
3484 0 : isl_multi_aff_copy(src->node[i].decompress);
3485 0 : dst->node[j].nvar = src->node[i].nvar;
3486 0 : dst->node[j].nparam = src->node[i].nparam;
3487 0 : dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3488 0 : dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3489 0 : dst->node[j].coincident = src->node[i].coincident;
3490 0 : dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3491 0 : dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3492 0 : dst->node[j].max = isl_vec_copy(src->node[i].max);
3493 0 : dst->n++;
3494 :
3495 0 : if (!dst->node[j].space || !dst->node[j].sched)
3496 0 : return isl_stat_error;
3497 0 : if (dst->node[j].compressed &&
3498 0 : (!dst->node[j].hull || !dst->node[j].compress ||
3499 0 : !dst->node[j].decompress))
3500 0 : return isl_stat_error;
3501 : }
3502 :
3503 0 : return isl_stat_ok;
3504 : }
3505 :
3506 : /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3507 : * to the dst dependence graph.
3508 : * If the source or destination node of the edge is not in the destination
3509 : * graph, then it must be a backward proximity edge and it should simply
3510 : * be ignored.
3511 : */
3512 0 : static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3513 : struct isl_sched_graph *src,
3514 : int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3515 : {
3516 : int i;
3517 :
3518 0 : dst->n_edge = 0;
3519 0 : for (i = 0; i < src->n_edge; ++i) {
3520 0 : struct isl_sched_edge *edge = &src->edge[i];
3521 : isl_map *map;
3522 : isl_union_map *tagged_condition;
3523 : isl_union_map *tagged_validity;
3524 : struct isl_sched_node *dst_src, *dst_dst;
3525 :
3526 0 : if (!edge_pred(edge, data))
3527 0 : continue;
3528 :
3529 0 : if (isl_map_plain_is_empty(edge->map))
3530 0 : continue;
3531 :
3532 0 : dst_src = graph_find_node(ctx, dst, edge->src->space);
3533 0 : dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3534 0 : if (!dst_src || !dst_dst)
3535 0 : return isl_stat_error;
3536 0 : if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
3537 0 : if (is_validity(edge) || is_conditional_validity(edge))
3538 0 : isl_die(ctx, isl_error_internal,
3539 : "backward (conditional) validity edge",
3540 : return isl_stat_error);
3541 0 : continue;
3542 : }
3543 :
3544 0 : map = isl_map_copy(edge->map);
3545 0 : tagged_condition = isl_union_map_copy(edge->tagged_condition);
3546 0 : tagged_validity = isl_union_map_copy(edge->tagged_validity);
3547 :
3548 0 : dst->edge[dst->n_edge].src = dst_src;
3549 0 : dst->edge[dst->n_edge].dst = dst_dst;
3550 0 : dst->edge[dst->n_edge].map = map;
3551 0 : dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3552 0 : dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3553 0 : dst->edge[dst->n_edge].types = edge->types;
3554 0 : dst->n_edge++;
3555 :
3556 0 : if (edge->tagged_condition && !tagged_condition)
3557 0 : return isl_stat_error;
3558 0 : if (edge->tagged_validity && !tagged_validity)
3559 0 : return isl_stat_error;
3560 :
3561 0 : if (graph_edge_tables_add(ctx, dst,
3562 0 : &dst->edge[dst->n_edge - 1]) < 0)
3563 0 : return isl_stat_error;
3564 : }
3565 :
3566 0 : return isl_stat_ok;
3567 : }
3568 :
3569 : /* Compute the maximal number of variables over all nodes.
3570 : * This is the maximal number of linearly independent schedule
3571 : * rows that we need to compute.
3572 : * Just in case we end up in a part of the dependence graph
3573 : * with only lower-dimensional domains, we make sure we will
3574 : * compute the required amount of extra linearly independent rows.
3575 : */
3576 0 : static int compute_maxvar(struct isl_sched_graph *graph)
3577 : {
3578 : int i;
3579 :
3580 0 : graph->maxvar = 0;
3581 0 : for (i = 0; i < graph->n; ++i) {
3582 0 : struct isl_sched_node *node = &graph->node[i];
3583 : int nvar;
3584 :
3585 0 : if (node_update_vmap(node) < 0)
3586 0 : return -1;
3587 0 : nvar = node->nvar + graph->n_row - node->rank;
3588 0 : if (nvar > graph->maxvar)
3589 0 : graph->maxvar = nvar;
3590 : }
3591 :
3592 0 : return 0;
3593 : }
3594 :
3595 : /* Extract the subgraph of "graph" that consists of the nodes satisfying
3596 : * "node_pred" and the edges satisfying "edge_pred" and store
3597 : * the result in "sub".
3598 : */
3599 0 : static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3600 : int (*node_pred)(struct isl_sched_node *node, int data),
3601 : int (*edge_pred)(struct isl_sched_edge *edge, int data),
3602 : int data, struct isl_sched_graph *sub)
3603 : {
3604 0 : int i, n = 0, n_edge = 0;
3605 : int t;
3606 :
3607 0 : for (i = 0; i < graph->n; ++i)
3608 0 : if (node_pred(&graph->node[i], data))
3609 0 : ++n;
3610 0 : for (i = 0; i < graph->n_edge; ++i)
3611 0 : if (edge_pred(&graph->edge[i], data))
3612 0 : ++n_edge;
3613 0 : if (graph_alloc(ctx, sub, n, n_edge) < 0)
3614 0 : return isl_stat_error;
3615 0 : sub->root = graph->root;
3616 0 : if (copy_nodes(sub, graph, node_pred, data) < 0)
3617 0 : return isl_stat_error;
3618 0 : if (graph_init_table(ctx, sub) < 0)
3619 0 : return isl_stat_error;
3620 0 : for (t = 0; t <= isl_edge_last; ++t)
3621 0 : sub->max_edge[t] = graph->max_edge[t];
3622 0 : if (graph_init_edge_tables(ctx, sub) < 0)
3623 0 : return isl_stat_error;
3624 0 : if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3625 0 : return isl_stat_error;
3626 0 : sub->n_row = graph->n_row;
3627 0 : sub->max_row = graph->max_row;
3628 0 : sub->n_total_row = graph->n_total_row;
3629 0 : sub->band_start = graph->band_start;
3630 :
3631 0 : return isl_stat_ok;
3632 : }
3633 :
3634 : static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3635 : struct isl_sched_graph *graph);
3636 : static __isl_give isl_schedule_node *compute_schedule_wcc(
3637 : isl_schedule_node *node, struct isl_sched_graph *graph);
3638 :
3639 : /* Compute a schedule for a subgraph of "graph". In particular, for
3640 : * the graph composed of nodes that satisfy node_pred and edges that
3641 : * that satisfy edge_pred.
3642 : * If the subgraph is known to consist of a single component, then wcc should
3643 : * be set and then we call compute_schedule_wcc on the constructed subgraph.
3644 : * Otherwise, we call compute_schedule, which will check whether the subgraph
3645 : * is connected.
3646 : *
3647 : * The schedule is inserted at "node" and the updated schedule node
3648 : * is returned.
3649 : */
3650 0 : static __isl_give isl_schedule_node *compute_sub_schedule(
3651 : __isl_take isl_schedule_node *node, isl_ctx *ctx,
3652 : struct isl_sched_graph *graph,
3653 : int (*node_pred)(struct isl_sched_node *node, int data),
3654 : int (*edge_pred)(struct isl_sched_edge *edge, int data),
3655 : int data, int wcc)
3656 : {
3657 0 : struct isl_sched_graph split = { 0 };
3658 :
3659 0 : if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3660 : &split) < 0)
3661 0 : goto error;
3662 :
3663 0 : if (wcc)
3664 0 : node = compute_schedule_wcc(node, &split);
3665 : else
3666 0 : node = compute_schedule(node, &split);
3667 :
3668 0 : graph_free(ctx, &split);
3669 0 : return node;
3670 : error:
3671 0 : graph_free(ctx, &split);
3672 0 : return isl_schedule_node_free(node);
3673 : }
3674 :
3675 0 : static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3676 : {
3677 0 : return edge->src->scc == scc && edge->dst->scc == scc;
3678 : }
3679 :
3680 0 : static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3681 : {
3682 0 : return edge->dst->scc <= scc;
3683 : }
3684 :
3685 0 : static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3686 : {
3687 0 : return edge->src->scc >= scc;
3688 : }
3689 :
3690 : /* Reset the current band by dropping all its schedule rows.
3691 : */
3692 0 : static isl_stat reset_band(struct isl_sched_graph *graph)
3693 : {
3694 : int i;
3695 : int drop;
3696 :
3697 0 : drop = graph->n_total_row - graph->band_start;
3698 0 : graph->n_total_row -= drop;
3699 0 : graph->n_row -= drop;
3700 :
3701 0 : for (i = 0; i < graph->n; ++i) {
3702 0 : struct isl_sched_node *node = &graph->node[i];
3703 :
3704 0 : isl_map_free(node->sched_map);
3705 0 : node->sched_map = NULL;
3706 :
3707 0 : node->sched = isl_mat_drop_rows(node->sched,
3708 0 : graph->band_start, drop);
3709 :
3710 0 : if (!node->sched)
3711 0 : return isl_stat_error;
3712 : }
3713 :
3714 0 : return isl_stat_ok;
3715 : }
3716 :
3717 : /* Split the current graph into two parts and compute a schedule for each
3718 : * part individually. In particular, one part consists of all SCCs up
3719 : * to and including graph->src_scc, while the other part contains the other
3720 : * SCCs. The split is enforced by a sequence node inserted at position "node"
3721 : * in the schedule tree. Return the updated schedule node.
3722 : * If either of these two parts consists of a sequence, then it is spliced
3723 : * into the sequence containing the two parts.
3724 : *
3725 : * The current band is reset. It would be possible to reuse
3726 : * the previously computed rows as the first rows in the next
3727 : * band, but recomputing them may result in better rows as we are looking
3728 : * at a smaller part of the dependence graph.
3729 : */
3730 0 : static __isl_give isl_schedule_node *compute_split_schedule(
3731 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3732 : {
3733 : int is_seq;
3734 : isl_ctx *ctx;
3735 : isl_union_set_list *filters;
3736 :
3737 0 : if (!node)
3738 0 : return NULL;
3739 :
3740 0 : if (reset_band(graph) < 0)
3741 0 : return isl_schedule_node_free(node);
3742 :
3743 0 : next_band(graph);
3744 :
3745 0 : ctx = isl_schedule_node_get_ctx(node);
3746 0 : filters = extract_split(ctx, graph);
3747 0 : node = isl_schedule_node_insert_sequence(node, filters);
3748 0 : node = isl_schedule_node_child(node, 1);
3749 0 : node = isl_schedule_node_child(node, 0);
3750 :
3751 0 : node = compute_sub_schedule(node, ctx, graph,
3752 : &node_scc_at_least, &edge_src_scc_at_least,
3753 0 : graph->src_scc + 1, 0);
3754 0 : is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3755 0 : node = isl_schedule_node_parent(node);
3756 0 : node = isl_schedule_node_parent(node);
3757 0 : if (is_seq)
3758 0 : node = isl_schedule_node_sequence_splice_child(node, 1);
3759 0 : node = isl_schedule_node_child(node, 0);
3760 0 : node = isl_schedule_node_child(node, 0);
3761 0 : node = compute_sub_schedule(node, ctx, graph,
3762 : &node_scc_at_most, &edge_dst_scc_at_most,
3763 : graph->src_scc, 0);
3764 0 : is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3765 0 : node = isl_schedule_node_parent(node);
3766 0 : node = isl_schedule_node_parent(node);
3767 0 : if (is_seq)
3768 0 : node = isl_schedule_node_sequence_splice_child(node, 0);
3769 :
3770 0 : return node;
3771 : }
3772 :
3773 : /* Insert a band node at position "node" in the schedule tree corresponding
3774 : * to the current band in "graph". Mark the band node permutable
3775 : * if "permutable" is set.
3776 : * The partial schedules and the coincidence property are extracted
3777 : * from the graph nodes.
3778 : * Return the updated schedule node.
3779 : */
3780 0 : static __isl_give isl_schedule_node *insert_current_band(
3781 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3782 : int permutable)
3783 : {
3784 : int i;
3785 : int start, end, n;
3786 : isl_multi_aff *ma;
3787 : isl_multi_pw_aff *mpa;
3788 : isl_multi_union_pw_aff *mupa;
3789 :
3790 0 : if (!node)
3791 0 : return NULL;
3792 :
3793 0 : if (graph->n < 1)
3794 0 : isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
3795 : "graph should have at least one node",
3796 : return isl_schedule_node_free(node));
3797 :
3798 0 : start = graph->band_start;
3799 0 : end = graph->n_total_row;
3800 0 : n = end - start;
3801 :
3802 0 : ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3803 0 : mpa = isl_multi_pw_aff_from_multi_aff(ma);
3804 0 : mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3805 :
3806 0 : for (i = 1; i < graph->n; ++i) {
3807 : isl_multi_union_pw_aff *mupa_i;
3808 :
3809 0 : ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3810 : start, n);
3811 0 : mpa = isl_multi_pw_aff_from_multi_aff(ma);
3812 0 : mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3813 0 : mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3814 : }
3815 0 : node = isl_schedule_node_insert_partial_schedule(node, mupa);
3816 :
3817 0 : for (i = 0; i < n; ++i)
3818 0 : node = isl_schedule_node_band_member_set_coincident(node, i,
3819 0 : graph->node[0].coincident[start + i]);
3820 0 : node = isl_schedule_node_band_set_permutable(node, permutable);
3821 :
3822 0 : return node;
3823 : }
3824 :
3825 : /* Update the dependence relations based on the current schedule,
3826 : * add the current band to "node" and then continue with the computation
3827 : * of the next band.
3828 : * Return the updated schedule node.
3829 : */
3830 0 : static __isl_give isl_schedule_node *compute_next_band(
3831 : __isl_take isl_schedule_node *node,
3832 : struct isl_sched_graph *graph, int permutable)
3833 : {
3834 : isl_ctx *ctx;
3835 :
3836 0 : if (!node)
3837 0 : return NULL;
3838 :
3839 0 : ctx = isl_schedule_node_get_ctx(node);
3840 0 : if (update_edges(ctx, graph) < 0)
3841 0 : return isl_schedule_node_free(node);
3842 0 : node = insert_current_band(node, graph, permutable);
3843 0 : next_band(graph);
3844 :
3845 0 : node = isl_schedule_node_child(node, 0);
3846 0 : node = compute_schedule(node, graph);
3847 0 : node = isl_schedule_node_parent(node);
3848 :
3849 0 : return node;
3850 : }
3851 :
3852 : /* Add the constraints "coef" derived from an edge from "node" to itself
3853 : * to graph->lp in order to respect the dependences and to try and carry them.
3854 : * "pos" is the sequence number of the edge that needs to be carried.
3855 : * "coef" represents general constraints on coefficients (c_0, c_x)
3856 : * of valid constraints for (y - x) with x and y instances of the node.
3857 : *
3858 : * The constraints added to graph->lp need to enforce
3859 : *
3860 : * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
3861 : * = c_j_x (y - x) >= e_i
3862 : *
3863 : * for each (x,y) in the dependence relation of the edge.
3864 : * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
3865 : * taking into account that each coefficient in c_j_x is represented
3866 : * as a pair of non-negative coefficients.
3867 : */
3868 0 : static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3869 : struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3870 : {
3871 : int offset;
3872 : isl_ctx *ctx;
3873 : isl_dim_map *dim_map;
3874 :
3875 0 : if (!coef)
3876 0 : return isl_stat_error;
3877 :
3878 0 : ctx = isl_basic_set_get_ctx(coef);
3879 0 : offset = coef_var_offset(coef);
3880 0 : dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3881 0 : isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3882 0 : graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3883 :
3884 0 : return isl_stat_ok;
3885 : }
3886 :
3887 : /* Add the constraints "coef" derived from an edge from "src" to "dst"
3888 : * to graph->lp in order to respect the dependences and to try and carry them.
3889 : * "pos" is the sequence number of the edge that needs to be carried or
3890 : * -1 if no attempt should be made to carry the dependences.
3891 : * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3892 : * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3893 : *
3894 : * The constraints added to graph->lp need to enforce
3895 : *
3896 : * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3897 : *
3898 : * for each (x,y) in the dependence relation of the edge or
3899 : *
3900 : * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
3901 : *
3902 : * if pos is -1.
3903 : * That is,
3904 : * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3905 : * or
3906 : * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3907 : * needs to be plugged in for (c_0, c_n, c_x, c_y),
3908 : * taking into account that each coefficient in c_j_x and c_k_x is represented
3909 : * as a pair of non-negative coefficients.
3910 : */
3911 0 : static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3912 : struct isl_sched_node *src, struct isl_sched_node *dst,
3913 : __isl_take isl_basic_set *coef, int pos)
3914 : {
3915 : int offset;
3916 : isl_ctx *ctx;
3917 : isl_dim_map *dim_map;
3918 :
3919 0 : if (!coef)
3920 0 : return isl_stat_error;
3921 :
3922 0 : ctx = isl_basic_set_get_ctx(coef);
3923 0 : offset = coef_var_offset(coef);
3924 0 : dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3925 0 : if (pos >= 0)
3926 0 : isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3927 0 : graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3928 :
3929 0 : return isl_stat_ok;
3930 : }
3931 :
3932 : /* Data structure for keeping track of the data needed
3933 : * to exploit non-trivial lineality spaces.
3934 : *
3935 : * "any_non_trivial" is true if there are any non-trivial lineality spaces.
3936 : * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
3937 : * "equivalent" connects instances to other instances on the same line(s).
3938 : * "mask" contains the domain spaces of "equivalent".
3939 : * Any instance set not in "mask" does not have a non-trivial lineality space.
3940 : */
3941 : struct isl_exploit_lineality_data {
3942 : isl_bool any_non_trivial;
3943 : isl_union_map *equivalent;
3944 : isl_union_set *mask;
3945 : };
3946 :
3947 : /* Data structure collecting information used during the construction
3948 : * of an LP for carrying dependences.
3949 : *
3950 : * "intra" is a sequence of coefficient constraints for intra-node edges.
3951 : * "inter" is a sequence of coefficient constraints for inter-node edges.
3952 : * "lineality" contains data used to exploit non-trivial lineality spaces.
3953 : */
3954 : struct isl_carry {
3955 : isl_basic_set_list *intra;
3956 : isl_basic_set_list *inter;
3957 : struct isl_exploit_lineality_data lineality;
3958 : };
3959 :
3960 : /* Free all the data stored in "carry".
3961 : */
3962 0 : static void isl_carry_clear(struct isl_carry *carry)
3963 : {
3964 0 : isl_basic_set_list_free(carry->intra);
3965 0 : isl_basic_set_list_free(carry->inter);
3966 0 : isl_union_map_free(carry->lineality.equivalent);
3967 0 : isl_union_set_free(carry->lineality.mask);
3968 0 : }
3969 :
3970 : /* Return a pointer to the node in "graph" that lives in "space".
3971 : * If the requested node has been compressed, then "space"
3972 : * corresponds to the compressed space.
3973 : * The graph is assumed to have such a node.
3974 : * Return NULL in case of error.
3975 : *
3976 : * First try and see if "space" is the space of an uncompressed node.
3977 : * If so, return that node.
3978 : * Otherwise, "space" was constructed by construct_compressed_id and
3979 : * contains a user pointer pointing to the node in the tuple id.
3980 : * However, this node belongs to the original dependence graph.
3981 : * If "graph" is a subgraph of this original dependence graph,
3982 : * then the node with the same space still needs to be looked up
3983 : * in the current graph.
3984 : */
3985 0 : static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
3986 : struct isl_sched_graph *graph, __isl_keep isl_space *space)
3987 : {
3988 : isl_id *id;
3989 : struct isl_sched_node *node;
3990 :
3991 0 : if (!space)
3992 0 : return NULL;
3993 :
3994 0 : node = graph_find_node(ctx, graph, space);
3995 0 : if (!node)
3996 0 : return NULL;
3997 0 : if (is_node(graph, node))
3998 0 : return node;
3999 :
4000 0 : id = isl_space_get_tuple_id(space, isl_dim_set);
4001 0 : node = isl_id_get_user(id);
4002 0 : isl_id_free(id);
4003 :
4004 0 : if (!node)
4005 0 : return NULL;
4006 :
4007 0 : if (!is_node(graph->root, node))
4008 0 : isl_die(ctx, isl_error_internal,
4009 : "space points to invalid node", return NULL);
4010 0 : if (graph != graph->root)
4011 0 : node = graph_find_node(ctx, graph, node->space);
4012 0 : if (!is_node(graph, node))
4013 0 : isl_die(ctx, isl_error_internal,
4014 : "unable to find node", return NULL);
4015 :
4016 0 : return node;
4017 : }
4018 :
4019 : /* Internal data structure for add_all_constraints.
4020 : *
4021 : * "graph" is the schedule constraint graph for which an LP problem
4022 : * is being constructed.
4023 : * "carry_inter" indicates whether inter-node edges should be carried.
4024 : * "pos" is the position of the next edge that needs to be carried.
4025 : */
4026 : struct isl_add_all_constraints_data {
4027 : isl_ctx *ctx;
4028 : struct isl_sched_graph *graph;
4029 : int carry_inter;
4030 : int pos;
4031 : };
4032 :
4033 : /* Add the constraints "coef" derived from an edge from a node to itself
4034 : * to data->graph->lp in order to respect the dependences and
4035 : * to try and carry them.
4036 : *
4037 : * The space of "coef" is of the form
4038 : *
4039 : * coefficients[[c_cst] -> S[c_x]]
4040 : *
4041 : * with S[c_x] the (compressed) space of the node.
4042 : * Extract the node from the space and call add_intra_constraints.
4043 : */
4044 0 : static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
4045 : {
4046 0 : struct isl_add_all_constraints_data *data = user;
4047 : isl_space *space;
4048 : struct isl_sched_node *node;
4049 :
4050 0 : space = isl_basic_set_get_space(coef);
4051 0 : space = isl_space_range(isl_space_unwrap(space));
4052 0 : node = graph_find_compressed_node(data->ctx, data->graph, space);
4053 0 : isl_space_free(space);
4054 0 : return add_intra_constraints(data->graph, node, coef, data->pos++);
4055 : }
4056 :
4057 : /* Add the constraints "coef" derived from an edge from a node j
4058 : * to a node k to data->graph->lp in order to respect the dependences and
4059 : * to try and carry them (provided data->carry_inter is set).
4060 : *
4061 : * The space of "coef" is of the form
4062 : *
4063 : * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
4064 : *
4065 : * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
4066 : * Extract the nodes from the space and call add_inter_constraints.
4067 : */
4068 0 : static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
4069 : {
4070 0 : struct isl_add_all_constraints_data *data = user;
4071 : isl_space *space, *dom;
4072 : struct isl_sched_node *src, *dst;
4073 : int pos;
4074 :
4075 0 : space = isl_basic_set_get_space(coef);
4076 0 : space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
4077 0 : dom = isl_space_domain(isl_space_copy(space));
4078 0 : src = graph_find_compressed_node(data->ctx, data->graph, dom);
4079 0 : isl_space_free(dom);
4080 0 : space = isl_space_range(space);
4081 0 : dst = graph_find_compressed_node(data->ctx, data->graph, space);
4082 0 : isl_space_free(space);
4083 :
4084 0 : pos = data->carry_inter ? data->pos++ : -1;
4085 0 : return add_inter_constraints(data->graph, src, dst, coef, pos);
4086 : }
4087 :
4088 : /* Add constraints to graph->lp that force all (conditional) validity
4089 : * dependences to be respected and attempt to carry them.
4090 : * "intra" is the sequence of coefficient constraints for intra-node edges.
4091 : * "inter" is the sequence of coefficient constraints for inter-node edges.
4092 : * "carry_inter" indicates whether inter-node edges should be carried or
4093 : * only respected.
4094 : */
4095 0 : static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
4096 : __isl_keep isl_basic_set_list *intra,
4097 : __isl_keep isl_basic_set_list *inter, int carry_inter)
4098 : {
4099 0 : struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
4100 :
4101 0 : data.pos = 0;
4102 0 : if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
4103 0 : return isl_stat_error;
4104 0 : if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
4105 0 : return isl_stat_error;
4106 0 : return isl_stat_ok;
4107 : }
4108 :
4109 : /* Internal data structure for count_all_constraints
4110 : * for keeping track of the number of equality and inequality constraints.
4111 : */
4112 : struct isl_sched_count {
4113 : int n_eq;
4114 : int n_ineq;
4115 : };
4116 :
4117 : /* Add the number of equality and inequality constraints of "bset"
4118 : * to data->n_eq and data->n_ineq.
4119 : */
4120 0 : static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4121 : {
4122 0 : struct isl_sched_count *data = user;
4123 :
4124 0 : return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4125 : }
4126 :
4127 : /* Count the number of equality and inequality constraints
4128 : * that will be added to the carry_lp problem.
4129 : * We count each edge exactly once.
4130 : * "intra" is the sequence of coefficient constraints for intra-node edges.
4131 : * "inter" is the sequence of coefficient constraints for inter-node edges.
4132 : */
4133 0 : static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4134 : __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4135 : {
4136 : struct isl_sched_count data;
4137 :
4138 0 : data.n_eq = data.n_ineq = 0;
4139 0 : if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4140 0 : return isl_stat_error;
4141 0 : if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4142 0 : return isl_stat_error;
4143 :
4144 0 : *n_eq = data.n_eq;
4145 0 : *n_ineq = data.n_ineq;
4146 :
4147 0 : return isl_stat_ok;
4148 : }
4149 :
4150 : /* Construct an LP problem for finding schedule coefficients
4151 : * such that the schedule carries as many validity dependences as possible.
4152 : * In particular, for each dependence i, we bound the dependence distance
4153 : * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4154 : * of all e_i's. Dependences with e_i = 0 in the solution are simply
4155 : * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4156 : * "intra" is the sequence of coefficient constraints for intra-node edges.
4157 : * "inter" is the sequence of coefficient constraints for inter-node edges.
4158 : * "n_edge" is the total number of edges.
4159 : * "carry_inter" indicates whether inter-node edges should be carried or
4160 : * only respected. That is, if "carry_inter" is not set, then
4161 : * no e_i variables are introduced for the inter-node edges.
4162 : *
4163 : * All variables of the LP are non-negative. The actual coefficients
4164 : * may be negative, so each coefficient is represented as the difference
4165 : * of two non-negative variables. The negative part always appears
4166 : * immediately before the positive part.
4167 : * Other than that, the variables have the following order
4168 : *
4169 : * - sum of (1 - e_i) over all edges
4170 : * - sum of all c_n coefficients
4171 : * (unconstrained when computing non-parametric schedules)
4172 : * - sum of positive and negative parts of all c_x coefficients
4173 : * - for each edge
4174 : * - e_i
4175 : * - for each node
4176 : * - positive and negative parts of c_i_x, in opposite order
4177 : * - c_i_n (if parametric)
4178 : * - c_i_0
4179 : *
4180 : * The constraints are those from the (validity) edges plus three equalities
4181 : * to express the sums and n_edge inequalities to express e_i <= 1.
4182 : */
4183 0 : static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4184 : int n_edge, __isl_keep isl_basic_set_list *intra,
4185 : __isl_keep isl_basic_set_list *inter, int carry_inter)
4186 : {
4187 : int i;
4188 : int k;
4189 : isl_space *dim;
4190 : unsigned total;
4191 : int n_eq, n_ineq;
4192 :
4193 0 : total = 3 + n_edge;
4194 0 : for (i = 0; i < graph->n; ++i) {
4195 0 : struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4196 0 : node->start = total;
4197 0 : total += 1 + node->nparam + 2 * node->nvar;
4198 : }
4199 :
4200 0 : if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4201 0 : return isl_stat_error;
4202 :
4203 0 : dim = isl_space_set_alloc(ctx, 0, total);
4204 0 : isl_basic_set_free(graph->lp);
4205 0 : n_eq += 3;
4206 0 : n_ineq += n_edge;
4207 0 : graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
4208 0 : graph->lp = isl_basic_set_set_rational(graph->lp);
4209 :
4210 0 : k = isl_basic_set_alloc_equality(graph->lp);
4211 0 : if (k < 0)
4212 0 : return isl_stat_error;
4213 0 : isl_seq_clr(graph->lp->eq[k], 1 + total);
4214 0 : isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4215 0 : isl_int_set_si(graph->lp->eq[k][1], 1);
4216 0 : for (i = 0; i < n_edge; ++i)
4217 0 : isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4218 :
4219 0 : if (add_param_sum_constraint(graph, 1) < 0)
4220 0 : return isl_stat_error;
4221 0 : if (add_var_sum_constraint(graph, 2) < 0)
4222 0 : return isl_stat_error;
4223 :
4224 0 : for (i = 0; i < n_edge; ++i) {
4225 0 : k = isl_basic_set_alloc_inequality(graph->lp);
4226 0 : if (k < 0)
4227 0 : return isl_stat_error;
4228 0 : isl_seq_clr(graph->lp->ineq[k], 1 + total);
4229 0 : isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4230 0 : isl_int_set_si(graph->lp->ineq[k][0], 1);
4231 : }
4232 :
4233 0 : if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4234 0 : return isl_stat_error;
4235 :
4236 0 : return isl_stat_ok;
4237 : }
4238 :
4239 : static __isl_give isl_schedule_node *compute_component_schedule(
4240 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4241 : int wcc);
4242 :
4243 : /* If the schedule_split_scaled option is set and if the linear
4244 : * parts of the scheduling rows for all nodes in the graphs have
4245 : * a non-trivial common divisor, then remove this
4246 : * common divisor from the linear part.
4247 : * Otherwise, insert a band node directly and continue with
4248 : * the construction of the schedule.
4249 : *
4250 : * If a non-trivial common divisor is found, then
4251 : * the linear part is reduced and the remainder is ignored.
4252 : * The pieces of the graph that are assigned different remainders
4253 : * form (groups of) strongly connected components within
4254 : * the scaled down band. If needed, they can therefore
4255 : * be ordered along this remainder in a sequence node.
4256 : * However, this ordering is not enforced here in order to allow
4257 : * the scheduler to combine some of the strongly connected components.
4258 : */
4259 0 : static __isl_give isl_schedule_node *split_scaled(
4260 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4261 : {
4262 : int i;
4263 : int row;
4264 : isl_ctx *ctx;
4265 : isl_int gcd, gcd_i;
4266 :
4267 0 : if (!node)
4268 0 : return NULL;
4269 :
4270 0 : ctx = isl_schedule_node_get_ctx(node);
4271 0 : if (!ctx->opt->schedule_split_scaled)
4272 0 : return compute_next_band(node, graph, 0);
4273 0 : if (graph->n <= 1)
4274 0 : return compute_next_band(node, graph, 0);
4275 :
4276 0 : isl_int_init(gcd);
4277 0 : isl_int_init(gcd_i);
4278 :
4279 0 : isl_int_set_si(gcd, 0);
4280 :
4281 0 : row = isl_mat_rows(graph->node[0].sched) - 1;
4282 :
4283 0 : for (i = 0; i < graph->n; ++i) {
4284 0 : struct isl_sched_node *node = &graph->node[i];
4285 0 : int cols = isl_mat_cols(node->sched);
4286 :
4287 0 : isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4288 0 : isl_int_gcd(gcd, gcd, gcd_i);
4289 : }
4290 :
4291 0 : isl_int_clear(gcd_i);
4292 :
4293 0 : if (isl_int_cmp_si(gcd, 1) <= 0) {
4294 0 : isl_int_clear(gcd);
4295 0 : return compute_next_band(node, graph, 0);
4296 : }
4297 :
4298 0 : for (i = 0; i < graph->n; ++i) {
4299 0 : struct isl_sched_node *node = &graph->node[i];
4300 :
4301 0 : isl_int_fdiv_q(node->sched->row[row][0],
4302 : node->sched->row[row][0], gcd);
4303 0 : isl_int_mul(node->sched->row[row][0],
4304 : node->sched->row[row][0], gcd);
4305 0 : node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4306 0 : if (!node->sched)
4307 0 : goto error;
4308 : }
4309 :
4310 0 : isl_int_clear(gcd);
4311 :
4312 0 : return compute_next_band(node, graph, 0);
4313 : error:
4314 0 : isl_int_clear(gcd);
4315 0 : return isl_schedule_node_free(node);
4316 : }
4317 :
4318 : /* Is the schedule row "sol" trivial on node "node"?
4319 : * That is, is the solution zero on the dimensions linearly independent of
4320 : * the previously found solutions?
4321 : * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4322 : *
4323 : * Each coefficient is represented as the difference between
4324 : * two non-negative values in "sol".
4325 : * We construct the schedule row s and check if it is linearly
4326 : * independent of previously computed schedule rows
4327 : * by computing T s, with T the linear combinations that are zero
4328 : * on linearly dependent schedule rows.
4329 : * If the result consists of all zeros, then the solution is trivial.
4330 : */
4331 0 : static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4332 : {
4333 : int trivial;
4334 : isl_vec *node_sol;
4335 :
4336 0 : if (!sol)
4337 0 : return -1;
4338 0 : if (node->nvar == node->rank)
4339 0 : return 0;
4340 :
4341 0 : node_sol = extract_var_coef(node, sol);
4342 0 : node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4343 0 : if (!node_sol)
4344 0 : return -1;
4345 :
4346 0 : trivial = isl_seq_first_non_zero(node_sol->el,
4347 0 : node->nvar - node->rank) == -1;
4348 :
4349 0 : isl_vec_free(node_sol);
4350 :
4351 0 : return trivial;
4352 : }
4353 :
4354 : /* Is the schedule row "sol" trivial on any node where it should
4355 : * not be trivial?
4356 : * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4357 : */
4358 0 : static int is_any_trivial(struct isl_sched_graph *graph,
4359 : __isl_keep isl_vec *sol)
4360 : {
4361 : int i;
4362 :
4363 0 : for (i = 0; i < graph->n; ++i) {
4364 0 : struct isl_sched_node *node = &graph->node[i];
4365 : int trivial;
4366 :
4367 0 : if (!needs_row(graph, node))
4368 0 : continue;
4369 0 : trivial = is_trivial(node, sol);
4370 0 : if (trivial < 0 || trivial)
4371 0 : return trivial;
4372 : }
4373 :
4374 0 : return 0;
4375 : }
4376 :
4377 : /* Does the schedule represented by "sol" perform loop coalescing on "node"?
4378 : * If so, return the position of the coalesced dimension.
4379 : * Otherwise, return node->nvar or -1 on error.
4380 : *
4381 : * In particular, look for pairs of coefficients c_i and c_j such that
4382 : * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4383 : * If any such pair is found, then return i.
4384 : * If size_i is infinity, then no check on c_i needs to be performed.
4385 : */
4386 0 : static int find_node_coalescing(struct isl_sched_node *node,
4387 : __isl_keep isl_vec *sol)
4388 : {
4389 : int i, j;
4390 : isl_int max;
4391 : isl_vec *csol;
4392 :
4393 0 : if (node->nvar <= 1)
4394 0 : return node->nvar;
4395 :
4396 0 : csol = extract_var_coef(node, sol);
4397 0 : if (!csol)
4398 0 : return -1;
4399 0 : isl_int_init(max);
4400 0 : for (i = 0; i < node->nvar; ++i) {
4401 : isl_val *v;
4402 :
4403 0 : if (isl_int_is_zero(csol->el[i]))
4404 0 : continue;
4405 0 : v = isl_multi_val_get_val(node->sizes, i);
4406 0 : if (!v)
4407 0 : goto error;
4408 0 : if (!isl_val_is_int(v)) {
4409 0 : isl_val_free(v);
4410 0 : continue;
4411 : }
4412 0 : v = isl_val_div_ui(v, 2);
4413 0 : v = isl_val_ceil(v);
4414 0 : if (!v)
4415 0 : goto error;
4416 0 : isl_int_mul(max, v->n, csol->el[i]);
4417 0 : isl_val_free(v);
4418 :
4419 0 : for (j = 0; j < node->nvar; ++j) {
4420 0 : if (j == i)
4421 0 : continue;
4422 0 : if (isl_int_abs_gt(csol->el[j], max))
4423 0 : break;
4424 : }
4425 0 : if (j < node->nvar)
4426 0 : break;
4427 : }
4428 :
4429 0 : isl_int_clear(max);
4430 0 : isl_vec_free(csol);
4431 0 : return i;
4432 : error:
4433 0 : isl_int_clear(max);
4434 0 : isl_vec_free(csol);
4435 0 : return -1;
4436 : }
4437 :
4438 : /* Force the schedule coefficient at position "pos" of "node" to be zero
4439 : * in "tl".
4440 : * The coefficient is encoded as the difference between two non-negative
4441 : * variables. Force these two variables to have the same value.
4442 : */
4443 0 : static __isl_give isl_tab_lexmin *zero_out_node_coef(
4444 : __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4445 : {
4446 : int dim;
4447 : isl_ctx *ctx;
4448 : isl_vec *eq;
4449 :
4450 0 : ctx = isl_space_get_ctx(node->space);
4451 0 : dim = isl_tab_lexmin_dim(tl);
4452 0 : if (dim < 0)
4453 0 : return isl_tab_lexmin_free(tl);
4454 0 : eq = isl_vec_alloc(ctx, 1 + dim);
4455 0 : eq = isl_vec_clr(eq);
4456 0 : if (!eq)
4457 0 : return isl_tab_lexmin_free(tl);
4458 :
4459 0 : pos = 1 + node_var_coef_pos(node, pos);
4460 0 : isl_int_set_si(eq->el[pos], 1);
4461 0 : isl_int_set_si(eq->el[pos + 1], -1);
4462 0 : tl = isl_tab_lexmin_add_eq(tl, eq->el);
4463 0 : isl_vec_free(eq);
4464 :
4465 0 : return tl;
4466 : }
4467 :
4468 : /* Return the lexicographically smallest rational point in the basic set
4469 : * from which "tl" was constructed, double checking that this input set
4470 : * was not empty.
4471 : */
4472 0 : static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4473 : {
4474 : isl_vec *sol;
4475 :
4476 0 : sol = isl_tab_lexmin_get_solution(tl);
4477 0 : if (!sol)
4478 0 : return NULL;
4479 0 : if (sol->size == 0)
4480 0 : isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4481 : "error in schedule construction",
4482 : return isl_vec_free(sol));
4483 0 : return sol;
4484 : }
4485 :
4486 : /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4487 : * carry any of the "n_edge" groups of dependences?
4488 : * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4489 : * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4490 : * by the edge are carried by the solution.
4491 : * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4492 : * one of those is carried.
4493 : *
4494 : * Note that despite the fact that the problem is solved using a rational
4495 : * solver, the solution is guaranteed to be integral.
4496 : * Specifically, the dependence distance lower bounds e_i (and therefore
4497 : * also their sum) are integers. See Lemma 5 of [1].
4498 : *
4499 : * Any potential denominator of the sum is cleared by this function.
4500 : * The denominator is not relevant for any of the other elements
4501 : * in the solution.
4502 : *
4503 : * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4504 : * Problem, Part II: Multi-Dimensional Time.
4505 : * In Intl. Journal of Parallel Programming, 1992.
4506 : */
4507 0 : static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4508 : {
4509 0 : isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4510 0 : isl_int_set_si(sol->el[0], 1);
4511 0 : return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4512 : }
4513 :
4514 : /* Return the lexicographically smallest rational point in "lp",
4515 : * assuming that all variables are non-negative and performing some
4516 : * additional sanity checks.
4517 : * If "want_integral" is set, then compute the lexicographically smallest
4518 : * integer point instead.
4519 : * In particular, "lp" should not be empty by construction.
4520 : * Double check that this is the case.
4521 : * If dependences are not carried for any of the "n_edge" edges,
4522 : * then return an empty vector.
4523 : *
4524 : * If the schedule_treat_coalescing option is set and
4525 : * if the computed schedule performs loop coalescing on a given node,
4526 : * i.e., if it is of the form
4527 : *
4528 : * c_i i + c_j j + ...
4529 : *
4530 : * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4531 : * to cut out this solution. Repeat this process until no more loop
4532 : * coalescing occurs or until no more dependences can be carried.
4533 : * In the latter case, revert to the previously computed solution.
4534 : *
4535 : * If the caller requests an integral solution and if coalescing should
4536 : * be treated, then perform the coalescing treatment first as
4537 : * an integral solution computed before coalescing treatment
4538 : * would carry the same number of edges and would therefore probably
4539 : * also be coalescing.
4540 : *
4541 : * To allow the coalescing treatment to be performed first,
4542 : * the initial solution is allowed to be rational and it is only
4543 : * cut out (if needed) in the next iteration, if no coalescing measures
4544 : * were taken.
4545 : */
4546 0 : static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4547 : __isl_take isl_basic_set *lp, int n_edge, int want_integral)
4548 : {
4549 : int i, pos, cut;
4550 : isl_ctx *ctx;
4551 : isl_tab_lexmin *tl;
4552 0 : isl_vec *sol = NULL, *prev;
4553 : int treat_coalescing;
4554 : int try_again;
4555 :
4556 0 : if (!lp)
4557 0 : return NULL;
4558 0 : ctx = isl_basic_set_get_ctx(lp);
4559 0 : treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4560 0 : tl = isl_tab_lexmin_from_basic_set(lp);
4561 :
4562 0 : cut = 0;
4563 : do {
4564 : int integral;
4565 :
4566 0 : try_again = 0;
4567 0 : if (cut)
4568 0 : tl = isl_tab_lexmin_cut_to_integer(tl);
4569 0 : prev = sol;
4570 0 : sol = non_empty_solution(tl);
4571 0 : if (!sol)
4572 0 : goto error;
4573 :
4574 0 : integral = isl_int_is_one(sol->el[0]);
4575 0 : if (!carries_dependences(sol, n_edge)) {
4576 0 : if (!prev)
4577 0 : prev = isl_vec_alloc(ctx, 0);
4578 0 : isl_vec_free(sol);
4579 0 : sol = prev;
4580 0 : break;
4581 : }
4582 0 : prev = isl_vec_free(prev);
4583 0 : cut = want_integral && !integral;
4584 0 : if (cut)
4585 0 : try_again = 1;
4586 0 : if (!treat_coalescing)
4587 0 : continue;
4588 0 : for (i = 0; i < graph->n; ++i) {
4589 0 : struct isl_sched_node *node = &graph->node[i];
4590 :
4591 0 : pos = find_node_coalescing(node, sol);
4592 0 : if (pos < 0)
4593 0 : goto error;
4594 0 : if (pos < node->nvar)
4595 0 : break;
4596 : }
4597 0 : if (i < graph->n) {
4598 0 : try_again = 1;
4599 0 : tl = zero_out_node_coef(tl, &graph->node[i], pos);
4600 0 : cut = 0;
4601 : }
4602 0 : } while (try_again);
4603 :
4604 0 : isl_tab_lexmin_free(tl);
4605 :
4606 0 : return sol;
4607 : error:
4608 0 : isl_tab_lexmin_free(tl);
4609 0 : isl_vec_free(prev);
4610 0 : isl_vec_free(sol);
4611 0 : return NULL;
4612 : }
4613 :
4614 : /* If "edge" is an edge from a node to itself, then add the corresponding
4615 : * dependence relation to "umap".
4616 : * If "node" has been compressed, then the dependence relation
4617 : * is also compressed first.
4618 : */
4619 0 : static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4620 : struct isl_sched_edge *edge)
4621 : {
4622 : isl_map *map;
4623 0 : struct isl_sched_node *node = edge->src;
4624 :
4625 0 : if (edge->src != edge->dst)
4626 0 : return umap;
4627 :
4628 0 : map = isl_map_copy(edge->map);
4629 0 : if (node->compressed) {
4630 0 : map = isl_map_preimage_domain_multi_aff(map,
4631 : isl_multi_aff_copy(node->decompress));
4632 0 : map = isl_map_preimage_range_multi_aff(map,
4633 : isl_multi_aff_copy(node->decompress));
4634 : }
4635 0 : umap = isl_union_map_add_map(umap, map);
4636 0 : return umap;
4637 : }
4638 :
4639 : /* If "edge" is an edge from a node to another node, then add the corresponding
4640 : * dependence relation to "umap".
4641 : * If the source or destination nodes of "edge" have been compressed,
4642 : * then the dependence relation is also compressed first.
4643 : */
4644 0 : static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4645 : struct isl_sched_edge *edge)
4646 : {
4647 : isl_map *map;
4648 :
4649 0 : if (edge->src == edge->dst)
4650 0 : return umap;
4651 :
4652 0 : map = isl_map_copy(edge->map);
4653 0 : if (edge->src->compressed)
4654 0 : map = isl_map_preimage_domain_multi_aff(map,
4655 0 : isl_multi_aff_copy(edge->src->decompress));
4656 0 : if (edge->dst->compressed)
4657 0 : map = isl_map_preimage_range_multi_aff(map,
4658 0 : isl_multi_aff_copy(edge->dst->decompress));
4659 0 : umap = isl_union_map_add_map(umap, map);
4660 0 : return umap;
4661 : }
4662 :
4663 : /* Internal data structure used by union_drop_coalescing_constraints
4664 : * to collect bounds on all relevant statements.
4665 : *
4666 : * "graph" is the schedule constraint graph for which an LP problem
4667 : * is being constructed.
4668 : * "bounds" collects the bounds.
4669 : */
4670 : struct isl_collect_bounds_data {
4671 : isl_ctx *ctx;
4672 : struct isl_sched_graph *graph;
4673 : isl_union_set *bounds;
4674 : };
4675 :
4676 : /* Add the size bounds for the node with instance deltas in "set"
4677 : * to data->bounds.
4678 : */
4679 0 : static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4680 : {
4681 0 : struct isl_collect_bounds_data *data = user;
4682 : struct isl_sched_node *node;
4683 : isl_space *space;
4684 : isl_set *bounds;
4685 :
4686 0 : space = isl_set_get_space(set);
4687 0 : isl_set_free(set);
4688 :
4689 0 : node = graph_find_compressed_node(data->ctx, data->graph, space);
4690 0 : isl_space_free(space);
4691 :
4692 0 : bounds = isl_set_from_basic_set(get_size_bounds(node));
4693 0 : data->bounds = isl_union_set_add_set(data->bounds, bounds);
4694 :
4695 0 : return isl_stat_ok;
4696 : }
4697 :
4698 : /* Drop some constraints from "delta" that could be exploited
4699 : * to construct loop coalescing schedules.
4700 : * In particular, drop those constraint that bound the difference
4701 : * to the size of the domain.
4702 : * Do this for each set/node in "delta" separately.
4703 : * The parameters are assumed to have been projected out by the caller.
4704 : */
4705 0 : static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4706 : struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4707 : {
4708 0 : struct isl_collect_bounds_data data = { ctx, graph };
4709 :
4710 0 : data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4711 0 : if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4712 0 : data.bounds = isl_union_set_free(data.bounds);
4713 0 : delta = isl_union_set_plain_gist(delta, data.bounds);
4714 :
4715 0 : return delta;
4716 : }
4717 :
4718 : /* Given a non-trivial lineality space "lineality", add the corresponding
4719 : * universe set to data->mask and add a map from elements to
4720 : * other elements along the lines in "lineality" to data->equivalent.
4721 : * If this is the first time this function gets called
4722 : * (data->any_non_trivial is still false), then set data->any_non_trivial and
4723 : * initialize data->mask and data->equivalent.
4724 : *
4725 : * In particular, if the lineality space is defined by equality constraints
4726 : *
4727 : * E x = 0
4728 : *
4729 : * then construct an affine mapping
4730 : *
4731 : * f : x -> E x
4732 : *
4733 : * and compute the equivalence relation of having the same image under f:
4734 : *
4735 : * { x -> x' : E x = E x' }
4736 : */
4737 0 : static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4738 : struct isl_exploit_lineality_data *data)
4739 : {
4740 : isl_mat *eq;
4741 : isl_space *space;
4742 : isl_set *univ;
4743 : isl_multi_aff *ma;
4744 : isl_multi_pw_aff *mpa;
4745 : isl_map *map;
4746 : int n;
4747 :
4748 0 : if (!lineality)
4749 0 : return isl_stat_error;
4750 0 : if (isl_basic_set_dim(lineality, isl_dim_div) != 0)
4751 0 : isl_die(isl_basic_set_get_ctx(lineality), isl_error_internal,
4752 : "local variables not allowed", goto error);
4753 :
4754 0 : space = isl_basic_set_get_space(lineality);
4755 0 : if (!data->any_non_trivial) {
4756 0 : data->equivalent = isl_union_map_empty(isl_space_copy(space));
4757 0 : data->mask = isl_union_set_empty(isl_space_copy(space));
4758 : }
4759 0 : data->any_non_trivial = isl_bool_true;
4760 :
4761 0 : univ = isl_set_universe(isl_space_copy(space));
4762 0 : data->mask = isl_union_set_add_set(data->mask, univ);
4763 :
4764 0 : eq = isl_basic_set_extract_equalities(lineality);
4765 0 : n = isl_mat_rows(eq);
4766 0 : eq = isl_mat_insert_zero_rows(eq, 0, 1);
4767 0 : eq = isl_mat_set_element_si(eq, 0, 0, 1);
4768 0 : space = isl_space_from_domain(space);
4769 0 : space = isl_space_add_dims(space, isl_dim_out, n);
4770 0 : ma = isl_multi_aff_from_aff_mat(space, eq);
4771 0 : mpa = isl_multi_pw_aff_from_multi_aff(ma);
4772 0 : map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4773 0 : data->equivalent = isl_union_map_add_map(data->equivalent, map);
4774 :
4775 0 : isl_basic_set_free(lineality);
4776 0 : return isl_stat_ok;
4777 : error:
4778 0 : isl_basic_set_free(lineality);
4779 0 : return isl_stat_error;
4780 : }
4781 :
4782 : /* Check if the lineality space "set" is non-trivial (i.e., is not just
4783 : * the origin or, in other words, satisfies a number of equality constraints
4784 : * that is smaller than the dimension of the set).
4785 : * If so, extend data->mask and data->equivalent accordingly.
4786 : *
4787 : * The input should not have any local variables already, but
4788 : * isl_set_remove_divs is called to make sure it does not.
4789 : */
4790 0 : static isl_stat add_lineality(__isl_take isl_set *set, void *user)
4791 : {
4792 0 : struct isl_exploit_lineality_data *data = user;
4793 : isl_basic_set *hull;
4794 : int dim, n_eq;
4795 :
4796 0 : set = isl_set_remove_divs(set);
4797 0 : hull = isl_set_unshifted_simple_hull(set);
4798 0 : dim = isl_basic_set_dim(hull, isl_dim_set);
4799 0 : n_eq = isl_basic_set_n_equality(hull);
4800 0 : if (!hull)
4801 0 : return isl_stat_error;
4802 0 : if (dim != n_eq)
4803 0 : return add_non_trivial_lineality(hull, data);
4804 0 : isl_basic_set_free(hull);
4805 0 : return isl_stat_ok;
4806 : }
4807 :
4808 : /* Check if the difference set on intra-node schedule constraints "intra"
4809 : * has any non-trivial lineality space.
4810 : * If so, then extend the difference set to a difference set
4811 : * on equivalent elements. That is, if "intra" is
4812 : *
4813 : * { y - x : (x,y) \in V }
4814 : *
4815 : * and elements are equivalent if they have the same image under f,
4816 : * then return
4817 : *
4818 : * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4819 : *
4820 : * or, since f is linear,
4821 : *
4822 : * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
4823 : *
4824 : * The results of the search for non-trivial lineality spaces is stored
4825 : * in "data".
4826 : */
4827 0 : static __isl_give isl_union_set *exploit_intra_lineality(
4828 : __isl_take isl_union_set *intra,
4829 : struct isl_exploit_lineality_data *data)
4830 : {
4831 : isl_union_set *lineality;
4832 : isl_union_set *uset;
4833 :
4834 0 : data->any_non_trivial = isl_bool_false;
4835 0 : lineality = isl_union_set_copy(intra);
4836 0 : lineality = isl_union_set_combined_lineality_space(lineality);
4837 0 : if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
4838 0 : data->any_non_trivial = isl_bool_error;
4839 0 : isl_union_set_free(lineality);
4840 :
4841 0 : if (data->any_non_trivial < 0)
4842 0 : return isl_union_set_free(intra);
4843 0 : if (!data->any_non_trivial)
4844 0 : return intra;
4845 :
4846 0 : uset = isl_union_set_copy(intra);
4847 0 : intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
4848 0 : uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
4849 0 : intra = isl_union_set_union(intra, uset);
4850 :
4851 0 : intra = isl_union_set_remove_divs(intra);
4852 :
4853 0 : return intra;
4854 : }
4855 :
4856 : /* If the difference set on intra-node schedule constraints was found to have
4857 : * any non-trivial lineality space by exploit_intra_lineality,
4858 : * as recorded in "data", then extend the inter-node
4859 : * schedule constraints "inter" to schedule constraints on equivalent elements.
4860 : * That is, if "inter" is V and
4861 : * elements are equivalent if they have the same image under f, then return
4862 : *
4863 : * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4864 : */
4865 0 : static __isl_give isl_union_map *exploit_inter_lineality(
4866 : __isl_take isl_union_map *inter,
4867 : struct isl_exploit_lineality_data *data)
4868 : {
4869 : isl_union_map *umap;
4870 :
4871 0 : if (data->any_non_trivial < 0)
4872 0 : return isl_union_map_free(inter);
4873 0 : if (!data->any_non_trivial)
4874 0 : return inter;
4875 :
4876 0 : umap = isl_union_map_copy(inter);
4877 0 : inter = isl_union_map_subtract_range(inter,
4878 : isl_union_set_copy(data->mask));
4879 0 : umap = isl_union_map_apply_range(umap,
4880 : isl_union_map_copy(data->equivalent));
4881 0 : inter = isl_union_map_union(inter, umap);
4882 0 : umap = isl_union_map_copy(inter);
4883 0 : inter = isl_union_map_subtract_domain(inter,
4884 : isl_union_set_copy(data->mask));
4885 0 : umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
4886 : umap);
4887 0 : inter = isl_union_map_union(inter, umap);
4888 :
4889 0 : inter = isl_union_map_remove_divs(inter);
4890 :
4891 0 : return inter;
4892 : }
4893 :
4894 : /* For each (conditional) validity edge in "graph",
4895 : * add the corresponding dependence relation using "add"
4896 : * to a collection of dependence relations and return the result.
4897 : * If "coincidence" is set, then coincidence edges are considered as well.
4898 : */
4899 0 : static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4900 : __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4901 : struct isl_sched_edge *edge), int coincidence)
4902 : {
4903 : int i;
4904 : isl_space *space;
4905 : isl_union_map *umap;
4906 :
4907 0 : space = isl_space_copy(graph->node[0].space);
4908 0 : umap = isl_union_map_empty(space);
4909 :
4910 0 : for (i = 0; i < graph->n_edge; ++i) {
4911 0 : struct isl_sched_edge *edge = &graph->edge[i];
4912 :
4913 0 : if (!is_any_validity(edge) &&
4914 0 : (!coincidence || !is_coincidence(edge)))
4915 0 : continue;
4916 :
4917 0 : umap = add(umap, edge);
4918 : }
4919 :
4920 0 : return umap;
4921 : }
4922 :
4923 : /* Project out all parameters from "uset" and return the result.
4924 : */
4925 0 : static __isl_give isl_union_set *union_set_drop_parameters(
4926 : __isl_take isl_union_set *uset)
4927 : {
4928 : unsigned nparam;
4929 :
4930 0 : nparam = isl_union_set_dim(uset, isl_dim_param);
4931 0 : return isl_union_set_project_out(uset, isl_dim_param, 0, nparam);
4932 : }
4933 :
4934 : /* For each dependence relation on a (conditional) validity edge
4935 : * from a node to itself,
4936 : * construct the set of coefficients of valid constraints for elements
4937 : * in that dependence relation and collect the results.
4938 : * If "coincidence" is set, then coincidence edges are considered as well.
4939 : *
4940 : * In particular, for each dependence relation R, constraints
4941 : * on coefficients (c_0, c_x) are constructed such that
4942 : *
4943 : * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4944 : *
4945 : * If the schedule_treat_coalescing option is set, then some constraints
4946 : * that could be exploited to construct coalescing schedules
4947 : * are removed before the dual is computed, but after the parameters
4948 : * have been projected out.
4949 : * The entire computation is essentially the same as that performed
4950 : * by intra_coefficients, except that it operates on multiple
4951 : * edges together and that the parameters are always projected out.
4952 : *
4953 : * Additionally, exploit any non-trivial lineality space
4954 : * in the difference set after removing coalescing constraints and
4955 : * store the results of the non-trivial lineality space detection in "data".
4956 : * The procedure is currently run unconditionally, but it is unlikely
4957 : * to find any non-trivial lineality spaces if no coalescing constraints
4958 : * have been removed.
4959 : *
4960 : * Note that if a dependence relation is a union of basic maps,
4961 : * then each basic map needs to be treated individually as it may only
4962 : * be possible to carry the dependences expressed by some of those
4963 : * basic maps and not all of them.
4964 : * The collected validity constraints are therefore not coalesced and
4965 : * it is assumed that they are not coalesced automatically.
4966 : * Duplicate basic maps can be removed, however.
4967 : * In particular, if the same basic map appears as a disjunct
4968 : * in multiple edges, then it only needs to be carried once.
4969 : */
4970 0 : static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
4971 : struct isl_sched_graph *graph, int coincidence,
4972 : struct isl_exploit_lineality_data *data)
4973 : {
4974 : isl_union_map *intra;
4975 : isl_union_set *delta;
4976 : isl_basic_set_list *list;
4977 :
4978 0 : intra = collect_validity(graph, &add_intra, coincidence);
4979 0 : delta = isl_union_map_deltas(intra);
4980 0 : delta = union_set_drop_parameters(delta);
4981 0 : delta = isl_union_set_remove_divs(delta);
4982 0 : if (isl_options_get_schedule_treat_coalescing(ctx))
4983 0 : delta = union_drop_coalescing_constraints(ctx, graph, delta);
4984 0 : delta = exploit_intra_lineality(delta, data);
4985 0 : list = isl_union_set_get_basic_set_list(delta);
4986 0 : isl_union_set_free(delta);
4987 :
4988 0 : return isl_basic_set_list_coefficients(list);
4989 : }
4990 :
4991 : /* For each dependence relation on a (conditional) validity edge
4992 : * from a node to some other node,
4993 : * construct the set of coefficients of valid constraints for elements
4994 : * in that dependence relation and collect the results.
4995 : * If "coincidence" is set, then coincidence edges are considered as well.
4996 : *
4997 : * In particular, for each dependence relation R, constraints
4998 : * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
4999 : *
5000 : * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
5001 : *
5002 : * This computation is essentially the same as that performed
5003 : * by inter_coefficients, except that it operates on multiple
5004 : * edges together.
5005 : *
5006 : * Additionally, exploit any non-trivial lineality space
5007 : * that may have been discovered by collect_intra_validity
5008 : * (as stored in "data").
5009 : *
5010 : * Note that if a dependence relation is a union of basic maps,
5011 : * then each basic map needs to be treated individually as it may only
5012 : * be possible to carry the dependences expressed by some of those
5013 : * basic maps and not all of them.
5014 : * The collected validity constraints are therefore not coalesced and
5015 : * it is assumed that they are not coalesced automatically.
5016 : * Duplicate basic maps can be removed, however.
5017 : * In particular, if the same basic map appears as a disjunct
5018 : * in multiple edges, then it only needs to be carried once.
5019 : */
5020 0 : static __isl_give isl_basic_set_list *collect_inter_validity(
5021 : struct isl_sched_graph *graph, int coincidence,
5022 : struct isl_exploit_lineality_data *data)
5023 : {
5024 : isl_union_map *inter;
5025 : isl_union_set *wrap;
5026 : isl_basic_set_list *list;
5027 :
5028 0 : inter = collect_validity(graph, &add_inter, coincidence);
5029 0 : inter = exploit_inter_lineality(inter, data);
5030 0 : inter = isl_union_map_remove_divs(inter);
5031 0 : wrap = isl_union_map_wrap(inter);
5032 0 : list = isl_union_set_get_basic_set_list(wrap);
5033 0 : isl_union_set_free(wrap);
5034 0 : return isl_basic_set_list_coefficients(list);
5035 : }
5036 :
5037 : /* Construct an LP problem for finding schedule coefficients
5038 : * such that the schedule carries as many of the "n_edge" groups of
5039 : * dependences as possible based on the corresponding coefficient
5040 : * constraints and return the lexicographically smallest non-trivial solution.
5041 : * "intra" is the sequence of coefficient constraints for intra-node edges.
5042 : * "inter" is the sequence of coefficient constraints for inter-node edges.
5043 : * If "want_integral" is set, then compute an integral solution
5044 : * for the coefficients rather than using the numerators
5045 : * of a rational solution.
5046 : * "carry_inter" indicates whether inter-node edges should be carried or
5047 : * only respected.
5048 : *
5049 : * If none of the "n_edge" groups can be carried
5050 : * then return an empty vector.
5051 : */
5052 0 : static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
5053 : struct isl_sched_graph *graph, int n_edge,
5054 : __isl_keep isl_basic_set_list *intra,
5055 : __isl_keep isl_basic_set_list *inter, int want_integral,
5056 : int carry_inter)
5057 : {
5058 : isl_basic_set *lp;
5059 :
5060 0 : if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
5061 0 : return NULL;
5062 :
5063 0 : lp = isl_basic_set_copy(graph->lp);
5064 0 : return non_neg_lexmin(graph, lp, n_edge, want_integral);
5065 : }
5066 :
5067 : /* Construct an LP problem for finding schedule coefficients
5068 : * such that the schedule carries as many of the validity dependences
5069 : * as possible and
5070 : * return the lexicographically smallest non-trivial solution.
5071 : * If "fallback" is set, then the carrying is performed as a fallback
5072 : * for the Pluto-like scheduler.
5073 : * If "coincidence" is set, then try and carry coincidence edges as well.
5074 : *
5075 : * The variable "n_edge" stores the number of groups that should be carried.
5076 : * If none of the "n_edge" groups can be carried
5077 : * then return an empty vector.
5078 : * If, moreover, "n_edge" is zero, then the LP problem does not even
5079 : * need to be constructed.
5080 : *
5081 : * If a fallback solution is being computed, then compute an integral solution
5082 : * for the coefficients rather than using the numerators
5083 : * of a rational solution.
5084 : *
5085 : * If a fallback solution is being computed, if there are any intra-node
5086 : * dependences, and if requested by the user, then first try
5087 : * to only carry those intra-node dependences.
5088 : * If this fails to carry any dependences, then try again
5089 : * with the inter-node dependences included.
5090 : */
5091 0 : static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
5092 : struct isl_sched_graph *graph, int fallback, int coincidence)
5093 : {
5094 : int n_intra, n_inter;
5095 : int n_edge;
5096 0 : struct isl_carry carry = { 0 };
5097 : isl_vec *sol;
5098 :
5099 0 : carry.intra = collect_intra_validity(ctx, graph, coincidence,
5100 : &carry.lineality);
5101 0 : carry.inter = collect_inter_validity(graph, coincidence,
5102 : &carry.lineality);
5103 0 : if (!carry.intra || !carry.inter)
5104 : goto error;
5105 0 : n_intra = isl_basic_set_list_n_basic_set(carry.intra);
5106 0 : n_inter = isl_basic_set_list_n_basic_set(carry.inter);
5107 :
5108 0 : if (fallback && n_intra > 0 &&
5109 0 : isl_options_get_schedule_carry_self_first(ctx)) {
5110 0 : sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5111 : carry.intra, carry.inter, fallback, 0);
5112 0 : if (!sol || sol->size != 0 || n_inter == 0) {
5113 0 : isl_carry_clear(&carry);
5114 0 : return sol;
5115 : }
5116 0 : isl_vec_free(sol);
5117 : }
5118 :
5119 0 : n_edge = n_intra + n_inter;
5120 0 : if (n_edge == 0) {
5121 0 : isl_carry_clear(&carry);
5122 0 : return isl_vec_alloc(ctx, 0);
5123 : }
5124 :
5125 0 : sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5126 : carry.intra, carry.inter, fallback, 1);
5127 0 : isl_carry_clear(&carry);
5128 0 : return sol;
5129 : error:
5130 0 : isl_carry_clear(&carry);
5131 0 : return NULL;
5132 : }
5133 :
5134 : /* Construct a schedule row for each node such that as many validity dependences
5135 : * as possible are carried and then continue with the next band.
5136 : * If "fallback" is set, then the carrying is performed as a fallback
5137 : * for the Pluto-like scheduler.
5138 : * If "coincidence" is set, then try and carry coincidence edges as well.
5139 : *
5140 : * If there are no validity dependences, then no dependence can be carried and
5141 : * the procedure is guaranteed to fail. If there is more than one component,
5142 : * then try computing a schedule on each component separately
5143 : * to prevent or at least postpone this failure.
5144 : *
5145 : * If a schedule row is computed, then check that dependences are carried
5146 : * for at least one of the edges.
5147 : *
5148 : * If the computed schedule row turns out to be trivial on one or
5149 : * more nodes where it should not be trivial, then we throw it away
5150 : * and try again on each component separately.
5151 : *
5152 : * If there is only one component, then we accept the schedule row anyway,
5153 : * but we do not consider it as a complete row and therefore do not
5154 : * increment graph->n_row. Note that the ranks of the nodes that
5155 : * do get a non-trivial schedule part will get updated regardless and
5156 : * graph->maxvar is computed based on these ranks. The test for
5157 : * whether more schedule rows are required in compute_schedule_wcc
5158 : * is therefore not affected.
5159 : *
5160 : * Insert a band corresponding to the schedule row at position "node"
5161 : * of the schedule tree and continue with the construction of the schedule.
5162 : * This insertion and the continued construction is performed by split_scaled
5163 : * after optionally checking for non-trivial common divisors.
5164 : */
5165 0 : static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
5166 : struct isl_sched_graph *graph, int fallback, int coincidence)
5167 : {
5168 : int trivial;
5169 : isl_ctx *ctx;
5170 : isl_vec *sol;
5171 :
5172 0 : if (!node)
5173 0 : return NULL;
5174 :
5175 0 : ctx = isl_schedule_node_get_ctx(node);
5176 0 : sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
5177 0 : if (!sol)
5178 0 : return isl_schedule_node_free(node);
5179 0 : if (sol->size == 0) {
5180 0 : isl_vec_free(sol);
5181 0 : if (graph->scc > 1)
5182 0 : return compute_component_schedule(node, graph, 1);
5183 0 : isl_die(ctx, isl_error_unknown, "unable to carry dependences",
5184 : return isl_schedule_node_free(node));
5185 : }
5186 :
5187 0 : trivial = is_any_trivial(graph, sol);
5188 0 : if (trivial < 0) {
5189 0 : sol = isl_vec_free(sol);
5190 0 : } else if (trivial && graph->scc > 1) {
5191 0 : isl_vec_free(sol);
5192 0 : return compute_component_schedule(node, graph, 1);
5193 : }
5194 :
5195 0 : if (update_schedule(graph, sol, 0) < 0)
5196 0 : return isl_schedule_node_free(node);
5197 0 : if (trivial)
5198 0 : graph->n_row--;
5199 :
5200 0 : return split_scaled(node, graph);
5201 : }
5202 :
5203 : /* Construct a schedule row for each node such that as many validity dependences
5204 : * as possible are carried and then continue with the next band.
5205 : * Do so as a fallback for the Pluto-like scheduler.
5206 : * If "coincidence" is set, then try and carry coincidence edges as well.
5207 : */
5208 0 : static __isl_give isl_schedule_node *carry_fallback(
5209 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5210 : int coincidence)
5211 : {
5212 0 : return carry(node, graph, 1, coincidence);
5213 : }
5214 :
5215 : /* Construct a schedule row for each node such that as many validity dependences
5216 : * as possible are carried and then continue with the next band.
5217 : * Do so for the case where the Feautrier scheduler was selected
5218 : * by the user.
5219 : */
5220 0 : static __isl_give isl_schedule_node *carry_feautrier(
5221 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5222 : {
5223 0 : return carry(node, graph, 0, 0);
5224 : }
5225 :
5226 : /* Construct a schedule row for each node such that as many validity dependences
5227 : * as possible are carried and then continue with the next band.
5228 : * Do so as a fallback for the Pluto-like scheduler.
5229 : */
5230 0 : static __isl_give isl_schedule_node *carry_dependences(
5231 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5232 : {
5233 0 : return carry_fallback(node, graph, 0);
5234 : }
5235 :
5236 : /* Construct a schedule row for each node such that as many validity or
5237 : * coincidence dependences as possible are carried and
5238 : * then continue with the next band.
5239 : * Do so as a fallback for the Pluto-like scheduler.
5240 : */
5241 0 : static __isl_give isl_schedule_node *carry_coincidence(
5242 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5243 : {
5244 0 : return carry_fallback(node, graph, 1);
5245 : }
5246 :
5247 : /* Topologically sort statements mapped to the same schedule iteration
5248 : * and add insert a sequence node in front of "node"
5249 : * corresponding to this order.
5250 : * If "initialized" is set, then it may be assumed that compute_maxvar
5251 : * has been called on the current band. Otherwise, call
5252 : * compute_maxvar if and before carry_dependences gets called.
5253 : *
5254 : * If it turns out to be impossible to sort the statements apart,
5255 : * because different dependences impose different orderings
5256 : * on the statements, then we extend the schedule such that
5257 : * it carries at least one more dependence.
5258 : */
5259 0 : static __isl_give isl_schedule_node *sort_statements(
5260 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5261 : int initialized)
5262 : {
5263 : isl_ctx *ctx;
5264 : isl_union_set_list *filters;
5265 :
5266 0 : if (!node)
5267 0 : return NULL;
5268 :
5269 0 : ctx = isl_schedule_node_get_ctx(node);
5270 0 : if (graph->n < 1)
5271 0 : isl_die(ctx, isl_error_internal,
5272 : "graph should have at least one node",
5273 : return isl_schedule_node_free(node));
5274 :
5275 0 : if (graph->n == 1)
5276 0 : return node;
5277 :
5278 0 : if (update_edges(ctx, graph) < 0)
5279 0 : return isl_schedule_node_free(node);
5280 :
5281 0 : if (graph->n_edge == 0)
5282 0 : return node;
5283 :
5284 0 : if (detect_sccs(ctx, graph) < 0)
5285 0 : return isl_schedule_node_free(node);
5286 :
5287 0 : next_band(graph);
5288 0 : if (graph->scc < graph->n) {
5289 0 : if (!initialized && compute_maxvar(graph) < 0)
5290 0 : return isl_schedule_node_free(node);
5291 0 : return carry_dependences(node, graph);
5292 : }
5293 :
5294 0 : filters = extract_sccs(ctx, graph);
5295 0 : node = isl_schedule_node_insert_sequence(node, filters);
5296 :
5297 0 : return node;
5298 : }
5299 :
5300 : /* Are there any (non-empty) (conditional) validity edges in the graph?
5301 : */
5302 0 : static int has_validity_edges(struct isl_sched_graph *graph)
5303 : {
5304 : int i;
5305 :
5306 0 : for (i = 0; i < graph->n_edge; ++i) {
5307 : int empty;
5308 :
5309 0 : empty = isl_map_plain_is_empty(graph->edge[i].map);
5310 0 : if (empty < 0)
5311 0 : return -1;
5312 0 : if (empty)
5313 0 : continue;
5314 0 : if (is_any_validity(&graph->edge[i]))
5315 0 : return 1;
5316 : }
5317 :
5318 0 : return 0;
5319 : }
5320 :
5321 : /* Should we apply a Feautrier step?
5322 : * That is, did the user request the Feautrier algorithm and are
5323 : * there any validity dependences (left)?
5324 : */
5325 0 : static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
5326 : {
5327 0 : if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
5328 0 : return 0;
5329 :
5330 0 : return has_validity_edges(graph);
5331 : }
5332 :
5333 : /* Compute a schedule for a connected dependence graph using Feautrier's
5334 : * multi-dimensional scheduling algorithm and return the updated schedule node.
5335 : *
5336 : * The original algorithm is described in [1].
5337 : * The main idea is to minimize the number of scheduling dimensions, by
5338 : * trying to satisfy as many dependences as possible per scheduling dimension.
5339 : *
5340 : * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
5341 : * Problem, Part II: Multi-Dimensional Time.
5342 : * In Intl. Journal of Parallel Programming, 1992.
5343 : */
5344 0 : static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
5345 : isl_schedule_node *node, struct isl_sched_graph *graph)
5346 : {
5347 0 : return carry_feautrier(node, graph);
5348 : }
5349 :
5350 : /* Turn off the "local" bit on all (condition) edges.
5351 : */
5352 0 : static void clear_local_edges(struct isl_sched_graph *graph)
5353 : {
5354 : int i;
5355 :
5356 0 : for (i = 0; i < graph->n_edge; ++i)
5357 0 : if (is_condition(&graph->edge[i]))
5358 0 : clear_local(&graph->edge[i]);
5359 0 : }
5360 :
5361 : /* Does "graph" have both condition and conditional validity edges?
5362 : */
5363 0 : static int need_condition_check(struct isl_sched_graph *graph)
5364 : {
5365 : int i;
5366 0 : int any_condition = 0;
5367 0 : int any_conditional_validity = 0;
5368 :
5369 0 : for (i = 0; i < graph->n_edge; ++i) {
5370 0 : if (is_condition(&graph->edge[i]))
5371 0 : any_condition = 1;
5372 0 : if (is_conditional_validity(&graph->edge[i]))
5373 0 : any_conditional_validity = 1;
5374 : }
5375 :
5376 0 : return any_condition && any_conditional_validity;
5377 : }
5378 :
5379 : /* Does "graph" contain any coincidence edge?
5380 : */
5381 0 : static int has_any_coincidence(struct isl_sched_graph *graph)
5382 : {
5383 : int i;
5384 :
5385 0 : for (i = 0; i < graph->n_edge; ++i)
5386 0 : if (is_coincidence(&graph->edge[i]))
5387 0 : return 1;
5388 :
5389 0 : return 0;
5390 : }
5391 :
5392 : /* Extract the final schedule row as a map with the iteration domain
5393 : * of "node" as domain.
5394 : */
5395 0 : static __isl_give isl_map *final_row(struct isl_sched_node *node)
5396 : {
5397 : isl_multi_aff *ma;
5398 : int row;
5399 :
5400 0 : row = isl_mat_rows(node->sched) - 1;
5401 0 : ma = node_extract_partial_schedule_multi_aff(node, row, 1);
5402 0 : return isl_map_from_multi_aff(ma);
5403 : }
5404 :
5405 : /* Is the conditional validity dependence in the edge with index "edge_index"
5406 : * violated by the latest (i.e., final) row of the schedule?
5407 : * That is, is i scheduled after j
5408 : * for any conditional validity dependence i -> j?
5409 : */
5410 0 : static int is_violated(struct isl_sched_graph *graph, int edge_index)
5411 : {
5412 : isl_map *src_sched, *dst_sched, *map;
5413 0 : struct isl_sched_edge *edge = &graph->edge[edge_index];
5414 : int empty;
5415 :
5416 0 : src_sched = final_row(edge->src);
5417 0 : dst_sched = final_row(edge->dst);
5418 0 : map = isl_map_copy(edge->map);
5419 0 : map = isl_map_apply_domain(map, src_sched);
5420 0 : map = isl_map_apply_range(map, dst_sched);
5421 0 : map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
5422 0 : empty = isl_map_is_empty(map);
5423 0 : isl_map_free(map);
5424 :
5425 0 : if (empty < 0)
5426 0 : return -1;
5427 :
5428 0 : return !empty;
5429 : }
5430 :
5431 : /* Does "graph" have any satisfied condition edges that
5432 : * are adjacent to the conditional validity constraint with
5433 : * domain "conditional_source" and range "conditional_sink"?
5434 : *
5435 : * A satisfied condition is one that is not local.
5436 : * If a condition was forced to be local already (i.e., marked as local)
5437 : * then there is no need to check if it is in fact local.
5438 : *
5439 : * Additionally, mark all adjacent condition edges found as local.
5440 : */
5441 0 : static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
5442 : __isl_keep isl_union_set *conditional_source,
5443 : __isl_keep isl_union_set *conditional_sink)
5444 : {
5445 : int i;
5446 0 : int any = 0;
5447 :
5448 0 : for (i = 0; i < graph->n_edge; ++i) {
5449 : int adjacent, local;
5450 : isl_union_map *condition;
5451 :
5452 0 : if (!is_condition(&graph->edge[i]))
5453 0 : continue;
5454 0 : if (is_local(&graph->edge[i]))
5455 0 : continue;
5456 :
5457 0 : condition = graph->edge[i].tagged_condition;
5458 0 : adjacent = domain_intersects(condition, conditional_sink);
5459 0 : if (adjacent >= 0 && !adjacent)
5460 0 : adjacent = range_intersects(condition,
5461 : conditional_source);
5462 0 : if (adjacent < 0)
5463 0 : return -1;
5464 0 : if (!adjacent)
5465 0 : continue;
5466 :
5467 0 : set_local(&graph->edge[i]);
5468 :
5469 0 : local = is_condition_false(&graph->edge[i]);
5470 0 : if (local < 0)
5471 0 : return -1;
5472 0 : if (!local)
5473 0 : any = 1;
5474 : }
5475 :
5476 0 : return any;
5477 : }
5478 :
5479 : /* Are there any violated conditional validity dependences with
5480 : * adjacent condition dependences that are not local with respect
5481 : * to the current schedule?
5482 : * That is, is the conditional validity constraint violated?
5483 : *
5484 : * Additionally, mark all those adjacent condition dependences as local.
5485 : * We also mark those adjacent condition dependences that were not marked
5486 : * as local before, but just happened to be local already. This ensures
5487 : * that they remain local if the schedule is recomputed.
5488 : *
5489 : * We first collect domain and range of all violated conditional validity
5490 : * dependences and then check if there are any adjacent non-local
5491 : * condition dependences.
5492 : */
5493 0 : static int has_violated_conditional_constraint(isl_ctx *ctx,
5494 : struct isl_sched_graph *graph)
5495 : {
5496 : int i;
5497 0 : int any = 0;
5498 : isl_union_set *source, *sink;
5499 :
5500 0 : source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5501 0 : sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5502 0 : for (i = 0; i < graph->n_edge; ++i) {
5503 : isl_union_set *uset;
5504 : isl_union_map *umap;
5505 : int violated;
5506 :
5507 0 : if (!is_conditional_validity(&graph->edge[i]))
5508 0 : continue;
5509 :
5510 0 : violated = is_violated(graph, i);
5511 0 : if (violated < 0)
5512 0 : goto error;
5513 0 : if (!violated)
5514 0 : continue;
5515 :
5516 0 : any = 1;
5517 :
5518 0 : umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5519 0 : uset = isl_union_map_domain(umap);
5520 0 : source = isl_union_set_union(source, uset);
5521 0 : source = isl_union_set_coalesce(source);
5522 :
5523 0 : umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5524 0 : uset = isl_union_map_range(umap);
5525 0 : sink = isl_union_set_union(sink, uset);
5526 0 : sink = isl_union_set_coalesce(sink);
5527 : }
5528 :
5529 0 : if (any)
5530 0 : any = has_adjacent_true_conditions(graph, source, sink);
5531 :
5532 0 : isl_union_set_free(source);
5533 0 : isl_union_set_free(sink);
5534 0 : return any;
5535 : error:
5536 0 : isl_union_set_free(source);
5537 0 : isl_union_set_free(sink);
5538 0 : return -1;
5539 : }
5540 :
5541 : /* Examine the current band (the rows between graph->band_start and
5542 : * graph->n_total_row), deciding whether to drop it or add it to "node"
5543 : * and then continue with the computation of the next band, if any.
5544 : * If "initialized" is set, then it may be assumed that compute_maxvar
5545 : * has been called on the current band. Otherwise, call
5546 : * compute_maxvar if and before carry_dependences gets called.
5547 : *
5548 : * The caller keeps looking for a new row as long as
5549 : * graph->n_row < graph->maxvar. If the latest attempt to find
5550 : * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
5551 : * then we either
5552 : * - split between SCCs and start over (assuming we found an interesting
5553 : * pair of SCCs between which to split)
5554 : * - continue with the next band (assuming the current band has at least
5555 : * one row)
5556 : * - if there is more than one SCC left, then split along all SCCs
5557 : * - if outer coincidence needs to be enforced, then try to carry as many
5558 : * validity or coincidence dependences as possible and
5559 : * continue with the next band
5560 : * - try to carry as many validity dependences as possible and
5561 : * continue with the next band
5562 : * In each case, we first insert a band node in the schedule tree
5563 : * if any rows have been computed.
5564 : *
5565 : * If the caller managed to complete the schedule and the current band
5566 : * is empty, then finish off by topologically
5567 : * sorting the statements based on the remaining dependences.
5568 : * If, on the other hand, the current band has at least one row,
5569 : * then continue with the next band. Note that this next band
5570 : * will necessarily be empty, but the graph may still be split up
5571 : * into weakly connected components before arriving back here.
5572 : */
5573 0 : static __isl_give isl_schedule_node *compute_schedule_finish_band(
5574 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5575 : int initialized)
5576 : {
5577 : int empty;
5578 :
5579 0 : if (!node)
5580 0 : return NULL;
5581 :
5582 0 : empty = graph->n_total_row == graph->band_start;
5583 0 : if (graph->n_row < graph->maxvar) {
5584 : isl_ctx *ctx;
5585 :
5586 0 : ctx = isl_schedule_node_get_ctx(node);
5587 0 : if (!ctx->opt->schedule_maximize_band_depth && !empty)
5588 0 : return compute_next_band(node, graph, 1);
5589 0 : if (graph->src_scc >= 0)
5590 0 : return compute_split_schedule(node, graph);
5591 0 : if (!empty)
5592 0 : return compute_next_band(node, graph, 1);
5593 0 : if (graph->scc > 1)
5594 0 : return compute_component_schedule(node, graph, 1);
5595 0 : if (!initialized && compute_maxvar(graph) < 0)
5596 0 : return isl_schedule_node_free(node);
5597 0 : if (isl_options_get_schedule_outer_coincidence(ctx))
5598 0 : return carry_coincidence(node, graph);
5599 0 : return carry_dependences(node, graph);
5600 : }
5601 :
5602 0 : if (!empty)
5603 0 : return compute_next_band(node, graph, 1);
5604 0 : return sort_statements(node, graph, initialized);
5605 : }
5606 :
5607 : /* Construct a band of schedule rows for a connected dependence graph.
5608 : * The caller is responsible for determining the strongly connected
5609 : * components and calling compute_maxvar first.
5610 : *
5611 : * We try to find a sequence of as many schedule rows as possible that result
5612 : * in non-negative dependence distances (independent of the previous rows
5613 : * in the sequence, i.e., such that the sequence is tilable), with as
5614 : * many of the initial rows as possible satisfying the coincidence constraints.
5615 : * The computation stops if we can't find any more rows or if we have found
5616 : * all the rows we wanted to find.
5617 : *
5618 : * If ctx->opt->schedule_outer_coincidence is set, then we force the
5619 : * outermost dimension to satisfy the coincidence constraints. If this
5620 : * turns out to be impossible, we fall back on the general scheme above
5621 : * and try to carry as many dependences as possible.
5622 : *
5623 : * If "graph" contains both condition and conditional validity dependences,
5624 : * then we need to check that that the conditional schedule constraint
5625 : * is satisfied, i.e., there are no violated conditional validity dependences
5626 : * that are adjacent to any non-local condition dependences.
5627 : * If there are, then we mark all those adjacent condition dependences
5628 : * as local and recompute the current band. Those dependences that
5629 : * are marked local will then be forced to be local.
5630 : * The initial computation is performed with no dependences marked as local.
5631 : * If we are lucky, then there will be no violated conditional validity
5632 : * dependences adjacent to any non-local condition dependences.
5633 : * Otherwise, we mark some additional condition dependences as local and
5634 : * recompute. We continue this process until there are no violations left or
5635 : * until we are no longer able to compute a schedule.
5636 : * Since there are only a finite number of dependences,
5637 : * there will only be a finite number of iterations.
5638 : */
5639 0 : static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
5640 : struct isl_sched_graph *graph)
5641 : {
5642 : int has_coincidence;
5643 : int use_coincidence;
5644 0 : int force_coincidence = 0;
5645 : int check_conditional;
5646 :
5647 0 : if (sort_sccs(graph) < 0)
5648 0 : return isl_stat_error;
5649 :
5650 0 : clear_local_edges(graph);
5651 0 : check_conditional = need_condition_check(graph);
5652 0 : has_coincidence = has_any_coincidence(graph);
5653 :
5654 0 : if (ctx->opt->schedule_outer_coincidence)
5655 0 : force_coincidence = 1;
5656 :
5657 0 : use_coincidence = has_coincidence;
5658 0 : while (graph->n_row < graph->maxvar) {
5659 : isl_vec *sol;
5660 : int violated;
5661 : int coincident;
5662 :
5663 0 : graph->src_scc = -1;
5664 0 : graph->dst_scc = -1;
5665 :
5666 0 : if (setup_lp(ctx, graph, use_coincidence) < 0)
5667 0 : return isl_stat_error;
5668 0 : sol = solve_lp(ctx, graph);
5669 0 : if (!sol)
5670 0 : return isl_stat_error;
5671 0 : if (sol->size == 0) {
5672 0 : int empty = graph->n_total_row == graph->band_start;
5673 :
5674 0 : isl_vec_free(sol);
5675 0 : if (use_coincidence && (!force_coincidence || !empty)) {
5676 0 : use_coincidence = 0;
5677 0 : continue;
5678 : }
5679 0 : return isl_stat_ok;
5680 : }
5681 0 : coincident = !has_coincidence || use_coincidence;
5682 0 : if (update_schedule(graph, sol, coincident) < 0)
5683 0 : return isl_stat_error;
5684 :
5685 0 : if (!check_conditional)
5686 0 : continue;
5687 0 : violated = has_violated_conditional_constraint(ctx, graph);
5688 0 : if (violated < 0)
5689 0 : return isl_stat_error;
5690 0 : if (!violated)
5691 0 : continue;
5692 0 : if (reset_band(graph) < 0)
5693 0 : return isl_stat_error;
5694 0 : use_coincidence = has_coincidence;
5695 : }
5696 :
5697 0 : return isl_stat_ok;
5698 : }
5699 :
5700 : /* Compute a schedule for a connected dependence graph by considering
5701 : * the graph as a whole and return the updated schedule node.
5702 : *
5703 : * The actual schedule rows of the current band are computed by
5704 : * compute_schedule_wcc_band. compute_schedule_finish_band takes
5705 : * care of integrating the band into "node" and continuing
5706 : * the computation.
5707 : */
5708 0 : static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
5709 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5710 : {
5711 : isl_ctx *ctx;
5712 :
5713 0 : if (!node)
5714 0 : return NULL;
5715 :
5716 0 : ctx = isl_schedule_node_get_ctx(node);
5717 0 : if (compute_schedule_wcc_band(ctx, graph) < 0)
5718 0 : return isl_schedule_node_free(node);
5719 :
5720 0 : return compute_schedule_finish_band(node, graph, 1);
5721 : }
5722 :
5723 : /* Clustering information used by compute_schedule_wcc_clustering.
5724 : *
5725 : * "n" is the number of SCCs in the original dependence graph
5726 : * "scc" is an array of "n" elements, each representing an SCC
5727 : * of the original dependence graph. All entries in the same cluster
5728 : * have the same number of schedule rows.
5729 : * "scc_cluster" maps each SCC index to the cluster to which it belongs,
5730 : * where each cluster is represented by the index of the first SCC
5731 : * in the cluster. Initially, each SCC belongs to a cluster containing
5732 : * only that SCC.
5733 : *
5734 : * "scc_in_merge" is used by merge_clusters_along_edge to keep
5735 : * track of which SCCs need to be merged.
5736 : *
5737 : * "cluster" contains the merged clusters of SCCs after the clustering
5738 : * has completed.
5739 : *
5740 : * "scc_node" is a temporary data structure used inside copy_partial.
5741 : * For each SCC, it keeps track of the number of nodes in the SCC
5742 : * that have already been copied.
5743 : */
5744 : struct isl_clustering {
5745 : int n;
5746 : struct isl_sched_graph *scc;
5747 : struct isl_sched_graph *cluster;
5748 : int *scc_cluster;
5749 : int *scc_node;
5750 : int *scc_in_merge;
5751 : };
5752 :
5753 : /* Initialize the clustering data structure "c" from "graph".
5754 : *
5755 : * In particular, allocate memory, extract the SCCs from "graph"
5756 : * into c->scc, initialize scc_cluster and construct
5757 : * a band of schedule rows for each SCC.
5758 : * Within each SCC, there is only one SCC by definition.
5759 : * Each SCC initially belongs to a cluster containing only that SCC.
5760 : */
5761 0 : static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
5762 : struct isl_sched_graph *graph)
5763 : {
5764 : int i;
5765 :
5766 0 : c->n = graph->scc;
5767 0 : c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
5768 0 : c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
5769 0 : c->scc_cluster = isl_calloc_array(ctx, int, c->n);
5770 0 : c->scc_node = isl_calloc_array(ctx, int, c->n);
5771 0 : c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
5772 0 : if (!c->scc || !c->cluster ||
5773 0 : !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
5774 0 : return isl_stat_error;
5775 :
5776 0 : for (i = 0; i < c->n; ++i) {
5777 0 : if (extract_sub_graph(ctx, graph, &node_scc_exactly,
5778 0 : &edge_scc_exactly, i, &c->scc[i]) < 0)
5779 0 : return isl_stat_error;
5780 0 : c->scc[i].scc = 1;
5781 0 : if (compute_maxvar(&c->scc[i]) < 0)
5782 0 : return isl_stat_error;
5783 0 : if (compute_schedule_wcc_band(ctx, &c->scc[i]) < 0)
5784 0 : return isl_stat_error;
5785 0 : c->scc_cluster[i] = i;
5786 : }
5787 :
5788 0 : return isl_stat_ok;
5789 : }
5790 :
5791 : /* Free all memory allocated for "c".
5792 : */
5793 0 : static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
5794 : {
5795 : int i;
5796 :
5797 0 : if (c->scc)
5798 0 : for (i = 0; i < c->n; ++i)
5799 0 : graph_free(ctx, &c->scc[i]);
5800 0 : free(c->scc);
5801 0 : if (c->cluster)
5802 0 : for (i = 0; i < c->n; ++i)
5803 0 : graph_free(ctx, &c->cluster[i]);
5804 0 : free(c->cluster);
5805 0 : free(c->scc_cluster);
5806 0 : free(c->scc_node);
5807 0 : free(c->scc_in_merge);
5808 0 : }
5809 :
5810 : /* Should we refrain from merging the cluster in "graph" with
5811 : * any other cluster?
5812 : * In particular, is its current schedule band empty and incomplete.
5813 : */
5814 0 : static int bad_cluster(struct isl_sched_graph *graph)
5815 : {
5816 0 : return graph->n_row < graph->maxvar &&
5817 0 : graph->n_total_row == graph->band_start;
5818 : }
5819 :
5820 : /* Is "edge" a proximity edge with a non-empty dependence relation?
5821 : */
5822 0 : static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
5823 : {
5824 0 : if (!is_proximity(edge))
5825 0 : return isl_bool_false;
5826 0 : return isl_bool_not(isl_map_plain_is_empty(edge->map));
5827 : }
5828 :
5829 : /* Return the index of an edge in "graph" that can be used to merge
5830 : * two clusters in "c".
5831 : * Return graph->n_edge if no such edge can be found.
5832 : * Return -1 on error.
5833 : *
5834 : * In particular, return a proximity edge between two clusters
5835 : * that is not marked "no_merge" and such that neither of the
5836 : * two clusters has an incomplete, empty band.
5837 : *
5838 : * If there are multiple such edges, then try and find the most
5839 : * appropriate edge to use for merging. In particular, pick the edge
5840 : * with the greatest weight. If there are multiple of those,
5841 : * then pick one with the shortest distance between
5842 : * the two cluster representatives.
5843 : */
5844 0 : static int find_proximity(struct isl_sched_graph *graph,
5845 : struct isl_clustering *c)
5846 : {
5847 0 : int i, best = graph->n_edge, best_dist, best_weight;
5848 :
5849 0 : for (i = 0; i < graph->n_edge; ++i) {
5850 0 : struct isl_sched_edge *edge = &graph->edge[i];
5851 : int dist, weight;
5852 : isl_bool prox;
5853 :
5854 0 : prox = is_non_empty_proximity(edge);
5855 0 : if (prox < 0)
5856 0 : return -1;
5857 0 : if (!prox)
5858 0 : continue;
5859 0 : if (edge->no_merge)
5860 0 : continue;
5861 0 : if (bad_cluster(&c->scc[edge->src->scc]) ||
5862 0 : bad_cluster(&c->scc[edge->dst->scc]))
5863 0 : continue;
5864 0 : dist = c->scc_cluster[edge->dst->scc] -
5865 0 : c->scc_cluster[edge->src->scc];
5866 0 : if (dist == 0)
5867 0 : continue;
5868 0 : weight = edge->weight;
5869 0 : if (best < graph->n_edge) {
5870 0 : if (best_weight > weight)
5871 0 : continue;
5872 0 : if (best_weight == weight && best_dist <= dist)
5873 0 : continue;
5874 : }
5875 0 : best = i;
5876 0 : best_dist = dist;
5877 0 : best_weight = weight;
5878 : }
5879 :
5880 0 : return best;
5881 : }
5882 :
5883 : /* Internal data structure used in mark_merge_sccs.
5884 : *
5885 : * "graph" is the dependence graph in which a strongly connected
5886 : * component is constructed.
5887 : * "scc_cluster" maps each SCC index to the cluster to which it belongs.
5888 : * "src" and "dst" are the indices of the nodes that are being merged.
5889 : */
5890 : struct isl_mark_merge_sccs_data {
5891 : struct isl_sched_graph *graph;
5892 : int *scc_cluster;
5893 : int src;
5894 : int dst;
5895 : };
5896 :
5897 : /* Check whether the cluster containing node "i" depends on the cluster
5898 : * containing node "j". If "i" and "j" belong to the same cluster,
5899 : * then they are taken to depend on each other to ensure that
5900 : * the resulting strongly connected component consists of complete
5901 : * clusters. Furthermore, if "i" and "j" are the two nodes that
5902 : * are being merged, then they are taken to depend on each other as well.
5903 : * Otherwise, check if there is a (conditional) validity dependence
5904 : * from node[j] to node[i], forcing node[i] to follow node[j].
5905 : */
5906 0 : static isl_bool cluster_follows(int i, int j, void *user)
5907 : {
5908 0 : struct isl_mark_merge_sccs_data *data = user;
5909 0 : struct isl_sched_graph *graph = data->graph;
5910 0 : int *scc_cluster = data->scc_cluster;
5911 :
5912 0 : if (data->src == i && data->dst == j)
5913 0 : return isl_bool_true;
5914 0 : if (data->src == j && data->dst == i)
5915 0 : return isl_bool_true;
5916 0 : if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
5917 0 : return isl_bool_true;
5918 :
5919 0 : return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
5920 : }
5921 :
5922 : /* Mark all SCCs that belong to either of the two clusters in "c"
5923 : * connected by the edge in "graph" with index "edge", or to any
5924 : * of the intermediate clusters.
5925 : * The marking is recorded in c->scc_in_merge.
5926 : *
5927 : * The given edge has been selected for merging two clusters,
5928 : * meaning that there is at least a proximity edge between the two nodes.
5929 : * However, there may also be (indirect) validity dependences
5930 : * between the two nodes. When merging the two clusters, all clusters
5931 : * containing one or more of the intermediate nodes along the
5932 : * indirect validity dependences need to be merged in as well.
5933 : *
5934 : * First collect all such nodes by computing the strongly connected
5935 : * component (SCC) containing the two nodes connected by the edge, where
5936 : * the two nodes are considered to depend on each other to make
5937 : * sure they end up in the same SCC. Similarly, each node is considered
5938 : * to depend on every other node in the same cluster to ensure
5939 : * that the SCC consists of complete clusters.
5940 : *
5941 : * Then the original SCCs that contain any of these nodes are marked
5942 : * in c->scc_in_merge.
5943 : */
5944 0 : static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
5945 : int edge, struct isl_clustering *c)
5946 : {
5947 : struct isl_mark_merge_sccs_data data;
5948 : struct isl_tarjan_graph *g;
5949 : int i;
5950 :
5951 0 : for (i = 0; i < c->n; ++i)
5952 0 : c->scc_in_merge[i] = 0;
5953 :
5954 0 : data.graph = graph;
5955 0 : data.scc_cluster = c->scc_cluster;
5956 0 : data.src = graph->edge[edge].src - graph->node;
5957 0 : data.dst = graph->edge[edge].dst - graph->node;
5958 :
5959 0 : g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
5960 : &cluster_follows, &data);
5961 0 : if (!g)
5962 0 : goto error;
5963 :
5964 0 : i = g->op;
5965 0 : if (i < 3)
5966 0 : isl_die(ctx, isl_error_internal,
5967 : "expecting at least two nodes in component",
5968 : goto error);
5969 0 : if (g->order[--i] != -1)
5970 0 : isl_die(ctx, isl_error_internal,
5971 : "expecting end of component marker", goto error);
5972 :
5973 0 : for (--i; i >= 0 && g->order[i] != -1; --i) {
5974 0 : int scc = graph->node[g->order[i]].scc;
5975 0 : c->scc_in_merge[scc] = 1;
5976 : }
5977 :
5978 0 : isl_tarjan_graph_free(g);
5979 0 : return isl_stat_ok;
5980 : error:
5981 0 : isl_tarjan_graph_free(g);
5982 0 : return isl_stat_error;
5983 : }
5984 :
5985 : /* Construct the identifier "cluster_i".
5986 : */
5987 0 : static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
5988 : {
5989 : char name[40];
5990 :
5991 0 : snprintf(name, sizeof(name), "cluster_%d", i);
5992 0 : return isl_id_alloc(ctx, name, NULL);
5993 : }
5994 :
5995 : /* Construct the space of the cluster with index "i" containing
5996 : * the strongly connected component "scc".
5997 : *
5998 : * In particular, construct a space called cluster_i with dimension equal
5999 : * to the number of schedule rows in the current band of "scc".
6000 : */
6001 0 : static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
6002 : {
6003 : int nvar;
6004 : isl_space *space;
6005 : isl_id *id;
6006 :
6007 0 : nvar = scc->n_total_row - scc->band_start;
6008 0 : space = isl_space_copy(scc->node[0].space);
6009 0 : space = isl_space_params(space);
6010 0 : space = isl_space_set_from_params(space);
6011 0 : space = isl_space_add_dims(space, isl_dim_set, nvar);
6012 0 : id = cluster_id(isl_space_get_ctx(space), i);
6013 0 : space = isl_space_set_tuple_id(space, isl_dim_set, id);
6014 :
6015 0 : return space;
6016 : }
6017 :
6018 : /* Collect the domain of the graph for merging clusters.
6019 : *
6020 : * In particular, for each cluster with first SCC "i", construct
6021 : * a set in the space called cluster_i with dimension equal
6022 : * to the number of schedule rows in the current band of the cluster.
6023 : */
6024 0 : static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
6025 : struct isl_sched_graph *graph, struct isl_clustering *c)
6026 : {
6027 : int i;
6028 : isl_space *space;
6029 : isl_union_set *domain;
6030 :
6031 0 : space = isl_space_params_alloc(ctx, 0);
6032 0 : domain = isl_union_set_empty(space);
6033 :
6034 0 : for (i = 0; i < graph->scc; ++i) {
6035 : isl_space *space;
6036 :
6037 0 : if (!c->scc_in_merge[i])
6038 0 : continue;
6039 0 : if (c->scc_cluster[i] != i)
6040 0 : continue;
6041 0 : space = cluster_space(&c->scc[i], i);
6042 0 : domain = isl_union_set_add_set(domain, isl_set_universe(space));
6043 : }
6044 :
6045 0 : return domain;
6046 : }
6047 :
6048 : /* Construct a map from the original instances to the corresponding
6049 : * cluster instance in the current bands of the clusters in "c".
6050 : */
6051 0 : static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
6052 : struct isl_sched_graph *graph, struct isl_clustering *c)
6053 : {
6054 : int i, j;
6055 : isl_space *space;
6056 : isl_union_map *cluster_map;
6057 :
6058 0 : space = isl_space_params_alloc(ctx, 0);
6059 0 : cluster_map = isl_union_map_empty(space);
6060 0 : for (i = 0; i < graph->scc; ++i) {
6061 : int start, n;
6062 : isl_id *id;
6063 :
6064 0 : if (!c->scc_in_merge[i])
6065 0 : continue;
6066 :
6067 0 : id = cluster_id(ctx, c->scc_cluster[i]);
6068 0 : start = c->scc[i].band_start;
6069 0 : n = c->scc[i].n_total_row - start;
6070 0 : for (j = 0; j < c->scc[i].n; ++j) {
6071 : isl_multi_aff *ma;
6072 : isl_map *map;
6073 0 : struct isl_sched_node *node = &c->scc[i].node[j];
6074 :
6075 0 : ma = node_extract_partial_schedule_multi_aff(node,
6076 : start, n);
6077 0 : ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
6078 : isl_id_copy(id));
6079 0 : map = isl_map_from_multi_aff(ma);
6080 0 : cluster_map = isl_union_map_add_map(cluster_map, map);
6081 : }
6082 0 : isl_id_free(id);
6083 : }
6084 :
6085 0 : return cluster_map;
6086 : }
6087 :
6088 : /* Add "umap" to the schedule constraints "sc" of all types of "edge"
6089 : * that are not isl_edge_condition or isl_edge_conditional_validity.
6090 : */
6091 0 : static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
6092 : struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
6093 : __isl_take isl_schedule_constraints *sc)
6094 : {
6095 : enum isl_edge_type t;
6096 :
6097 0 : if (!sc)
6098 0 : return NULL;
6099 :
6100 0 : for (t = isl_edge_first; t <= isl_edge_last; ++t) {
6101 0 : if (t == isl_edge_condition ||
6102 : t == isl_edge_conditional_validity)
6103 0 : continue;
6104 0 : if (!is_type(edge, t))
6105 0 : continue;
6106 0 : sc = isl_schedule_constraints_add(sc, t,
6107 : isl_union_map_copy(umap));
6108 : }
6109 :
6110 0 : return sc;
6111 : }
6112 :
6113 : /* Add schedule constraints of types isl_edge_condition and
6114 : * isl_edge_conditional_validity to "sc" by applying "umap" to
6115 : * the domains of the wrapped relations in domain and range
6116 : * of the corresponding tagged constraints of "edge".
6117 : */
6118 0 : static __isl_give isl_schedule_constraints *add_conditional_constraints(
6119 : struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
6120 : __isl_take isl_schedule_constraints *sc)
6121 : {
6122 : enum isl_edge_type t;
6123 : isl_union_map *tagged;
6124 :
6125 0 : for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
6126 0 : if (!is_type(edge, t))
6127 0 : continue;
6128 0 : if (t == isl_edge_condition)
6129 0 : tagged = isl_union_map_copy(edge->tagged_condition);
6130 : else
6131 0 : tagged = isl_union_map_copy(edge->tagged_validity);
6132 0 : tagged = isl_union_map_zip(tagged);
6133 0 : tagged = isl_union_map_apply_domain(tagged,
6134 : isl_union_map_copy(umap));
6135 0 : tagged = isl_union_map_zip(tagged);
6136 0 : sc = isl_schedule_constraints_add(sc, t, tagged);
6137 0 : if (!sc)
6138 0 : return NULL;
6139 : }
6140 :
6141 0 : return sc;
6142 : }
6143 :
6144 : /* Given a mapping "cluster_map" from the original instances to
6145 : * the cluster instances, add schedule constraints on the clusters
6146 : * to "sc" corresponding to the original constraints represented by "edge".
6147 : *
6148 : * For non-tagged dependence constraints, the cluster constraints
6149 : * are obtained by applying "cluster_map" to the edge->map.
6150 : *
6151 : * For tagged dependence constraints, "cluster_map" needs to be applied
6152 : * to the domains of the wrapped relations in domain and range
6153 : * of the tagged dependence constraints. Pick out the mappings
6154 : * from these domains from "cluster_map" and construct their product.
6155 : * This mapping can then be applied to the pair of domains.
6156 : */
6157 0 : static __isl_give isl_schedule_constraints *collect_edge_constraints(
6158 : struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
6159 : __isl_take isl_schedule_constraints *sc)
6160 : {
6161 : isl_union_map *umap;
6162 : isl_space *space;
6163 : isl_union_set *uset;
6164 : isl_union_map *umap1, *umap2;
6165 :
6166 0 : if (!sc)
6167 0 : return NULL;
6168 :
6169 0 : umap = isl_union_map_from_map(isl_map_copy(edge->map));
6170 0 : umap = isl_union_map_apply_domain(umap,
6171 : isl_union_map_copy(cluster_map));
6172 0 : umap = isl_union_map_apply_range(umap,
6173 : isl_union_map_copy(cluster_map));
6174 0 : sc = add_non_conditional_constraints(edge, umap, sc);
6175 0 : isl_union_map_free(umap);
6176 :
6177 0 : if (!sc || (!is_condition(edge) && !is_conditional_validity(edge)))
6178 0 : return sc;
6179 :
6180 0 : space = isl_space_domain(isl_map_get_space(edge->map));
6181 0 : uset = isl_union_set_from_set(isl_set_universe(space));
6182 0 : umap1 = isl_union_map_copy(cluster_map);
6183 0 : umap1 = isl_union_map_intersect_domain(umap1, uset);
6184 0 : space = isl_space_range(isl_map_get_space(edge->map));
6185 0 : uset = isl_union_set_from_set(isl_set_universe(space));
6186 0 : umap2 = isl_union_map_copy(cluster_map);
6187 0 : umap2 = isl_union_map_intersect_domain(umap2, uset);
6188 0 : umap = isl_union_map_product(umap1, umap2);
6189 :
6190 0 : sc = add_conditional_constraints(edge, umap, sc);
6191 :
6192 0 : isl_union_map_free(umap);
6193 0 : return sc;
6194 : }
6195 :
6196 : /* Given a mapping "cluster_map" from the original instances to
6197 : * the cluster instances, add schedule constraints on the clusters
6198 : * to "sc" corresponding to all edges in "graph" between nodes that
6199 : * belong to SCCs that are marked for merging in "scc_in_merge".
6200 : */
6201 0 : static __isl_give isl_schedule_constraints *collect_constraints(
6202 : struct isl_sched_graph *graph, int *scc_in_merge,
6203 : __isl_keep isl_union_map *cluster_map,
6204 : __isl_take isl_schedule_constraints *sc)
6205 : {
6206 : int i;
6207 :
6208 0 : for (i = 0; i < graph->n_edge; ++i) {
6209 0 : struct isl_sched_edge *edge = &graph->edge[i];
6210 :
6211 0 : if (!scc_in_merge[edge->src->scc])
6212 0 : continue;
6213 0 : if (!scc_in_merge[edge->dst->scc])
6214 0 : continue;
6215 0 : sc = collect_edge_constraints(edge, cluster_map, sc);
6216 : }
6217 :
6218 0 : return sc;
6219 : }
6220 :
6221 : /* Construct a dependence graph for scheduling clusters with respect
6222 : * to each other and store the result in "merge_graph".
6223 : * In particular, the nodes of the graph correspond to the schedule
6224 : * dimensions of the current bands of those clusters that have been
6225 : * marked for merging in "c".
6226 : *
6227 : * First construct an isl_schedule_constraints object for this domain
6228 : * by transforming the edges in "graph" to the domain.
6229 : * Then initialize a dependence graph for scheduling from these
6230 : * constraints.
6231 : */
6232 0 : static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
6233 : struct isl_clustering *c, struct isl_sched_graph *merge_graph)
6234 : {
6235 : isl_union_set *domain;
6236 : isl_union_map *cluster_map;
6237 : isl_schedule_constraints *sc;
6238 : isl_stat r;
6239 :
6240 0 : domain = collect_domain(ctx, graph, c);
6241 0 : sc = isl_schedule_constraints_on_domain(domain);
6242 0 : if (!sc)
6243 0 : return isl_stat_error;
6244 0 : cluster_map = collect_cluster_map(ctx, graph, c);
6245 0 : sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
6246 0 : isl_union_map_free(cluster_map);
6247 :
6248 0 : r = graph_init(merge_graph, sc);
6249 :
6250 0 : isl_schedule_constraints_free(sc);
6251 :
6252 0 : return r;
6253 : }
6254 :
6255 : /* Compute the maximal number of remaining schedule rows that still need
6256 : * to be computed for the nodes that belong to clusters with the maximal
6257 : * dimension for the current band (i.e., the band that is to be merged).
6258 : * Only clusters that are about to be merged are considered.
6259 : * "maxvar" is the maximal dimension for the current band.
6260 : * "c" contains information about the clusters.
6261 : *
6262 : * Return the maximal number of remaining schedule rows or -1 on error.
6263 : */
6264 0 : static int compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
6265 : {
6266 : int i, j;
6267 : int max_slack;
6268 :
6269 0 : max_slack = 0;
6270 0 : for (i = 0; i < c->n; ++i) {
6271 : int nvar;
6272 : struct isl_sched_graph *scc;
6273 :
6274 0 : if (!c->scc_in_merge[i])
6275 0 : continue;
6276 0 : scc = &c->scc[i];
6277 0 : nvar = scc->n_total_row - scc->band_start;
6278 0 : if (nvar != maxvar)
6279 0 : continue;
6280 0 : for (j = 0; j < scc->n; ++j) {
6281 0 : struct isl_sched_node *node = &scc->node[j];
6282 : int slack;
6283 :
6284 0 : if (node_update_vmap(node) < 0)
6285 0 : return -1;
6286 0 : slack = node->nvar - node->rank;
6287 0 : if (slack > max_slack)
6288 0 : max_slack = slack;
6289 : }
6290 : }
6291 :
6292 0 : return max_slack;
6293 : }
6294 :
6295 : /* If there are any clusters where the dimension of the current band
6296 : * (i.e., the band that is to be merged) is smaller than "maxvar" and
6297 : * if there are any nodes in such a cluster where the number
6298 : * of remaining schedule rows that still need to be computed
6299 : * is greater than "max_slack", then return the smallest current band
6300 : * dimension of all these clusters. Otherwise return the original value
6301 : * of "maxvar". Return -1 in case of any error.
6302 : * Only clusters that are about to be merged are considered.
6303 : * "c" contains information about the clusters.
6304 : */
6305 0 : static int limit_maxvar_to_slack(int maxvar, int max_slack,
6306 : struct isl_clustering *c)
6307 : {
6308 : int i, j;
6309 :
6310 0 : for (i = 0; i < c->n; ++i) {
6311 : int nvar;
6312 : struct isl_sched_graph *scc;
6313 :
6314 0 : if (!c->scc_in_merge[i])
6315 0 : continue;
6316 0 : scc = &c->scc[i];
6317 0 : nvar = scc->n_total_row - scc->band_start;
6318 0 : if (nvar >= maxvar)
6319 0 : continue;
6320 0 : for (j = 0; j < scc->n; ++j) {
6321 0 : struct isl_sched_node *node = &scc->node[j];
6322 : int slack;
6323 :
6324 0 : if (node_update_vmap(node) < 0)
6325 0 : return -1;
6326 0 : slack = node->nvar - node->rank;
6327 0 : if (slack > max_slack) {
6328 0 : maxvar = nvar;
6329 0 : break;
6330 : }
6331 : }
6332 : }
6333 :
6334 0 : return maxvar;
6335 : }
6336 :
6337 : /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
6338 : * that still need to be computed. In particular, if there is a node
6339 : * in a cluster where the dimension of the current band is smaller
6340 : * than merge_graph->maxvar, but the number of remaining schedule rows
6341 : * is greater than that of any node in a cluster with the maximal
6342 : * dimension for the current band (i.e., merge_graph->maxvar),
6343 : * then adjust merge_graph->maxvar to the (smallest) current band dimension
6344 : * of those clusters. Without this adjustment, the total number of
6345 : * schedule dimensions would be increased, resulting in a skewed view
6346 : * of the number of coincident dimensions.
6347 : * "c" contains information about the clusters.
6348 : *
6349 : * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
6350 : * then there is no point in attempting any merge since it will be rejected
6351 : * anyway. Set merge_graph->maxvar to zero in such cases.
6352 : */
6353 0 : static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
6354 : struct isl_sched_graph *merge_graph, struct isl_clustering *c)
6355 : {
6356 : int max_slack, maxvar;
6357 :
6358 0 : max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
6359 0 : if (max_slack < 0)
6360 0 : return isl_stat_error;
6361 0 : maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
6362 0 : if (maxvar < 0)
6363 0 : return isl_stat_error;
6364 :
6365 0 : if (maxvar < merge_graph->maxvar) {
6366 0 : if (isl_options_get_schedule_maximize_band_depth(ctx))
6367 0 : merge_graph->maxvar = 0;
6368 : else
6369 0 : merge_graph->maxvar = maxvar;
6370 : }
6371 :
6372 0 : return isl_stat_ok;
6373 : }
6374 :
6375 : /* Return the number of coincident dimensions in the current band of "graph",
6376 : * where the nodes of "graph" are assumed to be scheduled by a single band.
6377 : */
6378 0 : static int get_n_coincident(struct isl_sched_graph *graph)
6379 : {
6380 : int i;
6381 :
6382 0 : for (i = graph->band_start; i < graph->n_total_row; ++i)
6383 0 : if (!graph->node[0].coincident[i])
6384 0 : break;
6385 :
6386 0 : return i - graph->band_start;
6387 : }
6388 :
6389 : /* Should the clusters be merged based on the cluster schedule
6390 : * in the current (and only) band of "merge_graph", given that
6391 : * coincidence should be maximized?
6392 : *
6393 : * If the number of coincident schedule dimensions in the merged band
6394 : * would be less than the maximal number of coincident schedule dimensions
6395 : * in any of the merged clusters, then the clusters should not be merged.
6396 : */
6397 0 : static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
6398 : struct isl_sched_graph *merge_graph)
6399 : {
6400 : int i;
6401 : int n_coincident;
6402 : int max_coincident;
6403 :
6404 0 : max_coincident = 0;
6405 0 : for (i = 0; i < c->n; ++i) {
6406 0 : if (!c->scc_in_merge[i])
6407 0 : continue;
6408 0 : n_coincident = get_n_coincident(&c->scc[i]);
6409 0 : if (n_coincident > max_coincident)
6410 0 : max_coincident = n_coincident;
6411 : }
6412 :
6413 0 : n_coincident = get_n_coincident(merge_graph);
6414 :
6415 0 : return n_coincident >= max_coincident;
6416 : }
6417 :
6418 : /* Return the transformation on "node" expressed by the current (and only)
6419 : * band of "merge_graph" applied to the clusters in "c".
6420 : *
6421 : * First find the representation of "node" in its SCC in "c" and
6422 : * extract the transformation expressed by the current band.
6423 : * Then extract the transformation applied by "merge_graph"
6424 : * to the cluster to which this SCC belongs.
6425 : * Combine the two to obtain the complete transformation on the node.
6426 : *
6427 : * Note that the range of the first transformation is an anonymous space,
6428 : * while the domain of the second is named "cluster_X". The range
6429 : * of the former therefore needs to be adjusted before the two
6430 : * can be combined.
6431 : */
6432 0 : static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
6433 : struct isl_sched_node *node, struct isl_clustering *c,
6434 : struct isl_sched_graph *merge_graph)
6435 : {
6436 : struct isl_sched_node *scc_node, *cluster_node;
6437 : int start, n;
6438 : isl_id *id;
6439 : isl_space *space;
6440 : isl_multi_aff *ma, *ma2;
6441 :
6442 0 : scc_node = graph_find_node(ctx, &c->scc[node->scc], node->space);
6443 0 : if (scc_node && !is_node(&c->scc[node->scc], scc_node))
6444 0 : isl_die(ctx, isl_error_internal, "unable to find node",
6445 : return NULL);
6446 0 : start = c->scc[node->scc].band_start;
6447 0 : n = c->scc[node->scc].n_total_row - start;
6448 0 : ma = node_extract_partial_schedule_multi_aff(scc_node, start, n);
6449 0 : space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
6450 0 : cluster_node = graph_find_node(ctx, merge_graph, space);
6451 0 : if (cluster_node && !is_node(merge_graph, cluster_node))
6452 0 : isl_die(ctx, isl_error_internal, "unable to find cluster",
6453 : space = isl_space_free(space));
6454 0 : id = isl_space_get_tuple_id(space, isl_dim_set);
6455 0 : ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
6456 0 : isl_space_free(space);
6457 0 : n = merge_graph->n_total_row;
6458 0 : ma2 = node_extract_partial_schedule_multi_aff(cluster_node, 0, n);
6459 0 : ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
6460 :
6461 0 : return isl_map_from_multi_aff(ma);
6462 : }
6463 :
6464 : /* Give a set of distances "set", are they bounded by a small constant
6465 : * in direction "pos"?
6466 : * In practice, check if they are bounded by 2 by checking that there
6467 : * are no elements with a value greater than or equal to 3 or
6468 : * smaller than or equal to -3.
6469 : */
6470 0 : static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
6471 : {
6472 : isl_bool bounded;
6473 : isl_set *test;
6474 :
6475 0 : if (!set)
6476 0 : return isl_bool_error;
6477 :
6478 0 : test = isl_set_copy(set);
6479 0 : test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
6480 0 : bounded = isl_set_is_empty(test);
6481 0 : isl_set_free(test);
6482 :
6483 0 : if (bounded < 0 || !bounded)
6484 0 : return bounded;
6485 :
6486 0 : test = isl_set_copy(set);
6487 0 : test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
6488 0 : bounded = isl_set_is_empty(test);
6489 0 : isl_set_free(test);
6490 :
6491 0 : return bounded;
6492 : }
6493 :
6494 : /* Does the set "set" have a fixed (but possible parametric) value
6495 : * at dimension "pos"?
6496 : */
6497 0 : static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
6498 : {
6499 : int n;
6500 : isl_bool single;
6501 :
6502 0 : if (!set)
6503 0 : return isl_bool_error;
6504 0 : set = isl_set_copy(set);
6505 0 : n = isl_set_dim(set, isl_dim_set);
6506 0 : set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
6507 0 : set = isl_set_project_out(set, isl_dim_set, 0, pos);
6508 0 : single = isl_set_is_singleton(set);
6509 0 : isl_set_free(set);
6510 :
6511 0 : return single;
6512 : }
6513 :
6514 : /* Does "map" have a fixed (but possible parametric) value
6515 : * at dimension "pos" of either its domain or its range?
6516 : */
6517 0 : static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
6518 : {
6519 : isl_set *set;
6520 : isl_bool single;
6521 :
6522 0 : set = isl_map_domain(isl_map_copy(map));
6523 0 : single = has_single_value(set, pos);
6524 0 : isl_set_free(set);
6525 :
6526 0 : if (single < 0 || single)
6527 0 : return single;
6528 :
6529 0 : set = isl_map_range(isl_map_copy(map));
6530 0 : single = has_single_value(set, pos);
6531 0 : isl_set_free(set);
6532 :
6533 0 : return single;
6534 : }
6535 :
6536 : /* Does the edge "edge" from "graph" have bounded dependence distances
6537 : * in the merged graph "merge_graph" of a selection of clusters in "c"?
6538 : *
6539 : * Extract the complete transformations of the source and destination
6540 : * nodes of the edge, apply them to the edge constraints and
6541 : * compute the differences. Finally, check if these differences are bounded
6542 : * in each direction.
6543 : *
6544 : * If the dimension of the band is greater than the number of
6545 : * dimensions that can be expected to be optimized by the edge
6546 : * (based on its weight), then also allow the differences to be unbounded
6547 : * in the remaining dimensions, but only if either the source or
6548 : * the destination has a fixed value in that direction.
6549 : * This allows a statement that produces values that are used by
6550 : * several instances of another statement to be merged with that
6551 : * other statement.
6552 : * However, merging such clusters will introduce an inherently
6553 : * large proximity distance inside the merged cluster, meaning
6554 : * that proximity distances will no longer be optimized in
6555 : * subsequent merges. These merges are therefore only allowed
6556 : * after all other possible merges have been tried.
6557 : * The first time such a merge is encountered, the weight of the edge
6558 : * is replaced by a negative weight. The second time (i.e., after
6559 : * all merges over edges with a non-negative weight have been tried),
6560 : * the merge is allowed.
6561 : */
6562 0 : static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
6563 : struct isl_sched_graph *graph, struct isl_clustering *c,
6564 : struct isl_sched_graph *merge_graph)
6565 : {
6566 : int i, n, n_slack;
6567 : isl_bool bounded;
6568 : isl_map *map, *t;
6569 : isl_set *dist;
6570 :
6571 0 : map = isl_map_copy(edge->map);
6572 0 : t = extract_node_transformation(ctx, edge->src, c, merge_graph);
6573 0 : map = isl_map_apply_domain(map, t);
6574 0 : t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
6575 0 : map = isl_map_apply_range(map, t);
6576 0 : dist = isl_map_deltas(isl_map_copy(map));
6577 :
6578 0 : bounded = isl_bool_true;
6579 0 : n = isl_set_dim(dist, isl_dim_set);
6580 0 : n_slack = n - edge->weight;
6581 0 : if (edge->weight < 0)
6582 0 : n_slack -= graph->max_weight + 1;
6583 0 : for (i = 0; i < n; ++i) {
6584 : isl_bool bounded_i, singular_i;
6585 :
6586 0 : bounded_i = distance_is_bounded(dist, i);
6587 0 : if (bounded_i < 0)
6588 0 : goto error;
6589 0 : if (bounded_i)
6590 0 : continue;
6591 0 : if (edge->weight >= 0)
6592 0 : bounded = isl_bool_false;
6593 0 : n_slack--;
6594 0 : if (n_slack < 0)
6595 0 : break;
6596 0 : singular_i = has_singular_src_or_dst(map, i);
6597 0 : if (singular_i < 0)
6598 0 : goto error;
6599 0 : if (singular_i)
6600 0 : continue;
6601 0 : bounded = isl_bool_false;
6602 0 : break;
6603 : }
6604 0 : if (!bounded && i >= n && edge->weight >= 0)
6605 0 : edge->weight -= graph->max_weight + 1;
6606 0 : isl_map_free(map);
6607 0 : isl_set_free(dist);
6608 :
6609 0 : return bounded;
6610 : error:
6611 0 : isl_map_free(map);
6612 0 : isl_set_free(dist);
6613 0 : return isl_bool_error;
6614 : }
6615 :
6616 : /* Should the clusters be merged based on the cluster schedule
6617 : * in the current (and only) band of "merge_graph"?
6618 : * "graph" is the original dependence graph, while "c" records
6619 : * which SCCs are involved in the latest merge.
6620 : *
6621 : * In particular, is there at least one proximity constraint
6622 : * that is optimized by the merge?
6623 : *
6624 : * A proximity constraint is considered to be optimized
6625 : * if the dependence distances are small.
6626 : */
6627 0 : static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
6628 : struct isl_sched_graph *graph, struct isl_clustering *c,
6629 : struct isl_sched_graph *merge_graph)
6630 : {
6631 : int i;
6632 :
6633 0 : for (i = 0; i < graph->n_edge; ++i) {
6634 0 : struct isl_sched_edge *edge = &graph->edge[i];
6635 : isl_bool bounded;
6636 :
6637 0 : if (!is_proximity(edge))
6638 0 : continue;
6639 0 : if (!c->scc_in_merge[edge->src->scc])
6640 0 : continue;
6641 0 : if (!c->scc_in_merge[edge->dst->scc])
6642 0 : continue;
6643 0 : if (c->scc_cluster[edge->dst->scc] ==
6644 0 : c->scc_cluster[edge->src->scc])
6645 0 : continue;
6646 0 : bounded = has_bounded_distances(ctx, edge, graph, c,
6647 : merge_graph);
6648 0 : if (bounded < 0 || bounded)
6649 0 : return bounded;
6650 : }
6651 :
6652 0 : return isl_bool_false;
6653 : }
6654 :
6655 : /* Should the clusters be merged based on the cluster schedule
6656 : * in the current (and only) band of "merge_graph"?
6657 : * "graph" is the original dependence graph, while "c" records
6658 : * which SCCs are involved in the latest merge.
6659 : *
6660 : * If the current band is empty, then the clusters should not be merged.
6661 : *
6662 : * If the band depth should be maximized and the merge schedule
6663 : * is incomplete (meaning that the dimension of some of the schedule
6664 : * bands in the original schedule will be reduced), then the clusters
6665 : * should not be merged.
6666 : *
6667 : * If the schedule_maximize_coincidence option is set, then check that
6668 : * the number of coincident schedule dimensions is not reduced.
6669 : *
6670 : * Finally, only allow the merge if at least one proximity
6671 : * constraint is optimized.
6672 : */
6673 0 : static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
6674 : struct isl_clustering *c, struct isl_sched_graph *merge_graph)
6675 : {
6676 0 : if (merge_graph->n_total_row == merge_graph->band_start)
6677 0 : return isl_bool_false;
6678 :
6679 0 : if (isl_options_get_schedule_maximize_band_depth(ctx) &&
6680 0 : merge_graph->n_total_row < merge_graph->maxvar)
6681 0 : return isl_bool_false;
6682 :
6683 0 : if (isl_options_get_schedule_maximize_coincidence(ctx)) {
6684 : isl_bool ok;
6685 :
6686 0 : ok = ok_to_merge_coincident(c, merge_graph);
6687 0 : if (ok < 0 || !ok)
6688 0 : return ok;
6689 : }
6690 :
6691 0 : return ok_to_merge_proximity(ctx, graph, c, merge_graph);
6692 : }
6693 :
6694 : /* Apply the schedule in "t_node" to the "n" rows starting at "first"
6695 : * of the schedule in "node" and return the result.
6696 : *
6697 : * That is, essentially compute
6698 : *
6699 : * T * N(first:first+n-1)
6700 : *
6701 : * taking into account the constant term and the parameter coefficients
6702 : * in "t_node".
6703 : */
6704 0 : static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
6705 : struct isl_sched_node *t_node, struct isl_sched_node *node,
6706 : int first, int n)
6707 : {
6708 : int i, j;
6709 : isl_mat *t;
6710 : int n_row, n_col, n_param, n_var;
6711 :
6712 0 : n_param = node->nparam;
6713 0 : n_var = node->nvar;
6714 0 : n_row = isl_mat_rows(t_node->sched);
6715 0 : n_col = isl_mat_cols(node->sched);
6716 0 : t = isl_mat_alloc(ctx, n_row, n_col);
6717 0 : if (!t)
6718 0 : return NULL;
6719 0 : for (i = 0; i < n_row; ++i) {
6720 0 : isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
6721 0 : isl_seq_clr(t->row[i] + 1 + n_param, n_var);
6722 0 : for (j = 0; j < n; ++j)
6723 0 : isl_seq_addmul(t->row[i],
6724 0 : t_node->sched->row[i][1 + n_param + j],
6725 0 : node->sched->row[first + j],
6726 0 : 1 + n_param + n_var);
6727 : }
6728 0 : return t;
6729 : }
6730 :
6731 : /* Apply the cluster schedule in "t_node" to the current band
6732 : * schedule of the nodes in "graph".
6733 : *
6734 : * In particular, replace the rows starting at band_start
6735 : * by the result of applying the cluster schedule in "t_node"
6736 : * to the original rows.
6737 : *
6738 : * The coincidence of the schedule is determined by the coincidence
6739 : * of the cluster schedule.
6740 : */
6741 0 : static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
6742 : struct isl_sched_node *t_node)
6743 : {
6744 : int i, j;
6745 : int n_new;
6746 : int start, n;
6747 :
6748 0 : start = graph->band_start;
6749 0 : n = graph->n_total_row - start;
6750 :
6751 0 : n_new = isl_mat_rows(t_node->sched);
6752 0 : for (i = 0; i < graph->n; ++i) {
6753 0 : struct isl_sched_node *node = &graph->node[i];
6754 : isl_mat *t;
6755 :
6756 0 : t = node_transformation(ctx, t_node, node, start, n);
6757 0 : node->sched = isl_mat_drop_rows(node->sched, start, n);
6758 0 : node->sched = isl_mat_concat(node->sched, t);
6759 0 : node->sched_map = isl_map_free(node->sched_map);
6760 0 : if (!node->sched)
6761 0 : return isl_stat_error;
6762 0 : for (j = 0; j < n_new; ++j)
6763 0 : node->coincident[start + j] = t_node->coincident[j];
6764 : }
6765 0 : graph->n_total_row -= n;
6766 0 : graph->n_row -= n;
6767 0 : graph->n_total_row += n_new;
6768 0 : graph->n_row += n_new;
6769 :
6770 0 : return isl_stat_ok;
6771 : }
6772 :
6773 : /* Merge the clusters marked for merging in "c" into a single
6774 : * cluster using the cluster schedule in the current band of "merge_graph".
6775 : * The representative SCC for the new cluster is the SCC with
6776 : * the smallest index.
6777 : *
6778 : * The current band schedule of each SCC in the new cluster is obtained
6779 : * by applying the schedule of the corresponding original cluster
6780 : * to the original band schedule.
6781 : * All SCCs in the new cluster have the same number of schedule rows.
6782 : */
6783 0 : static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
6784 : struct isl_sched_graph *merge_graph)
6785 : {
6786 : int i;
6787 0 : int cluster = -1;
6788 : isl_space *space;
6789 :
6790 0 : for (i = 0; i < c->n; ++i) {
6791 : struct isl_sched_node *node;
6792 :
6793 0 : if (!c->scc_in_merge[i])
6794 0 : continue;
6795 0 : if (cluster < 0)
6796 0 : cluster = i;
6797 0 : space = cluster_space(&c->scc[i], c->scc_cluster[i]);
6798 0 : node = graph_find_node(ctx, merge_graph, space);
6799 0 : isl_space_free(space);
6800 0 : if (!node)
6801 0 : return isl_stat_error;
6802 0 : if (!is_node(merge_graph, node))
6803 0 : isl_die(ctx, isl_error_internal,
6804 : "unable to find cluster",
6805 : return isl_stat_error);
6806 0 : if (transform(ctx, &c->scc[i], node) < 0)
6807 0 : return isl_stat_error;
6808 0 : c->scc_cluster[i] = cluster;
6809 : }
6810 :
6811 0 : return isl_stat_ok;
6812 : }
6813 :
6814 : /* Try and merge the clusters of SCCs marked in c->scc_in_merge
6815 : * by scheduling the current cluster bands with respect to each other.
6816 : *
6817 : * Construct a dependence graph with a space for each cluster and
6818 : * with the coordinates of each space corresponding to the schedule
6819 : * dimensions of the current band of that cluster.
6820 : * Construct a cluster schedule in this cluster dependence graph and
6821 : * apply it to the current cluster bands if it is applicable
6822 : * according to ok_to_merge.
6823 : *
6824 : * If the number of remaining schedule dimensions in a cluster
6825 : * with a non-maximal current schedule dimension is greater than
6826 : * the number of remaining schedule dimensions in clusters
6827 : * with a maximal current schedule dimension, then restrict
6828 : * the number of rows to be computed in the cluster schedule
6829 : * to the minimal such non-maximal current schedule dimension.
6830 : * Do this by adjusting merge_graph.maxvar.
6831 : *
6832 : * Return isl_bool_true if the clusters have effectively been merged
6833 : * into a single cluster.
6834 : *
6835 : * Note that since the standard scheduling algorithm minimizes the maximal
6836 : * distance over proximity constraints, the proximity constraints between
6837 : * the merged clusters may not be optimized any further than what is
6838 : * sufficient to bring the distances within the limits of the internal
6839 : * proximity constraints inside the individual clusters.
6840 : * It may therefore make sense to perform an additional translation step
6841 : * to bring the clusters closer to each other, while maintaining
6842 : * the linear part of the merging schedule found using the standard
6843 : * scheduling algorithm.
6844 : */
6845 0 : static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
6846 : struct isl_clustering *c)
6847 : {
6848 0 : struct isl_sched_graph merge_graph = { 0 };
6849 : isl_bool merged;
6850 :
6851 0 : if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
6852 0 : goto error;
6853 :
6854 0 : if (compute_maxvar(&merge_graph) < 0)
6855 0 : goto error;
6856 0 : if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
6857 0 : goto error;
6858 0 : if (compute_schedule_wcc_band(ctx, &merge_graph) < 0)
6859 0 : goto error;
6860 0 : merged = ok_to_merge(ctx, graph, c, &merge_graph);
6861 0 : if (merged && merge(ctx, c, &merge_graph) < 0)
6862 0 : goto error;
6863 :
6864 0 : graph_free(ctx, &merge_graph);
6865 0 : return merged;
6866 : error:
6867 0 : graph_free(ctx, &merge_graph);
6868 0 : return isl_bool_error;
6869 : }
6870 :
6871 : /* Is there any edge marked "no_merge" between two SCCs that are
6872 : * about to be merged (i.e., that are set in "scc_in_merge")?
6873 : * "merge_edge" is the proximity edge along which the clusters of SCCs
6874 : * are going to be merged.
6875 : *
6876 : * If there is any edge between two SCCs with a negative weight,
6877 : * while the weight of "merge_edge" is non-negative, then this
6878 : * means that the edge was postponed. "merge_edge" should then
6879 : * also be postponed since merging along the edge with negative weight should
6880 : * be postponed until all edges with non-negative weight have been tried.
6881 : * Replace the weight of "merge_edge" by a negative weight as well and
6882 : * tell the caller not to attempt a merge.
6883 : */
6884 0 : static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
6885 : struct isl_sched_edge *merge_edge)
6886 : {
6887 : int i;
6888 :
6889 0 : for (i = 0; i < graph->n_edge; ++i) {
6890 0 : struct isl_sched_edge *edge = &graph->edge[i];
6891 :
6892 0 : if (!scc_in_merge[edge->src->scc])
6893 0 : continue;
6894 0 : if (!scc_in_merge[edge->dst->scc])
6895 0 : continue;
6896 0 : if (edge->no_merge)
6897 0 : return 1;
6898 0 : if (merge_edge->weight >= 0 && edge->weight < 0) {
6899 0 : merge_edge->weight -= graph->max_weight + 1;
6900 0 : return 1;
6901 : }
6902 : }
6903 :
6904 0 : return 0;
6905 : }
6906 :
6907 : /* Merge the two clusters in "c" connected by the edge in "graph"
6908 : * with index "edge" into a single cluster.
6909 : * If it turns out to be impossible to merge these two clusters,
6910 : * then mark the edge as "no_merge" such that it will not be
6911 : * considered again.
6912 : *
6913 : * First mark all SCCs that need to be merged. This includes the SCCs
6914 : * in the two clusters, but it may also include the SCCs
6915 : * of intermediate clusters.
6916 : * If there is already a no_merge edge between any pair of such SCCs,
6917 : * then simply mark the current edge as no_merge as well.
6918 : * Likewise, if any of those edges was postponed by has_bounded_distances,
6919 : * then postpone the current edge as well.
6920 : * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
6921 : * if the clusters did not end up getting merged, unless the non-merge
6922 : * is due to the fact that the edge was postponed. This postponement
6923 : * can be recognized by a change in weight (from non-negative to negative).
6924 : */
6925 0 : static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
6926 : struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
6927 : {
6928 : isl_bool merged;
6929 0 : int edge_weight = graph->edge[edge].weight;
6930 :
6931 0 : if (mark_merge_sccs(ctx, graph, edge, c) < 0)
6932 0 : return isl_stat_error;
6933 :
6934 0 : if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
6935 0 : merged = isl_bool_false;
6936 : else
6937 0 : merged = try_merge(ctx, graph, c);
6938 0 : if (merged < 0)
6939 0 : return isl_stat_error;
6940 0 : if (!merged && edge_weight == graph->edge[edge].weight)
6941 0 : graph->edge[edge].no_merge = 1;
6942 :
6943 0 : return isl_stat_ok;
6944 : }
6945 :
6946 : /* Does "node" belong to the cluster identified by "cluster"?
6947 : */
6948 0 : static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
6949 : {
6950 0 : return node->cluster == cluster;
6951 : }
6952 :
6953 : /* Does "edge" connect two nodes belonging to the cluster
6954 : * identified by "cluster"?
6955 : */
6956 0 : static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
6957 : {
6958 0 : return edge->src->cluster == cluster && edge->dst->cluster == cluster;
6959 : }
6960 :
6961 : /* Swap the schedule of "node1" and "node2".
6962 : * Both nodes have been derived from the same node in a common parent graph.
6963 : * Since the "coincident" field is shared with that node
6964 : * in the parent graph, there is no need to also swap this field.
6965 : */
6966 0 : static void swap_sched(struct isl_sched_node *node1,
6967 : struct isl_sched_node *node2)
6968 : {
6969 : isl_mat *sched;
6970 : isl_map *sched_map;
6971 :
6972 0 : sched = node1->sched;
6973 0 : node1->sched = node2->sched;
6974 0 : node2->sched = sched;
6975 :
6976 0 : sched_map = node1->sched_map;
6977 0 : node1->sched_map = node2->sched_map;
6978 0 : node2->sched_map = sched_map;
6979 0 : }
6980 :
6981 : /* Copy the current band schedule from the SCCs that form the cluster
6982 : * with index "pos" to the actual cluster at position "pos".
6983 : * By construction, the index of the first SCC that belongs to the cluster
6984 : * is also "pos".
6985 : *
6986 : * The order of the nodes inside both the SCCs and the cluster
6987 : * is assumed to be same as the order in the original "graph".
6988 : *
6989 : * Since the SCC graphs will no longer be used after this function,
6990 : * the schedules are actually swapped rather than copied.
6991 : */
6992 0 : static isl_stat copy_partial(struct isl_sched_graph *graph,
6993 : struct isl_clustering *c, int pos)
6994 : {
6995 : int i, j;
6996 :
6997 0 : c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
6998 0 : c->cluster[pos].n_row = c->scc[pos].n_row;
6999 0 : c->cluster[pos].maxvar = c->scc[pos].maxvar;
7000 0 : j = 0;
7001 0 : for (i = 0; i < graph->n; ++i) {
7002 : int k;
7003 : int s;
7004 :
7005 0 : if (graph->node[i].cluster != pos)
7006 0 : continue;
7007 0 : s = graph->node[i].scc;
7008 0 : k = c->scc_node[s]++;
7009 0 : swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
7010 0 : if (c->scc[s].maxvar > c->cluster[pos].maxvar)
7011 0 : c->cluster[pos].maxvar = c->scc[s].maxvar;
7012 0 : ++j;
7013 : }
7014 :
7015 0 : return isl_stat_ok;
7016 : }
7017 :
7018 : /* Is there a (conditional) validity dependence from node[j] to node[i],
7019 : * forcing node[i] to follow node[j] or do the nodes belong to the same
7020 : * cluster?
7021 : */
7022 0 : static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
7023 : {
7024 0 : struct isl_sched_graph *graph = user;
7025 :
7026 0 : if (graph->node[i].cluster == graph->node[j].cluster)
7027 0 : return isl_bool_true;
7028 0 : return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
7029 : }
7030 :
7031 : /* Extract the merged clusters of SCCs in "graph", sort them, and
7032 : * store them in c->clusters. Update c->scc_cluster accordingly.
7033 : *
7034 : * First keep track of the cluster containing the SCC to which a node
7035 : * belongs in the node itself.
7036 : * Then extract the clusters into c->clusters, copying the current
7037 : * band schedule from the SCCs that belong to the cluster.
7038 : * Do this only once per cluster.
7039 : *
7040 : * Finally, topologically sort the clusters and update c->scc_cluster
7041 : * to match the new scc numbering. While the SCCs were originally
7042 : * sorted already, some SCCs that depend on some other SCCs may
7043 : * have been merged with SCCs that appear before these other SCCs.
7044 : * A reordering may therefore be required.
7045 : */
7046 0 : static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
7047 : struct isl_clustering *c)
7048 : {
7049 : int i;
7050 :
7051 0 : for (i = 0; i < graph->n; ++i)
7052 0 : graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
7053 :
7054 0 : for (i = 0; i < graph->scc; ++i) {
7055 0 : if (c->scc_cluster[i] != i)
7056 0 : continue;
7057 0 : if (extract_sub_graph(ctx, graph, &node_cluster_exactly,
7058 0 : &edge_cluster_exactly, i, &c->cluster[i]) < 0)
7059 0 : return isl_stat_error;
7060 0 : c->cluster[i].src_scc = -1;
7061 0 : c->cluster[i].dst_scc = -1;
7062 0 : if (copy_partial(graph, c, i) < 0)
7063 0 : return isl_stat_error;
7064 : }
7065 :
7066 0 : if (detect_ccs(ctx, graph, &node_follows_strong_or_same_cluster) < 0)
7067 0 : return isl_stat_error;
7068 0 : for (i = 0; i < graph->n; ++i)
7069 0 : c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
7070 :
7071 0 : return isl_stat_ok;
7072 : }
7073 :
7074 : /* Compute weights on the proximity edges of "graph" that can
7075 : * be used by find_proximity to find the most appropriate
7076 : * proximity edge to use to merge two clusters in "c".
7077 : * The weights are also used by has_bounded_distances to determine
7078 : * whether the merge should be allowed.
7079 : * Store the maximum of the computed weights in graph->max_weight.
7080 : *
7081 : * The computed weight is a measure for the number of remaining schedule
7082 : * dimensions that can still be completely aligned.
7083 : * In particular, compute the number of equalities between
7084 : * input dimensions and output dimensions in the proximity constraints.
7085 : * The directions that are already handled by outer schedule bands
7086 : * are projected out prior to determining this number.
7087 : *
7088 : * Edges that will never be considered by find_proximity are ignored.
7089 : */
7090 0 : static isl_stat compute_weights(struct isl_sched_graph *graph,
7091 : struct isl_clustering *c)
7092 : {
7093 : int i;
7094 :
7095 0 : graph->max_weight = 0;
7096 :
7097 0 : for (i = 0; i < graph->n_edge; ++i) {
7098 0 : struct isl_sched_edge *edge = &graph->edge[i];
7099 0 : struct isl_sched_node *src = edge->src;
7100 0 : struct isl_sched_node *dst = edge->dst;
7101 : isl_basic_map *hull;
7102 : isl_bool prox;
7103 : int n_in, n_out;
7104 :
7105 0 : prox = is_non_empty_proximity(edge);
7106 0 : if (prox < 0)
7107 0 : return isl_stat_error;
7108 0 : if (!prox)
7109 0 : continue;
7110 0 : if (bad_cluster(&c->scc[edge->src->scc]) ||
7111 0 : bad_cluster(&c->scc[edge->dst->scc]))
7112 0 : continue;
7113 0 : if (c->scc_cluster[edge->dst->scc] ==
7114 0 : c->scc_cluster[edge->src->scc])
7115 0 : continue;
7116 :
7117 0 : hull = isl_map_affine_hull(isl_map_copy(edge->map));
7118 0 : hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
7119 : isl_mat_copy(src->vmap));
7120 0 : hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
7121 : isl_mat_copy(dst->vmap));
7122 0 : hull = isl_basic_map_project_out(hull,
7123 0 : isl_dim_in, 0, src->rank);
7124 0 : hull = isl_basic_map_project_out(hull,
7125 0 : isl_dim_out, 0, dst->rank);
7126 0 : hull = isl_basic_map_remove_divs(hull);
7127 0 : n_in = isl_basic_map_dim(hull, isl_dim_in);
7128 0 : n_out = isl_basic_map_dim(hull, isl_dim_out);
7129 0 : hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
7130 : isl_dim_in, 0, n_in);
7131 0 : hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
7132 : isl_dim_out, 0, n_out);
7133 0 : if (!hull)
7134 0 : return isl_stat_error;
7135 0 : edge->weight = isl_basic_map_n_equality(hull);
7136 0 : isl_basic_map_free(hull);
7137 :
7138 0 : if (edge->weight > graph->max_weight)
7139 0 : graph->max_weight = edge->weight;
7140 : }
7141 :
7142 0 : return isl_stat_ok;
7143 : }
7144 :
7145 : /* Call compute_schedule_finish_band on each of the clusters in "c"
7146 : * in their topological order. This order is determined by the scc
7147 : * fields of the nodes in "graph".
7148 : * Combine the results in a sequence expressing the topological order.
7149 : *
7150 : * If there is only one cluster left, then there is no need to introduce
7151 : * a sequence node. Also, in this case, the cluster necessarily contains
7152 : * the SCC at position 0 in the original graph and is therefore also
7153 : * stored in the first cluster of "c".
7154 : */
7155 0 : static __isl_give isl_schedule_node *finish_bands_clustering(
7156 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
7157 : struct isl_clustering *c)
7158 : {
7159 : int i;
7160 : isl_ctx *ctx;
7161 : isl_union_set_list *filters;
7162 :
7163 0 : if (graph->scc == 1)
7164 0 : return compute_schedule_finish_band(node, &c->cluster[0], 0);
7165 :
7166 0 : ctx = isl_schedule_node_get_ctx(node);
7167 :
7168 0 : filters = extract_sccs(ctx, graph);
7169 0 : node = isl_schedule_node_insert_sequence(node, filters);
7170 :
7171 0 : for (i = 0; i < graph->scc; ++i) {
7172 0 : int j = c->scc_cluster[i];
7173 0 : node = isl_schedule_node_child(node, i);
7174 0 : node = isl_schedule_node_child(node, 0);
7175 0 : node = compute_schedule_finish_band(node, &c->cluster[j], 0);
7176 0 : node = isl_schedule_node_parent(node);
7177 0 : node = isl_schedule_node_parent(node);
7178 : }
7179 :
7180 0 : return node;
7181 : }
7182 :
7183 : /* Compute a schedule for a connected dependence graph by first considering
7184 : * each strongly connected component (SCC) in the graph separately and then
7185 : * incrementally combining them into clusters.
7186 : * Return the updated schedule node.
7187 : *
7188 : * Initially, each cluster consists of a single SCC, each with its
7189 : * own band schedule. The algorithm then tries to merge pairs
7190 : * of clusters along a proximity edge until no more suitable
7191 : * proximity edges can be found. During this merging, the schedule
7192 : * is maintained in the individual SCCs.
7193 : * After the merging is completed, the full resulting clusters
7194 : * are extracted and in finish_bands_clustering,
7195 : * compute_schedule_finish_band is called on each of them to integrate
7196 : * the band into "node" and to continue the computation.
7197 : *
7198 : * compute_weights initializes the weights that are used by find_proximity.
7199 : */
7200 0 : static __isl_give isl_schedule_node *compute_schedule_wcc_clustering(
7201 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7202 : {
7203 : isl_ctx *ctx;
7204 : struct isl_clustering c;
7205 : int i;
7206 :
7207 0 : ctx = isl_schedule_node_get_ctx(node);
7208 :
7209 0 : if (clustering_init(ctx, &c, graph) < 0)
7210 0 : goto error;
7211 :
7212 0 : if (compute_weights(graph, &c) < 0)
7213 0 : goto error;
7214 :
7215 : for (;;) {
7216 0 : i = find_proximity(graph, &c);
7217 0 : if (i < 0)
7218 0 : goto error;
7219 0 : if (i >= graph->n_edge)
7220 0 : break;
7221 0 : if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
7222 0 : goto error;
7223 0 : }
7224 :
7225 0 : if (extract_clusters(ctx, graph, &c) < 0)
7226 0 : goto error;
7227 :
7228 0 : node = finish_bands_clustering(node, graph, &c);
7229 :
7230 0 : clustering_free(ctx, &c);
7231 0 : return node;
7232 : error:
7233 0 : clustering_free(ctx, &c);
7234 0 : return isl_schedule_node_free(node);
7235 : }
7236 :
7237 : /* Compute a schedule for a connected dependence graph and return
7238 : * the updated schedule node.
7239 : *
7240 : * If Feautrier's algorithm is selected, we first recursively try to satisfy
7241 : * as many validity dependences as possible. When all validity dependences
7242 : * are satisfied we extend the schedule to a full-dimensional schedule.
7243 : *
7244 : * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering
7245 : * depending on whether the user has selected the option to try and
7246 : * compute a schedule for the entire (weakly connected) component first.
7247 : * If there is only a single strongly connected component (SCC), then
7248 : * there is no point in trying to combine SCCs
7249 : * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole
7250 : * is called instead.
7251 : */
7252 0 : static __isl_give isl_schedule_node *compute_schedule_wcc(
7253 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7254 : {
7255 : isl_ctx *ctx;
7256 :
7257 0 : if (!node)
7258 0 : return NULL;
7259 :
7260 0 : ctx = isl_schedule_node_get_ctx(node);
7261 0 : if (detect_sccs(ctx, graph) < 0)
7262 0 : return isl_schedule_node_free(node);
7263 :
7264 0 : if (compute_maxvar(graph) < 0)
7265 0 : return isl_schedule_node_free(node);
7266 :
7267 0 : if (need_feautrier_step(ctx, graph))
7268 0 : return compute_schedule_wcc_feautrier(node, graph);
7269 :
7270 0 : if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx))
7271 0 : return compute_schedule_wcc_whole(node, graph);
7272 : else
7273 0 : return compute_schedule_wcc_clustering(node, graph);
7274 : }
7275 :
7276 : /* Compute a schedule for each group of nodes identified by node->scc
7277 : * separately and then combine them in a sequence node (or as set node
7278 : * if graph->weak is set) inserted at position "node" of the schedule tree.
7279 : * Return the updated schedule node.
7280 : *
7281 : * If "wcc" is set then each of the groups belongs to a single
7282 : * weakly connected component in the dependence graph so that
7283 : * there is no need for compute_sub_schedule to look for weakly
7284 : * connected components.
7285 : *
7286 : * If a set node would be introduced and if the number of components
7287 : * is equal to the number of nodes, then check if the schedule
7288 : * is already complete. If so, a redundant set node would be introduced
7289 : * (without any further descendants) stating that the statements
7290 : * can be executed in arbitrary order, which is also expressed
7291 : * by the absence of any node. Refrain from inserting any nodes
7292 : * in this case and simply return.
7293 : */
7294 0 : static __isl_give isl_schedule_node *compute_component_schedule(
7295 : __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
7296 : int wcc)
7297 : {
7298 : int component;
7299 : isl_ctx *ctx;
7300 : isl_union_set_list *filters;
7301 :
7302 0 : if (!node)
7303 0 : return NULL;
7304 :
7305 0 : if (graph->weak && graph->scc == graph->n) {
7306 0 : if (compute_maxvar(graph) < 0)
7307 0 : return isl_schedule_node_free(node);
7308 0 : if (graph->n_row >= graph->maxvar)
7309 0 : return node;
7310 : }
7311 :
7312 0 : ctx = isl_schedule_node_get_ctx(node);
7313 0 : filters = extract_sccs(ctx, graph);
7314 0 : if (graph->weak)
7315 0 : node = isl_schedule_node_insert_set(node, filters);
7316 : else
7317 0 : node = isl_schedule_node_insert_sequence(node, filters);
7318 :
7319 0 : for (component = 0; component < graph->scc; ++component) {
7320 0 : node = isl_schedule_node_child(node, component);
7321 0 : node = isl_schedule_node_child(node, 0);
7322 0 : node = compute_sub_schedule(node, ctx, graph,
7323 : &node_scc_exactly,
7324 : &edge_scc_exactly, component, wcc);
7325 0 : node = isl_schedule_node_parent(node);
7326 0 : node = isl_schedule_node_parent(node);
7327 : }
7328 :
7329 0 : return node;
7330 : }
7331 :
7332 : /* Compute a schedule for the given dependence graph and insert it at "node".
7333 : * Return the updated schedule node.
7334 : *
7335 : * We first check if the graph is connected (through validity and conditional
7336 : * validity dependences) and, if not, compute a schedule
7337 : * for each component separately.
7338 : * If the schedule_serialize_sccs option is set, then we check for strongly
7339 : * connected components instead and compute a separate schedule for
7340 : * each such strongly connected component.
7341 : */
7342 0 : static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
7343 : struct isl_sched_graph *graph)
7344 : {
7345 : isl_ctx *ctx;
7346 :
7347 0 : if (!node)
7348 0 : return NULL;
7349 :
7350 0 : ctx = isl_schedule_node_get_ctx(node);
7351 0 : if (isl_options_get_schedule_serialize_sccs(ctx)) {
7352 0 : if (detect_sccs(ctx, graph) < 0)
7353 0 : return isl_schedule_node_free(node);
7354 : } else {
7355 0 : if (detect_wccs(ctx, graph) < 0)
7356 0 : return isl_schedule_node_free(node);
7357 : }
7358 :
7359 0 : if (graph->scc > 1)
7360 0 : return compute_component_schedule(node, graph, 1);
7361 :
7362 0 : return compute_schedule_wcc(node, graph);
7363 : }
7364 :
7365 : /* Compute a schedule on sc->domain that respects the given schedule
7366 : * constraints.
7367 : *
7368 : * In particular, the schedule respects all the validity dependences.
7369 : * If the default isl scheduling algorithm is used, it tries to minimize
7370 : * the dependence distances over the proximity dependences.
7371 : * If Feautrier's scheduling algorithm is used, the proximity dependence
7372 : * distances are only minimized during the extension to a full-dimensional
7373 : * schedule.
7374 : *
7375 : * If there are any condition and conditional validity dependences,
7376 : * then the conditional validity dependences may be violated inside
7377 : * a tilable band, provided they have no adjacent non-local
7378 : * condition dependences.
7379 : */
7380 0 : __isl_give isl_schedule *isl_schedule_constraints_compute_schedule(
7381 : __isl_take isl_schedule_constraints *sc)
7382 : {
7383 0 : isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc);
7384 0 : struct isl_sched_graph graph = { 0 };
7385 : isl_schedule *sched;
7386 : isl_schedule_node *node;
7387 : isl_union_set *domain;
7388 :
7389 0 : sc = isl_schedule_constraints_align_params(sc);
7390 :
7391 0 : domain = isl_schedule_constraints_get_domain(sc);
7392 0 : if (isl_union_set_n_set(domain) == 0) {
7393 0 : isl_schedule_constraints_free(sc);
7394 0 : return isl_schedule_from_domain(domain);
7395 : }
7396 :
7397 0 : if (graph_init(&graph, sc) < 0)
7398 0 : domain = isl_union_set_free(domain);
7399 :
7400 0 : node = isl_schedule_node_from_domain(domain);
7401 0 : node = isl_schedule_node_child(node, 0);
7402 0 : if (graph.n > 0)
7403 0 : node = compute_schedule(node, &graph);
7404 0 : sched = isl_schedule_node_get_schedule(node);
7405 0 : isl_schedule_node_free(node);
7406 :
7407 0 : graph_free(ctx, &graph);
7408 0 : isl_schedule_constraints_free(sc);
7409 :
7410 0 : return sched;
7411 : }
7412 :
7413 : /* Compute a schedule for the given union of domains that respects
7414 : * all the validity dependences and minimizes
7415 : * the dependence distances over the proximity dependences.
7416 : *
7417 : * This function is kept for backward compatibility.
7418 : */
7419 0 : __isl_give isl_schedule *isl_union_set_compute_schedule(
7420 : __isl_take isl_union_set *domain,
7421 : __isl_take isl_union_map *validity,
7422 : __isl_take isl_union_map *proximity)
7423 : {
7424 : isl_schedule_constraints *sc;
7425 :
7426 0 : sc = isl_schedule_constraints_on_domain(domain);
7427 0 : sc = isl_schedule_constraints_set_validity(sc, validity);
7428 0 : sc = isl_schedule_constraints_set_proximity(sc, proximity);
7429 :
7430 0 : return isl_schedule_constraints_compute_schedule(sc);
7431 : }
|