LCOV - code coverage report
Current view: top level - metalib_isl - isl_scheduler.c (source / functions) Hit Total Coverage
Test: 2018-10-31_point_maint_greina16.lcov Lines: 0 2823 0.0 %
Date: 2018-11-01 11:27:00 Functions: 0 229 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2011      INRIA Saclay
       3             :  * Copyright 2012-2014 Ecole Normale Superieure
       4             :  * Copyright 2015-2016 Sven Verdoolaege
       5             :  * Copyright 2016      INRIA Paris
       6             :  * Copyright 2017      Sven Verdoolaege
       7             :  *
       8             :  * Use of this software is governed by the MIT license
       9             :  *
      10             :  * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
      11             :  * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
      12             :  * 91893 Orsay, France
      13             :  * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
      14             :  * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
      15             :  * CS 42112, 75589 Paris Cedex 12, France
      16             :  */
      17             : 
      18             : #include <isl_ctx_private.h>
      19             : #include <isl_map_private.h>
      20             : #include <isl_space_private.h>
      21             : #include <isl_aff_private.h>
      22             : #include <isl/hash.h>
      23             : #include <isl/id.h>
      24             : #include <isl/constraint.h>
      25             : #include <isl/schedule.h>
      26             : #include <isl_schedule_constraints.h>
      27             : #include <isl/schedule_node.h>
      28             : #include <isl_mat_private.h>
      29             : #include <isl_vec_private.h>
      30             : #include <isl/set.h>
      31             : #include <isl_union_set_private.h>
      32             : #include <isl_seq.h>
      33             : #include <isl_tab.h>
      34             : #include <isl_dim_map.h>
      35             : #include <isl/map_to_basic_set.h>
      36             : #include <isl_sort.h>
      37             : #include <isl_options_private.h>
      38             : #include <isl_tarjan.h>
      39             : #include <isl_morph.h>
      40             : #include <isl/ilp.h>
      41             : #include <isl_val_private.h>
      42             : 
      43             : /*
      44             :  * The scheduling algorithm implemented in this file was inspired by
      45             :  * Bondhugula et al., "Automatic Transformations for Communication-Minimized
      46             :  * Parallelization and Locality Optimization in the Polyhedral Model".
      47             :  *
      48             :  * For a detailed description of the variant implemented in isl,
      49             :  * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
      50             :  */
      51             : 
      52             : 
      53             : /* Internal information about a node that is used during the construction
      54             :  * of a schedule.
      55             :  * space represents the original space in which the domain lives;
      56             :  *      that is, the space is not affected by compression
      57             :  * sched is a matrix representation of the schedule being constructed
      58             :  *      for this node; if compressed is set, then this schedule is
      59             :  *      defined over the compressed domain space
      60             :  * sched_map is an isl_map representation of the same (partial) schedule
      61             :  *      sched_map may be NULL; if compressed is set, then this map
      62             :  *      is defined over the uncompressed domain space
      63             :  * rank is the number of linearly independent rows in the linear part
      64             :  *      of sched
      65             :  * the rows of "vmap" represent a change of basis for the node
      66             :  *      variables; the first rank rows span the linear part of
      67             :  *      the schedule rows; the remaining rows are linearly independent
      68             :  * the rows of "indep" represent linear combinations of the schedule
      69             :  * coefficients that are non-zero when the schedule coefficients are
      70             :  * linearly independent of previously computed schedule rows.
      71             :  * start is the first variable in the LP problem in the sequences that
      72             :  *      represents the schedule coefficients of this node
      73             :  * nvar is the dimension of the (compressed) domain
      74             :  * nparam is the number of parameters or 0 if we are not constructing
      75             :  *      a parametric schedule
      76             :  *
      77             :  * If compressed is set, then hull represents the constraints
      78             :  * that were used to derive the compression, while compress and
      79             :  * decompress map the original space to the compressed space and
      80             :  * vice versa.
      81             :  *
      82             :  * scc is the index of SCC (or WCC) this node belongs to
      83             :  *
      84             :  * "cluster" is only used inside extract_clusters and identifies
      85             :  * the cluster of SCCs that the node belongs to.
      86             :  *
      87             :  * coincident contains a boolean for each of the rows of the schedule,
      88             :  * indicating whether the corresponding scheduling dimension satisfies
      89             :  * the coincidence constraints in the sense that the corresponding
      90             :  * dependence distances are zero.
      91             :  *
      92             :  * If the schedule_treat_coalescing option is set, then
      93             :  * "sizes" contains the sizes of the (compressed) instance set
      94             :  * in each direction.  If there is no fixed size in a given direction,
      95             :  * then the corresponding size value is set to infinity.
      96             :  * If the schedule_treat_coalescing option or the schedule_max_coefficient
      97             :  * option is set, then "max" contains the maximal values for
      98             :  * schedule coefficients of the (compressed) variables.  If no bound
      99             :  * needs to be imposed on a particular variable, then the corresponding
     100             :  * value is negative.
     101             :  * If not NULL, then "bounds" contains a non-parametric set
     102             :  * in the compressed space that is bounded by the size in each direction.
     103             :  */
     104             : struct isl_sched_node {
     105             :         isl_space *space;
     106             :         int     compressed;
     107             :         isl_set *hull;
     108             :         isl_multi_aff *compress;
     109             :         isl_multi_aff *decompress;
     110             :         isl_mat *sched;
     111             :         isl_map *sched_map;
     112             :         int      rank;
     113             :         isl_mat *indep;
     114             :         isl_mat *vmap;
     115             :         int      start;
     116             :         int      nvar;
     117             :         int      nparam;
     118             : 
     119             :         int      scc;
     120             :         int      cluster;
     121             : 
     122             :         int     *coincident;
     123             : 
     124             :         isl_multi_val *sizes;
     125             :         isl_basic_set *bounds;
     126             :         isl_vec *max;
     127             : };
     128             : 
     129           0 : static int node_has_tuples(const void *entry, const void *val)
     130             : {
     131           0 :         struct isl_sched_node *node = (struct isl_sched_node *)entry;
     132           0 :         isl_space *space = (isl_space *) val;
     133             : 
     134           0 :         return isl_space_has_equal_tuples(node->space, space);
     135             : }
     136             : 
     137           0 : static int node_scc_exactly(struct isl_sched_node *node, int scc)
     138             : {
     139           0 :         return node->scc == scc;
     140             : }
     141             : 
     142           0 : static int node_scc_at_most(struct isl_sched_node *node, int scc)
     143             : {
     144           0 :         return node->scc <= scc;
     145             : }
     146             : 
     147           0 : static int node_scc_at_least(struct isl_sched_node *node, int scc)
     148             : {
     149           0 :         return node->scc >= scc;
     150             : }
     151             : 
     152             : /* An edge in the dependence graph.  An edge may be used to
     153             :  * ensure validity of the generated schedule, to minimize the dependence
     154             :  * distance or both
     155             :  *
     156             :  * map is the dependence relation, with i -> j in the map if j depends on i
     157             :  * tagged_condition and tagged_validity contain the union of all tagged
     158             :  *      condition or conditional validity dependence relations that
     159             :  *      specialize the dependence relation "map"; that is,
     160             :  *      if (i -> a) -> (j -> b) is an element of "tagged_condition"
     161             :  *      or "tagged_validity", then i -> j is an element of "map".
     162             :  *      If these fields are NULL, then they represent the empty relation.
     163             :  * src is the source node
     164             :  * dst is the sink node
     165             :  *
     166             :  * types is a bit vector containing the types of this edge.
     167             :  * validity is set if the edge is used to ensure correctness
     168             :  * coincidence is used to enforce zero dependence distances
     169             :  * proximity is set if the edge is used to minimize dependence distances
     170             :  * condition is set if the edge represents a condition
     171             :  *      for a conditional validity schedule constraint
     172             :  * local can only be set for condition edges and indicates that
     173             :  *      the dependence distance over the edge should be zero
     174             :  * conditional_validity is set if the edge is used to conditionally
     175             :  *      ensure correctness
     176             :  *
     177             :  * For validity edges, start and end mark the sequence of inequality
     178             :  * constraints in the LP problem that encode the validity constraint
     179             :  * corresponding to this edge.
     180             :  *
     181             :  * During clustering, an edge may be marked "no_merge" if it should
     182             :  * not be used to merge clusters.
     183             :  * The weight is also only used during clustering and it is
     184             :  * an indication of how many schedule dimensions on either side
     185             :  * of the schedule constraints can be aligned.
     186             :  * If the weight is negative, then this means that this edge was postponed
     187             :  * by has_bounded_distances or any_no_merge.  The original weight can
     188             :  * be retrieved by adding 1 + graph->max_weight, with "graph"
     189             :  * the graph containing this edge.
     190             :  */
     191             : struct isl_sched_edge {
     192             :         isl_map *map;
     193             :         isl_union_map *tagged_condition;
     194             :         isl_union_map *tagged_validity;
     195             : 
     196             :         struct isl_sched_node *src;
     197             :         struct isl_sched_node *dst;
     198             : 
     199             :         unsigned types;
     200             : 
     201             :         int start;
     202             :         int end;
     203             : 
     204             :         int no_merge;
     205             :         int weight;
     206             : };
     207             : 
     208             : /* Is "edge" marked as being of type "type"?
     209             :  */
     210           0 : static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
     211             : {
     212           0 :         return ISL_FL_ISSET(edge->types, 1 << type);
     213             : }
     214             : 
     215             : /* Mark "edge" as being of type "type".
     216             :  */
     217           0 : static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
     218             : {
     219           0 :         ISL_FL_SET(edge->types, 1 << type);
     220           0 : }
     221             : 
     222             : /* No longer mark "edge" as being of type "type"?
     223             :  */
     224           0 : static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
     225             : {
     226           0 :         ISL_FL_CLR(edge->types, 1 << type);
     227           0 : }
     228             : 
     229             : /* Is "edge" marked as a validity edge?
     230             :  */
     231           0 : static int is_validity(struct isl_sched_edge *edge)
     232             : {
     233           0 :         return is_type(edge, isl_edge_validity);
     234             : }
     235             : 
     236             : /* Mark "edge" as a validity edge.
     237             :  */
     238           0 : static void set_validity(struct isl_sched_edge *edge)
     239             : {
     240           0 :         set_type(edge, isl_edge_validity);
     241           0 : }
     242             : 
     243             : /* Is "edge" marked as a proximity edge?
     244             :  */
     245           0 : static int is_proximity(struct isl_sched_edge *edge)
     246             : {
     247           0 :         return is_type(edge, isl_edge_proximity);
     248             : }
     249             : 
     250             : /* Is "edge" marked as a local edge?
     251             :  */
     252           0 : static int is_local(struct isl_sched_edge *edge)
     253             : {
     254           0 :         return is_type(edge, isl_edge_local);
     255             : }
     256             : 
     257             : /* Mark "edge" as a local edge.
     258             :  */
     259           0 : static void set_local(struct isl_sched_edge *edge)
     260             : {
     261           0 :         set_type(edge, isl_edge_local);
     262           0 : }
     263             : 
     264             : /* No longer mark "edge" as a local edge.
     265             :  */
     266           0 : static void clear_local(struct isl_sched_edge *edge)
     267             : {
     268           0 :         clear_type(edge, isl_edge_local);
     269           0 : }
     270             : 
     271             : /* Is "edge" marked as a coincidence edge?
     272             :  */
     273           0 : static int is_coincidence(struct isl_sched_edge *edge)
     274             : {
     275           0 :         return is_type(edge, isl_edge_coincidence);
     276             : }
     277             : 
     278             : /* Is "edge" marked as a condition edge?
     279             :  */
     280           0 : static int is_condition(struct isl_sched_edge *edge)
     281             : {
     282           0 :         return is_type(edge, isl_edge_condition);
     283             : }
     284             : 
     285             : /* Is "edge" marked as a conditional validity edge?
     286             :  */
     287           0 : static int is_conditional_validity(struct isl_sched_edge *edge)
     288             : {
     289           0 :         return is_type(edge, isl_edge_conditional_validity);
     290             : }
     291             : 
     292             : /* Is "edge" of a type that can appear multiple times between
     293             :  * the same pair of nodes?
     294             :  *
     295             :  * Condition edges and conditional validity edges may have tagged
     296             :  * dependence relations, in which case an edge is added for each
     297             :  * pair of tags.
     298             :  */
     299           0 : static int is_multi_edge_type(struct isl_sched_edge *edge)
     300             : {
     301           0 :         return is_condition(edge) || is_conditional_validity(edge);
     302             : }
     303             : 
     304             : /* Internal information about the dependence graph used during
     305             :  * the construction of the schedule.
     306             :  *
     307             :  * intra_hmap is a cache, mapping dependence relations to their dual,
     308             :  *      for dependences from a node to itself, possibly without
     309             :  *      coefficients for the parameters
     310             :  * intra_hmap_param is a cache, mapping dependence relations to their dual,
     311             :  *      for dependences from a node to itself, including coefficients
     312             :  *      for the parameters
     313             :  * inter_hmap is a cache, mapping dependence relations to their dual,
     314             :  *      for dependences between distinct nodes
     315             :  * if compression is involved then the key for these maps
     316             :  * is the original, uncompressed dependence relation, while
     317             :  * the value is the dual of the compressed dependence relation.
     318             :  *
     319             :  * n is the number of nodes
     320             :  * node is the list of nodes
     321             :  * maxvar is the maximal number of variables over all nodes
     322             :  * max_row is the allocated number of rows in the schedule
     323             :  * n_row is the current (maximal) number of linearly independent
     324             :  *      rows in the node schedules
     325             :  * n_total_row is the current number of rows in the node schedules
     326             :  * band_start is the starting row in the node schedules of the current band
     327             :  * root is set to the original dependence graph from which this graph
     328             :  *      is derived through splitting.  If this graph is not the result of
     329             :  *      splitting, then the root field points to the graph itself.
     330             :  *
     331             :  * sorted contains a list of node indices sorted according to the
     332             :  *      SCC to which a node belongs
     333             :  *
     334             :  * n_edge is the number of edges
     335             :  * edge is the list of edges
     336             :  * max_edge contains the maximal number of edges of each type;
     337             :  *      in particular, it contains the number of edges in the inital graph.
     338             :  * edge_table contains pointers into the edge array, hashed on the source
     339             :  *      and sink spaces; there is one such table for each type;
     340             :  *      a given edge may be referenced from more than one table
     341             :  *      if the corresponding relation appears in more than one of the
     342             :  *      sets of dependences; however, for each type there is only
     343             :  *      a single edge between a given pair of source and sink space
     344             :  *      in the entire graph
     345             :  *
     346             :  * node_table contains pointers into the node array, hashed on the space tuples
     347             :  *
     348             :  * region contains a list of variable sequences that should be non-trivial
     349             :  *
     350             :  * lp contains the (I)LP problem used to obtain new schedule rows
     351             :  *
     352             :  * src_scc and dst_scc are the source and sink SCCs of an edge with
     353             :  *      conflicting constraints
     354             :  *
     355             :  * scc represents the number of components
     356             :  * weak is set if the components are weakly connected
     357             :  *
     358             :  * max_weight is used during clustering and represents the maximal
     359             :  * weight of the relevant proximity edges.
     360             :  */
     361             : struct isl_sched_graph {
     362             :         isl_map_to_basic_set *intra_hmap;
     363             :         isl_map_to_basic_set *intra_hmap_param;
     364             :         isl_map_to_basic_set *inter_hmap;
     365             : 
     366             :         struct isl_sched_node *node;
     367             :         int n;
     368             :         int maxvar;
     369             :         int max_row;
     370             :         int n_row;
     371             : 
     372             :         int *sorted;
     373             : 
     374             :         int n_total_row;
     375             :         int band_start;
     376             : 
     377             :         struct isl_sched_graph *root;
     378             : 
     379             :         struct isl_sched_edge *edge;
     380             :         int n_edge;
     381             :         int max_edge[isl_edge_last + 1];
     382             :         struct isl_hash_table *edge_table[isl_edge_last + 1];
     383             : 
     384             :         struct isl_hash_table *node_table;
     385             :         struct isl_trivial_region *region;
     386             : 
     387             :         isl_basic_set *lp;
     388             : 
     389             :         int src_scc;
     390             :         int dst_scc;
     391             : 
     392             :         int scc;
     393             :         int weak;
     394             : 
     395             :         int max_weight;
     396             : };
     397             : 
     398             : /* Initialize node_table based on the list of nodes.
     399             :  */
     400           0 : static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
     401             : {
     402             :         int i;
     403             : 
     404           0 :         graph->node_table = isl_hash_table_alloc(ctx, graph->n);
     405           0 :         if (!graph->node_table)
     406           0 :                 return -1;
     407             : 
     408           0 :         for (i = 0; i < graph->n; ++i) {
     409             :                 struct isl_hash_table_entry *entry;
     410             :                 uint32_t hash;
     411             : 
     412           0 :                 hash = isl_space_get_tuple_hash(graph->node[i].space);
     413           0 :                 entry = isl_hash_table_find(ctx, graph->node_table, hash,
     414             :                                             &node_has_tuples,
     415           0 :                                             graph->node[i].space, 1);
     416           0 :                 if (!entry)
     417           0 :                         return -1;
     418           0 :                 entry->data = &graph->node[i];
     419             :         }
     420             : 
     421           0 :         return 0;
     422             : }
     423             : 
     424             : /* Return a pointer to the node that lives within the given space,
     425             :  * an invalid node if there is no such node, or NULL in case of error.
     426             :  */
     427           0 : static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
     428             :         struct isl_sched_graph *graph, __isl_keep isl_space *space)
     429             : {
     430             :         struct isl_hash_table_entry *entry;
     431             :         uint32_t hash;
     432             : 
     433           0 :         if (!space)
     434           0 :                 return NULL;
     435             : 
     436           0 :         hash = isl_space_get_tuple_hash(space);
     437           0 :         entry = isl_hash_table_find(ctx, graph->node_table, hash,
     438             :                                     &node_has_tuples, space, 0);
     439             : 
     440           0 :         return entry ? entry->data : graph->node + graph->n;
     441             : }
     442             : 
     443             : /* Is "node" a node in "graph"?
     444             :  */
     445           0 : static int is_node(struct isl_sched_graph *graph,
     446             :         struct isl_sched_node *node)
     447             : {
     448           0 :         return node && node >= &graph->node[0] && node < &graph->node[graph->n];
     449             : }
     450             : 
     451           0 : static int edge_has_src_and_dst(const void *entry, const void *val)
     452             : {
     453           0 :         const struct isl_sched_edge *edge = entry;
     454           0 :         const struct isl_sched_edge *temp = val;
     455             : 
     456           0 :         return edge->src == temp->src && edge->dst == temp->dst;
     457             : }
     458             : 
     459             : /* Add the given edge to graph->edge_table[type].
     460             :  */
     461           0 : static isl_stat graph_edge_table_add(isl_ctx *ctx,
     462             :         struct isl_sched_graph *graph, enum isl_edge_type type,
     463             :         struct isl_sched_edge *edge)
     464             : {
     465             :         struct isl_hash_table_entry *entry;
     466             :         uint32_t hash;
     467             : 
     468           0 :         hash = isl_hash_init();
     469           0 :         hash = isl_hash_builtin(hash, edge->src);
     470           0 :         hash = isl_hash_builtin(hash, edge->dst);
     471           0 :         entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
     472             :                                     &edge_has_src_and_dst, edge, 1);
     473           0 :         if (!entry)
     474           0 :                 return isl_stat_error;
     475           0 :         entry->data = edge;
     476             : 
     477           0 :         return isl_stat_ok;
     478             : }
     479             : 
     480             : /* Add "edge" to all relevant edge tables.
     481             :  * That is, for every type of the edge, add it to the corresponding table.
     482             :  */
     483           0 : static isl_stat graph_edge_tables_add(isl_ctx *ctx,
     484             :         struct isl_sched_graph *graph, struct isl_sched_edge *edge)
     485             : {
     486             :         enum isl_edge_type t;
     487             : 
     488           0 :         for (t = isl_edge_first; t <= isl_edge_last; ++t) {
     489           0 :                 if (!is_type(edge, t))
     490           0 :                         continue;
     491           0 :                 if (graph_edge_table_add(ctx, graph, t, edge) < 0)
     492           0 :                         return isl_stat_error;
     493             :         }
     494             : 
     495           0 :         return isl_stat_ok;
     496             : }
     497             : 
     498             : /* Allocate the edge_tables based on the maximal number of edges of
     499             :  * each type.
     500             :  */
     501           0 : static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
     502             : {
     503             :         int i;
     504             : 
     505           0 :         for (i = 0; i <= isl_edge_last; ++i) {
     506           0 :                 graph->edge_table[i] = isl_hash_table_alloc(ctx,
     507             :                                                             graph->max_edge[i]);
     508           0 :                 if (!graph->edge_table[i])
     509           0 :                         return -1;
     510             :         }
     511             : 
     512           0 :         return 0;
     513             : }
     514             : 
     515             : /* If graph->edge_table[type] contains an edge from the given source
     516             :  * to the given destination, then return the hash table entry of this edge.
     517             :  * Otherwise, return NULL.
     518             :  */
     519           0 : static struct isl_hash_table_entry *graph_find_edge_entry(
     520             :         struct isl_sched_graph *graph,
     521             :         enum isl_edge_type type,
     522             :         struct isl_sched_node *src, struct isl_sched_node *dst)
     523             : {
     524           0 :         isl_ctx *ctx = isl_space_get_ctx(src->space);
     525             :         uint32_t hash;
     526           0 :         struct isl_sched_edge temp = { .src = src, .dst = dst };
     527             : 
     528           0 :         hash = isl_hash_init();
     529           0 :         hash = isl_hash_builtin(hash, temp.src);
     530           0 :         hash = isl_hash_builtin(hash, temp.dst);
     531           0 :         return isl_hash_table_find(ctx, graph->edge_table[type], hash,
     532             :                                     &edge_has_src_and_dst, &temp, 0);
     533             : }
     534             : 
     535             : 
     536             : /* If graph->edge_table[type] contains an edge from the given source
     537             :  * to the given destination, then return this edge.
     538             :  * Otherwise, return NULL.
     539             :  */
     540           0 : static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
     541             :         enum isl_edge_type type,
     542             :         struct isl_sched_node *src, struct isl_sched_node *dst)
     543             : {
     544             :         struct isl_hash_table_entry *entry;
     545             : 
     546           0 :         entry = graph_find_edge_entry(graph, type, src, dst);
     547           0 :         if (!entry)
     548           0 :                 return NULL;
     549             : 
     550           0 :         return entry->data;
     551             : }
     552             : 
     553             : /* Check whether the dependence graph has an edge of the given type
     554             :  * between the given two nodes.
     555             :  */
     556           0 : static isl_bool graph_has_edge(struct isl_sched_graph *graph,
     557             :         enum isl_edge_type type,
     558             :         struct isl_sched_node *src, struct isl_sched_node *dst)
     559             : {
     560             :         struct isl_sched_edge *edge;
     561             :         isl_bool empty;
     562             : 
     563           0 :         edge = graph_find_edge(graph, type, src, dst);
     564           0 :         if (!edge)
     565           0 :                 return isl_bool_false;
     566             : 
     567           0 :         empty = isl_map_plain_is_empty(edge->map);
     568           0 :         if (empty < 0)
     569           0 :                 return isl_bool_error;
     570             : 
     571           0 :         return !empty;
     572             : }
     573             : 
     574             : /* Look for any edge with the same src, dst and map fields as "model".
     575             :  *
     576             :  * Return the matching edge if one can be found.
     577             :  * Return "model" if no matching edge is found.
     578             :  * Return NULL on error.
     579             :  */
     580           0 : static struct isl_sched_edge *graph_find_matching_edge(
     581             :         struct isl_sched_graph *graph, struct isl_sched_edge *model)
     582             : {
     583             :         enum isl_edge_type i;
     584             :         struct isl_sched_edge *edge;
     585             : 
     586           0 :         for (i = isl_edge_first; i <= isl_edge_last; ++i) {
     587             :                 int is_equal;
     588             : 
     589           0 :                 edge = graph_find_edge(graph, i, model->src, model->dst);
     590           0 :                 if (!edge)
     591           0 :                         continue;
     592           0 :                 is_equal = isl_map_plain_is_equal(model->map, edge->map);
     593           0 :                 if (is_equal < 0)
     594           0 :                         return NULL;
     595           0 :                 if (is_equal)
     596           0 :                         return edge;
     597             :         }
     598             : 
     599           0 :         return model;
     600             : }
     601             : 
     602             : /* Remove the given edge from all the edge_tables that refer to it.
     603             :  */
     604           0 : static void graph_remove_edge(struct isl_sched_graph *graph,
     605             :         struct isl_sched_edge *edge)
     606             : {
     607           0 :         isl_ctx *ctx = isl_map_get_ctx(edge->map);
     608             :         enum isl_edge_type i;
     609             : 
     610           0 :         for (i = isl_edge_first; i <= isl_edge_last; ++i) {
     611             :                 struct isl_hash_table_entry *entry;
     612             : 
     613           0 :                 entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
     614           0 :                 if (!entry)
     615           0 :                         continue;
     616           0 :                 if (entry->data != edge)
     617           0 :                         continue;
     618           0 :                 isl_hash_table_remove(ctx, graph->edge_table[i], entry);
     619             :         }
     620           0 : }
     621             : 
     622             : /* Check whether the dependence graph has any edge
     623             :  * between the given two nodes.
     624             :  */
     625           0 : static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
     626             :         struct isl_sched_node *src, struct isl_sched_node *dst)
     627             : {
     628             :         enum isl_edge_type i;
     629             :         isl_bool r;
     630             : 
     631           0 :         for (i = isl_edge_first; i <= isl_edge_last; ++i) {
     632           0 :                 r = graph_has_edge(graph, i, src, dst);
     633           0 :                 if (r < 0 || r)
     634           0 :                         return r;
     635             :         }
     636             : 
     637           0 :         return r;
     638             : }
     639             : 
     640             : /* Check whether the dependence graph has a validity edge
     641             :  * between the given two nodes.
     642             :  *
     643             :  * Conditional validity edges are essentially validity edges that
     644             :  * can be ignored if the corresponding condition edges are iteration private.
     645             :  * Here, we are only checking for the presence of validity
     646             :  * edges, so we need to consider the conditional validity edges too.
     647             :  * In particular, this function is used during the detection
     648             :  * of strongly connected components and we cannot ignore
     649             :  * conditional validity edges during this detection.
     650             :  */
     651           0 : static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
     652             :         struct isl_sched_node *src, struct isl_sched_node *dst)
     653             : {
     654             :         isl_bool r;
     655             : 
     656           0 :         r = graph_has_edge(graph, isl_edge_validity, src, dst);
     657           0 :         if (r < 0 || r)
     658           0 :                 return r;
     659             : 
     660           0 :         return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
     661             : }
     662             : 
     663             : /* Perform all the required memory allocations for a schedule graph "graph"
     664             :  * with "n_node" nodes and "n_edge" edge and initialize the corresponding
     665             :  * fields.
     666             :  */
     667           0 : static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
     668             :         int n_node, int n_edge)
     669             : {
     670             :         int i;
     671             : 
     672           0 :         graph->n = n_node;
     673           0 :         graph->n_edge = n_edge;
     674           0 :         graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
     675           0 :         graph->sorted = isl_calloc_array(ctx, int, graph->n);
     676           0 :         graph->region = isl_alloc_array(ctx,
     677             :                                         struct isl_trivial_region, graph->n);
     678           0 :         graph->edge = isl_calloc_array(ctx,
     679             :                                         struct isl_sched_edge, graph->n_edge);
     680             : 
     681           0 :         graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
     682           0 :         graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
     683           0 :         graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
     684             : 
     685           0 :         if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) ||
     686           0 :             !graph->sorted)
     687           0 :                 return isl_stat_error;
     688             : 
     689           0 :         for(i = 0; i < graph->n; ++i)
     690           0 :                 graph->sorted[i] = i;
     691             : 
     692           0 :         return isl_stat_ok;
     693             : }
     694             : 
     695             : /* Free the memory associated to node "node" in "graph".
     696             :  * The "coincident" field is shared by nodes in a graph and its subgraph.
     697             :  * It therefore only needs to be freed for the original dependence graph,
     698             :  * i.e., one that is not the result of splitting.
     699             :  */
     700           0 : static void clear_node(struct isl_sched_graph *graph,
     701             :         struct isl_sched_node *node)
     702             : {
     703           0 :         isl_space_free(node->space);
     704           0 :         isl_set_free(node->hull);
     705           0 :         isl_multi_aff_free(node->compress);
     706           0 :         isl_multi_aff_free(node->decompress);
     707           0 :         isl_mat_free(node->sched);
     708           0 :         isl_map_free(node->sched_map);
     709           0 :         isl_mat_free(node->indep);
     710           0 :         isl_mat_free(node->vmap);
     711           0 :         if (graph->root == graph)
     712           0 :                 free(node->coincident);
     713           0 :         isl_multi_val_free(node->sizes);
     714           0 :         isl_basic_set_free(node->bounds);
     715           0 :         isl_vec_free(node->max);
     716           0 : }
     717             : 
     718           0 : static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
     719             : {
     720             :         int i;
     721             : 
     722           0 :         isl_map_to_basic_set_free(graph->intra_hmap);
     723           0 :         isl_map_to_basic_set_free(graph->intra_hmap_param);
     724           0 :         isl_map_to_basic_set_free(graph->inter_hmap);
     725             : 
     726           0 :         if (graph->node)
     727           0 :                 for (i = 0; i < graph->n; ++i)
     728           0 :                         clear_node(graph, &graph->node[i]);
     729           0 :         free(graph->node);
     730           0 :         free(graph->sorted);
     731           0 :         if (graph->edge)
     732           0 :                 for (i = 0; i < graph->n_edge; ++i) {
     733           0 :                         isl_map_free(graph->edge[i].map);
     734           0 :                         isl_union_map_free(graph->edge[i].tagged_condition);
     735           0 :                         isl_union_map_free(graph->edge[i].tagged_validity);
     736             :                 }
     737           0 :         free(graph->edge);
     738           0 :         free(graph->region);
     739           0 :         for (i = 0; i <= isl_edge_last; ++i)
     740           0 :                 isl_hash_table_free(ctx, graph->edge_table[i]);
     741           0 :         isl_hash_table_free(ctx, graph->node_table);
     742           0 :         isl_basic_set_free(graph->lp);
     743           0 : }
     744             : 
     745             : /* For each "set" on which this function is called, increment
     746             :  * graph->n by one and update graph->maxvar.
     747             :  */
     748           0 : static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
     749             : {
     750           0 :         struct isl_sched_graph *graph = user;
     751           0 :         int nvar = isl_set_dim(set, isl_dim_set);
     752             : 
     753           0 :         graph->n++;
     754           0 :         if (nvar > graph->maxvar)
     755           0 :                 graph->maxvar = nvar;
     756             : 
     757           0 :         isl_set_free(set);
     758             : 
     759           0 :         return isl_stat_ok;
     760             : }
     761             : 
     762             : /* Compute the number of rows that should be allocated for the schedule.
     763             :  * In particular, we need one row for each variable or one row
     764             :  * for each basic map in the dependences.
     765             :  * Note that it is practically impossible to exhaust both
     766             :  * the number of dependences and the number of variables.
     767             :  */
     768           0 : static isl_stat compute_max_row(struct isl_sched_graph *graph,
     769             :         __isl_keep isl_schedule_constraints *sc)
     770             : {
     771             :         int n_edge;
     772             :         isl_stat r;
     773             :         isl_union_set *domain;
     774             : 
     775           0 :         graph->n = 0;
     776           0 :         graph->maxvar = 0;
     777           0 :         domain = isl_schedule_constraints_get_domain(sc);
     778           0 :         r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
     779           0 :         isl_union_set_free(domain);
     780           0 :         if (r < 0)
     781           0 :                 return isl_stat_error;
     782           0 :         n_edge = isl_schedule_constraints_n_basic_map(sc);
     783           0 :         if (n_edge < 0)
     784           0 :                 return isl_stat_error;
     785           0 :         graph->max_row = n_edge + graph->maxvar;
     786             : 
     787           0 :         return isl_stat_ok;
     788             : }
     789             : 
     790             : /* Does "bset" have any defining equalities for its set variables?
     791             :  */
     792           0 : static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
     793             : {
     794             :         int i, n;
     795             : 
     796           0 :         if (!bset)
     797           0 :                 return isl_bool_error;
     798             : 
     799           0 :         n = isl_basic_set_dim(bset, isl_dim_set);
     800           0 :         for (i = 0; i < n; ++i) {
     801             :                 isl_bool has;
     802             : 
     803           0 :                 has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
     804             :                                                         NULL);
     805           0 :                 if (has < 0 || has)
     806           0 :                         return has;
     807             :         }
     808             : 
     809           0 :         return isl_bool_false;
     810             : }
     811             : 
     812             : /* Set the entries of node->max to the value of the schedule_max_coefficient
     813             :  * option, if set.
     814             :  */
     815           0 : static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
     816             : {
     817             :         int max;
     818             : 
     819           0 :         max = isl_options_get_schedule_max_coefficient(ctx);
     820           0 :         if (max == -1)
     821           0 :                 return isl_stat_ok;
     822             : 
     823           0 :         node->max = isl_vec_alloc(ctx, node->nvar);
     824           0 :         node->max = isl_vec_set_si(node->max, max);
     825           0 :         if (!node->max)
     826           0 :                 return isl_stat_error;
     827             : 
     828           0 :         return isl_stat_ok;
     829             : }
     830             : 
     831             : /* Set the entries of node->max to the minimum of the schedule_max_coefficient
     832             :  * option (if set) and half of the minimum of the sizes in the other
     833             :  * dimensions.  Round up when computing the half such that
     834             :  * if the minimum of the sizes is one, half of the size is taken to be one
     835             :  * rather than zero.
     836             :  * If the global minimum is unbounded (i.e., if both
     837             :  * the schedule_max_coefficient is not set and the sizes in the other
     838             :  * dimensions are unbounded), then store a negative value.
     839             :  * If the schedule coefficient is close to the size of the instance set
     840             :  * in another dimension, then the schedule may represent a loop
     841             :  * coalescing transformation (especially if the coefficient
     842             :  * in that other dimension is one).  Forcing the coefficient to be
     843             :  * smaller than or equal to half the minimal size should avoid this
     844             :  * situation.
     845             :  */
     846           0 : static isl_stat compute_max_coefficient(isl_ctx *ctx,
     847             :         struct isl_sched_node *node)
     848             : {
     849             :         int max;
     850             :         int i, j;
     851             :         isl_vec *v;
     852             : 
     853           0 :         max = isl_options_get_schedule_max_coefficient(ctx);
     854           0 :         v = isl_vec_alloc(ctx, node->nvar);
     855           0 :         if (!v)
     856           0 :                 return isl_stat_error;
     857             : 
     858           0 :         for (i = 0; i < node->nvar; ++i) {
     859           0 :                 isl_int_set_si(v->el[i], max);
     860           0 :                 isl_int_mul_si(v->el[i], v->el[i], 2);
     861             :         }
     862             : 
     863           0 :         for (i = 0; i < node->nvar; ++i) {
     864             :                 isl_val *size;
     865             : 
     866           0 :                 size = isl_multi_val_get_val(node->sizes, i);
     867           0 :                 if (!size)
     868           0 :                         goto error;
     869           0 :                 if (!isl_val_is_int(size)) {
     870           0 :                         isl_val_free(size);
     871           0 :                         continue;
     872             :                 }
     873           0 :                 for (j = 0; j < node->nvar; ++j) {
     874           0 :                         if (j == i)
     875           0 :                                 continue;
     876           0 :                         if (isl_int_is_neg(v->el[j]) ||
     877           0 :                             isl_int_gt(v->el[j], size->n))
     878           0 :                                 isl_int_set(v->el[j], size->n);
     879             :                 }
     880           0 :                 isl_val_free(size);
     881             :         }
     882             : 
     883           0 :         for (i = 0; i < node->nvar; ++i)
     884           0 :                 isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
     885             : 
     886           0 :         node->max = v;
     887           0 :         return isl_stat_ok;
     888             : error:
     889           0 :         isl_vec_free(v);
     890           0 :         return isl_stat_error;
     891             : }
     892             : 
     893             : /* Compute and return the size of "set" in dimension "dim".
     894             :  * The size is taken to be the difference in values for that variable
     895             :  * for fixed values of the other variables.
     896             :  * This assumes that "set" is convex.
     897             :  * In particular, the variable is first isolated from the other variables
     898             :  * in the range of a map
     899             :  *
     900             :  *      [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
     901             :  *
     902             :  * and then duplicated
     903             :  *
     904             :  *      [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
     905             :  *
     906             :  * The shared variables are then projected out and the maximal value
     907             :  * of i_dim' - i_dim is computed.
     908             :  */
     909           0 : static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
     910             : {
     911             :         isl_map *map;
     912             :         isl_local_space *ls;
     913             :         isl_aff *obj;
     914             :         isl_val *v;
     915             : 
     916           0 :         map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
     917           0 :         map = isl_map_project_out(map, isl_dim_in, dim, 1);
     918           0 :         map = isl_map_range_product(map, isl_map_copy(map));
     919           0 :         map = isl_set_unwrap(isl_map_range(map));
     920           0 :         set = isl_map_deltas(map);
     921           0 :         ls = isl_local_space_from_space(isl_set_get_space(set));
     922           0 :         obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
     923           0 :         v = isl_set_max_val(set, obj);
     924           0 :         isl_aff_free(obj);
     925           0 :         isl_set_free(set);
     926             : 
     927           0 :         return v;
     928             : }
     929             : 
     930             : /* Compute the size of the instance set "set" of "node", after compression,
     931             :  * as well as bounds on the corresponding coefficients, if needed.
     932             :  *
     933             :  * The sizes are needed when the schedule_treat_coalescing option is set.
     934             :  * The bounds are needed when the schedule_treat_coalescing option or
     935             :  * the schedule_max_coefficient option is set.
     936             :  *
     937             :  * If the schedule_treat_coalescing option is not set, then at most
     938             :  * the bounds need to be set and this is done in set_max_coefficient.
     939             :  * Otherwise, compress the domain if needed, compute the size
     940             :  * in each direction and store the results in node->size.
     941             :  * If the domain is not convex, then the sizes are computed
     942             :  * on a convex superset in order to avoid picking up sizes
     943             :  * that are valid for the individual disjuncts, but not for
     944             :  * the domain as a whole.
     945             :  * Finally, set the bounds on the coefficients based on the sizes
     946             :  * and the schedule_max_coefficient option in compute_max_coefficient.
     947             :  */
     948           0 : static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
     949             :         __isl_take isl_set *set)
     950             : {
     951             :         int j, n;
     952             :         isl_multi_val *mv;
     953             : 
     954           0 :         if (!isl_options_get_schedule_treat_coalescing(ctx)) {
     955           0 :                 isl_set_free(set);
     956           0 :                 return set_max_coefficient(ctx, node);
     957             :         }
     958             : 
     959           0 :         if (node->compressed)
     960           0 :                 set = isl_set_preimage_multi_aff(set,
     961             :                                         isl_multi_aff_copy(node->decompress));
     962           0 :         set = isl_set_from_basic_set(isl_set_simple_hull(set));
     963           0 :         mv = isl_multi_val_zero(isl_set_get_space(set));
     964           0 :         n = isl_set_dim(set, isl_dim_set);
     965           0 :         for (j = 0; j < n; ++j) {
     966             :                 isl_val *v;
     967             : 
     968           0 :                 v = compute_size(isl_set_copy(set), j);
     969           0 :                 mv = isl_multi_val_set_val(mv, j, v);
     970             :         }
     971           0 :         node->sizes = mv;
     972           0 :         isl_set_free(set);
     973           0 :         if (!node->sizes)
     974           0 :                 return isl_stat_error;
     975           0 :         return compute_max_coefficient(ctx, node);
     976             : }
     977             : 
     978             : /* Add a new node to the graph representing the given instance set.
     979             :  * "nvar" is the (possibly compressed) number of variables and
     980             :  * may be smaller than then number of set variables in "set"
     981             :  * if "compressed" is set.
     982             :  * If "compressed" is set, then "hull" represents the constraints
     983             :  * that were used to derive the compression, while "compress" and
     984             :  * "decompress" map the original space to the compressed space and
     985             :  * vice versa.
     986             :  * If "compressed" is not set, then "hull", "compress" and "decompress"
     987             :  * should be NULL.
     988             :  *
     989             :  * Compute the size of the instance set and bounds on the coefficients,
     990             :  * if needed.
     991             :  */
     992           0 : static isl_stat add_node(struct isl_sched_graph *graph,
     993             :         __isl_take isl_set *set, int nvar, int compressed,
     994             :         __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
     995             :         __isl_take isl_multi_aff *decompress)
     996             : {
     997             :         int nparam;
     998             :         isl_ctx *ctx;
     999             :         isl_mat *sched;
    1000             :         isl_space *space;
    1001             :         int *coincident;
    1002             :         struct isl_sched_node *node;
    1003             : 
    1004           0 :         if (!set)
    1005           0 :                 goto error;
    1006             : 
    1007           0 :         ctx = isl_set_get_ctx(set);
    1008           0 :         nparam = isl_set_dim(set, isl_dim_param);
    1009           0 :         if (!ctx->opt->schedule_parametric)
    1010           0 :                 nparam = 0;
    1011           0 :         sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
    1012           0 :         node = &graph->node[graph->n];
    1013           0 :         graph->n++;
    1014           0 :         space = isl_set_get_space(set);
    1015           0 :         node->space = space;
    1016           0 :         node->nvar = nvar;
    1017           0 :         node->nparam = nparam;
    1018           0 :         node->sched = sched;
    1019           0 :         node->sched_map = NULL;
    1020           0 :         coincident = isl_calloc_array(ctx, int, graph->max_row);
    1021           0 :         node->coincident = coincident;
    1022           0 :         node->compressed = compressed;
    1023           0 :         node->hull = hull;
    1024           0 :         node->compress = compress;
    1025           0 :         node->decompress = decompress;
    1026           0 :         if (compute_sizes_and_max(ctx, node, set) < 0)
    1027           0 :                 return isl_stat_error;
    1028             : 
    1029           0 :         if (!space || !sched || (graph->max_row && !coincident))
    1030           0 :                 return isl_stat_error;
    1031           0 :         if (compressed && (!hull || !compress || !decompress))
    1032           0 :                 return isl_stat_error;
    1033             : 
    1034           0 :         return isl_stat_ok;
    1035             : error:
    1036           0 :         isl_set_free(set);
    1037           0 :         isl_set_free(hull);
    1038           0 :         isl_multi_aff_free(compress);
    1039           0 :         isl_multi_aff_free(decompress);
    1040           0 :         return isl_stat_error;
    1041             : }
    1042             : 
    1043             : /* Construct an identifier for node "node", which will represent "set".
    1044             :  * The name of the identifier is either "compressed" or
    1045             :  * "compressed_<name>", with <name> the name of the space of "set".
    1046             :  * The user pointer of the identifier points to "node".
    1047             :  */
    1048           0 : static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
    1049             :         struct isl_sched_node *node)
    1050             : {
    1051             :         isl_bool has_name;
    1052             :         isl_ctx *ctx;
    1053             :         isl_id *id;
    1054             :         isl_printer *p;
    1055             :         const char *name;
    1056             :         char *id_name;
    1057             : 
    1058           0 :         has_name = isl_set_has_tuple_name(set);
    1059           0 :         if (has_name < 0)
    1060           0 :                 return NULL;
    1061             : 
    1062           0 :         ctx = isl_set_get_ctx(set);
    1063           0 :         if (!has_name)
    1064           0 :                 return isl_id_alloc(ctx, "compressed", node);
    1065             : 
    1066           0 :         p = isl_printer_to_str(ctx);
    1067           0 :         name = isl_set_get_tuple_name(set);
    1068           0 :         p = isl_printer_print_str(p, "compressed_");
    1069           0 :         p = isl_printer_print_str(p, name);
    1070           0 :         id_name = isl_printer_get_str(p);
    1071           0 :         isl_printer_free(p);
    1072             : 
    1073           0 :         id = isl_id_alloc(ctx, id_name, node);
    1074           0 :         free(id_name);
    1075             : 
    1076           0 :         return id;
    1077             : }
    1078             : 
    1079             : /* Add a new node to the graph representing the given set.
    1080             :  *
    1081             :  * If any of the set variables is defined by an equality, then
    1082             :  * we perform variable compression such that we can perform
    1083             :  * the scheduling on the compressed domain.
    1084             :  * In this case, an identifier is used that references the new node
    1085             :  * such that each compressed space is unique and
    1086             :  * such that the node can be recovered from the compressed space.
    1087             :  */
    1088           0 : static isl_stat extract_node(__isl_take isl_set *set, void *user)
    1089             : {
    1090             :         int nvar;
    1091             :         isl_bool has_equality;
    1092             :         isl_id *id;
    1093             :         isl_basic_set *hull;
    1094             :         isl_set *hull_set;
    1095             :         isl_morph *morph;
    1096             :         isl_multi_aff *compress, *decompress;
    1097           0 :         struct isl_sched_graph *graph = user;
    1098             : 
    1099           0 :         hull = isl_set_affine_hull(isl_set_copy(set));
    1100           0 :         hull = isl_basic_set_remove_divs(hull);
    1101           0 :         nvar = isl_set_dim(set, isl_dim_set);
    1102           0 :         has_equality = has_any_defining_equality(hull);
    1103             : 
    1104           0 :         if (has_equality < 0)
    1105           0 :                 goto error;
    1106           0 :         if (!has_equality) {
    1107           0 :                 isl_basic_set_free(hull);
    1108           0 :                 return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
    1109             :         }
    1110             : 
    1111           0 :         id = construct_compressed_id(set, &graph->node[graph->n]);
    1112           0 :         morph = isl_basic_set_variable_compression_with_id(hull,
    1113             :                                                             isl_dim_set, id);
    1114           0 :         isl_id_free(id);
    1115           0 :         nvar = isl_morph_ran_dim(morph, isl_dim_set);
    1116           0 :         compress = isl_morph_get_var_multi_aff(morph);
    1117           0 :         morph = isl_morph_inverse(morph);
    1118           0 :         decompress = isl_morph_get_var_multi_aff(morph);
    1119           0 :         isl_morph_free(morph);
    1120             : 
    1121           0 :         hull_set = isl_set_from_basic_set(hull);
    1122           0 :         return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
    1123             : error:
    1124           0 :         isl_basic_set_free(hull);
    1125           0 :         isl_set_free(set);
    1126           0 :         return isl_stat_error;
    1127             : }
    1128             : 
    1129             : struct isl_extract_edge_data {
    1130             :         enum isl_edge_type type;
    1131             :         struct isl_sched_graph *graph;
    1132             : };
    1133             : 
    1134             : /* Merge edge2 into edge1, freeing the contents of edge2.
    1135             :  * Return 0 on success and -1 on failure.
    1136             :  *
    1137             :  * edge1 and edge2 are assumed to have the same value for the map field.
    1138             :  */
    1139           0 : static int merge_edge(struct isl_sched_edge *edge1,
    1140             :         struct isl_sched_edge *edge2)
    1141             : {
    1142           0 :         edge1->types |= edge2->types;
    1143           0 :         isl_map_free(edge2->map);
    1144             : 
    1145           0 :         if (is_condition(edge2)) {
    1146           0 :                 if (!edge1->tagged_condition)
    1147           0 :                         edge1->tagged_condition = edge2->tagged_condition;
    1148             :                 else
    1149           0 :                         edge1->tagged_condition =
    1150           0 :                                 isl_union_map_union(edge1->tagged_condition,
    1151             :                                                     edge2->tagged_condition);
    1152             :         }
    1153             : 
    1154           0 :         if (is_conditional_validity(edge2)) {
    1155           0 :                 if (!edge1->tagged_validity)
    1156           0 :                         edge1->tagged_validity = edge2->tagged_validity;
    1157             :                 else
    1158           0 :                         edge1->tagged_validity =
    1159           0 :                                 isl_union_map_union(edge1->tagged_validity,
    1160             :                                                     edge2->tagged_validity);
    1161             :         }
    1162             : 
    1163           0 :         if (is_condition(edge2) && !edge1->tagged_condition)
    1164           0 :                 return -1;
    1165           0 :         if (is_conditional_validity(edge2) && !edge1->tagged_validity)
    1166           0 :                 return -1;
    1167             : 
    1168           0 :         return 0;
    1169             : }
    1170             : 
    1171             : /* Insert dummy tags in domain and range of "map".
    1172             :  *
    1173             :  * In particular, if "map" is of the form
    1174             :  *
    1175             :  *      A -> B
    1176             :  *
    1177             :  * then return
    1178             :  *
    1179             :  *      [A -> dummy_tag] -> [B -> dummy_tag]
    1180             :  *
    1181             :  * where the dummy_tags are identical and equal to any dummy tags
    1182             :  * introduced by any other call to this function.
    1183             :  */
    1184           0 : static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
    1185             : {
    1186             :         static char dummy;
    1187             :         isl_ctx *ctx;
    1188             :         isl_id *id;
    1189             :         isl_space *space;
    1190             :         isl_set *domain, *range;
    1191             : 
    1192           0 :         ctx = isl_map_get_ctx(map);
    1193             : 
    1194           0 :         id = isl_id_alloc(ctx, NULL, &dummy);
    1195           0 :         space = isl_space_params(isl_map_get_space(map));
    1196           0 :         space = isl_space_set_from_params(space);
    1197           0 :         space = isl_space_set_tuple_id(space, isl_dim_set, id);
    1198           0 :         space = isl_space_map_from_set(space);
    1199             : 
    1200           0 :         domain = isl_map_wrap(map);
    1201           0 :         range = isl_map_wrap(isl_map_universe(space));
    1202           0 :         map = isl_map_from_domain_and_range(domain, range);
    1203           0 :         map = isl_map_zip(map);
    1204             : 
    1205           0 :         return map;
    1206             : }
    1207             : 
    1208             : /* Given that at least one of "src" or "dst" is compressed, return
    1209             :  * a map between the spaces of these nodes restricted to the affine
    1210             :  * hull that was used in the compression.
    1211             :  */
    1212           0 : static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
    1213             :         struct isl_sched_node *dst)
    1214             : {
    1215             :         isl_set *dom, *ran;
    1216             : 
    1217           0 :         if (src->compressed)
    1218           0 :                 dom = isl_set_copy(src->hull);
    1219             :         else
    1220           0 :                 dom = isl_set_universe(isl_space_copy(src->space));
    1221           0 :         if (dst->compressed)
    1222           0 :                 ran = isl_set_copy(dst->hull);
    1223             :         else
    1224           0 :                 ran = isl_set_universe(isl_space_copy(dst->space));
    1225             : 
    1226           0 :         return isl_map_from_domain_and_range(dom, ran);
    1227             : }
    1228             : 
    1229             : /* Intersect the domains of the nested relations in domain and range
    1230             :  * of "tagged" with "map".
    1231             :  */
    1232           0 : static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
    1233             :         __isl_keep isl_map *map)
    1234             : {
    1235             :         isl_set *set;
    1236             : 
    1237           0 :         tagged = isl_map_zip(tagged);
    1238           0 :         set = isl_map_wrap(isl_map_copy(map));
    1239           0 :         tagged = isl_map_intersect_domain(tagged, set);
    1240           0 :         tagged = isl_map_zip(tagged);
    1241           0 :         return tagged;
    1242             : }
    1243             : 
    1244             : /* Return a pointer to the node that lives in the domain space of "map",
    1245             :  * an invalid node if there is no such node, or NULL in case of error.
    1246             :  */
    1247           0 : static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
    1248             :         struct isl_sched_graph *graph, __isl_keep isl_map *map)
    1249             : {
    1250             :         struct isl_sched_node *node;
    1251             :         isl_space *space;
    1252             : 
    1253           0 :         space = isl_space_domain(isl_map_get_space(map));
    1254           0 :         node = graph_find_node(ctx, graph, space);
    1255           0 :         isl_space_free(space);
    1256             : 
    1257           0 :         return node;
    1258             : }
    1259             : 
    1260             : /* Return a pointer to the node that lives in the range space of "map",
    1261             :  * an invalid node if there is no such node, or NULL in case of error.
    1262             :  */
    1263           0 : static struct isl_sched_node *find_range_node(isl_ctx *ctx,
    1264             :         struct isl_sched_graph *graph, __isl_keep isl_map *map)
    1265             : {
    1266             :         struct isl_sched_node *node;
    1267             :         isl_space *space;
    1268             : 
    1269           0 :         space = isl_space_range(isl_map_get_space(map));
    1270           0 :         node = graph_find_node(ctx, graph, space);
    1271           0 :         isl_space_free(space);
    1272             : 
    1273           0 :         return node;
    1274             : }
    1275             : 
    1276             : /* Refrain from adding a new edge based on "map".
    1277             :  * Instead, just free the map.
    1278             :  * "tagged" is either a copy of "map" with additional tags or NULL.
    1279             :  */
    1280           0 : static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
    1281             : {
    1282           0 :         isl_map_free(map);
    1283           0 :         isl_map_free(tagged);
    1284             : 
    1285           0 :         return isl_stat_ok;
    1286             : }
    1287             : 
    1288             : /* Add a new edge to the graph based on the given map
    1289             :  * and add it to data->graph->edge_table[data->type].
    1290             :  * If a dependence relation of a given type happens to be identical
    1291             :  * to one of the dependence relations of a type that was added before,
    1292             :  * then we don't create a new edge, but instead mark the original edge
    1293             :  * as also representing a dependence of the current type.
    1294             :  *
    1295             :  * Edges of type isl_edge_condition or isl_edge_conditional_validity
    1296             :  * may be specified as "tagged" dependence relations.  That is, "map"
    1297             :  * may contain elements (i -> a) -> (j -> b), where i -> j denotes
    1298             :  * the dependence on iterations and a and b are tags.
    1299             :  * edge->map is set to the relation containing the elements i -> j,
    1300             :  * while edge->tagged_condition and edge->tagged_validity contain
    1301             :  * the union of all the "map" relations
    1302             :  * for which extract_edge is called that result in the same edge->map.
    1303             :  *
    1304             :  * If the source or the destination node is compressed, then
    1305             :  * intersect both "map" and "tagged" with the constraints that
    1306             :  * were used to construct the compression.
    1307             :  * This ensures that there are no schedule constraints defined
    1308             :  * outside of these domains, while the scheduler no longer has
    1309             :  * any control over those outside parts.
    1310             :  */
    1311           0 : static isl_stat extract_edge(__isl_take isl_map *map, void *user)
    1312             : {
    1313             :         isl_bool empty;
    1314           0 :         isl_ctx *ctx = isl_map_get_ctx(map);
    1315           0 :         struct isl_extract_edge_data *data = user;
    1316           0 :         struct isl_sched_graph *graph = data->graph;
    1317             :         struct isl_sched_node *src, *dst;
    1318             :         struct isl_sched_edge *edge;
    1319           0 :         isl_map *tagged = NULL;
    1320             : 
    1321           0 :         if (data->type == isl_edge_condition ||
    1322           0 :             data->type == isl_edge_conditional_validity) {
    1323           0 :                 if (isl_map_can_zip(map)) {
    1324           0 :                         tagged = isl_map_copy(map);
    1325           0 :                         map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
    1326             :                 } else {
    1327           0 :                         tagged = insert_dummy_tags(isl_map_copy(map));
    1328             :                 }
    1329             :         }
    1330             : 
    1331           0 :         src = find_domain_node(ctx, graph, map);
    1332           0 :         dst = find_range_node(ctx, graph, map);
    1333             : 
    1334           0 :         if (!src || !dst)
    1335             :                 goto error;
    1336           0 :         if (!is_node(graph, src) || !is_node(graph, dst))
    1337           0 :                 return skip_edge(map, tagged);
    1338             : 
    1339           0 :         if (src->compressed || dst->compressed) {
    1340             :                 isl_map *hull;
    1341           0 :                 hull = extract_hull(src, dst);
    1342           0 :                 if (tagged)
    1343           0 :                         tagged = map_intersect_domains(tagged, hull);
    1344           0 :                 map = isl_map_intersect(map, hull);
    1345             :         }
    1346             : 
    1347           0 :         empty = isl_map_plain_is_empty(map);
    1348           0 :         if (empty < 0)
    1349           0 :                 goto error;
    1350           0 :         if (empty)
    1351           0 :                 return skip_edge(map, tagged);
    1352             : 
    1353           0 :         graph->edge[graph->n_edge].src = src;
    1354           0 :         graph->edge[graph->n_edge].dst = dst;
    1355           0 :         graph->edge[graph->n_edge].map = map;
    1356           0 :         graph->edge[graph->n_edge].types = 0;
    1357           0 :         graph->edge[graph->n_edge].tagged_condition = NULL;
    1358           0 :         graph->edge[graph->n_edge].tagged_validity = NULL;
    1359           0 :         set_type(&graph->edge[graph->n_edge], data->type);
    1360           0 :         if (data->type == isl_edge_condition)
    1361           0 :                 graph->edge[graph->n_edge].tagged_condition =
    1362           0 :                                         isl_union_map_from_map(tagged);
    1363           0 :         if (data->type == isl_edge_conditional_validity)
    1364           0 :                 graph->edge[graph->n_edge].tagged_validity =
    1365           0 :                                         isl_union_map_from_map(tagged);
    1366             : 
    1367           0 :         edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
    1368           0 :         if (!edge) {
    1369           0 :                 graph->n_edge++;
    1370           0 :                 return isl_stat_error;
    1371             :         }
    1372           0 :         if (edge == &graph->edge[graph->n_edge])
    1373           0 :                 return graph_edge_table_add(ctx, graph, data->type,
    1374           0 :                                     &graph->edge[graph->n_edge++]);
    1375             : 
    1376           0 :         if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
    1377           0 :                 return isl_stat_error;
    1378             : 
    1379           0 :         return graph_edge_table_add(ctx, graph, data->type, edge);
    1380             : error:
    1381           0 :         isl_map_free(map);
    1382           0 :         isl_map_free(tagged);
    1383           0 :         return isl_stat_error;
    1384             : }
    1385             : 
    1386             : /* Initialize the schedule graph "graph" from the schedule constraints "sc".
    1387             :  *
    1388             :  * The context is included in the domain before the nodes of
    1389             :  * the graphs are extracted in order to be able to exploit
    1390             :  * any possible additional equalities.
    1391             :  * Note that this intersection is only performed locally here.
    1392             :  */
    1393           0 : static isl_stat graph_init(struct isl_sched_graph *graph,
    1394             :         __isl_keep isl_schedule_constraints *sc)
    1395             : {
    1396             :         isl_ctx *ctx;
    1397             :         isl_union_set *domain;
    1398             :         isl_union_map *c;
    1399             :         struct isl_extract_edge_data data;
    1400             :         enum isl_edge_type i;
    1401             :         isl_stat r;
    1402             : 
    1403           0 :         if (!sc)
    1404           0 :                 return isl_stat_error;
    1405             : 
    1406           0 :         ctx = isl_schedule_constraints_get_ctx(sc);
    1407             : 
    1408           0 :         domain = isl_schedule_constraints_get_domain(sc);
    1409           0 :         graph->n = isl_union_set_n_set(domain);
    1410           0 :         isl_union_set_free(domain);
    1411             : 
    1412           0 :         if (graph_alloc(ctx, graph, graph->n,
    1413             :             isl_schedule_constraints_n_map(sc)) < 0)
    1414           0 :                 return isl_stat_error;
    1415             : 
    1416           0 :         if (compute_max_row(graph, sc) < 0)
    1417           0 :                 return isl_stat_error;
    1418           0 :         graph->root = graph;
    1419           0 :         graph->n = 0;
    1420           0 :         domain = isl_schedule_constraints_get_domain(sc);
    1421           0 :         domain = isl_union_set_intersect_params(domain,
    1422             :                                     isl_schedule_constraints_get_context(sc));
    1423           0 :         r = isl_union_set_foreach_set(domain, &extract_node, graph);
    1424           0 :         isl_union_set_free(domain);
    1425           0 :         if (r < 0)
    1426           0 :                 return isl_stat_error;
    1427           0 :         if (graph_init_table(ctx, graph) < 0)
    1428           0 :                 return isl_stat_error;
    1429           0 :         for (i = isl_edge_first; i <= isl_edge_last; ++i) {
    1430           0 :                 c = isl_schedule_constraints_get(sc, i);
    1431           0 :                 graph->max_edge[i] = isl_union_map_n_map(c);
    1432           0 :                 isl_union_map_free(c);
    1433           0 :                 if (!c)
    1434           0 :                         return isl_stat_error;
    1435             :         }
    1436           0 :         if (graph_init_edge_tables(ctx, graph) < 0)
    1437           0 :                 return isl_stat_error;
    1438           0 :         graph->n_edge = 0;
    1439           0 :         data.graph = graph;
    1440           0 :         for (i = isl_edge_first; i <= isl_edge_last; ++i) {
    1441             :                 isl_stat r;
    1442             : 
    1443           0 :                 data.type = i;
    1444           0 :                 c = isl_schedule_constraints_get(sc, i);
    1445           0 :                 r = isl_union_map_foreach_map(c, &extract_edge, &data);
    1446           0 :                 isl_union_map_free(c);
    1447           0 :                 if (r < 0)
    1448           0 :                         return isl_stat_error;
    1449             :         }
    1450             : 
    1451           0 :         return isl_stat_ok;
    1452             : }
    1453             : 
    1454             : /* Check whether there is any dependence from node[j] to node[i]
    1455             :  * or from node[i] to node[j].
    1456             :  */
    1457           0 : static isl_bool node_follows_weak(int i, int j, void *user)
    1458             : {
    1459             :         isl_bool f;
    1460           0 :         struct isl_sched_graph *graph = user;
    1461             : 
    1462           0 :         f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
    1463           0 :         if (f < 0 || f)
    1464           0 :                 return f;
    1465           0 :         return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
    1466             : }
    1467             : 
    1468             : /* Check whether there is a (conditional) validity dependence from node[j]
    1469             :  * to node[i], forcing node[i] to follow node[j].
    1470             :  */
    1471           0 : static isl_bool node_follows_strong(int i, int j, void *user)
    1472             : {
    1473           0 :         struct isl_sched_graph *graph = user;
    1474             : 
    1475           0 :         return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
    1476             : }
    1477             : 
    1478             : /* Use Tarjan's algorithm for computing the strongly connected components
    1479             :  * in the dependence graph only considering those edges defined by "follows".
    1480             :  */
    1481           0 : static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
    1482             :         isl_bool (*follows)(int i, int j, void *user))
    1483             : {
    1484             :         int i, n;
    1485           0 :         struct isl_tarjan_graph *g = NULL;
    1486             : 
    1487           0 :         g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
    1488           0 :         if (!g)
    1489           0 :                 return isl_stat_error;
    1490             : 
    1491           0 :         graph->scc = 0;
    1492           0 :         i = 0;
    1493           0 :         n = graph->n;
    1494           0 :         while (n) {
    1495           0 :                 while (g->order[i] != -1) {
    1496           0 :                         graph->node[g->order[i]].scc = graph->scc;
    1497           0 :                         --n;
    1498           0 :                         ++i;
    1499             :                 }
    1500           0 :                 ++i;
    1501           0 :                 graph->scc++;
    1502             :         }
    1503             : 
    1504           0 :         isl_tarjan_graph_free(g);
    1505             : 
    1506           0 :         return isl_stat_ok;
    1507             : }
    1508             : 
    1509             : /* Apply Tarjan's algorithm to detect the strongly connected components
    1510             :  * in the dependence graph.
    1511             :  * Only consider the (conditional) validity dependences and clear "weak".
    1512             :  */
    1513           0 : static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
    1514             : {
    1515           0 :         graph->weak = 0;
    1516           0 :         return detect_ccs(ctx, graph, &node_follows_strong);
    1517             : }
    1518             : 
    1519             : /* Apply Tarjan's algorithm to detect the (weakly) connected components
    1520             :  * in the dependence graph.
    1521             :  * Consider all dependences and set "weak".
    1522             :  */
    1523           0 : static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
    1524             : {
    1525           0 :         graph->weak = 1;
    1526           0 :         return detect_ccs(ctx, graph, &node_follows_weak);
    1527             : }
    1528             : 
    1529           0 : static int cmp_scc(const void *a, const void *b, void *data)
    1530             : {
    1531           0 :         struct isl_sched_graph *graph = data;
    1532           0 :         const int *i1 = a;
    1533           0 :         const int *i2 = b;
    1534             : 
    1535           0 :         return graph->node[*i1].scc - graph->node[*i2].scc;
    1536             : }
    1537             : 
    1538             : /* Sort the elements of graph->sorted according to the corresponding SCCs.
    1539             :  */
    1540           0 : static int sort_sccs(struct isl_sched_graph *graph)
    1541             : {
    1542           0 :         return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
    1543             : }
    1544             : 
    1545             : /* Return a non-parametric set in the compressed space of "node" that is
    1546             :  * bounded by the size in each direction
    1547             :  *
    1548             :  *      { [x] : -S_i <= x_i <= S_i }
    1549             :  *
    1550             :  * If S_i is infinity in direction i, then there are no constraints
    1551             :  * in that direction.
    1552             :  *
    1553             :  * Cache the result in node->bounds.
    1554             :  */
    1555           0 : static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
    1556             : {
    1557             :         isl_space *space;
    1558             :         isl_basic_set *bounds;
    1559             :         int i;
    1560             :         unsigned nparam;
    1561             : 
    1562           0 :         if (node->bounds)
    1563           0 :                 return isl_basic_set_copy(node->bounds);
    1564             : 
    1565           0 :         if (node->compressed)
    1566           0 :                 space = isl_multi_aff_get_domain_space(node->decompress);
    1567             :         else
    1568           0 :                 space = isl_space_copy(node->space);
    1569           0 :         nparam = isl_space_dim(space, isl_dim_param);
    1570           0 :         space = isl_space_drop_dims(space, isl_dim_param, 0, nparam);
    1571           0 :         bounds = isl_basic_set_universe(space);
    1572             : 
    1573           0 :         for (i = 0; i < node->nvar; ++i) {
    1574             :                 isl_val *size;
    1575             : 
    1576           0 :                 size = isl_multi_val_get_val(node->sizes, i);
    1577           0 :                 if (!size)
    1578           0 :                         return isl_basic_set_free(bounds);
    1579           0 :                 if (!isl_val_is_int(size)) {
    1580           0 :                         isl_val_free(size);
    1581           0 :                         continue;
    1582             :                 }
    1583           0 :                 bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
    1584             :                                                         isl_val_copy(size));
    1585           0 :                 bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
    1586             :                                                         isl_val_neg(size));
    1587             :         }
    1588             : 
    1589           0 :         node->bounds = isl_basic_set_copy(bounds);
    1590           0 :         return bounds;
    1591             : }
    1592             : 
    1593             : /* Drop some constraints from "delta" that could be exploited
    1594             :  * to construct loop coalescing schedules.
    1595             :  * In particular, drop those constraint that bound the difference
    1596             :  * to the size of the domain.
    1597             :  * First project out the parameters to improve the effectiveness.
    1598             :  */
    1599           0 : static __isl_give isl_set *drop_coalescing_constraints(
    1600             :         __isl_take isl_set *delta, struct isl_sched_node *node)
    1601             : {
    1602             :         unsigned nparam;
    1603             :         isl_basic_set *bounds;
    1604             : 
    1605           0 :         bounds = get_size_bounds(node);
    1606             : 
    1607           0 :         nparam = isl_set_dim(delta, isl_dim_param);
    1608           0 :         delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
    1609           0 :         delta = isl_set_remove_divs(delta);
    1610           0 :         delta = isl_set_plain_gist_basic_set(delta, bounds);
    1611           0 :         return delta;
    1612             : }
    1613             : 
    1614             : /* Given a dependence relation R from "node" to itself,
    1615             :  * construct the set of coefficients of valid constraints for elements
    1616             :  * in that dependence relation.
    1617             :  * In particular, the result contains tuples of coefficients
    1618             :  * c_0, c_n, c_x such that
    1619             :  *
    1620             :  *      c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
    1621             :  *
    1622             :  * or, equivalently,
    1623             :  *
    1624             :  *      c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
    1625             :  *
    1626             :  * We choose here to compute the dual of delta R.
    1627             :  * Alternatively, we could have computed the dual of R, resulting
    1628             :  * in a set of tuples c_0, c_n, c_x, c_y, and then
    1629             :  * plugged in (c_0, c_n, c_x, -c_x).
    1630             :  *
    1631             :  * If "need_param" is set, then the resulting coefficients effectively
    1632             :  * include coefficients for the parameters c_n.  Otherwise, they may
    1633             :  * have been projected out already.
    1634             :  * Since the constraints may be different for these two cases,
    1635             :  * they are stored in separate caches.
    1636             :  * In particular, if no parameter coefficients are required and
    1637             :  * the schedule_treat_coalescing option is set, then the parameters
    1638             :  * are projected out and some constraints that could be exploited
    1639             :  * to construct coalescing schedules are removed before the dual
    1640             :  * is computed.
    1641             :  *
    1642             :  * If "node" has been compressed, then the dependence relation
    1643             :  * is also compressed before the set of coefficients is computed.
    1644             :  */
    1645           0 : static __isl_give isl_basic_set *intra_coefficients(
    1646             :         struct isl_sched_graph *graph, struct isl_sched_node *node,
    1647             :         __isl_take isl_map *map, int need_param)
    1648             : {
    1649             :         isl_ctx *ctx;
    1650             :         isl_set *delta;
    1651             :         isl_map *key;
    1652             :         isl_basic_set *coef;
    1653             :         isl_maybe_isl_basic_set m;
    1654           0 :         isl_map_to_basic_set **hmap = &graph->intra_hmap;
    1655             :         int treat;
    1656             : 
    1657           0 :         if (!map)
    1658           0 :                 return NULL;
    1659             : 
    1660           0 :         ctx = isl_map_get_ctx(map);
    1661           0 :         treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx);
    1662           0 :         if (!treat)
    1663           0 :                 hmap = &graph->intra_hmap_param;
    1664           0 :         m = isl_map_to_basic_set_try_get(*hmap, map);
    1665           0 :         if (m.valid < 0 || m.valid) {
    1666           0 :                 isl_map_free(map);
    1667           0 :                 return m.value;
    1668             :         }
    1669             : 
    1670           0 :         key = isl_map_copy(map);
    1671           0 :         if (node->compressed) {
    1672           0 :                 map = isl_map_preimage_domain_multi_aff(map,
    1673             :                                     isl_multi_aff_copy(node->decompress));
    1674           0 :                 map = isl_map_preimage_range_multi_aff(map,
    1675             :                                     isl_multi_aff_copy(node->decompress));
    1676             :         }
    1677           0 :         delta = isl_map_deltas(map);
    1678           0 :         if (treat)
    1679           0 :                 delta = drop_coalescing_constraints(delta, node);
    1680           0 :         delta = isl_set_remove_divs(delta);
    1681           0 :         coef = isl_set_coefficients(delta);
    1682           0 :         *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
    1683             : 
    1684           0 :         return coef;
    1685             : }
    1686             : 
    1687             : /* Given a dependence relation R, construct the set of coefficients
    1688             :  * of valid constraints for elements in that dependence relation.
    1689             :  * In particular, the result contains tuples of coefficients
    1690             :  * c_0, c_n, c_x, c_y such that
    1691             :  *
    1692             :  *      c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
    1693             :  *
    1694             :  * If the source or destination nodes of "edge" have been compressed,
    1695             :  * then the dependence relation is also compressed before
    1696             :  * the set of coefficients is computed.
    1697             :  */
    1698           0 : static __isl_give isl_basic_set *inter_coefficients(
    1699             :         struct isl_sched_graph *graph, struct isl_sched_edge *edge,
    1700             :         __isl_take isl_map *map)
    1701             : {
    1702             :         isl_set *set;
    1703             :         isl_map *key;
    1704             :         isl_basic_set *coef;
    1705             :         isl_maybe_isl_basic_set m;
    1706             : 
    1707           0 :         m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
    1708           0 :         if (m.valid < 0 || m.valid) {
    1709           0 :                 isl_map_free(map);
    1710           0 :                 return m.value;
    1711             :         }
    1712             : 
    1713           0 :         key = isl_map_copy(map);
    1714           0 :         if (edge->src->compressed)
    1715           0 :                 map = isl_map_preimage_domain_multi_aff(map,
    1716           0 :                                     isl_multi_aff_copy(edge->src->decompress));
    1717           0 :         if (edge->dst->compressed)
    1718           0 :                 map = isl_map_preimage_range_multi_aff(map,
    1719           0 :                                     isl_multi_aff_copy(edge->dst->decompress));
    1720           0 :         set = isl_map_wrap(isl_map_remove_divs(map));
    1721           0 :         coef = isl_set_coefficients(set);
    1722           0 :         graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
    1723             :                                         isl_basic_set_copy(coef));
    1724             : 
    1725           0 :         return coef;
    1726             : }
    1727             : 
    1728             : /* Return the position of the coefficients of the variables in
    1729             :  * the coefficients constraints "coef".
    1730             :  *
    1731             :  * The space of "coef" is of the form
    1732             :  *
    1733             :  *      { coefficients[[cst, params] -> S] }
    1734             :  *
    1735             :  * Return the position of S.
    1736             :  */
    1737           0 : static int coef_var_offset(__isl_keep isl_basic_set *coef)
    1738             : {
    1739             :         int offset;
    1740             :         isl_space *space;
    1741             : 
    1742           0 :         space = isl_space_unwrap(isl_basic_set_get_space(coef));
    1743           0 :         offset = isl_space_dim(space, isl_dim_in);
    1744           0 :         isl_space_free(space);
    1745             : 
    1746           0 :         return offset;
    1747             : }
    1748             : 
    1749             : /* Return the offset of the coefficient of the constant term of "node"
    1750             :  * within the (I)LP.
    1751             :  *
    1752             :  * Within each node, the coefficients have the following order:
    1753             :  *      - positive and negative parts of c_i_x
    1754             :  *      - c_i_n (if parametric)
    1755             :  *      - c_i_0
    1756             :  */
    1757           0 : static int node_cst_coef_offset(struct isl_sched_node *node)
    1758             : {
    1759           0 :         return node->start + 2 * node->nvar + node->nparam;
    1760             : }
    1761             : 
    1762             : /* Return the offset of the coefficients of the parameters of "node"
    1763             :  * within the (I)LP.
    1764             :  *
    1765             :  * Within each node, the coefficients have the following order:
    1766             :  *      - positive and negative parts of c_i_x
    1767             :  *      - c_i_n (if parametric)
    1768             :  *      - c_i_0
    1769             :  */
    1770           0 : static int node_par_coef_offset(struct isl_sched_node *node)
    1771             : {
    1772           0 :         return node->start + 2 * node->nvar;
    1773             : }
    1774             : 
    1775             : /* Return the offset of the coefficients of the variables of "node"
    1776             :  * within the (I)LP.
    1777             :  *
    1778             :  * Within each node, the coefficients have the following order:
    1779             :  *      - positive and negative parts of c_i_x
    1780             :  *      - c_i_n (if parametric)
    1781             :  *      - c_i_0
    1782             :  */
    1783           0 : static int node_var_coef_offset(struct isl_sched_node *node)
    1784             : {
    1785           0 :         return node->start;
    1786             : }
    1787             : 
    1788             : /* Return the position of the pair of variables encoding
    1789             :  * coefficient "i" of "node".
    1790             :  *
    1791             :  * The order of these variable pairs is the opposite of
    1792             :  * that of the coefficients, with 2 variables per coefficient.
    1793             :  */
    1794           0 : static int node_var_coef_pos(struct isl_sched_node *node, int i)
    1795             : {
    1796           0 :         return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
    1797             : }
    1798             : 
    1799             : /* Construct an isl_dim_map for mapping constraints on coefficients
    1800             :  * for "node" to the corresponding positions in graph->lp.
    1801             :  * "offset" is the offset of the coefficients for the variables
    1802             :  * in the input constraints.
    1803             :  * "s" is the sign of the mapping.
    1804             :  *
    1805             :  * The input constraints are given in terms of the coefficients
    1806             :  * (c_0, c_x) or (c_0, c_n, c_x).
    1807             :  * The mapping produced by this function essentially plugs in
    1808             :  * (0, c_i_x^+ - c_i_x^-) if s = 1 and
    1809             :  * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
    1810             :  * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
    1811             :  * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
    1812             :  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
    1813             :  * Furthermore, the order of these pairs is the opposite of that
    1814             :  * of the corresponding coefficients.
    1815             :  *
    1816             :  * The caller can extend the mapping to also map the other coefficients
    1817             :  * (and therefore not plug in 0).
    1818             :  */
    1819           0 : static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
    1820             :         struct isl_sched_graph *graph, struct isl_sched_node *node,
    1821             :         int offset, int s)
    1822             : {
    1823             :         int pos;
    1824             :         unsigned total;
    1825             :         isl_dim_map *dim_map;
    1826             : 
    1827           0 :         if (!node || !graph->lp)
    1828           0 :                 return NULL;
    1829             : 
    1830           0 :         total = isl_basic_set_total_dim(graph->lp);
    1831           0 :         pos = node_var_coef_pos(node, 0);
    1832           0 :         dim_map = isl_dim_map_alloc(ctx, total);
    1833           0 :         isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
    1834           0 :         isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
    1835             : 
    1836           0 :         return dim_map;
    1837             : }
    1838             : 
    1839             : /* Construct an isl_dim_map for mapping constraints on coefficients
    1840             :  * for "src" (node i) and "dst" (node j) to the corresponding positions
    1841             :  * in graph->lp.
    1842             :  * "offset" is the offset of the coefficients for the variables of "src"
    1843             :  * in the input constraints.
    1844             :  * "s" is the sign of the mapping.
    1845             :  *
    1846             :  * The input constraints are given in terms of the coefficients
    1847             :  * (c_0, c_n, c_x, c_y).
    1848             :  * The mapping produced by this function essentially plugs in
    1849             :  * (c_j_0 - c_i_0, c_j_n - c_i_n,
    1850             :  *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
    1851             :  * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
    1852             :  *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
    1853             :  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
    1854             :  * Furthermore, the order of these pairs is the opposite of that
    1855             :  * of the corresponding coefficients.
    1856             :  *
    1857             :  * The caller can further extend the mapping.
    1858             :  */
    1859           0 : static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
    1860             :         struct isl_sched_graph *graph, struct isl_sched_node *src,
    1861             :         struct isl_sched_node *dst, int offset, int s)
    1862             : {
    1863             :         int pos;
    1864             :         unsigned total;
    1865             :         isl_dim_map *dim_map;
    1866             : 
    1867           0 :         if (!src || !dst || !graph->lp)
    1868           0 :                 return NULL;
    1869             : 
    1870           0 :         total = isl_basic_set_total_dim(graph->lp);
    1871           0 :         dim_map = isl_dim_map_alloc(ctx, total);
    1872             : 
    1873           0 :         pos = node_cst_coef_offset(dst);
    1874           0 :         isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
    1875           0 :         pos = node_par_coef_offset(dst);
    1876           0 :         isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
    1877           0 :         pos = node_var_coef_pos(dst, 0);
    1878           0 :         isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
    1879           0 :                           dst->nvar, -s);
    1880           0 :         isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
    1881           0 :                           dst->nvar, s);
    1882             : 
    1883           0 :         pos = node_cst_coef_offset(src);
    1884           0 :         isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
    1885           0 :         pos = node_par_coef_offset(src);
    1886           0 :         isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
    1887           0 :         pos = node_var_coef_pos(src, 0);
    1888           0 :         isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
    1889           0 :         isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
    1890             : 
    1891           0 :         return dim_map;
    1892             : }
    1893             : 
    1894             : /* Add the constraints from "src" to "dst" using "dim_map",
    1895             :  * after making sure there is enough room in "dst" for the extra constraints.
    1896             :  */
    1897           0 : static __isl_give isl_basic_set *add_constraints_dim_map(
    1898             :         __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
    1899             :         __isl_take isl_dim_map *dim_map)
    1900             : {
    1901             :         int n_eq, n_ineq;
    1902             : 
    1903           0 :         n_eq = isl_basic_set_n_equality(src);
    1904           0 :         n_ineq = isl_basic_set_n_inequality(src);
    1905           0 :         dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
    1906           0 :         dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
    1907           0 :         return dst;
    1908             : }
    1909             : 
    1910             : /* Add constraints to graph->lp that force validity for the given
    1911             :  * dependence from a node i to itself.
    1912             :  * That is, add constraints that enforce
    1913             :  *
    1914             :  *      (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
    1915             :  *      = c_i_x (y - x) >= 0
    1916             :  *
    1917             :  * for each (x,y) in R.
    1918             :  * We obtain general constraints on coefficients (c_0, c_x)
    1919             :  * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
    1920             :  * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
    1921             :  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
    1922             :  * Note that the result of intra_coefficients may also contain
    1923             :  * parameter coefficients c_n, in which case 0 is plugged in for them as well.
    1924             :  */
    1925           0 : static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
    1926             :         struct isl_sched_edge *edge)
    1927             : {
    1928             :         int offset;
    1929           0 :         isl_map *map = isl_map_copy(edge->map);
    1930           0 :         isl_ctx *ctx = isl_map_get_ctx(map);
    1931             :         isl_dim_map *dim_map;
    1932             :         isl_basic_set *coef;
    1933           0 :         struct isl_sched_node *node = edge->src;
    1934             : 
    1935           0 :         coef = intra_coefficients(graph, node, map, 0);
    1936             : 
    1937           0 :         offset = coef_var_offset(coef);
    1938             : 
    1939           0 :         if (!coef)
    1940           0 :                 return isl_stat_error;
    1941             : 
    1942           0 :         dim_map = intra_dim_map(ctx, graph, node, offset, 1);
    1943           0 :         graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
    1944             : 
    1945           0 :         return isl_stat_ok;
    1946             : }
    1947             : 
    1948             : /* Add constraints to graph->lp that force validity for the given
    1949             :  * dependence from node i to node j.
    1950             :  * That is, add constraints that enforce
    1951             :  *
    1952             :  *      (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
    1953             :  *
    1954             :  * for each (x,y) in R.
    1955             :  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
    1956             :  * of valid constraints for R and then plug in
    1957             :  * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
    1958             :  * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
    1959             :  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
    1960             :  */
    1961           0 : static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
    1962             :         struct isl_sched_edge *edge)
    1963             : {
    1964             :         int offset;
    1965             :         isl_map *map;
    1966             :         isl_ctx *ctx;
    1967             :         isl_dim_map *dim_map;
    1968             :         isl_basic_set *coef;
    1969           0 :         struct isl_sched_node *src = edge->src;
    1970           0 :         struct isl_sched_node *dst = edge->dst;
    1971             : 
    1972           0 :         if (!graph->lp)
    1973           0 :                 return isl_stat_error;
    1974             : 
    1975           0 :         map = isl_map_copy(edge->map);
    1976           0 :         ctx = isl_map_get_ctx(map);
    1977           0 :         coef = inter_coefficients(graph, edge, map);
    1978             : 
    1979           0 :         offset = coef_var_offset(coef);
    1980             : 
    1981           0 :         if (!coef)
    1982           0 :                 return isl_stat_error;
    1983             : 
    1984           0 :         dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
    1985             : 
    1986           0 :         edge->start = graph->lp->n_ineq;
    1987           0 :         graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
    1988           0 :         if (!graph->lp)
    1989           0 :                 return isl_stat_error;
    1990           0 :         edge->end = graph->lp->n_ineq;
    1991             : 
    1992           0 :         return isl_stat_ok;
    1993             : }
    1994             : 
    1995             : /* Add constraints to graph->lp that bound the dependence distance for the given
    1996             :  * dependence from a node i to itself.
    1997             :  * If s = 1, we add the constraint
    1998             :  *
    1999             :  *      c_i_x (y - x) <= m_0 + m_n n
    2000             :  *
    2001             :  * or
    2002             :  *
    2003             :  *      -c_i_x (y - x) + m_0 + m_n n >= 0
    2004             :  *
    2005             :  * for each (x,y) in R.
    2006             :  * If s = -1, we add the constraint
    2007             :  *
    2008             :  *      -c_i_x (y - x) <= m_0 + m_n n
    2009             :  *
    2010             :  * or
    2011             :  *
    2012             :  *      c_i_x (y - x) + m_0 + m_n n >= 0
    2013             :  *
    2014             :  * for each (x,y) in R.
    2015             :  * We obtain general constraints on coefficients (c_0, c_n, c_x)
    2016             :  * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
    2017             :  * with each coefficient (except m_0) represented as a pair of non-negative
    2018             :  * coefficients.
    2019             :  *
    2020             :  *
    2021             :  * If "local" is set, then we add constraints
    2022             :  *
    2023             :  *      c_i_x (y - x) <= 0
    2024             :  *
    2025             :  * or
    2026             :  *
    2027             :  *      -c_i_x (y - x) <= 0
    2028             :  *
    2029             :  * instead, forcing the dependence distance to be (less than or) equal to 0.
    2030             :  * That is, we plug in (0, 0, -s * c_i_x),
    2031             :  * intra_coefficients is not required to have c_n in its result when
    2032             :  * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
    2033             :  * Note that dependences marked local are treated as validity constraints
    2034             :  * by add_all_validity_constraints and therefore also have
    2035             :  * their distances bounded by 0 from below.
    2036             :  */
    2037           0 : static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
    2038             :         struct isl_sched_edge *edge, int s, int local)
    2039             : {
    2040             :         int offset;
    2041             :         unsigned nparam;
    2042           0 :         isl_map *map = isl_map_copy(edge->map);
    2043           0 :         isl_ctx *ctx = isl_map_get_ctx(map);
    2044             :         isl_dim_map *dim_map;
    2045             :         isl_basic_set *coef;
    2046           0 :         struct isl_sched_node *node = edge->src;
    2047             : 
    2048           0 :         coef = intra_coefficients(graph, node, map, !local);
    2049             : 
    2050           0 :         offset = coef_var_offset(coef);
    2051             : 
    2052           0 :         if (!coef)
    2053           0 :                 return isl_stat_error;
    2054             : 
    2055           0 :         nparam = isl_space_dim(node->space, isl_dim_param);
    2056           0 :         dim_map = intra_dim_map(ctx, graph, node, offset, -s);
    2057             : 
    2058           0 :         if (!local) {
    2059           0 :                 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
    2060           0 :                 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
    2061           0 :                 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
    2062             :         }
    2063           0 :         graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
    2064             : 
    2065           0 :         return isl_stat_ok;
    2066             : }
    2067             : 
    2068             : /* Add constraints to graph->lp that bound the dependence distance for the given
    2069             :  * dependence from node i to node j.
    2070             :  * If s = 1, we add the constraint
    2071             :  *
    2072             :  *      (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
    2073             :  *              <= m_0 + m_n n
    2074             :  *
    2075             :  * or
    2076             :  *
    2077             :  *      -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
    2078             :  *              m_0 + m_n n >= 0
    2079             :  *
    2080             :  * for each (x,y) in R.
    2081             :  * If s = -1, we add the constraint
    2082             :  *
    2083             :  *      -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
    2084             :  *              <= m_0 + m_n n
    2085             :  *
    2086             :  * or
    2087             :  *
    2088             :  *      (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
    2089             :  *              m_0 + m_n n >= 0
    2090             :  *
    2091             :  * for each (x,y) in R.
    2092             :  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
    2093             :  * of valid constraints for R and then plug in
    2094             :  * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
    2095             :  *  s*c_i_x, -s*c_j_x)
    2096             :  * with each coefficient (except m_0, c_*_0 and c_*_n)
    2097             :  * represented as a pair of non-negative coefficients.
    2098             :  *
    2099             :  *
    2100             :  * If "local" is set (and s = 1), then we add constraints
    2101             :  *
    2102             :  *      (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
    2103             :  *
    2104             :  * or
    2105             :  *
    2106             :  *      -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
    2107             :  *
    2108             :  * instead, forcing the dependence distance to be (less than or) equal to 0.
    2109             :  * That is, we plug in
    2110             :  * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
    2111             :  * Note that dependences marked local are treated as validity constraints
    2112             :  * by add_all_validity_constraints and therefore also have
    2113             :  * their distances bounded by 0 from below.
    2114             :  */
    2115           0 : static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
    2116             :         struct isl_sched_edge *edge, int s, int local)
    2117             : {
    2118             :         int offset;
    2119             :         unsigned nparam;
    2120           0 :         isl_map *map = isl_map_copy(edge->map);
    2121           0 :         isl_ctx *ctx = isl_map_get_ctx(map);
    2122             :         isl_dim_map *dim_map;
    2123             :         isl_basic_set *coef;
    2124           0 :         struct isl_sched_node *src = edge->src;
    2125           0 :         struct isl_sched_node *dst = edge->dst;
    2126             : 
    2127           0 :         coef = inter_coefficients(graph, edge, map);
    2128             : 
    2129           0 :         offset = coef_var_offset(coef);
    2130             : 
    2131           0 :         if (!coef)
    2132           0 :                 return isl_stat_error;
    2133             : 
    2134           0 :         nparam = isl_space_dim(src->space, isl_dim_param);
    2135           0 :         dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
    2136             : 
    2137           0 :         if (!local) {
    2138           0 :                 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
    2139           0 :                 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
    2140           0 :                 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
    2141             :         }
    2142             : 
    2143           0 :         graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
    2144             : 
    2145           0 :         return isl_stat_ok;
    2146             : }
    2147             : 
    2148             : /* Should the distance over "edge" be forced to zero?
    2149             :  * That is, is it marked as a local edge?
    2150             :  * If "use_coincidence" is set, then coincidence edges are treated
    2151             :  * as local edges.
    2152             :  */
    2153           0 : static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
    2154             : {
    2155           0 :         return is_local(edge) || (use_coincidence && is_coincidence(edge));
    2156             : }
    2157             : 
    2158             : /* Add all validity constraints to graph->lp.
    2159             :  *
    2160             :  * An edge that is forced to be local needs to have its dependence
    2161             :  * distances equal to zero.  We take care of bounding them by 0 from below
    2162             :  * here.  add_all_proximity_constraints takes care of bounding them by 0
    2163             :  * from above.
    2164             :  *
    2165             :  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
    2166             :  * Otherwise, we ignore them.
    2167             :  */
    2168           0 : static int add_all_validity_constraints(struct isl_sched_graph *graph,
    2169             :         int use_coincidence)
    2170             : {
    2171             :         int i;
    2172             : 
    2173           0 :         for (i = 0; i < graph->n_edge; ++i) {
    2174           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    2175             :                 int zero;
    2176             : 
    2177           0 :                 zero = force_zero(edge, use_coincidence);
    2178           0 :                 if (!is_validity(edge) && !zero)
    2179           0 :                         continue;
    2180           0 :                 if (edge->src != edge->dst)
    2181           0 :                         continue;
    2182           0 :                 if (add_intra_validity_constraints(graph, edge) < 0)
    2183           0 :                         return -1;
    2184             :         }
    2185             : 
    2186           0 :         for (i = 0; i < graph->n_edge; ++i) {
    2187           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    2188             :                 int zero;
    2189             : 
    2190           0 :                 zero = force_zero(edge, use_coincidence);
    2191           0 :                 if (!is_validity(edge) && !zero)
    2192           0 :                         continue;
    2193           0 :                 if (edge->src == edge->dst)
    2194           0 :                         continue;
    2195           0 :                 if (add_inter_validity_constraints(graph, edge) < 0)
    2196           0 :                         return -1;
    2197             :         }
    2198             : 
    2199           0 :         return 0;
    2200             : }
    2201             : 
    2202             : /* Add constraints to graph->lp that bound the dependence distance
    2203             :  * for all dependence relations.
    2204             :  * If a given proximity dependence is identical to a validity
    2205             :  * dependence, then the dependence distance is already bounded
    2206             :  * from below (by zero), so we only need to bound the distance
    2207             :  * from above.  (This includes the case of "local" dependences
    2208             :  * which are treated as validity dependence by add_all_validity_constraints.)
    2209             :  * Otherwise, we need to bound the distance both from above and from below.
    2210             :  *
    2211             :  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
    2212             :  * Otherwise, we ignore them.
    2213             :  */
    2214           0 : static int add_all_proximity_constraints(struct isl_sched_graph *graph,
    2215             :         int use_coincidence)
    2216             : {
    2217             :         int i;
    2218             : 
    2219           0 :         for (i = 0; i < graph->n_edge; ++i) {
    2220           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    2221             :                 int zero;
    2222             : 
    2223           0 :                 zero = force_zero(edge, use_coincidence);
    2224           0 :                 if (!is_proximity(edge) && !zero)
    2225           0 :                         continue;
    2226           0 :                 if (edge->src == edge->dst &&
    2227           0 :                     add_intra_proximity_constraints(graph, edge, 1, zero) < 0)
    2228           0 :                         return -1;
    2229           0 :                 if (edge->src != edge->dst &&
    2230           0 :                     add_inter_proximity_constraints(graph, edge, 1, zero) < 0)
    2231           0 :                         return -1;
    2232           0 :                 if (is_validity(edge) || zero)
    2233           0 :                         continue;
    2234           0 :                 if (edge->src == edge->dst &&
    2235           0 :                     add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
    2236           0 :                         return -1;
    2237           0 :                 if (edge->src != edge->dst &&
    2238           0 :                     add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
    2239           0 :                         return -1;
    2240             :         }
    2241             : 
    2242           0 :         return 0;
    2243             : }
    2244             : 
    2245             : /* Normalize the rows of "indep" such that all rows are lexicographically
    2246             :  * positive and such that each row contains as many final zeros as possible,
    2247             :  * given the choice for the previous rows.
    2248             :  * Do this by performing elementary row operations.
    2249             :  */
    2250           0 : static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
    2251             : {
    2252           0 :         indep = isl_mat_reverse_gauss(indep);
    2253           0 :         indep = isl_mat_lexnonneg_rows(indep);
    2254           0 :         return indep;
    2255             : }
    2256             : 
    2257             : /* Compute a basis for the rows in the linear part of the schedule
    2258             :  * and extend this basis to a full basis.  The remaining rows
    2259             :  * can then be used to force linear independence from the rows
    2260             :  * in the schedule.
    2261             :  *
    2262             :  * In particular, given the schedule rows S, we compute
    2263             :  *
    2264             :  *      S   = H Q
    2265             :  *      S U = H
    2266             :  *
    2267             :  * with H the Hermite normal form of S.  That is, all but the
    2268             :  * first rank columns of H are zero and so each row in S is
    2269             :  * a linear combination of the first rank rows of Q.
    2270             :  * The matrix Q can be used as a variable transformation
    2271             :  * that isolates the directions of S in the first rank rows.
    2272             :  * Transposing S U = H yields
    2273             :  *
    2274             :  *      U^T S^T = H^T
    2275             :  *
    2276             :  * with all but the first rank rows of H^T zero.
    2277             :  * The last rows of U^T are therefore linear combinations
    2278             :  * of schedule coefficients that are all zero on schedule
    2279             :  * coefficients that are linearly dependent on the rows of S.
    2280             :  * At least one of these combinations is non-zero on
    2281             :  * linearly independent schedule coefficients.
    2282             :  * The rows are normalized to involve as few of the last
    2283             :  * coefficients as possible and to have a positive initial value.
    2284             :  */
    2285           0 : static int node_update_vmap(struct isl_sched_node *node)
    2286             : {
    2287             :         isl_mat *H, *U, *Q;
    2288           0 :         int n_row = isl_mat_rows(node->sched);
    2289             : 
    2290           0 :         H = isl_mat_sub_alloc(node->sched, 0, n_row,
    2291           0 :                               1 + node->nparam, node->nvar);
    2292             : 
    2293           0 :         H = isl_mat_left_hermite(H, 0, &U, &Q);
    2294           0 :         isl_mat_free(node->indep);
    2295           0 :         isl_mat_free(node->vmap);
    2296           0 :         node->vmap = Q;
    2297           0 :         node->indep = isl_mat_transpose(U);
    2298           0 :         node->rank = isl_mat_initial_non_zero_cols(H);
    2299           0 :         node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
    2300           0 :         node->indep = normalize_independent(node->indep);
    2301           0 :         isl_mat_free(H);
    2302             : 
    2303           0 :         if (!node->indep || !node->vmap || node->rank < 0)
    2304           0 :                 return -1;
    2305           0 :         return 0;
    2306             : }
    2307             : 
    2308             : /* Is "edge" marked as a validity or a conditional validity edge?
    2309             :  */
    2310           0 : static int is_any_validity(struct isl_sched_edge *edge)
    2311             : {
    2312           0 :         return is_validity(edge) || is_conditional_validity(edge);
    2313             : }
    2314             : 
    2315             : /* How many times should we count the constraints in "edge"?
    2316             :  *
    2317             :  * We count as follows
    2318             :  * validity             -> 1 (>= 0)
    2319             :  * validity+proximity   -> 2 (>= 0 and upper bound)
    2320             :  * proximity            -> 2 (lower and upper bound)
    2321             :  * local(+any)          -> 2 (>= 0 and <= 0)
    2322             :  *
    2323             :  * If an edge is only marked conditional_validity then it counts
    2324             :  * as zero since it is only checked afterwards.
    2325             :  *
    2326             :  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
    2327             :  * Otherwise, we ignore them.
    2328             :  */
    2329           0 : static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
    2330             : {
    2331           0 :         if (is_proximity(edge) || force_zero(edge, use_coincidence))
    2332           0 :                 return 2;
    2333           0 :         if (is_validity(edge))
    2334           0 :                 return 1;
    2335           0 :         return 0;
    2336             : }
    2337             : 
    2338             : /* How many times should the constraints in "edge" be counted
    2339             :  * as a parametric intra-node constraint?
    2340             :  *
    2341             :  * Only proximity edges that are not forced zero need
    2342             :  * coefficient constraints that include coefficients for parameters.
    2343             :  * If the edge is also a validity edge, then only
    2344             :  * an upper bound is introduced.  Otherwise, both lower and upper bounds
    2345             :  * are introduced.
    2346             :  */
    2347           0 : static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
    2348             :         int use_coincidence)
    2349             : {
    2350           0 :         if (edge->src != edge->dst)
    2351           0 :                 return 0;
    2352           0 :         if (!is_proximity(edge))
    2353           0 :                 return 0;
    2354           0 :         if (force_zero(edge, use_coincidence))
    2355           0 :                 return 0;
    2356           0 :         if (is_validity(edge))
    2357           0 :                 return 1;
    2358             :         else
    2359           0 :                 return 2;
    2360             : }
    2361             : 
    2362             : /* Add "f" times the number of equality and inequality constraints of "bset"
    2363             :  * to "n_eq" and "n_ineq" and free "bset".
    2364             :  */
    2365           0 : static isl_stat update_count(__isl_take isl_basic_set *bset,
    2366             :         int f, int *n_eq, int *n_ineq)
    2367             : {
    2368           0 :         if (!bset)
    2369           0 :                 return isl_stat_error;
    2370             : 
    2371           0 :         *n_eq += isl_basic_set_n_equality(bset);
    2372           0 :         *n_ineq += isl_basic_set_n_inequality(bset);
    2373           0 :         isl_basic_set_free(bset);
    2374             : 
    2375           0 :         return isl_stat_ok;
    2376             : }
    2377             : 
    2378             : /* Count the number of equality and inequality constraints
    2379             :  * that will be added for the given map.
    2380             :  *
    2381             :  * The edges that require parameter coefficients are counted separately.
    2382             :  *
    2383             :  * "use_coincidence" is set if we should take into account coincidence edges.
    2384             :  */
    2385           0 : static isl_stat count_map_constraints(struct isl_sched_graph *graph,
    2386             :         struct isl_sched_edge *edge, __isl_take isl_map *map,
    2387             :         int *n_eq, int *n_ineq, int use_coincidence)
    2388             : {
    2389             :         isl_map *copy;
    2390             :         isl_basic_set *coef;
    2391           0 :         int f = edge_multiplicity(edge, use_coincidence);
    2392           0 :         int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
    2393             : 
    2394           0 :         if (f == 0) {
    2395           0 :                 isl_map_free(map);
    2396           0 :                 return isl_stat_ok;
    2397             :         }
    2398             : 
    2399           0 :         if (edge->src != edge->dst) {
    2400           0 :                 coef = inter_coefficients(graph, edge, map);
    2401           0 :                 return update_count(coef, f, n_eq, n_ineq);
    2402             :         }
    2403             : 
    2404           0 :         if (fp > 0) {
    2405           0 :                 copy = isl_map_copy(map);
    2406           0 :                 coef = intra_coefficients(graph, edge->src, copy, 1);
    2407           0 :                 if (update_count(coef, fp, n_eq, n_ineq) < 0)
    2408           0 :                         goto error;
    2409             :         }
    2410             : 
    2411           0 :         if (f > fp) {
    2412           0 :                 copy = isl_map_copy(map);
    2413           0 :                 coef = intra_coefficients(graph, edge->src, copy, 0);
    2414           0 :                 if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
    2415           0 :                         goto error;
    2416             :         }
    2417             : 
    2418           0 :         isl_map_free(map);
    2419           0 :         return isl_stat_ok;
    2420             : error:
    2421           0 :         isl_map_free(map);
    2422           0 :         return isl_stat_error;
    2423             : }
    2424             : 
    2425             : /* Count the number of equality and inequality constraints
    2426             :  * that will be added to the main lp problem.
    2427             :  * We count as follows
    2428             :  * validity             -> 1 (>= 0)
    2429             :  * validity+proximity   -> 2 (>= 0 and upper bound)
    2430             :  * proximity            -> 2 (lower and upper bound)
    2431             :  * local(+any)          -> 2 (>= 0 and <= 0)
    2432             :  *
    2433             :  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
    2434             :  * Otherwise, we ignore them.
    2435             :  */
    2436           0 : static int count_constraints(struct isl_sched_graph *graph,
    2437             :         int *n_eq, int *n_ineq, int use_coincidence)
    2438             : {
    2439             :         int i;
    2440             : 
    2441           0 :         *n_eq = *n_ineq = 0;
    2442           0 :         for (i = 0; i < graph->n_edge; ++i) {
    2443           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    2444           0 :                 isl_map *map = isl_map_copy(edge->map);
    2445             : 
    2446           0 :                 if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
    2447             :                                             use_coincidence) < 0)
    2448           0 :                         return -1;
    2449             :         }
    2450             : 
    2451           0 :         return 0;
    2452             : }
    2453             : 
    2454             : /* Count the number of constraints that will be added by
    2455             :  * add_bound_constant_constraints to bound the values of the constant terms
    2456             :  * and increment *n_eq and *n_ineq accordingly.
    2457             :  *
    2458             :  * In practice, add_bound_constant_constraints only adds inequalities.
    2459             :  */
    2460           0 : static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
    2461             :         struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
    2462             : {
    2463           0 :         if (isl_options_get_schedule_max_constant_term(ctx) == -1)
    2464           0 :                 return isl_stat_ok;
    2465             : 
    2466           0 :         *n_ineq += graph->n;
    2467             : 
    2468           0 :         return isl_stat_ok;
    2469             : }
    2470             : 
    2471             : /* Add constraints to bound the values of the constant terms in the schedule,
    2472             :  * if requested by the user.
    2473             :  *
    2474             :  * The maximal value of the constant terms is defined by the option
    2475             :  * "schedule_max_constant_term".
    2476             :  */
    2477           0 : static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
    2478             :         struct isl_sched_graph *graph)
    2479             : {
    2480             :         int i, k;
    2481             :         int max;
    2482             :         int total;
    2483             : 
    2484           0 :         max = isl_options_get_schedule_max_constant_term(ctx);
    2485           0 :         if (max == -1)
    2486           0 :                 return isl_stat_ok;
    2487             : 
    2488           0 :         total = isl_basic_set_dim(graph->lp, isl_dim_set);
    2489             : 
    2490           0 :         for (i = 0; i < graph->n; ++i) {
    2491           0 :                 struct isl_sched_node *node = &graph->node[i];
    2492             :                 int pos;
    2493             : 
    2494           0 :                 k = isl_basic_set_alloc_inequality(graph->lp);
    2495           0 :                 if (k < 0)
    2496           0 :                         return isl_stat_error;
    2497           0 :                 isl_seq_clr(graph->lp->ineq[k], 1 + total);
    2498           0 :                 pos = node_cst_coef_offset(node);
    2499           0 :                 isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
    2500           0 :                 isl_int_set_si(graph->lp->ineq[k][0], max);
    2501             :         }
    2502             : 
    2503           0 :         return isl_stat_ok;
    2504             : }
    2505             : 
    2506             : /* Count the number of constraints that will be added by
    2507             :  * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
    2508             :  * accordingly.
    2509             :  *
    2510             :  * In practice, add_bound_coefficient_constraints only adds inequalities.
    2511             :  */
    2512           0 : static int count_bound_coefficient_constraints(isl_ctx *ctx,
    2513             :         struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
    2514             : {
    2515             :         int i;
    2516             : 
    2517           0 :         if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
    2518           0 :             !isl_options_get_schedule_treat_coalescing(ctx))
    2519           0 :                 return 0;
    2520             : 
    2521           0 :         for (i = 0; i < graph->n; ++i)
    2522           0 :                 *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
    2523             : 
    2524           0 :         return 0;
    2525             : }
    2526             : 
    2527             : /* Add constraints to graph->lp that bound the values of
    2528             :  * the parameter schedule coefficients of "node" to "max" and
    2529             :  * the variable schedule coefficients to the corresponding entry
    2530             :  * in node->max.
    2531             :  * In either case, a negative value means that no bound needs to be imposed.
    2532             :  *
    2533             :  * For parameter coefficients, this amounts to adding a constraint
    2534             :  *
    2535             :  *      c_n <= max
    2536             :  *
    2537             :  * i.e.,
    2538             :  *
    2539             :  *      -c_n + max >= 0
    2540             :  *
    2541             :  * The variables coefficients are, however, not represented directly.
    2542             :  * Instead, the variable coefficients c_x are written as differences
    2543             :  * c_x = c_x^+ - c_x^-.
    2544             :  * That is,
    2545             :  *
    2546             :  *      -max_i <= c_x_i <= max_i
    2547             :  *
    2548             :  * is encoded as
    2549             :  *
    2550             :  *      -max_i <= c_x_i^+ - c_x_i^- <= max_i
    2551             :  *
    2552             :  * or
    2553             :  *
    2554             :  *      -(c_x_i^+ - c_x_i^-) + max_i >= 0
    2555             :  *      c_x_i^+ - c_x_i^- + max_i >= 0
    2556             :  */
    2557           0 : static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
    2558             :         struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
    2559             : {
    2560             :         int i, j, k;
    2561             :         int total;
    2562             :         isl_vec *ineq;
    2563             : 
    2564           0 :         total = isl_basic_set_dim(graph->lp, isl_dim_set);
    2565             : 
    2566           0 :         for (j = 0; j < node->nparam; ++j) {
    2567             :                 int dim;
    2568             : 
    2569           0 :                 if (max < 0)
    2570           0 :                         continue;
    2571             : 
    2572           0 :                 k = isl_basic_set_alloc_inequality(graph->lp);
    2573           0 :                 if (k < 0)
    2574           0 :                         return isl_stat_error;
    2575           0 :                 dim = 1 + node_par_coef_offset(node) + j;
    2576           0 :                 isl_seq_clr(graph->lp->ineq[k], 1 + total);
    2577           0 :                 isl_int_set_si(graph->lp->ineq[k][dim], -1);
    2578           0 :                 isl_int_set_si(graph->lp->ineq[k][0], max);
    2579             :         }
    2580             : 
    2581           0 :         ineq = isl_vec_alloc(ctx, 1 + total);
    2582           0 :         ineq = isl_vec_clr(ineq);
    2583           0 :         if (!ineq)
    2584           0 :                 return isl_stat_error;
    2585           0 :         for (i = 0; i < node->nvar; ++i) {
    2586           0 :                 int pos = 1 + node_var_coef_pos(node, i);
    2587             : 
    2588           0 :                 if (isl_int_is_neg(node->max->el[i]))
    2589           0 :                         continue;
    2590             : 
    2591           0 :                 isl_int_set_si(ineq->el[pos], 1);
    2592           0 :                 isl_int_set_si(ineq->el[pos + 1], -1);
    2593           0 :                 isl_int_set(ineq->el[0], node->max->el[i]);
    2594             : 
    2595           0 :                 k = isl_basic_set_alloc_inequality(graph->lp);
    2596           0 :                 if (k < 0)
    2597           0 :                         goto error;
    2598           0 :                 isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
    2599             : 
    2600           0 :                 isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
    2601           0 :                 k = isl_basic_set_alloc_inequality(graph->lp);
    2602           0 :                 if (k < 0)
    2603           0 :                         goto error;
    2604           0 :                 isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
    2605             : 
    2606           0 :                 isl_seq_clr(ineq->el + pos, 2);
    2607             :         }
    2608           0 :         isl_vec_free(ineq);
    2609             : 
    2610           0 :         return isl_stat_ok;
    2611             : error:
    2612           0 :         isl_vec_free(ineq);
    2613           0 :         return isl_stat_error;
    2614             : }
    2615             : 
    2616             : /* Add constraints that bound the values of the variable and parameter
    2617             :  * coefficients of the schedule.
    2618             :  *
    2619             :  * The maximal value of the coefficients is defined by the option
    2620             :  * 'schedule_max_coefficient' and the entries in node->max.
    2621             :  * These latter entries are only set if either the schedule_max_coefficient
    2622             :  * option or the schedule_treat_coalescing option is set.
    2623             :  */
    2624           0 : static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
    2625             :         struct isl_sched_graph *graph)
    2626             : {
    2627             :         int i;
    2628             :         int max;
    2629             : 
    2630           0 :         max = isl_options_get_schedule_max_coefficient(ctx);
    2631             : 
    2632           0 :         if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx))
    2633           0 :                 return isl_stat_ok;
    2634             : 
    2635           0 :         for (i = 0; i < graph->n; ++i) {
    2636           0 :                 struct isl_sched_node *node = &graph->node[i];
    2637             : 
    2638           0 :                 if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
    2639           0 :                         return isl_stat_error;
    2640             :         }
    2641             : 
    2642           0 :         return isl_stat_ok;
    2643             : }
    2644             : 
    2645             : /* Add a constraint to graph->lp that equates the value at position
    2646             :  * "sum_pos" to the sum of the "n" values starting at "first".
    2647             :  */
    2648           0 : static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
    2649             :         int sum_pos, int first, int n)
    2650             : {
    2651             :         int i, k;
    2652             :         int total;
    2653             : 
    2654           0 :         total = isl_basic_set_dim(graph->lp, isl_dim_set);
    2655             : 
    2656           0 :         k = isl_basic_set_alloc_equality(graph->lp);
    2657           0 :         if (k < 0)
    2658           0 :                 return isl_stat_error;
    2659           0 :         isl_seq_clr(graph->lp->eq[k], 1 + total);
    2660           0 :         isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
    2661           0 :         for (i = 0; i < n; ++i)
    2662           0 :                 isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
    2663             : 
    2664           0 :         return isl_stat_ok;
    2665             : }
    2666             : 
    2667             : /* Add a constraint to graph->lp that equates the value at position
    2668             :  * "sum_pos" to the sum of the parameter coefficients of all nodes.
    2669             :  */
    2670           0 : static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
    2671             :         int sum_pos)
    2672             : {
    2673             :         int i, j, k;
    2674             :         int total;
    2675             : 
    2676           0 :         total = isl_basic_set_dim(graph->lp, isl_dim_set);
    2677             : 
    2678           0 :         k = isl_basic_set_alloc_equality(graph->lp);
    2679           0 :         if (k < 0)
    2680           0 :                 return isl_stat_error;
    2681           0 :         isl_seq_clr(graph->lp->eq[k], 1 + total);
    2682           0 :         isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
    2683           0 :         for (i = 0; i < graph->n; ++i) {
    2684           0 :                 int pos = 1 + node_par_coef_offset(&graph->node[i]);
    2685             : 
    2686           0 :                 for (j = 0; j < graph->node[i].nparam; ++j)
    2687           0 :                         isl_int_set_si(graph->lp->eq[k][pos + j], 1);
    2688             :         }
    2689             : 
    2690           0 :         return isl_stat_ok;
    2691             : }
    2692             : 
    2693             : /* Add a constraint to graph->lp that equates the value at position
    2694             :  * "sum_pos" to the sum of the variable coefficients of all nodes.
    2695             :  */
    2696           0 : static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
    2697             :         int sum_pos)
    2698             : {
    2699             :         int i, j, k;
    2700             :         int total;
    2701             : 
    2702           0 :         total = isl_basic_set_dim(graph->lp, isl_dim_set);
    2703             : 
    2704           0 :         k = isl_basic_set_alloc_equality(graph->lp);
    2705           0 :         if (k < 0)
    2706           0 :                 return isl_stat_error;
    2707           0 :         isl_seq_clr(graph->lp->eq[k], 1 + total);
    2708           0 :         isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
    2709           0 :         for (i = 0; i < graph->n; ++i) {
    2710           0 :                 struct isl_sched_node *node = &graph->node[i];
    2711           0 :                 int pos = 1 + node_var_coef_offset(node);
    2712             : 
    2713           0 :                 for (j = 0; j < 2 * node->nvar; ++j)
    2714           0 :                         isl_int_set_si(graph->lp->eq[k][pos + j], 1);
    2715             :         }
    2716             : 
    2717           0 :         return isl_stat_ok;
    2718             : }
    2719             : 
    2720             : /* Construct an ILP problem for finding schedule coefficients
    2721             :  * that result in non-negative, but small dependence distances
    2722             :  * over all dependences.
    2723             :  * In particular, the dependence distances over proximity edges
    2724             :  * are bounded by m_0 + m_n n and we compute schedule coefficients
    2725             :  * with small values (preferably zero) of m_n and m_0.
    2726             :  *
    2727             :  * All variables of the ILP are non-negative.  The actual coefficients
    2728             :  * may be negative, so each coefficient is represented as the difference
    2729             :  * of two non-negative variables.  The negative part always appears
    2730             :  * immediately before the positive part.
    2731             :  * Other than that, the variables have the following order
    2732             :  *
    2733             :  *      - sum of positive and negative parts of m_n coefficients
    2734             :  *      - m_0
    2735             :  *      - sum of all c_n coefficients
    2736             :  *              (unconstrained when computing non-parametric schedules)
    2737             :  *      - sum of positive and negative parts of all c_x coefficients
    2738             :  *      - positive and negative parts of m_n coefficients
    2739             :  *      - for each node
    2740             :  *              - positive and negative parts of c_i_x, in opposite order
    2741             :  *              - c_i_n (if parametric)
    2742             :  *              - c_i_0
    2743             :  *
    2744             :  * The constraints are those from the edges plus two or three equalities
    2745             :  * to express the sums.
    2746             :  *
    2747             :  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
    2748             :  * Otherwise, we ignore them.
    2749             :  */
    2750           0 : static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
    2751             :         int use_coincidence)
    2752             : {
    2753             :         int i;
    2754             :         unsigned nparam;
    2755             :         unsigned total;
    2756             :         isl_space *space;
    2757             :         int parametric;
    2758             :         int param_pos;
    2759             :         int n_eq, n_ineq;
    2760             : 
    2761           0 :         parametric = ctx->opt->schedule_parametric;
    2762           0 :         nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
    2763           0 :         param_pos = 4;
    2764           0 :         total = param_pos + 2 * nparam;
    2765           0 :         for (i = 0; i < graph->n; ++i) {
    2766           0 :                 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
    2767           0 :                 if (node_update_vmap(node) < 0)
    2768           0 :                         return isl_stat_error;
    2769           0 :                 node->start = total;
    2770           0 :                 total += 1 + node->nparam + 2 * node->nvar;
    2771             :         }
    2772             : 
    2773           0 :         if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
    2774           0 :                 return isl_stat_error;
    2775           0 :         if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
    2776           0 :                 return isl_stat_error;
    2777           0 :         if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
    2778           0 :                 return isl_stat_error;
    2779             : 
    2780           0 :         space = isl_space_set_alloc(ctx, 0, total);
    2781           0 :         isl_basic_set_free(graph->lp);
    2782           0 :         n_eq += 2 + parametric;
    2783             : 
    2784           0 :         graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
    2785             : 
    2786           0 :         if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
    2787           0 :                 return isl_stat_error;
    2788           0 :         if (parametric && add_param_sum_constraint(graph, 2) < 0)
    2789           0 :                 return isl_stat_error;
    2790           0 :         if (add_var_sum_constraint(graph, 3) < 0)
    2791           0 :                 return isl_stat_error;
    2792           0 :         if (add_bound_constant_constraints(ctx, graph) < 0)
    2793           0 :                 return isl_stat_error;
    2794           0 :         if (add_bound_coefficient_constraints(ctx, graph) < 0)
    2795           0 :                 return isl_stat_error;
    2796           0 :         if (add_all_validity_constraints(graph, use_coincidence) < 0)
    2797           0 :                 return isl_stat_error;
    2798           0 :         if (add_all_proximity_constraints(graph, use_coincidence) < 0)
    2799           0 :                 return isl_stat_error;
    2800             : 
    2801           0 :         return isl_stat_ok;
    2802             : }
    2803             : 
    2804             : /* Analyze the conflicting constraint found by
    2805             :  * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
    2806             :  * constraint of one of the edges between distinct nodes, living, moreover
    2807             :  * in distinct SCCs, then record the source and sink SCC as this may
    2808             :  * be a good place to cut between SCCs.
    2809             :  */
    2810           0 : static int check_conflict(int con, void *user)
    2811             : {
    2812             :         int i;
    2813           0 :         struct isl_sched_graph *graph = user;
    2814             : 
    2815           0 :         if (graph->src_scc >= 0)
    2816           0 :                 return 0;
    2817             : 
    2818           0 :         con -= graph->lp->n_eq;
    2819             : 
    2820           0 :         if (con >= graph->lp->n_ineq)
    2821           0 :                 return 0;
    2822             : 
    2823           0 :         for (i = 0; i < graph->n_edge; ++i) {
    2824           0 :                 if (!is_validity(&graph->edge[i]))
    2825           0 :                         continue;
    2826           0 :                 if (graph->edge[i].src == graph->edge[i].dst)
    2827           0 :                         continue;
    2828           0 :                 if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
    2829           0 :                         continue;
    2830           0 :                 if (graph->edge[i].start > con)
    2831           0 :                         continue;
    2832           0 :                 if (graph->edge[i].end <= con)
    2833           0 :                         continue;
    2834           0 :                 graph->src_scc = graph->edge[i].src->scc;
    2835           0 :                 graph->dst_scc = graph->edge[i].dst->scc;
    2836             :         }
    2837             : 
    2838           0 :         return 0;
    2839             : }
    2840             : 
    2841             : /* Check whether the next schedule row of the given node needs to be
    2842             :  * non-trivial.  Lower-dimensional domains may have some trivial rows,
    2843             :  * but as soon as the number of remaining required non-trivial rows
    2844             :  * is as large as the number or remaining rows to be computed,
    2845             :  * all remaining rows need to be non-trivial.
    2846             :  */
    2847           0 : static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
    2848             : {
    2849           0 :         return node->nvar - node->rank >= graph->maxvar - graph->n_row;
    2850             : }
    2851             : 
    2852             : /* Construct a non-triviality region with triviality directions
    2853             :  * corresponding to the rows of "indep".
    2854             :  * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
    2855             :  * while the triviality directions are expressed in terms of
    2856             :  * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
    2857             :  * before c^+_i.  Furthermore,
    2858             :  * the pairs of non-negative variables representing the coefficients
    2859             :  * are stored in the opposite order.
    2860             :  */
    2861           0 : static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
    2862             : {
    2863             :         isl_ctx *ctx;
    2864             :         isl_mat *mat;
    2865             :         int i, j, n, n_var;
    2866             : 
    2867           0 :         if (!indep)
    2868           0 :                 return NULL;
    2869             : 
    2870           0 :         ctx = isl_mat_get_ctx(indep);
    2871           0 :         n = isl_mat_rows(indep);
    2872           0 :         n_var = isl_mat_cols(indep);
    2873           0 :         mat = isl_mat_alloc(ctx, n, 2 * n_var);
    2874           0 :         if (!mat)
    2875           0 :                 return NULL;
    2876           0 :         for (i = 0; i < n; ++i) {
    2877           0 :                 for (j = 0; j < n_var; ++j) {
    2878           0 :                         int nj = n_var - 1 - j;
    2879           0 :                         isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
    2880           0 :                         isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
    2881             :                 }
    2882             :         }
    2883             : 
    2884           0 :         return mat;
    2885             : }
    2886             : 
    2887             : /* Solve the ILP problem constructed in setup_lp.
    2888             :  * For each node such that all the remaining rows of its schedule
    2889             :  * need to be non-trivial, we construct a non-triviality region.
    2890             :  * This region imposes that the next row is independent of previous rows.
    2891             :  * In particular, the non-triviality region enforces that at least
    2892             :  * one of the linear combinations in the rows of node->indep is non-zero.
    2893             :  */
    2894           0 : static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
    2895             : {
    2896             :         int i;
    2897             :         isl_vec *sol;
    2898             :         isl_basic_set *lp;
    2899             : 
    2900           0 :         for (i = 0; i < graph->n; ++i) {
    2901           0 :                 struct isl_sched_node *node = &graph->node[i];
    2902             :                 isl_mat *trivial;
    2903             : 
    2904           0 :                 graph->region[i].pos = node_var_coef_offset(node);
    2905           0 :                 if (needs_row(graph, node))
    2906           0 :                         trivial = construct_trivial(node->indep);
    2907             :                 else
    2908           0 :                         trivial = isl_mat_zero(ctx, 0, 0);
    2909           0 :                 graph->region[i].trivial = trivial;
    2910             :         }
    2911           0 :         lp = isl_basic_set_copy(graph->lp);
    2912           0 :         sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
    2913             :                                        graph->region, &check_conflict, graph);
    2914           0 :         for (i = 0; i < graph->n; ++i)
    2915           0 :                 isl_mat_free(graph->region[i].trivial);
    2916           0 :         return sol;
    2917             : }
    2918             : 
    2919             : /* Extract the coefficients for the variables of "node" from "sol".
    2920             :  *
    2921             :  * Each schedule coefficient c_i_x is represented as the difference
    2922             :  * between two non-negative variables c_i_x^+ - c_i_x^-.
    2923             :  * The c_i_x^- appear before their c_i_x^+ counterpart.
    2924             :  * Furthermore, the order of these pairs is the opposite of that
    2925             :  * of the corresponding coefficients.
    2926             :  *
    2927             :  * Return c_i_x = c_i_x^+ - c_i_x^-
    2928             :  */
    2929           0 : static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
    2930             :         __isl_keep isl_vec *sol)
    2931             : {
    2932             :         int i;
    2933             :         int pos;
    2934             :         isl_vec *csol;
    2935             : 
    2936           0 :         if (!sol)
    2937           0 :                 return NULL;
    2938           0 :         csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
    2939           0 :         if (!csol)
    2940           0 :                 return NULL;
    2941             : 
    2942           0 :         pos = 1 + node_var_coef_offset(node);
    2943           0 :         for (i = 0; i < node->nvar; ++i)
    2944           0 :                 isl_int_sub(csol->el[node->nvar - 1 - i],
    2945             :                             sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
    2946             : 
    2947           0 :         return csol;
    2948             : }
    2949             : 
    2950             : /* Update the schedules of all nodes based on the given solution
    2951             :  * of the LP problem.
    2952             :  * The new row is added to the current band.
    2953             :  * All possibly negative coefficients are encoded as a difference
    2954             :  * of two non-negative variables, so we need to perform the subtraction
    2955             :  * here.
    2956             :  *
    2957             :  * If coincident is set, then the caller guarantees that the new
    2958             :  * row satisfies the coincidence constraints.
    2959             :  */
    2960           0 : static int update_schedule(struct isl_sched_graph *graph,
    2961             :         __isl_take isl_vec *sol, int coincident)
    2962             : {
    2963             :         int i, j;
    2964           0 :         isl_vec *csol = NULL;
    2965             : 
    2966           0 :         if (!sol)
    2967           0 :                 goto error;
    2968           0 :         if (sol->size == 0)
    2969           0 :                 isl_die(sol->ctx, isl_error_internal,
    2970             :                         "no solution found", goto error);
    2971           0 :         if (graph->n_total_row >= graph->max_row)
    2972           0 :                 isl_die(sol->ctx, isl_error_internal,
    2973             :                         "too many schedule rows", goto error);
    2974             : 
    2975           0 :         for (i = 0; i < graph->n; ++i) {
    2976           0 :                 struct isl_sched_node *node = &graph->node[i];
    2977             :                 int pos;
    2978           0 :                 int row = isl_mat_rows(node->sched);
    2979             : 
    2980           0 :                 isl_vec_free(csol);
    2981           0 :                 csol = extract_var_coef(node, sol);
    2982           0 :                 if (!csol)
    2983           0 :                         goto error;
    2984             : 
    2985           0 :                 isl_map_free(node->sched_map);
    2986           0 :                 node->sched_map = NULL;
    2987           0 :                 node->sched = isl_mat_add_rows(node->sched, 1);
    2988           0 :                 if (!node->sched)
    2989           0 :                         goto error;
    2990           0 :                 pos = node_cst_coef_offset(node);
    2991           0 :                 node->sched = isl_mat_set_element(node->sched,
    2992           0 :                                         row, 0, sol->el[1 + pos]);
    2993           0 :                 pos = node_par_coef_offset(node);
    2994           0 :                 for (j = 0; j < node->nparam; ++j)
    2995           0 :                         node->sched = isl_mat_set_element(node->sched,
    2996           0 :                                         row, 1 + j, sol->el[1 + pos + j]);
    2997           0 :                 for (j = 0; j < node->nvar; ++j)
    2998           0 :                         node->sched = isl_mat_set_element(node->sched,
    2999           0 :                                         row, 1 + node->nparam + j, csol->el[j]);
    3000           0 :                 node->coincident[graph->n_total_row] = coincident;
    3001             :         }
    3002           0 :         isl_vec_free(sol);
    3003           0 :         isl_vec_free(csol);
    3004             : 
    3005           0 :         graph->n_row++;
    3006           0 :         graph->n_total_row++;
    3007             : 
    3008           0 :         return 0;
    3009             : error:
    3010           0 :         isl_vec_free(sol);
    3011           0 :         isl_vec_free(csol);
    3012           0 :         return -1;
    3013             : }
    3014             : 
    3015             : /* Convert row "row" of node->sched into an isl_aff living in "ls"
    3016             :  * and return this isl_aff.
    3017             :  */
    3018           0 : static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
    3019             :         struct isl_sched_node *node, int row)
    3020             : {
    3021             :         int j;
    3022             :         isl_int v;
    3023             :         isl_aff *aff;
    3024             : 
    3025           0 :         isl_int_init(v);
    3026             : 
    3027           0 :         aff = isl_aff_zero_on_domain(ls);
    3028           0 :         if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
    3029           0 :                 goto error;
    3030           0 :         aff = isl_aff_set_constant(aff, v);
    3031           0 :         for (j = 0; j < node->nparam; ++j) {
    3032           0 :                 if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
    3033           0 :                         goto error;
    3034           0 :                 aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
    3035             :         }
    3036           0 :         for (j = 0; j < node->nvar; ++j) {
    3037           0 :                 if (isl_mat_get_element(node->sched, row,
    3038           0 :                                         1 + node->nparam + j, &v) < 0)
    3039           0 :                         goto error;
    3040           0 :                 aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
    3041             :         }
    3042             : 
    3043           0 :         isl_int_clear(v);
    3044             : 
    3045           0 :         return aff;
    3046             : error:
    3047           0 :         isl_int_clear(v);
    3048           0 :         isl_aff_free(aff);
    3049           0 :         return NULL;
    3050             : }
    3051             : 
    3052             : /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
    3053             :  * and return this multi_aff.
    3054             :  *
    3055             :  * The result is defined over the uncompressed node domain.
    3056             :  */
    3057           0 : static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
    3058             :         struct isl_sched_node *node, int first, int n)
    3059             : {
    3060             :         int i;
    3061             :         isl_space *space;
    3062             :         isl_local_space *ls;
    3063             :         isl_aff *aff;
    3064             :         isl_multi_aff *ma;
    3065             :         int nrow;
    3066             : 
    3067           0 :         if (!node)
    3068           0 :                 return NULL;
    3069           0 :         nrow = isl_mat_rows(node->sched);
    3070           0 :         if (node->compressed)
    3071           0 :                 space = isl_multi_aff_get_domain_space(node->decompress);
    3072             :         else
    3073           0 :                 space = isl_space_copy(node->space);
    3074           0 :         ls = isl_local_space_from_space(isl_space_copy(space));
    3075           0 :         space = isl_space_from_domain(space);
    3076           0 :         space = isl_space_add_dims(space, isl_dim_out, n);
    3077           0 :         ma = isl_multi_aff_zero(space);
    3078             : 
    3079           0 :         for (i = first; i < first + n; ++i) {
    3080           0 :                 aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
    3081           0 :                 ma = isl_multi_aff_set_aff(ma, i - first, aff);
    3082             :         }
    3083             : 
    3084           0 :         isl_local_space_free(ls);
    3085             : 
    3086           0 :         if (node->compressed)
    3087           0 :                 ma = isl_multi_aff_pullback_multi_aff(ma,
    3088             :                                         isl_multi_aff_copy(node->compress));
    3089             : 
    3090           0 :         return ma;
    3091             : }
    3092             : 
    3093             : /* Convert node->sched into a multi_aff and return this multi_aff.
    3094             :  *
    3095             :  * The result is defined over the uncompressed node domain.
    3096             :  */
    3097           0 : static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
    3098             :         struct isl_sched_node *node)
    3099             : {
    3100             :         int nrow;
    3101             : 
    3102           0 :         nrow = isl_mat_rows(node->sched);
    3103           0 :         return node_extract_partial_schedule_multi_aff(node, 0, nrow);
    3104             : }
    3105             : 
    3106             : /* Convert node->sched into a map and return this map.
    3107             :  *
    3108             :  * The result is cached in node->sched_map, which needs to be released
    3109             :  * whenever node->sched is updated.
    3110             :  * It is defined over the uncompressed node domain.
    3111             :  */
    3112           0 : static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
    3113             : {
    3114           0 :         if (!node->sched_map) {
    3115             :                 isl_multi_aff *ma;
    3116             : 
    3117           0 :                 ma = node_extract_schedule_multi_aff(node);
    3118           0 :                 node->sched_map = isl_map_from_multi_aff(ma);
    3119             :         }
    3120             : 
    3121           0 :         return isl_map_copy(node->sched_map);
    3122             : }
    3123             : 
    3124             : /* Construct a map that can be used to update a dependence relation
    3125             :  * based on the current schedule.
    3126             :  * That is, construct a map expressing that source and sink
    3127             :  * are executed within the same iteration of the current schedule.
    3128             :  * This map can then be intersected with the dependence relation.
    3129             :  * This is not the most efficient way, but this shouldn't be a critical
    3130             :  * operation.
    3131             :  */
    3132           0 : static __isl_give isl_map *specializer(struct isl_sched_node *src,
    3133             :         struct isl_sched_node *dst)
    3134             : {
    3135             :         isl_map *src_sched, *dst_sched;
    3136             : 
    3137           0 :         src_sched = node_extract_schedule(src);
    3138           0 :         dst_sched = node_extract_schedule(dst);
    3139           0 :         return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
    3140             : }
    3141             : 
    3142             : /* Intersect the domains of the nested relations in domain and range
    3143             :  * of "umap" with "map".
    3144             :  */
    3145           0 : static __isl_give isl_union_map *intersect_domains(
    3146             :         __isl_take isl_union_map *umap, __isl_keep isl_map *map)
    3147             : {
    3148             :         isl_union_set *uset;
    3149             : 
    3150           0 :         umap = isl_union_map_zip(umap);
    3151           0 :         uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
    3152           0 :         umap = isl_union_map_intersect_domain(umap, uset);
    3153           0 :         umap = isl_union_map_zip(umap);
    3154           0 :         return umap;
    3155             : }
    3156             : 
    3157             : /* Update the dependence relation of the given edge based
    3158             :  * on the current schedule.
    3159             :  * If the dependence is carried completely by the current schedule, then
    3160             :  * it is removed from the edge_tables.  It is kept in the list of edges
    3161             :  * as otherwise all edge_tables would have to be recomputed.
    3162             :  *
    3163             :  * If the edge is of a type that can appear multiple times
    3164             :  * between the same pair of nodes, then it is added to
    3165             :  * the edge table (again).  This prevents the situation
    3166             :  * where none of these edges is referenced from the edge table
    3167             :  * because the one that was referenced turned out to be empty and
    3168             :  * was therefore removed from the table.
    3169             :  */
    3170           0 : static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
    3171             :         struct isl_sched_edge *edge)
    3172             : {
    3173             :         int empty;
    3174             :         isl_map *id;
    3175             : 
    3176           0 :         id = specializer(edge->src, edge->dst);
    3177           0 :         edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
    3178           0 :         if (!edge->map)
    3179           0 :                 goto error;
    3180             : 
    3181           0 :         if (edge->tagged_condition) {
    3182           0 :                 edge->tagged_condition =
    3183           0 :                         intersect_domains(edge->tagged_condition, id);
    3184           0 :                 if (!edge->tagged_condition)
    3185           0 :                         goto error;
    3186             :         }
    3187           0 :         if (edge->tagged_validity) {
    3188           0 :                 edge->tagged_validity =
    3189           0 :                         intersect_domains(edge->tagged_validity, id);
    3190           0 :                 if (!edge->tagged_validity)
    3191           0 :                         goto error;
    3192             :         }
    3193             : 
    3194           0 :         empty = isl_map_plain_is_empty(edge->map);
    3195           0 :         if (empty < 0)
    3196           0 :                 goto error;
    3197           0 :         if (empty) {
    3198           0 :                 graph_remove_edge(graph, edge);
    3199           0 :         } else if (is_multi_edge_type(edge)) {
    3200           0 :                 if (graph_edge_tables_add(ctx, graph, edge) < 0)
    3201           0 :                         goto error;
    3202             :         }
    3203             : 
    3204           0 :         isl_map_free(id);
    3205           0 :         return isl_stat_ok;
    3206             : error:
    3207           0 :         isl_map_free(id);
    3208           0 :         return isl_stat_error;
    3209             : }
    3210             : 
    3211             : /* Does the domain of "umap" intersect "uset"?
    3212             :  */
    3213           0 : static int domain_intersects(__isl_keep isl_union_map *umap,
    3214             :         __isl_keep isl_union_set *uset)
    3215             : {
    3216             :         int empty;
    3217             : 
    3218           0 :         umap = isl_union_map_copy(umap);
    3219           0 :         umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
    3220           0 :         empty = isl_union_map_is_empty(umap);
    3221           0 :         isl_union_map_free(umap);
    3222             : 
    3223           0 :         return empty < 0 ? -1 : !empty;
    3224             : }
    3225             : 
    3226             : /* Does the range of "umap" intersect "uset"?
    3227             :  */
    3228           0 : static int range_intersects(__isl_keep isl_union_map *umap,
    3229             :         __isl_keep isl_union_set *uset)
    3230             : {
    3231             :         int empty;
    3232             : 
    3233           0 :         umap = isl_union_map_copy(umap);
    3234           0 :         umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
    3235           0 :         empty = isl_union_map_is_empty(umap);
    3236           0 :         isl_union_map_free(umap);
    3237             : 
    3238           0 :         return empty < 0 ? -1 : !empty;
    3239             : }
    3240             : 
    3241             : /* Are the condition dependences of "edge" local with respect to
    3242             :  * the current schedule?
    3243             :  *
    3244             :  * That is, are domain and range of the condition dependences mapped
    3245             :  * to the same point?
    3246             :  *
    3247             :  * In other words, is the condition false?
    3248             :  */
    3249           0 : static int is_condition_false(struct isl_sched_edge *edge)
    3250             : {
    3251             :         isl_union_map *umap;
    3252             :         isl_map *map, *sched, *test;
    3253             :         int empty, local;
    3254             : 
    3255           0 :         empty = isl_union_map_is_empty(edge->tagged_condition);
    3256           0 :         if (empty < 0 || empty)
    3257           0 :                 return empty;
    3258             : 
    3259           0 :         umap = isl_union_map_copy(edge->tagged_condition);
    3260           0 :         umap = isl_union_map_zip(umap);
    3261           0 :         umap = isl_union_set_unwrap(isl_union_map_domain(umap));
    3262           0 :         map = isl_map_from_union_map(umap);
    3263             : 
    3264           0 :         sched = node_extract_schedule(edge->src);
    3265           0 :         map = isl_map_apply_domain(map, sched);
    3266           0 :         sched = node_extract_schedule(edge->dst);
    3267           0 :         map = isl_map_apply_range(map, sched);
    3268             : 
    3269           0 :         test = isl_map_identity(isl_map_get_space(map));
    3270           0 :         local = isl_map_is_subset(map, test);
    3271           0 :         isl_map_free(map);
    3272           0 :         isl_map_free(test);
    3273             : 
    3274           0 :         return local;
    3275             : }
    3276             : 
    3277             : /* For each conditional validity constraint that is adjacent
    3278             :  * to a condition with domain in condition_source or range in condition_sink,
    3279             :  * turn it into an unconditional validity constraint.
    3280             :  */
    3281           0 : static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
    3282             :         __isl_take isl_union_set *condition_source,
    3283             :         __isl_take isl_union_set *condition_sink)
    3284             : {
    3285             :         int i;
    3286             : 
    3287           0 :         condition_source = isl_union_set_coalesce(condition_source);
    3288           0 :         condition_sink = isl_union_set_coalesce(condition_sink);
    3289             : 
    3290           0 :         for (i = 0; i < graph->n_edge; ++i) {
    3291             :                 int adjacent;
    3292             :                 isl_union_map *validity;
    3293             : 
    3294           0 :                 if (!is_conditional_validity(&graph->edge[i]))
    3295           0 :                         continue;
    3296           0 :                 if (is_validity(&graph->edge[i]))
    3297           0 :                         continue;
    3298             : 
    3299           0 :                 validity = graph->edge[i].tagged_validity;
    3300           0 :                 adjacent = domain_intersects(validity, condition_sink);
    3301           0 :                 if (adjacent >= 0 && !adjacent)
    3302           0 :                         adjacent = range_intersects(validity, condition_source);
    3303           0 :                 if (adjacent < 0)
    3304           0 :                         goto error;
    3305           0 :                 if (!adjacent)
    3306           0 :                         continue;
    3307             : 
    3308           0 :                 set_validity(&graph->edge[i]);
    3309             :         }
    3310             : 
    3311           0 :         isl_union_set_free(condition_source);
    3312           0 :         isl_union_set_free(condition_sink);
    3313           0 :         return 0;
    3314             : error:
    3315           0 :         isl_union_set_free(condition_source);
    3316           0 :         isl_union_set_free(condition_sink);
    3317           0 :         return -1;
    3318             : }
    3319             : 
    3320             : /* Update the dependence relations of all edges based on the current schedule
    3321             :  * and enforce conditional validity constraints that are adjacent
    3322             :  * to satisfied condition constraints.
    3323             :  *
    3324             :  * First check if any of the condition constraints are satisfied
    3325             :  * (i.e., not local to the outer schedule) and keep track of
    3326             :  * their domain and range.
    3327             :  * Then update all dependence relations (which removes the non-local
    3328             :  * constraints).
    3329             :  * Finally, if any condition constraints turned out to be satisfied,
    3330             :  * then turn all adjacent conditional validity constraints into
    3331             :  * unconditional validity constraints.
    3332             :  */
    3333           0 : static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
    3334             : {
    3335             :         int i;
    3336           0 :         int any = 0;
    3337             :         isl_union_set *source, *sink;
    3338             : 
    3339           0 :         source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
    3340           0 :         sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
    3341           0 :         for (i = 0; i < graph->n_edge; ++i) {
    3342             :                 int local;
    3343             :                 isl_union_set *uset;
    3344             :                 isl_union_map *umap;
    3345             : 
    3346           0 :                 if (!is_condition(&graph->edge[i]))
    3347           0 :                         continue;
    3348           0 :                 if (is_local(&graph->edge[i]))
    3349           0 :                         continue;
    3350           0 :                 local = is_condition_false(&graph->edge[i]);
    3351           0 :                 if (local < 0)
    3352           0 :                         goto error;
    3353           0 :                 if (local)
    3354           0 :                         continue;
    3355             : 
    3356           0 :                 any = 1;
    3357             : 
    3358           0 :                 umap = isl_union_map_copy(graph->edge[i].tagged_condition);
    3359           0 :                 uset = isl_union_map_domain(umap);
    3360           0 :                 source = isl_union_set_union(source, uset);
    3361             : 
    3362           0 :                 umap = isl_union_map_copy(graph->edge[i].tagged_condition);
    3363           0 :                 uset = isl_union_map_range(umap);
    3364           0 :                 sink = isl_union_set_union(sink, uset);
    3365             :         }
    3366             : 
    3367           0 :         for (i = 0; i < graph->n_edge; ++i) {
    3368           0 :                 if (update_edge(ctx, graph, &graph->edge[i]) < 0)
    3369           0 :                         goto error;
    3370             :         }
    3371             : 
    3372           0 :         if (any)
    3373           0 :                 return unconditionalize_adjacent_validity(graph, source, sink);
    3374             : 
    3375           0 :         isl_union_set_free(source);
    3376           0 :         isl_union_set_free(sink);
    3377           0 :         return 0;
    3378             : error:
    3379           0 :         isl_union_set_free(source);
    3380           0 :         isl_union_set_free(sink);
    3381           0 :         return -1;
    3382             : }
    3383             : 
    3384           0 : static void next_band(struct isl_sched_graph *graph)
    3385             : {
    3386           0 :         graph->band_start = graph->n_total_row;
    3387           0 : }
    3388             : 
    3389             : /* Return the union of the universe domains of the nodes in "graph"
    3390             :  * that satisfy "pred".
    3391             :  */
    3392           0 : static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
    3393             :         struct isl_sched_graph *graph,
    3394             :         int (*pred)(struct isl_sched_node *node, int data), int data)
    3395             : {
    3396             :         int i;
    3397             :         isl_set *set;
    3398             :         isl_union_set *dom;
    3399             : 
    3400           0 :         for (i = 0; i < graph->n; ++i)
    3401           0 :                 if (pred(&graph->node[i], data))
    3402           0 :                         break;
    3403             : 
    3404           0 :         if (i >= graph->n)
    3405           0 :                 isl_die(ctx, isl_error_internal,
    3406             :                         "empty component", return NULL);
    3407             : 
    3408           0 :         set = isl_set_universe(isl_space_copy(graph->node[i].space));
    3409           0 :         dom = isl_union_set_from_set(set);
    3410             : 
    3411           0 :         for (i = i + 1; i < graph->n; ++i) {
    3412           0 :                 if (!pred(&graph->node[i], data))
    3413           0 :                         continue;
    3414           0 :                 set = isl_set_universe(isl_space_copy(graph->node[i].space));
    3415           0 :                 dom = isl_union_set_union(dom, isl_union_set_from_set(set));
    3416             :         }
    3417             : 
    3418           0 :         return dom;
    3419             : }
    3420             : 
    3421             : /* Return a list of unions of universe domains, where each element
    3422             :  * in the list corresponds to an SCC (or WCC) indexed by node->scc.
    3423             :  */
    3424           0 : static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
    3425             :         struct isl_sched_graph *graph)
    3426             : {
    3427             :         int i;
    3428             :         isl_union_set_list *filters;
    3429             : 
    3430           0 :         filters = isl_union_set_list_alloc(ctx, graph->scc);
    3431           0 :         for (i = 0; i < graph->scc; ++i) {
    3432             :                 isl_union_set *dom;
    3433             : 
    3434           0 :                 dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
    3435           0 :                 filters = isl_union_set_list_add(filters, dom);
    3436             :         }
    3437             : 
    3438           0 :         return filters;
    3439             : }
    3440             : 
    3441             : /* Return a list of two unions of universe domains, one for the SCCs up
    3442             :  * to and including graph->src_scc and another for the other SCCs.
    3443             :  */
    3444           0 : static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
    3445             :         struct isl_sched_graph *graph)
    3446             : {
    3447             :         isl_union_set *dom;
    3448             :         isl_union_set_list *filters;
    3449             : 
    3450           0 :         filters = isl_union_set_list_alloc(ctx, 2);
    3451           0 :         dom = isl_sched_graph_domain(ctx, graph,
    3452             :                                         &node_scc_at_most, graph->src_scc);
    3453           0 :         filters = isl_union_set_list_add(filters, dom);
    3454           0 :         dom = isl_sched_graph_domain(ctx, graph,
    3455           0 :                                         &node_scc_at_least, graph->src_scc + 1);
    3456           0 :         filters = isl_union_set_list_add(filters, dom);
    3457             : 
    3458           0 :         return filters;
    3459             : }
    3460             : 
    3461             : /* Copy nodes that satisfy node_pred from the src dependence graph
    3462             :  * to the dst dependence graph.
    3463             :  */
    3464           0 : static isl_stat copy_nodes(struct isl_sched_graph *dst,
    3465             :         struct isl_sched_graph *src,
    3466             :         int (*node_pred)(struct isl_sched_node *node, int data), int data)
    3467             : {
    3468             :         int i;
    3469             : 
    3470           0 :         dst->n = 0;
    3471           0 :         for (i = 0; i < src->n; ++i) {
    3472             :                 int j;
    3473             : 
    3474           0 :                 if (!node_pred(&src->node[i], data))
    3475           0 :                         continue;
    3476             : 
    3477           0 :                 j = dst->n;
    3478           0 :                 dst->node[j].space = isl_space_copy(src->node[i].space);
    3479           0 :                 dst->node[j].compressed = src->node[i].compressed;
    3480           0 :                 dst->node[j].hull = isl_set_copy(src->node[i].hull);
    3481           0 :                 dst->node[j].compress =
    3482           0 :                         isl_multi_aff_copy(src->node[i].compress);
    3483           0 :                 dst->node[j].decompress =
    3484           0 :                         isl_multi_aff_copy(src->node[i].decompress);
    3485           0 :                 dst->node[j].nvar = src->node[i].nvar;
    3486           0 :                 dst->node[j].nparam = src->node[i].nparam;
    3487           0 :                 dst->node[j].sched = isl_mat_copy(src->node[i].sched);
    3488           0 :                 dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
    3489           0 :                 dst->node[j].coincident = src->node[i].coincident;
    3490           0 :                 dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
    3491           0 :                 dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
    3492           0 :                 dst->node[j].max = isl_vec_copy(src->node[i].max);
    3493           0 :                 dst->n++;
    3494             : 
    3495           0 :                 if (!dst->node[j].space || !dst->node[j].sched)
    3496           0 :                         return isl_stat_error;
    3497           0 :                 if (dst->node[j].compressed &&
    3498           0 :                     (!dst->node[j].hull || !dst->node[j].compress ||
    3499           0 :                      !dst->node[j].decompress))
    3500           0 :                         return isl_stat_error;
    3501             :         }
    3502             : 
    3503           0 :         return isl_stat_ok;
    3504             : }
    3505             : 
    3506             : /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
    3507             :  * to the dst dependence graph.
    3508             :  * If the source or destination node of the edge is not in the destination
    3509             :  * graph, then it must be a backward proximity edge and it should simply
    3510             :  * be ignored.
    3511             :  */
    3512           0 : static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
    3513             :         struct isl_sched_graph *src,
    3514             :         int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
    3515             : {
    3516             :         int i;
    3517             : 
    3518           0 :         dst->n_edge = 0;
    3519           0 :         for (i = 0; i < src->n_edge; ++i) {
    3520           0 :                 struct isl_sched_edge *edge = &src->edge[i];
    3521             :                 isl_map *map;
    3522             :                 isl_union_map *tagged_condition;
    3523             :                 isl_union_map *tagged_validity;
    3524             :                 struct isl_sched_node *dst_src, *dst_dst;
    3525             : 
    3526           0 :                 if (!edge_pred(edge, data))
    3527           0 :                         continue;
    3528             : 
    3529           0 :                 if (isl_map_plain_is_empty(edge->map))
    3530           0 :                         continue;
    3531             : 
    3532           0 :                 dst_src = graph_find_node(ctx, dst, edge->src->space);
    3533           0 :                 dst_dst = graph_find_node(ctx, dst, edge->dst->space);
    3534           0 :                 if (!dst_src || !dst_dst)
    3535           0 :                         return isl_stat_error;
    3536           0 :                 if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
    3537           0 :                         if (is_validity(edge) || is_conditional_validity(edge))
    3538           0 :                                 isl_die(ctx, isl_error_internal,
    3539             :                                         "backward (conditional) validity edge",
    3540             :                                         return isl_stat_error);
    3541           0 :                         continue;
    3542             :                 }
    3543             : 
    3544           0 :                 map = isl_map_copy(edge->map);
    3545           0 :                 tagged_condition = isl_union_map_copy(edge->tagged_condition);
    3546           0 :                 tagged_validity = isl_union_map_copy(edge->tagged_validity);
    3547             : 
    3548           0 :                 dst->edge[dst->n_edge].src = dst_src;
    3549           0 :                 dst->edge[dst->n_edge].dst = dst_dst;
    3550           0 :                 dst->edge[dst->n_edge].map = map;
    3551           0 :                 dst->edge[dst->n_edge].tagged_condition = tagged_condition;
    3552           0 :                 dst->edge[dst->n_edge].tagged_validity = tagged_validity;
    3553           0 :                 dst->edge[dst->n_edge].types = edge->types;
    3554           0 :                 dst->n_edge++;
    3555             : 
    3556           0 :                 if (edge->tagged_condition && !tagged_condition)
    3557           0 :                         return isl_stat_error;
    3558           0 :                 if (edge->tagged_validity && !tagged_validity)
    3559           0 :                         return isl_stat_error;
    3560             : 
    3561           0 :                 if (graph_edge_tables_add(ctx, dst,
    3562           0 :                                             &dst->edge[dst->n_edge - 1]) < 0)
    3563           0 :                         return isl_stat_error;
    3564             :         }
    3565             : 
    3566           0 :         return isl_stat_ok;
    3567             : }
    3568             : 
    3569             : /* Compute the maximal number of variables over all nodes.
    3570             :  * This is the maximal number of linearly independent schedule
    3571             :  * rows that we need to compute.
    3572             :  * Just in case we end up in a part of the dependence graph
    3573             :  * with only lower-dimensional domains, we make sure we will
    3574             :  * compute the required amount of extra linearly independent rows.
    3575             :  */
    3576           0 : static int compute_maxvar(struct isl_sched_graph *graph)
    3577             : {
    3578             :         int i;
    3579             : 
    3580           0 :         graph->maxvar = 0;
    3581           0 :         for (i = 0; i < graph->n; ++i) {
    3582           0 :                 struct isl_sched_node *node = &graph->node[i];
    3583             :                 int nvar;
    3584             : 
    3585           0 :                 if (node_update_vmap(node) < 0)
    3586           0 :                         return -1;
    3587           0 :                 nvar = node->nvar + graph->n_row - node->rank;
    3588           0 :                 if (nvar > graph->maxvar)
    3589           0 :                         graph->maxvar = nvar;
    3590             :         }
    3591             : 
    3592           0 :         return 0;
    3593             : }
    3594             : 
    3595             : /* Extract the subgraph of "graph" that consists of the nodes satisfying
    3596             :  * "node_pred" and the edges satisfying "edge_pred" and store
    3597             :  * the result in "sub".
    3598             :  */
    3599           0 : static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
    3600             :         int (*node_pred)(struct isl_sched_node *node, int data),
    3601             :         int (*edge_pred)(struct isl_sched_edge *edge, int data),
    3602             :         int data, struct isl_sched_graph *sub)
    3603             : {
    3604           0 :         int i, n = 0, n_edge = 0;
    3605             :         int t;
    3606             : 
    3607           0 :         for (i = 0; i < graph->n; ++i)
    3608           0 :                 if (node_pred(&graph->node[i], data))
    3609           0 :                         ++n;
    3610           0 :         for (i = 0; i < graph->n_edge; ++i)
    3611           0 :                 if (edge_pred(&graph->edge[i], data))
    3612           0 :                         ++n_edge;
    3613           0 :         if (graph_alloc(ctx, sub, n, n_edge) < 0)
    3614           0 :                 return isl_stat_error;
    3615           0 :         sub->root = graph->root;
    3616           0 :         if (copy_nodes(sub, graph, node_pred, data) < 0)
    3617           0 :                 return isl_stat_error;
    3618           0 :         if (graph_init_table(ctx, sub) < 0)
    3619           0 :                 return isl_stat_error;
    3620           0 :         for (t = 0; t <= isl_edge_last; ++t)
    3621           0 :                 sub->max_edge[t] = graph->max_edge[t];
    3622           0 :         if (graph_init_edge_tables(ctx, sub) < 0)
    3623           0 :                 return isl_stat_error;
    3624           0 :         if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
    3625           0 :                 return isl_stat_error;
    3626           0 :         sub->n_row = graph->n_row;
    3627           0 :         sub->max_row = graph->max_row;
    3628           0 :         sub->n_total_row = graph->n_total_row;
    3629           0 :         sub->band_start = graph->band_start;
    3630             : 
    3631           0 :         return isl_stat_ok;
    3632             : }
    3633             : 
    3634             : static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
    3635             :         struct isl_sched_graph *graph);
    3636             : static __isl_give isl_schedule_node *compute_schedule_wcc(
    3637             :         isl_schedule_node *node, struct isl_sched_graph *graph);
    3638             : 
    3639             : /* Compute a schedule for a subgraph of "graph".  In particular, for
    3640             :  * the graph composed of nodes that satisfy node_pred and edges that
    3641             :  * that satisfy edge_pred.
    3642             :  * If the subgraph is known to consist of a single component, then wcc should
    3643             :  * be set and then we call compute_schedule_wcc on the constructed subgraph.
    3644             :  * Otherwise, we call compute_schedule, which will check whether the subgraph
    3645             :  * is connected.
    3646             :  *
    3647             :  * The schedule is inserted at "node" and the updated schedule node
    3648             :  * is returned.
    3649             :  */
    3650           0 : static __isl_give isl_schedule_node *compute_sub_schedule(
    3651             :         __isl_take isl_schedule_node *node, isl_ctx *ctx,
    3652             :         struct isl_sched_graph *graph,
    3653             :         int (*node_pred)(struct isl_sched_node *node, int data),
    3654             :         int (*edge_pred)(struct isl_sched_edge *edge, int data),
    3655             :         int data, int wcc)
    3656             : {
    3657           0 :         struct isl_sched_graph split = { 0 };
    3658             : 
    3659           0 :         if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
    3660             :                                 &split) < 0)
    3661           0 :                 goto error;
    3662             : 
    3663           0 :         if (wcc)
    3664           0 :                 node = compute_schedule_wcc(node, &split);
    3665             :         else
    3666           0 :                 node = compute_schedule(node, &split);
    3667             : 
    3668           0 :         graph_free(ctx, &split);
    3669           0 :         return node;
    3670             : error:
    3671           0 :         graph_free(ctx, &split);
    3672           0 :         return isl_schedule_node_free(node);
    3673             : }
    3674             : 
    3675           0 : static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
    3676             : {
    3677           0 :         return edge->src->scc == scc && edge->dst->scc == scc;
    3678             : }
    3679             : 
    3680           0 : static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
    3681             : {
    3682           0 :         return edge->dst->scc <= scc;
    3683             : }
    3684             : 
    3685           0 : static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
    3686             : {
    3687           0 :         return edge->src->scc >= scc;
    3688             : }
    3689             : 
    3690             : /* Reset the current band by dropping all its schedule rows.
    3691             :  */
    3692           0 : static isl_stat reset_band(struct isl_sched_graph *graph)
    3693             : {
    3694             :         int i;
    3695             :         int drop;
    3696             : 
    3697           0 :         drop = graph->n_total_row - graph->band_start;
    3698           0 :         graph->n_total_row -= drop;
    3699           0 :         graph->n_row -= drop;
    3700             : 
    3701           0 :         for (i = 0; i < graph->n; ++i) {
    3702           0 :                 struct isl_sched_node *node = &graph->node[i];
    3703             : 
    3704           0 :                 isl_map_free(node->sched_map);
    3705           0 :                 node->sched_map = NULL;
    3706             : 
    3707           0 :                 node->sched = isl_mat_drop_rows(node->sched,
    3708           0 :                                                 graph->band_start, drop);
    3709             : 
    3710           0 :                 if (!node->sched)
    3711           0 :                         return isl_stat_error;
    3712             :         }
    3713             : 
    3714           0 :         return isl_stat_ok;
    3715             : }
    3716             : 
    3717             : /* Split the current graph into two parts and compute a schedule for each
    3718             :  * part individually.  In particular, one part consists of all SCCs up
    3719             :  * to and including graph->src_scc, while the other part contains the other
    3720             :  * SCCs.  The split is enforced by a sequence node inserted at position "node"
    3721             :  * in the schedule tree.  Return the updated schedule node.
    3722             :  * If either of these two parts consists of a sequence, then it is spliced
    3723             :  * into the sequence containing the two parts.
    3724             :  *
    3725             :  * The current band is reset. It would be possible to reuse
    3726             :  * the previously computed rows as the first rows in the next
    3727             :  * band, but recomputing them may result in better rows as we are looking
    3728             :  * at a smaller part of the dependence graph.
    3729             :  */
    3730           0 : static __isl_give isl_schedule_node *compute_split_schedule(
    3731             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    3732             : {
    3733             :         int is_seq;
    3734             :         isl_ctx *ctx;
    3735             :         isl_union_set_list *filters;
    3736             : 
    3737           0 :         if (!node)
    3738           0 :                 return NULL;
    3739             : 
    3740           0 :         if (reset_band(graph) < 0)
    3741           0 :                 return isl_schedule_node_free(node);
    3742             : 
    3743           0 :         next_band(graph);
    3744             : 
    3745           0 :         ctx = isl_schedule_node_get_ctx(node);
    3746           0 :         filters = extract_split(ctx, graph);
    3747           0 :         node = isl_schedule_node_insert_sequence(node, filters);
    3748           0 :         node = isl_schedule_node_child(node, 1);
    3749           0 :         node = isl_schedule_node_child(node, 0);
    3750             : 
    3751           0 :         node = compute_sub_schedule(node, ctx, graph,
    3752             :                                 &node_scc_at_least, &edge_src_scc_at_least,
    3753           0 :                                 graph->src_scc + 1, 0);
    3754           0 :         is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
    3755           0 :         node = isl_schedule_node_parent(node);
    3756           0 :         node = isl_schedule_node_parent(node);
    3757           0 :         if (is_seq)
    3758           0 :                 node = isl_schedule_node_sequence_splice_child(node, 1);
    3759           0 :         node = isl_schedule_node_child(node, 0);
    3760           0 :         node = isl_schedule_node_child(node, 0);
    3761           0 :         node = compute_sub_schedule(node, ctx, graph,
    3762             :                                 &node_scc_at_most, &edge_dst_scc_at_most,
    3763             :                                 graph->src_scc, 0);
    3764           0 :         is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
    3765           0 :         node = isl_schedule_node_parent(node);
    3766           0 :         node = isl_schedule_node_parent(node);
    3767           0 :         if (is_seq)
    3768           0 :                 node = isl_schedule_node_sequence_splice_child(node, 0);
    3769             : 
    3770           0 :         return node;
    3771             : }
    3772             : 
    3773             : /* Insert a band node at position "node" in the schedule tree corresponding
    3774             :  * to the current band in "graph".  Mark the band node permutable
    3775             :  * if "permutable" is set.
    3776             :  * The partial schedules and the coincidence property are extracted
    3777             :  * from the graph nodes.
    3778             :  * Return the updated schedule node.
    3779             :  */
    3780           0 : static __isl_give isl_schedule_node *insert_current_band(
    3781             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
    3782             :         int permutable)
    3783             : {
    3784             :         int i;
    3785             :         int start, end, n;
    3786             :         isl_multi_aff *ma;
    3787             :         isl_multi_pw_aff *mpa;
    3788             :         isl_multi_union_pw_aff *mupa;
    3789             : 
    3790           0 :         if (!node)
    3791           0 :                 return NULL;
    3792             : 
    3793           0 :         if (graph->n < 1)
    3794           0 :                 isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
    3795             :                         "graph should have at least one node",
    3796             :                         return isl_schedule_node_free(node));
    3797             : 
    3798           0 :         start = graph->band_start;
    3799           0 :         end = graph->n_total_row;
    3800           0 :         n = end - start;
    3801             : 
    3802           0 :         ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
    3803           0 :         mpa = isl_multi_pw_aff_from_multi_aff(ma);
    3804           0 :         mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
    3805             : 
    3806           0 :         for (i = 1; i < graph->n; ++i) {
    3807             :                 isl_multi_union_pw_aff *mupa_i;
    3808             : 
    3809           0 :                 ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
    3810             :                                                                 start, n);
    3811           0 :                 mpa = isl_multi_pw_aff_from_multi_aff(ma);
    3812           0 :                 mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
    3813           0 :                 mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
    3814             :         }
    3815           0 :         node = isl_schedule_node_insert_partial_schedule(node, mupa);
    3816             : 
    3817           0 :         for (i = 0; i < n; ++i)
    3818           0 :                 node = isl_schedule_node_band_member_set_coincident(node, i,
    3819           0 :                                         graph->node[0].coincident[start + i]);
    3820           0 :         node = isl_schedule_node_band_set_permutable(node, permutable);
    3821             : 
    3822           0 :         return node;
    3823             : }
    3824             : 
    3825             : /* Update the dependence relations based on the current schedule,
    3826             :  * add the current band to "node" and then continue with the computation
    3827             :  * of the next band.
    3828             :  * Return the updated schedule node.
    3829             :  */
    3830           0 : static __isl_give isl_schedule_node *compute_next_band(
    3831             :         __isl_take isl_schedule_node *node,
    3832             :         struct isl_sched_graph *graph, int permutable)
    3833             : {
    3834             :         isl_ctx *ctx;
    3835             : 
    3836           0 :         if (!node)
    3837           0 :                 return NULL;
    3838             : 
    3839           0 :         ctx = isl_schedule_node_get_ctx(node);
    3840           0 :         if (update_edges(ctx, graph) < 0)
    3841           0 :                 return isl_schedule_node_free(node);
    3842           0 :         node = insert_current_band(node, graph, permutable);
    3843           0 :         next_band(graph);
    3844             : 
    3845           0 :         node = isl_schedule_node_child(node, 0);
    3846           0 :         node = compute_schedule(node, graph);
    3847           0 :         node = isl_schedule_node_parent(node);
    3848             : 
    3849           0 :         return node;
    3850             : }
    3851             : 
    3852             : /* Add the constraints "coef" derived from an edge from "node" to itself
    3853             :  * to graph->lp in order to respect the dependences and to try and carry them.
    3854             :  * "pos" is the sequence number of the edge that needs to be carried.
    3855             :  * "coef" represents general constraints on coefficients (c_0, c_x)
    3856             :  * of valid constraints for (y - x) with x and y instances of the node.
    3857             :  *
    3858             :  * The constraints added to graph->lp need to enforce
    3859             :  *
    3860             :  *      (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
    3861             :  *      = c_j_x (y - x) >= e_i
    3862             :  *
    3863             :  * for each (x,y) in the dependence relation of the edge.
    3864             :  * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
    3865             :  * taking into account that each coefficient in c_j_x is represented
    3866             :  * as a pair of non-negative coefficients.
    3867             :  */
    3868           0 : static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
    3869             :         struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
    3870             : {
    3871             :         int offset;
    3872             :         isl_ctx *ctx;
    3873             :         isl_dim_map *dim_map;
    3874             : 
    3875           0 :         if (!coef)
    3876           0 :                 return isl_stat_error;
    3877             : 
    3878           0 :         ctx = isl_basic_set_get_ctx(coef);
    3879           0 :         offset = coef_var_offset(coef);
    3880           0 :         dim_map = intra_dim_map(ctx, graph, node, offset, 1);
    3881           0 :         isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
    3882           0 :         graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
    3883             : 
    3884           0 :         return isl_stat_ok;
    3885             : }
    3886             : 
    3887             : /* Add the constraints "coef" derived from an edge from "src" to "dst"
    3888             :  * to graph->lp in order to respect the dependences and to try and carry them.
    3889             :  * "pos" is the sequence number of the edge that needs to be carried or
    3890             :  * -1 if no attempt should be made to carry the dependences.
    3891             :  * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
    3892             :  * of valid constraints for (x, y) with x and y instances of "src" and "dst".
    3893             :  *
    3894             :  * The constraints added to graph->lp need to enforce
    3895             :  *
    3896             :  *      (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
    3897             :  *
    3898             :  * for each (x,y) in the dependence relation of the edge or
    3899             :  *
    3900             :  *      (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
    3901             :  *
    3902             :  * if pos is -1.
    3903             :  * That is,
    3904             :  * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
    3905             :  * or
    3906             :  * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
    3907             :  * needs to be plugged in for (c_0, c_n, c_x, c_y),
    3908             :  * taking into account that each coefficient in c_j_x and c_k_x is represented
    3909             :  * as a pair of non-negative coefficients.
    3910             :  */
    3911           0 : static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
    3912             :         struct isl_sched_node *src, struct isl_sched_node *dst,
    3913             :         __isl_take isl_basic_set *coef, int pos)
    3914             : {
    3915             :         int offset;
    3916             :         isl_ctx *ctx;
    3917             :         isl_dim_map *dim_map;
    3918             : 
    3919           0 :         if (!coef)
    3920           0 :                 return isl_stat_error;
    3921             : 
    3922           0 :         ctx = isl_basic_set_get_ctx(coef);
    3923           0 :         offset = coef_var_offset(coef);
    3924           0 :         dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
    3925           0 :         if (pos >= 0)
    3926           0 :                 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
    3927           0 :         graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
    3928             : 
    3929           0 :         return isl_stat_ok;
    3930             : }
    3931             : 
    3932             : /* Data structure for keeping track of the data needed
    3933             :  * to exploit non-trivial lineality spaces.
    3934             :  *
    3935             :  * "any_non_trivial" is true if there are any non-trivial lineality spaces.
    3936             :  * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
    3937             :  * "equivalent" connects instances to other instances on the same line(s).
    3938             :  * "mask" contains the domain spaces of "equivalent".
    3939             :  * Any instance set not in "mask" does not have a non-trivial lineality space.
    3940             :  */
    3941             : struct isl_exploit_lineality_data {
    3942             :         isl_bool any_non_trivial;
    3943             :         isl_union_map *equivalent;
    3944             :         isl_union_set *mask;
    3945             : };
    3946             : 
    3947             : /* Data structure collecting information used during the construction
    3948             :  * of an LP for carrying dependences.
    3949             :  *
    3950             :  * "intra" is a sequence of coefficient constraints for intra-node edges.
    3951             :  * "inter" is a sequence of coefficient constraints for inter-node edges.
    3952             :  * "lineality" contains data used to exploit non-trivial lineality spaces.
    3953             :  */
    3954             : struct isl_carry {
    3955             :         isl_basic_set_list *intra;
    3956             :         isl_basic_set_list *inter;
    3957             :         struct isl_exploit_lineality_data lineality;
    3958             : };
    3959             : 
    3960             : /* Free all the data stored in "carry".
    3961             :  */
    3962           0 : static void isl_carry_clear(struct isl_carry *carry)
    3963             : {
    3964           0 :         isl_basic_set_list_free(carry->intra);
    3965           0 :         isl_basic_set_list_free(carry->inter);
    3966           0 :         isl_union_map_free(carry->lineality.equivalent);
    3967           0 :         isl_union_set_free(carry->lineality.mask);
    3968           0 : }
    3969             : 
    3970             : /* Return a pointer to the node in "graph" that lives in "space".
    3971             :  * If the requested node has been compressed, then "space"
    3972             :  * corresponds to the compressed space.
    3973             :  * The graph is assumed to have such a node.
    3974             :  * Return NULL in case of error.
    3975             :  *
    3976             :  * First try and see if "space" is the space of an uncompressed node.
    3977             :  * If so, return that node.
    3978             :  * Otherwise, "space" was constructed by construct_compressed_id and
    3979             :  * contains a user pointer pointing to the node in the tuple id.
    3980             :  * However, this node belongs to the original dependence graph.
    3981             :  * If "graph" is a subgraph of this original dependence graph,
    3982             :  * then the node with the same space still needs to be looked up
    3983             :  * in the current graph.
    3984             :  */
    3985           0 : static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
    3986             :         struct isl_sched_graph *graph, __isl_keep isl_space *space)
    3987             : {
    3988             :         isl_id *id;
    3989             :         struct isl_sched_node *node;
    3990             : 
    3991           0 :         if (!space)
    3992           0 :                 return NULL;
    3993             : 
    3994           0 :         node = graph_find_node(ctx, graph, space);
    3995           0 :         if (!node)
    3996           0 :                 return NULL;
    3997           0 :         if (is_node(graph, node))
    3998           0 :                 return node;
    3999             : 
    4000           0 :         id = isl_space_get_tuple_id(space, isl_dim_set);
    4001           0 :         node = isl_id_get_user(id);
    4002           0 :         isl_id_free(id);
    4003             : 
    4004           0 :         if (!node)
    4005           0 :                 return NULL;
    4006             : 
    4007           0 :         if (!is_node(graph->root, node))
    4008           0 :                 isl_die(ctx, isl_error_internal,
    4009             :                         "space points to invalid node", return NULL);
    4010           0 :         if (graph != graph->root)
    4011           0 :                 node = graph_find_node(ctx, graph, node->space);
    4012           0 :         if (!is_node(graph, node))
    4013           0 :                 isl_die(ctx, isl_error_internal,
    4014             :                         "unable to find node", return NULL);
    4015             : 
    4016           0 :         return node;
    4017             : }
    4018             : 
    4019             : /* Internal data structure for add_all_constraints.
    4020             :  *
    4021             :  * "graph" is the schedule constraint graph for which an LP problem
    4022             :  * is being constructed.
    4023             :  * "carry_inter" indicates whether inter-node edges should be carried.
    4024             :  * "pos" is the position of the next edge that needs to be carried.
    4025             :  */
    4026             : struct isl_add_all_constraints_data {
    4027             :         isl_ctx *ctx;
    4028             :         struct isl_sched_graph *graph;
    4029             :         int carry_inter;
    4030             :         int pos;
    4031             : };
    4032             : 
    4033             : /* Add the constraints "coef" derived from an edge from a node to itself
    4034             :  * to data->graph->lp in order to respect the dependences and
    4035             :  * to try and carry them.
    4036             :  *
    4037             :  * The space of "coef" is of the form
    4038             :  *
    4039             :  *      coefficients[[c_cst] -> S[c_x]]
    4040             :  *
    4041             :  * with S[c_x] the (compressed) space of the node.
    4042             :  * Extract the node from the space and call add_intra_constraints.
    4043             :  */
    4044           0 : static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
    4045             : {
    4046           0 :         struct isl_add_all_constraints_data *data = user;
    4047             :         isl_space *space;
    4048             :         struct isl_sched_node *node;
    4049             : 
    4050           0 :         space = isl_basic_set_get_space(coef);
    4051           0 :         space = isl_space_range(isl_space_unwrap(space));
    4052           0 :         node = graph_find_compressed_node(data->ctx, data->graph, space);
    4053           0 :         isl_space_free(space);
    4054           0 :         return add_intra_constraints(data->graph, node, coef, data->pos++);
    4055             : }
    4056             : 
    4057             : /* Add the constraints "coef" derived from an edge from a node j
    4058             :  * to a node k to data->graph->lp in order to respect the dependences and
    4059             :  * to try and carry them (provided data->carry_inter is set).
    4060             :  *
    4061             :  * The space of "coef" is of the form
    4062             :  *
    4063             :  *      coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
    4064             :  *
    4065             :  * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
    4066             :  * Extract the nodes from the space and call add_inter_constraints.
    4067             :  */
    4068           0 : static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
    4069             : {
    4070           0 :         struct isl_add_all_constraints_data *data = user;
    4071             :         isl_space *space, *dom;
    4072             :         struct isl_sched_node *src, *dst;
    4073             :         int pos;
    4074             : 
    4075           0 :         space = isl_basic_set_get_space(coef);
    4076           0 :         space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
    4077           0 :         dom = isl_space_domain(isl_space_copy(space));
    4078           0 :         src = graph_find_compressed_node(data->ctx, data->graph, dom);
    4079           0 :         isl_space_free(dom);
    4080           0 :         space = isl_space_range(space);
    4081           0 :         dst = graph_find_compressed_node(data->ctx, data->graph, space);
    4082           0 :         isl_space_free(space);
    4083             : 
    4084           0 :         pos = data->carry_inter ? data->pos++ : -1;
    4085           0 :         return add_inter_constraints(data->graph, src, dst, coef, pos);
    4086             : }
    4087             : 
    4088             : /* Add constraints to graph->lp that force all (conditional) validity
    4089             :  * dependences to be respected and attempt to carry them.
    4090             :  * "intra" is the sequence of coefficient constraints for intra-node edges.
    4091             :  * "inter" is the sequence of coefficient constraints for inter-node edges.
    4092             :  * "carry_inter" indicates whether inter-node edges should be carried or
    4093             :  * only respected.
    4094             :  */
    4095           0 : static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
    4096             :         __isl_keep isl_basic_set_list *intra,
    4097             :         __isl_keep isl_basic_set_list *inter, int carry_inter)
    4098             : {
    4099           0 :         struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
    4100             : 
    4101           0 :         data.pos = 0;
    4102           0 :         if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
    4103           0 :                 return isl_stat_error;
    4104           0 :         if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
    4105           0 :                 return isl_stat_error;
    4106           0 :         return isl_stat_ok;
    4107             : }
    4108             : 
    4109             : /* Internal data structure for count_all_constraints
    4110             :  * for keeping track of the number of equality and inequality constraints.
    4111             :  */
    4112             : struct isl_sched_count {
    4113             :         int n_eq;
    4114             :         int n_ineq;
    4115             : };
    4116             : 
    4117             : /* Add the number of equality and inequality constraints of "bset"
    4118             :  * to data->n_eq and data->n_ineq.
    4119             :  */
    4120           0 : static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
    4121             : {
    4122           0 :         struct isl_sched_count *data = user;
    4123             : 
    4124           0 :         return update_count(bset, 1, &data->n_eq, &data->n_ineq);
    4125             : }
    4126             : 
    4127             : /* Count the number of equality and inequality constraints
    4128             :  * that will be added to the carry_lp problem.
    4129             :  * We count each edge exactly once.
    4130             :  * "intra" is the sequence of coefficient constraints for intra-node edges.
    4131             :  * "inter" is the sequence of coefficient constraints for inter-node edges.
    4132             :  */
    4133           0 : static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
    4134             :         __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
    4135             : {
    4136             :         struct isl_sched_count data;
    4137             : 
    4138           0 :         data.n_eq = data.n_ineq = 0;
    4139           0 :         if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
    4140           0 :                 return isl_stat_error;
    4141           0 :         if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
    4142           0 :                 return isl_stat_error;
    4143             : 
    4144           0 :         *n_eq = data.n_eq;
    4145           0 :         *n_ineq = data.n_ineq;
    4146             : 
    4147           0 :         return isl_stat_ok;
    4148             : }
    4149             : 
    4150             : /* Construct an LP problem for finding schedule coefficients
    4151             :  * such that the schedule carries as many validity dependences as possible.
    4152             :  * In particular, for each dependence i, we bound the dependence distance
    4153             :  * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
    4154             :  * of all e_i's.  Dependences with e_i = 0 in the solution are simply
    4155             :  * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
    4156             :  * "intra" is the sequence of coefficient constraints for intra-node edges.
    4157             :  * "inter" is the sequence of coefficient constraints for inter-node edges.
    4158             :  * "n_edge" is the total number of edges.
    4159             :  * "carry_inter" indicates whether inter-node edges should be carried or
    4160             :  * only respected.  That is, if "carry_inter" is not set, then
    4161             :  * no e_i variables are introduced for the inter-node edges.
    4162             :  *
    4163             :  * All variables of the LP are non-negative.  The actual coefficients
    4164             :  * may be negative, so each coefficient is represented as the difference
    4165             :  * of two non-negative variables.  The negative part always appears
    4166             :  * immediately before the positive part.
    4167             :  * Other than that, the variables have the following order
    4168             :  *
    4169             :  *      - sum of (1 - e_i) over all edges
    4170             :  *      - sum of all c_n coefficients
    4171             :  *              (unconstrained when computing non-parametric schedules)
    4172             :  *      - sum of positive and negative parts of all c_x coefficients
    4173             :  *      - for each edge
    4174             :  *              - e_i
    4175             :  *      - for each node
    4176             :  *              - positive and negative parts of c_i_x, in opposite order
    4177             :  *              - c_i_n (if parametric)
    4178             :  *              - c_i_0
    4179             :  *
    4180             :  * The constraints are those from the (validity) edges plus three equalities
    4181             :  * to express the sums and n_edge inequalities to express e_i <= 1.
    4182             :  */
    4183           0 : static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
    4184             :         int n_edge, __isl_keep isl_basic_set_list *intra,
    4185             :         __isl_keep isl_basic_set_list *inter, int carry_inter)
    4186             : {
    4187             :         int i;
    4188             :         int k;
    4189             :         isl_space *dim;
    4190             :         unsigned total;
    4191             :         int n_eq, n_ineq;
    4192             : 
    4193           0 :         total = 3 + n_edge;
    4194           0 :         for (i = 0; i < graph->n; ++i) {
    4195           0 :                 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
    4196           0 :                 node->start = total;
    4197           0 :                 total += 1 + node->nparam + 2 * node->nvar;
    4198             :         }
    4199             : 
    4200           0 :         if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
    4201           0 :                 return isl_stat_error;
    4202             : 
    4203           0 :         dim = isl_space_set_alloc(ctx, 0, total);
    4204           0 :         isl_basic_set_free(graph->lp);
    4205           0 :         n_eq += 3;
    4206           0 :         n_ineq += n_edge;
    4207           0 :         graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
    4208           0 :         graph->lp = isl_basic_set_set_rational(graph->lp);
    4209             : 
    4210           0 :         k = isl_basic_set_alloc_equality(graph->lp);
    4211           0 :         if (k < 0)
    4212           0 :                 return isl_stat_error;
    4213           0 :         isl_seq_clr(graph->lp->eq[k], 1 + total);
    4214           0 :         isl_int_set_si(graph->lp->eq[k][0], -n_edge);
    4215           0 :         isl_int_set_si(graph->lp->eq[k][1], 1);
    4216           0 :         for (i = 0; i < n_edge; ++i)
    4217           0 :                 isl_int_set_si(graph->lp->eq[k][4 + i], 1);
    4218             : 
    4219           0 :         if (add_param_sum_constraint(graph, 1) < 0)
    4220           0 :                 return isl_stat_error;
    4221           0 :         if (add_var_sum_constraint(graph, 2) < 0)
    4222           0 :                 return isl_stat_error;
    4223             : 
    4224           0 :         for (i = 0; i < n_edge; ++i) {
    4225           0 :                 k = isl_basic_set_alloc_inequality(graph->lp);
    4226           0 :                 if (k < 0)
    4227           0 :                         return isl_stat_error;
    4228           0 :                 isl_seq_clr(graph->lp->ineq[k], 1 + total);
    4229           0 :                 isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
    4230           0 :                 isl_int_set_si(graph->lp->ineq[k][0], 1);
    4231             :         }
    4232             : 
    4233           0 :         if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
    4234           0 :                 return isl_stat_error;
    4235             : 
    4236           0 :         return isl_stat_ok;
    4237             : }
    4238             : 
    4239             : static __isl_give isl_schedule_node *compute_component_schedule(
    4240             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
    4241             :         int wcc);
    4242             : 
    4243             : /* If the schedule_split_scaled option is set and if the linear
    4244             :  * parts of the scheduling rows for all nodes in the graphs have
    4245             :  * a non-trivial common divisor, then remove this
    4246             :  * common divisor from the linear part.
    4247             :  * Otherwise, insert a band node directly and continue with
    4248             :  * the construction of the schedule.
    4249             :  *
    4250             :  * If a non-trivial common divisor is found, then
    4251             :  * the linear part is reduced and the remainder is ignored.
    4252             :  * The pieces of the graph that are assigned different remainders
    4253             :  * form (groups of) strongly connected components within
    4254             :  * the scaled down band.  If needed, they can therefore
    4255             :  * be ordered along this remainder in a sequence node.
    4256             :  * However, this ordering is not enforced here in order to allow
    4257             :  * the scheduler to combine some of the strongly connected components.
    4258             :  */
    4259           0 : static __isl_give isl_schedule_node *split_scaled(
    4260             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    4261             : {
    4262             :         int i;
    4263             :         int row;
    4264             :         isl_ctx *ctx;
    4265             :         isl_int gcd, gcd_i;
    4266             : 
    4267           0 :         if (!node)
    4268           0 :                 return NULL;
    4269             : 
    4270           0 :         ctx = isl_schedule_node_get_ctx(node);
    4271           0 :         if (!ctx->opt->schedule_split_scaled)
    4272           0 :                 return compute_next_band(node, graph, 0);
    4273           0 :         if (graph->n <= 1)
    4274           0 :                 return compute_next_band(node, graph, 0);
    4275             : 
    4276           0 :         isl_int_init(gcd);
    4277           0 :         isl_int_init(gcd_i);
    4278             : 
    4279           0 :         isl_int_set_si(gcd, 0);
    4280             : 
    4281           0 :         row = isl_mat_rows(graph->node[0].sched) - 1;
    4282             : 
    4283           0 :         for (i = 0; i < graph->n; ++i) {
    4284           0 :                 struct isl_sched_node *node = &graph->node[i];
    4285           0 :                 int cols = isl_mat_cols(node->sched);
    4286             : 
    4287           0 :                 isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
    4288           0 :                 isl_int_gcd(gcd, gcd, gcd_i);
    4289             :         }
    4290             : 
    4291           0 :         isl_int_clear(gcd_i);
    4292             : 
    4293           0 :         if (isl_int_cmp_si(gcd, 1) <= 0) {
    4294           0 :                 isl_int_clear(gcd);
    4295           0 :                 return compute_next_band(node, graph, 0);
    4296             :         }
    4297             : 
    4298           0 :         for (i = 0; i < graph->n; ++i) {
    4299           0 :                 struct isl_sched_node *node = &graph->node[i];
    4300             : 
    4301           0 :                 isl_int_fdiv_q(node->sched->row[row][0],
    4302             :                                node->sched->row[row][0], gcd);
    4303           0 :                 isl_int_mul(node->sched->row[row][0],
    4304             :                             node->sched->row[row][0], gcd);
    4305           0 :                 node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
    4306           0 :                 if (!node->sched)
    4307           0 :                         goto error;
    4308             :         }
    4309             : 
    4310           0 :         isl_int_clear(gcd);
    4311             : 
    4312           0 :         return compute_next_band(node, graph, 0);
    4313             : error:
    4314           0 :         isl_int_clear(gcd);
    4315           0 :         return isl_schedule_node_free(node);
    4316             : }
    4317             : 
    4318             : /* Is the schedule row "sol" trivial on node "node"?
    4319             :  * That is, is the solution zero on the dimensions linearly independent of
    4320             :  * the previously found solutions?
    4321             :  * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
    4322             :  *
    4323             :  * Each coefficient is represented as the difference between
    4324             :  * two non-negative values in "sol".
    4325             :  * We construct the schedule row s and check if it is linearly
    4326             :  * independent of previously computed schedule rows
    4327             :  * by computing T s, with T the linear combinations that are zero
    4328             :  * on linearly dependent schedule rows.
    4329             :  * If the result consists of all zeros, then the solution is trivial.
    4330             :  */
    4331           0 : static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
    4332             : {
    4333             :         int trivial;
    4334             :         isl_vec *node_sol;
    4335             : 
    4336           0 :         if (!sol)
    4337           0 :                 return -1;
    4338           0 :         if (node->nvar == node->rank)
    4339           0 :                 return 0;
    4340             : 
    4341           0 :         node_sol = extract_var_coef(node, sol);
    4342           0 :         node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
    4343           0 :         if (!node_sol)
    4344           0 :                 return -1;
    4345             : 
    4346           0 :         trivial = isl_seq_first_non_zero(node_sol->el,
    4347           0 :                                         node->nvar - node->rank) == -1;
    4348             : 
    4349           0 :         isl_vec_free(node_sol);
    4350             : 
    4351           0 :         return trivial;
    4352             : }
    4353             : 
    4354             : /* Is the schedule row "sol" trivial on any node where it should
    4355             :  * not be trivial?
    4356             :  * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
    4357             :  */
    4358           0 : static int is_any_trivial(struct isl_sched_graph *graph,
    4359             :         __isl_keep isl_vec *sol)
    4360             : {
    4361             :         int i;
    4362             : 
    4363           0 :         for (i = 0; i < graph->n; ++i) {
    4364           0 :                 struct isl_sched_node *node = &graph->node[i];
    4365             :                 int trivial;
    4366             : 
    4367           0 :                 if (!needs_row(graph, node))
    4368           0 :                         continue;
    4369           0 :                 trivial = is_trivial(node, sol);
    4370           0 :                 if (trivial < 0 || trivial)
    4371           0 :                         return trivial;
    4372             :         }
    4373             : 
    4374           0 :         return 0;
    4375             : }
    4376             : 
    4377             : /* Does the schedule represented by "sol" perform loop coalescing on "node"?
    4378             :  * If so, return the position of the coalesced dimension.
    4379             :  * Otherwise, return node->nvar or -1 on error.
    4380             :  *
    4381             :  * In particular, look for pairs of coefficients c_i and c_j such that
    4382             :  * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
    4383             :  * If any such pair is found, then return i.
    4384             :  * If size_i is infinity, then no check on c_i needs to be performed.
    4385             :  */
    4386           0 : static int find_node_coalescing(struct isl_sched_node *node,
    4387             :         __isl_keep isl_vec *sol)
    4388             : {
    4389             :         int i, j;
    4390             :         isl_int max;
    4391             :         isl_vec *csol;
    4392             : 
    4393           0 :         if (node->nvar <= 1)
    4394           0 :                 return node->nvar;
    4395             : 
    4396           0 :         csol = extract_var_coef(node, sol);
    4397           0 :         if (!csol)
    4398           0 :                 return -1;
    4399           0 :         isl_int_init(max);
    4400           0 :         for (i = 0; i < node->nvar; ++i) {
    4401             :                 isl_val *v;
    4402             : 
    4403           0 :                 if (isl_int_is_zero(csol->el[i]))
    4404           0 :                         continue;
    4405           0 :                 v = isl_multi_val_get_val(node->sizes, i);
    4406           0 :                 if (!v)
    4407           0 :                         goto error;
    4408           0 :                 if (!isl_val_is_int(v)) {
    4409           0 :                         isl_val_free(v);
    4410           0 :                         continue;
    4411             :                 }
    4412           0 :                 v = isl_val_div_ui(v, 2);
    4413           0 :                 v = isl_val_ceil(v);
    4414           0 :                 if (!v)
    4415           0 :                         goto error;
    4416           0 :                 isl_int_mul(max, v->n, csol->el[i]);
    4417           0 :                 isl_val_free(v);
    4418             : 
    4419           0 :                 for (j = 0; j < node->nvar; ++j) {
    4420           0 :                         if (j == i)
    4421           0 :                                 continue;
    4422           0 :                         if (isl_int_abs_gt(csol->el[j], max))
    4423           0 :                                 break;
    4424             :                 }
    4425           0 :                 if (j < node->nvar)
    4426           0 :                         break;
    4427             :         }
    4428             : 
    4429           0 :         isl_int_clear(max);
    4430           0 :         isl_vec_free(csol);
    4431           0 :         return i;
    4432             : error:
    4433           0 :         isl_int_clear(max);
    4434           0 :         isl_vec_free(csol);
    4435           0 :         return -1;
    4436             : }
    4437             : 
    4438             : /* Force the schedule coefficient at position "pos" of "node" to be zero
    4439             :  * in "tl".
    4440             :  * The coefficient is encoded as the difference between two non-negative
    4441             :  * variables.  Force these two variables to have the same value.
    4442             :  */
    4443           0 : static __isl_give isl_tab_lexmin *zero_out_node_coef(
    4444             :         __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
    4445             : {
    4446             :         int dim;
    4447             :         isl_ctx *ctx;
    4448             :         isl_vec *eq;
    4449             : 
    4450           0 :         ctx = isl_space_get_ctx(node->space);
    4451           0 :         dim = isl_tab_lexmin_dim(tl);
    4452           0 :         if (dim < 0)
    4453           0 :                 return isl_tab_lexmin_free(tl);
    4454           0 :         eq = isl_vec_alloc(ctx, 1 + dim);
    4455           0 :         eq = isl_vec_clr(eq);
    4456           0 :         if (!eq)
    4457           0 :                 return isl_tab_lexmin_free(tl);
    4458             : 
    4459           0 :         pos = 1 + node_var_coef_pos(node, pos);
    4460           0 :         isl_int_set_si(eq->el[pos], 1);
    4461           0 :         isl_int_set_si(eq->el[pos + 1], -1);
    4462           0 :         tl = isl_tab_lexmin_add_eq(tl, eq->el);
    4463           0 :         isl_vec_free(eq);
    4464             : 
    4465           0 :         return tl;
    4466             : }
    4467             : 
    4468             : /* Return the lexicographically smallest rational point in the basic set
    4469             :  * from which "tl" was constructed, double checking that this input set
    4470             :  * was not empty.
    4471             :  */
    4472           0 : static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
    4473             : {
    4474             :         isl_vec *sol;
    4475             : 
    4476           0 :         sol = isl_tab_lexmin_get_solution(tl);
    4477           0 :         if (!sol)
    4478           0 :                 return NULL;
    4479           0 :         if (sol->size == 0)
    4480           0 :                 isl_die(isl_vec_get_ctx(sol), isl_error_internal,
    4481             :                         "error in schedule construction",
    4482             :                         return isl_vec_free(sol));
    4483           0 :         return sol;
    4484             : }
    4485             : 
    4486             : /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
    4487             :  * carry any of the "n_edge" groups of dependences?
    4488             :  * The value in the first position is the sum of (1 - e_i) over all "n_edge"
    4489             :  * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
    4490             :  * by the edge are carried by the solution.
    4491             :  * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
    4492             :  * one of those is carried.
    4493             :  *
    4494             :  * Note that despite the fact that the problem is solved using a rational
    4495             :  * solver, the solution is guaranteed to be integral.
    4496             :  * Specifically, the dependence distance lower bounds e_i (and therefore
    4497             :  * also their sum) are integers.  See Lemma 5 of [1].
    4498             :  *
    4499             :  * Any potential denominator of the sum is cleared by this function.
    4500             :  * The denominator is not relevant for any of the other elements
    4501             :  * in the solution.
    4502             :  *
    4503             :  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
    4504             :  *     Problem, Part II: Multi-Dimensional Time.
    4505             :  *     In Intl. Journal of Parallel Programming, 1992.
    4506             :  */
    4507           0 : static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
    4508             : {
    4509           0 :         isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
    4510           0 :         isl_int_set_si(sol->el[0], 1);
    4511           0 :         return isl_int_cmp_si(sol->el[1], n_edge) < 0;
    4512             : }
    4513             : 
    4514             : /* Return the lexicographically smallest rational point in "lp",
    4515             :  * assuming that all variables are non-negative and performing some
    4516             :  * additional sanity checks.
    4517             :  * If "want_integral" is set, then compute the lexicographically smallest
    4518             :  * integer point instead.
    4519             :  * In particular, "lp" should not be empty by construction.
    4520             :  * Double check that this is the case.
    4521             :  * If dependences are not carried for any of the "n_edge" edges,
    4522             :  * then return an empty vector.
    4523             :  *
    4524             :  * If the schedule_treat_coalescing option is set and
    4525             :  * if the computed schedule performs loop coalescing on a given node,
    4526             :  * i.e., if it is of the form
    4527             :  *
    4528             :  *      c_i i + c_j j + ...
    4529             :  *
    4530             :  * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
    4531             :  * to cut out this solution.  Repeat this process until no more loop
    4532             :  * coalescing occurs or until no more dependences can be carried.
    4533             :  * In the latter case, revert to the previously computed solution.
    4534             :  *
    4535             :  * If the caller requests an integral solution and if coalescing should
    4536             :  * be treated, then perform the coalescing treatment first as
    4537             :  * an integral solution computed before coalescing treatment
    4538             :  * would carry the same number of edges and would therefore probably
    4539             :  * also be coalescing.
    4540             :  *
    4541             :  * To allow the coalescing treatment to be performed first,
    4542             :  * the initial solution is allowed to be rational and it is only
    4543             :  * cut out (if needed) in the next iteration, if no coalescing measures
    4544             :  * were taken.
    4545             :  */
    4546           0 : static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
    4547             :         __isl_take isl_basic_set *lp, int n_edge, int want_integral)
    4548             : {
    4549             :         int i, pos, cut;
    4550             :         isl_ctx *ctx;
    4551             :         isl_tab_lexmin *tl;
    4552           0 :         isl_vec *sol = NULL, *prev;
    4553             :         int treat_coalescing;
    4554             :         int try_again;
    4555             : 
    4556           0 :         if (!lp)
    4557           0 :                 return NULL;
    4558           0 :         ctx = isl_basic_set_get_ctx(lp);
    4559           0 :         treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
    4560           0 :         tl = isl_tab_lexmin_from_basic_set(lp);
    4561             : 
    4562           0 :         cut = 0;
    4563             :         do {
    4564             :                 int integral;
    4565             : 
    4566           0 :                 try_again = 0;
    4567           0 :                 if (cut)
    4568           0 :                         tl = isl_tab_lexmin_cut_to_integer(tl);
    4569           0 :                 prev = sol;
    4570           0 :                 sol = non_empty_solution(tl);
    4571           0 :                 if (!sol)
    4572           0 :                         goto error;
    4573             : 
    4574           0 :                 integral = isl_int_is_one(sol->el[0]);
    4575           0 :                 if (!carries_dependences(sol, n_edge)) {
    4576           0 :                         if (!prev)
    4577           0 :                                 prev = isl_vec_alloc(ctx, 0);
    4578           0 :                         isl_vec_free(sol);
    4579           0 :                         sol = prev;
    4580           0 :                         break;
    4581             :                 }
    4582           0 :                 prev = isl_vec_free(prev);
    4583           0 :                 cut = want_integral && !integral;
    4584           0 :                 if (cut)
    4585           0 :                         try_again = 1;
    4586           0 :                 if (!treat_coalescing)
    4587           0 :                         continue;
    4588           0 :                 for (i = 0; i < graph->n; ++i) {
    4589           0 :                         struct isl_sched_node *node = &graph->node[i];
    4590             : 
    4591           0 :                         pos = find_node_coalescing(node, sol);
    4592           0 :                         if (pos < 0)
    4593           0 :                                 goto error;
    4594           0 :                         if (pos < node->nvar)
    4595           0 :                                 break;
    4596             :                 }
    4597           0 :                 if (i < graph->n) {
    4598           0 :                         try_again = 1;
    4599           0 :                         tl = zero_out_node_coef(tl, &graph->node[i], pos);
    4600           0 :                         cut = 0;
    4601             :                 }
    4602           0 :         } while (try_again);
    4603             : 
    4604           0 :         isl_tab_lexmin_free(tl);
    4605             : 
    4606           0 :         return sol;
    4607             : error:
    4608           0 :         isl_tab_lexmin_free(tl);
    4609           0 :         isl_vec_free(prev);
    4610           0 :         isl_vec_free(sol);
    4611           0 :         return NULL;
    4612             : }
    4613             : 
    4614             : /* If "edge" is an edge from a node to itself, then add the corresponding
    4615             :  * dependence relation to "umap".
    4616             :  * If "node" has been compressed, then the dependence relation
    4617             :  * is also compressed first.
    4618             :  */
    4619           0 : static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
    4620             :         struct isl_sched_edge *edge)
    4621             : {
    4622             :         isl_map *map;
    4623           0 :         struct isl_sched_node *node = edge->src;
    4624             : 
    4625           0 :         if (edge->src != edge->dst)
    4626           0 :                 return umap;
    4627             : 
    4628           0 :         map = isl_map_copy(edge->map);
    4629           0 :         if (node->compressed) {
    4630           0 :                 map = isl_map_preimage_domain_multi_aff(map,
    4631             :                                     isl_multi_aff_copy(node->decompress));
    4632           0 :                 map = isl_map_preimage_range_multi_aff(map,
    4633             :                                     isl_multi_aff_copy(node->decompress));
    4634             :         }
    4635           0 :         umap = isl_union_map_add_map(umap, map);
    4636           0 :         return umap;
    4637             : }
    4638             : 
    4639             : /* If "edge" is an edge from a node to another node, then add the corresponding
    4640             :  * dependence relation to "umap".
    4641             :  * If the source or destination nodes of "edge" have been compressed,
    4642             :  * then the dependence relation is also compressed first.
    4643             :  */
    4644           0 : static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
    4645             :         struct isl_sched_edge *edge)
    4646             : {
    4647             :         isl_map *map;
    4648             : 
    4649           0 :         if (edge->src == edge->dst)
    4650           0 :                 return umap;
    4651             : 
    4652           0 :         map = isl_map_copy(edge->map);
    4653           0 :         if (edge->src->compressed)
    4654           0 :                 map = isl_map_preimage_domain_multi_aff(map,
    4655           0 :                                     isl_multi_aff_copy(edge->src->decompress));
    4656           0 :         if (edge->dst->compressed)
    4657           0 :                 map = isl_map_preimage_range_multi_aff(map,
    4658           0 :                                     isl_multi_aff_copy(edge->dst->decompress));
    4659           0 :         umap = isl_union_map_add_map(umap, map);
    4660           0 :         return umap;
    4661             : }
    4662             : 
    4663             : /* Internal data structure used by union_drop_coalescing_constraints
    4664             :  * to collect bounds on all relevant statements.
    4665             :  *
    4666             :  * "graph" is the schedule constraint graph for which an LP problem
    4667             :  * is being constructed.
    4668             :  * "bounds" collects the bounds.
    4669             :  */
    4670             : struct isl_collect_bounds_data {
    4671             :         isl_ctx *ctx;
    4672             :         struct isl_sched_graph *graph;
    4673             :         isl_union_set *bounds;
    4674             : };
    4675             : 
    4676             : /* Add the size bounds for the node with instance deltas in "set"
    4677             :  * to data->bounds.
    4678             :  */
    4679           0 : static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
    4680             : {
    4681           0 :         struct isl_collect_bounds_data *data = user;
    4682             :         struct isl_sched_node *node;
    4683             :         isl_space *space;
    4684             :         isl_set *bounds;
    4685             : 
    4686           0 :         space = isl_set_get_space(set);
    4687           0 :         isl_set_free(set);
    4688             : 
    4689           0 :         node = graph_find_compressed_node(data->ctx, data->graph, space);
    4690           0 :         isl_space_free(space);
    4691             : 
    4692           0 :         bounds = isl_set_from_basic_set(get_size_bounds(node));
    4693           0 :         data->bounds = isl_union_set_add_set(data->bounds, bounds);
    4694             : 
    4695           0 :         return isl_stat_ok;
    4696             : }
    4697             : 
    4698             : /* Drop some constraints from "delta" that could be exploited
    4699             :  * to construct loop coalescing schedules.
    4700             :  * In particular, drop those constraint that bound the difference
    4701             :  * to the size of the domain.
    4702             :  * Do this for each set/node in "delta" separately.
    4703             :  * The parameters are assumed to have been projected out by the caller.
    4704             :  */
    4705           0 : static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
    4706             :         struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
    4707             : {
    4708           0 :         struct isl_collect_bounds_data data = { ctx, graph };
    4709             : 
    4710           0 :         data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
    4711           0 :         if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
    4712           0 :                 data.bounds = isl_union_set_free(data.bounds);
    4713           0 :         delta = isl_union_set_plain_gist(delta, data.bounds);
    4714             : 
    4715           0 :         return delta;
    4716             : }
    4717             : 
    4718             : /* Given a non-trivial lineality space "lineality", add the corresponding
    4719             :  * universe set to data->mask and add a map from elements to
    4720             :  * other elements along the lines in "lineality" to data->equivalent.
    4721             :  * If this is the first time this function gets called
    4722             :  * (data->any_non_trivial is still false), then set data->any_non_trivial and
    4723             :  * initialize data->mask and data->equivalent.
    4724             :  *
    4725             :  * In particular, if the lineality space is defined by equality constraints
    4726             :  *
    4727             :  *      E x = 0
    4728             :  *
    4729             :  * then construct an affine mapping
    4730             :  *
    4731             :  *      f : x -> E x
    4732             :  *
    4733             :  * and compute the equivalence relation of having the same image under f:
    4734             :  *
    4735             :  *      { x -> x' : E x = E x' }
    4736             :  */
    4737           0 : static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
    4738             :         struct isl_exploit_lineality_data *data)
    4739             : {
    4740             :         isl_mat *eq;
    4741             :         isl_space *space;
    4742             :         isl_set *univ;
    4743             :         isl_multi_aff *ma;
    4744             :         isl_multi_pw_aff *mpa;
    4745             :         isl_map *map;
    4746             :         int n;
    4747             : 
    4748           0 :         if (!lineality)
    4749           0 :                 return isl_stat_error;
    4750           0 :         if (isl_basic_set_dim(lineality, isl_dim_div) != 0)
    4751           0 :                 isl_die(isl_basic_set_get_ctx(lineality), isl_error_internal,
    4752             :                         "local variables not allowed", goto error);
    4753             : 
    4754           0 :         space = isl_basic_set_get_space(lineality);
    4755           0 :         if (!data->any_non_trivial) {
    4756           0 :                 data->equivalent = isl_union_map_empty(isl_space_copy(space));
    4757           0 :                 data->mask = isl_union_set_empty(isl_space_copy(space));
    4758             :         }
    4759           0 :         data->any_non_trivial = isl_bool_true;
    4760             : 
    4761           0 :         univ = isl_set_universe(isl_space_copy(space));
    4762           0 :         data->mask = isl_union_set_add_set(data->mask, univ);
    4763             : 
    4764           0 :         eq = isl_basic_set_extract_equalities(lineality);
    4765           0 :         n = isl_mat_rows(eq);
    4766           0 :         eq = isl_mat_insert_zero_rows(eq, 0, 1);
    4767           0 :         eq = isl_mat_set_element_si(eq, 0, 0, 1);
    4768           0 :         space = isl_space_from_domain(space);
    4769           0 :         space = isl_space_add_dims(space, isl_dim_out, n);
    4770           0 :         ma = isl_multi_aff_from_aff_mat(space, eq);
    4771           0 :         mpa = isl_multi_pw_aff_from_multi_aff(ma);
    4772           0 :         map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
    4773           0 :         data->equivalent = isl_union_map_add_map(data->equivalent, map);
    4774             : 
    4775           0 :         isl_basic_set_free(lineality);
    4776           0 :         return isl_stat_ok;
    4777             : error:
    4778           0 :         isl_basic_set_free(lineality);
    4779           0 :         return isl_stat_error;
    4780             : }
    4781             : 
    4782             : /* Check if the lineality space "set" is non-trivial (i.e., is not just
    4783             :  * the origin or, in other words, satisfies a number of equality constraints
    4784             :  * that is smaller than the dimension of the set).
    4785             :  * If so, extend data->mask and data->equivalent accordingly.
    4786             :  *
    4787             :  * The input should not have any local variables already, but
    4788             :  * isl_set_remove_divs is called to make sure it does not.
    4789             :  */
    4790           0 : static isl_stat add_lineality(__isl_take isl_set *set, void *user)
    4791             : {
    4792           0 :         struct isl_exploit_lineality_data *data = user;
    4793             :         isl_basic_set *hull;
    4794             :         int dim, n_eq;
    4795             : 
    4796           0 :         set = isl_set_remove_divs(set);
    4797           0 :         hull = isl_set_unshifted_simple_hull(set);
    4798           0 :         dim = isl_basic_set_dim(hull, isl_dim_set);
    4799           0 :         n_eq = isl_basic_set_n_equality(hull);
    4800           0 :         if (!hull)
    4801           0 :                 return isl_stat_error;
    4802           0 :         if (dim != n_eq)
    4803           0 :                 return add_non_trivial_lineality(hull, data);
    4804           0 :         isl_basic_set_free(hull);
    4805           0 :         return isl_stat_ok;
    4806             : }
    4807             : 
    4808             : /* Check if the difference set on intra-node schedule constraints "intra"
    4809             :  * has any non-trivial lineality space.
    4810             :  * If so, then extend the difference set to a difference set
    4811             :  * on equivalent elements.  That is, if "intra" is
    4812             :  *
    4813             :  *      { y - x : (x,y) \in V }
    4814             :  *
    4815             :  * and elements are equivalent if they have the same image under f,
    4816             :  * then return
    4817             :  *
    4818             :  *      { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
    4819             :  *
    4820             :  * or, since f is linear,
    4821             :  *
    4822             :  *      { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
    4823             :  *
    4824             :  * The results of the search for non-trivial lineality spaces is stored
    4825             :  * in "data".
    4826             :  */
    4827           0 : static __isl_give isl_union_set *exploit_intra_lineality(
    4828             :         __isl_take isl_union_set *intra,
    4829             :         struct isl_exploit_lineality_data *data)
    4830             : {
    4831             :         isl_union_set *lineality;
    4832             :         isl_union_set *uset;
    4833             : 
    4834           0 :         data->any_non_trivial = isl_bool_false;
    4835           0 :         lineality = isl_union_set_copy(intra);
    4836           0 :         lineality = isl_union_set_combined_lineality_space(lineality);
    4837           0 :         if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
    4838           0 :                 data->any_non_trivial = isl_bool_error;
    4839           0 :         isl_union_set_free(lineality);
    4840             : 
    4841           0 :         if (data->any_non_trivial < 0)
    4842           0 :                 return isl_union_set_free(intra);
    4843           0 :         if (!data->any_non_trivial)
    4844           0 :                 return intra;
    4845             : 
    4846           0 :         uset = isl_union_set_copy(intra);
    4847           0 :         intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
    4848           0 :         uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
    4849           0 :         intra = isl_union_set_union(intra, uset);
    4850             : 
    4851           0 :         intra = isl_union_set_remove_divs(intra);
    4852             : 
    4853           0 :         return intra;
    4854             : }
    4855             : 
    4856             : /* If the difference set on intra-node schedule constraints was found to have
    4857             :  * any non-trivial lineality space by exploit_intra_lineality,
    4858             :  * as recorded in "data", then extend the inter-node
    4859             :  * schedule constraints "inter" to schedule constraints on equivalent elements.
    4860             :  * That is, if "inter" is V and
    4861             :  * elements are equivalent if they have the same image under f, then return
    4862             :  *
    4863             :  *      { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
    4864             :  */
    4865           0 : static __isl_give isl_union_map *exploit_inter_lineality(
    4866             :         __isl_take isl_union_map *inter,
    4867             :         struct isl_exploit_lineality_data *data)
    4868             : {
    4869             :         isl_union_map *umap;
    4870             : 
    4871           0 :         if (data->any_non_trivial < 0)
    4872           0 :                 return isl_union_map_free(inter);
    4873           0 :         if (!data->any_non_trivial)
    4874           0 :                 return inter;
    4875             : 
    4876           0 :         umap = isl_union_map_copy(inter);
    4877           0 :         inter = isl_union_map_subtract_range(inter,
    4878             :                                 isl_union_set_copy(data->mask));
    4879           0 :         umap = isl_union_map_apply_range(umap,
    4880             :                                 isl_union_map_copy(data->equivalent));
    4881           0 :         inter = isl_union_map_union(inter, umap);
    4882           0 :         umap = isl_union_map_copy(inter);
    4883           0 :         inter = isl_union_map_subtract_domain(inter,
    4884             :                                 isl_union_set_copy(data->mask));
    4885           0 :         umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
    4886             :                                 umap);
    4887           0 :         inter = isl_union_map_union(inter, umap);
    4888             : 
    4889           0 :         inter = isl_union_map_remove_divs(inter);
    4890             : 
    4891           0 :         return inter;
    4892             : }
    4893             : 
    4894             : /* For each (conditional) validity edge in "graph",
    4895             :  * add the corresponding dependence relation using "add"
    4896             :  * to a collection of dependence relations and return the result.
    4897             :  * If "coincidence" is set, then coincidence edges are considered as well.
    4898             :  */
    4899           0 : static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
    4900             :         __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
    4901             :                 struct isl_sched_edge *edge), int coincidence)
    4902             : {
    4903             :         int i;
    4904             :         isl_space *space;
    4905             :         isl_union_map *umap;
    4906             : 
    4907           0 :         space = isl_space_copy(graph->node[0].space);
    4908           0 :         umap = isl_union_map_empty(space);
    4909             : 
    4910           0 :         for (i = 0; i < graph->n_edge; ++i) {
    4911           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    4912             : 
    4913           0 :                 if (!is_any_validity(edge) &&
    4914           0 :                     (!coincidence || !is_coincidence(edge)))
    4915           0 :                         continue;
    4916             : 
    4917           0 :                 umap = add(umap, edge);
    4918             :         }
    4919             : 
    4920           0 :         return umap;
    4921             : }
    4922             : 
    4923             : /* Project out all parameters from "uset" and return the result.
    4924             :  */
    4925           0 : static __isl_give isl_union_set *union_set_drop_parameters(
    4926             :         __isl_take isl_union_set *uset)
    4927             : {
    4928             :         unsigned nparam;
    4929             : 
    4930           0 :         nparam = isl_union_set_dim(uset, isl_dim_param);
    4931           0 :         return isl_union_set_project_out(uset, isl_dim_param, 0, nparam);
    4932             : }
    4933             : 
    4934             : /* For each dependence relation on a (conditional) validity edge
    4935             :  * from a node to itself,
    4936             :  * construct the set of coefficients of valid constraints for elements
    4937             :  * in that dependence relation and collect the results.
    4938             :  * If "coincidence" is set, then coincidence edges are considered as well.
    4939             :  *
    4940             :  * In particular, for each dependence relation R, constraints
    4941             :  * on coefficients (c_0, c_x) are constructed such that
    4942             :  *
    4943             :  *      c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
    4944             :  *
    4945             :  * If the schedule_treat_coalescing option is set, then some constraints
    4946             :  * that could be exploited to construct coalescing schedules
    4947             :  * are removed before the dual is computed, but after the parameters
    4948             :  * have been projected out.
    4949             :  * The entire computation is essentially the same as that performed
    4950             :  * by intra_coefficients, except that it operates on multiple
    4951             :  * edges together and that the parameters are always projected out.
    4952             :  *
    4953             :  * Additionally, exploit any non-trivial lineality space
    4954             :  * in the difference set after removing coalescing constraints and
    4955             :  * store the results of the non-trivial lineality space detection in "data".
    4956             :  * The procedure is currently run unconditionally, but it is unlikely
    4957             :  * to find any non-trivial lineality spaces if no coalescing constraints
    4958             :  * have been removed.
    4959             :  *
    4960             :  * Note that if a dependence relation is a union of basic maps,
    4961             :  * then each basic map needs to be treated individually as it may only
    4962             :  * be possible to carry the dependences expressed by some of those
    4963             :  * basic maps and not all of them.
    4964             :  * The collected validity constraints are therefore not coalesced and
    4965             :  * it is assumed that they are not coalesced automatically.
    4966             :  * Duplicate basic maps can be removed, however.
    4967             :  * In particular, if the same basic map appears as a disjunct
    4968             :  * in multiple edges, then it only needs to be carried once.
    4969             :  */
    4970           0 : static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
    4971             :         struct isl_sched_graph *graph, int coincidence,
    4972             :         struct isl_exploit_lineality_data *data)
    4973             : {
    4974             :         isl_union_map *intra;
    4975             :         isl_union_set *delta;
    4976             :         isl_basic_set_list *list;
    4977             : 
    4978           0 :         intra = collect_validity(graph, &add_intra, coincidence);
    4979           0 :         delta = isl_union_map_deltas(intra);
    4980           0 :         delta = union_set_drop_parameters(delta);
    4981           0 :         delta = isl_union_set_remove_divs(delta);
    4982           0 :         if (isl_options_get_schedule_treat_coalescing(ctx))
    4983           0 :                 delta = union_drop_coalescing_constraints(ctx, graph, delta);
    4984           0 :         delta = exploit_intra_lineality(delta, data);
    4985           0 :         list = isl_union_set_get_basic_set_list(delta);
    4986           0 :         isl_union_set_free(delta);
    4987             : 
    4988           0 :         return isl_basic_set_list_coefficients(list);
    4989             : }
    4990             : 
    4991             : /* For each dependence relation on a (conditional) validity edge
    4992             :  * from a node to some other node,
    4993             :  * construct the set of coefficients of valid constraints for elements
    4994             :  * in that dependence relation and collect the results.
    4995             :  * If "coincidence" is set, then coincidence edges are considered as well.
    4996             :  *
    4997             :  * In particular, for each dependence relation R, constraints
    4998             :  * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
    4999             :  *
    5000             :  *      c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
    5001             :  *
    5002             :  * This computation is essentially the same as that performed
    5003             :  * by inter_coefficients, except that it operates on multiple
    5004             :  * edges together.
    5005             :  *
    5006             :  * Additionally, exploit any non-trivial lineality space
    5007             :  * that may have been discovered by collect_intra_validity
    5008             :  * (as stored in "data").
    5009             :  *
    5010             :  * Note that if a dependence relation is a union of basic maps,
    5011             :  * then each basic map needs to be treated individually as it may only
    5012             :  * be possible to carry the dependences expressed by some of those
    5013             :  * basic maps and not all of them.
    5014             :  * The collected validity constraints are therefore not coalesced and
    5015             :  * it is assumed that they are not coalesced automatically.
    5016             :  * Duplicate basic maps can be removed, however.
    5017             :  * In particular, if the same basic map appears as a disjunct
    5018             :  * in multiple edges, then it only needs to be carried once.
    5019             :  */
    5020           0 : static __isl_give isl_basic_set_list *collect_inter_validity(
    5021             :         struct isl_sched_graph *graph, int coincidence,
    5022             :         struct isl_exploit_lineality_data *data)
    5023             : {
    5024             :         isl_union_map *inter;
    5025             :         isl_union_set *wrap;
    5026             :         isl_basic_set_list *list;
    5027             : 
    5028           0 :         inter = collect_validity(graph, &add_inter, coincidence);
    5029           0 :         inter = exploit_inter_lineality(inter, data);
    5030           0 :         inter = isl_union_map_remove_divs(inter);
    5031           0 :         wrap = isl_union_map_wrap(inter);
    5032           0 :         list = isl_union_set_get_basic_set_list(wrap);
    5033           0 :         isl_union_set_free(wrap);
    5034           0 :         return isl_basic_set_list_coefficients(list);
    5035             : }
    5036             : 
    5037             : /* Construct an LP problem for finding schedule coefficients
    5038             :  * such that the schedule carries as many of the "n_edge" groups of
    5039             :  * dependences as possible based on the corresponding coefficient
    5040             :  * constraints and return the lexicographically smallest non-trivial solution.
    5041             :  * "intra" is the sequence of coefficient constraints for intra-node edges.
    5042             :  * "inter" is the sequence of coefficient constraints for inter-node edges.
    5043             :  * If "want_integral" is set, then compute an integral solution
    5044             :  * for the coefficients rather than using the numerators
    5045             :  * of a rational solution.
    5046             :  * "carry_inter" indicates whether inter-node edges should be carried or
    5047             :  * only respected.
    5048             :  *
    5049             :  * If none of the "n_edge" groups can be carried
    5050             :  * then return an empty vector.
    5051             :  */
    5052           0 : static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
    5053             :         struct isl_sched_graph *graph, int n_edge,
    5054             :         __isl_keep isl_basic_set_list *intra,
    5055             :         __isl_keep isl_basic_set_list *inter, int want_integral,
    5056             :         int carry_inter)
    5057             : {
    5058             :         isl_basic_set *lp;
    5059             : 
    5060           0 :         if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
    5061           0 :                 return NULL;
    5062             : 
    5063           0 :         lp = isl_basic_set_copy(graph->lp);
    5064           0 :         return non_neg_lexmin(graph, lp, n_edge, want_integral);
    5065             : }
    5066             : 
    5067             : /* Construct an LP problem for finding schedule coefficients
    5068             :  * such that the schedule carries as many of the validity dependences
    5069             :  * as possible and
    5070             :  * return the lexicographically smallest non-trivial solution.
    5071             :  * If "fallback" is set, then the carrying is performed as a fallback
    5072             :  * for the Pluto-like scheduler.
    5073             :  * If "coincidence" is set, then try and carry coincidence edges as well.
    5074             :  *
    5075             :  * The variable "n_edge" stores the number of groups that should be carried.
    5076             :  * If none of the "n_edge" groups can be carried
    5077             :  * then return an empty vector.
    5078             :  * If, moreover, "n_edge" is zero, then the LP problem does not even
    5079             :  * need to be constructed.
    5080             :  *
    5081             :  * If a fallback solution is being computed, then compute an integral solution
    5082             :  * for the coefficients rather than using the numerators
    5083             :  * of a rational solution.
    5084             :  *
    5085             :  * If a fallback solution is being computed, if there are any intra-node
    5086             :  * dependences, and if requested by the user, then first try
    5087             :  * to only carry those intra-node dependences.
    5088             :  * If this fails to carry any dependences, then try again
    5089             :  * with the inter-node dependences included.
    5090             :  */
    5091           0 : static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
    5092             :         struct isl_sched_graph *graph, int fallback, int coincidence)
    5093             : {
    5094             :         int n_intra, n_inter;
    5095             :         int n_edge;
    5096           0 :         struct isl_carry carry = { 0 };
    5097             :         isl_vec *sol;
    5098             : 
    5099           0 :         carry.intra = collect_intra_validity(ctx, graph, coincidence,
    5100             :                                                 &carry.lineality);
    5101           0 :         carry.inter = collect_inter_validity(graph, coincidence,
    5102             :                                                 &carry.lineality);
    5103           0 :         if (!carry.intra || !carry.inter)
    5104             :                 goto error;
    5105           0 :         n_intra = isl_basic_set_list_n_basic_set(carry.intra);
    5106           0 :         n_inter = isl_basic_set_list_n_basic_set(carry.inter);
    5107             : 
    5108           0 :         if (fallback && n_intra > 0 &&
    5109           0 :             isl_options_get_schedule_carry_self_first(ctx)) {
    5110           0 :                 sol = compute_carrying_sol_coef(ctx, graph, n_intra,
    5111             :                                 carry.intra, carry.inter, fallback, 0);
    5112           0 :                 if (!sol || sol->size != 0 || n_inter == 0) {
    5113           0 :                         isl_carry_clear(&carry);
    5114           0 :                         return sol;
    5115             :                 }
    5116           0 :                 isl_vec_free(sol);
    5117             :         }
    5118             : 
    5119           0 :         n_edge = n_intra + n_inter;
    5120           0 :         if (n_edge == 0) {
    5121           0 :                 isl_carry_clear(&carry);
    5122           0 :                 return isl_vec_alloc(ctx, 0);
    5123             :         }
    5124             : 
    5125           0 :         sol = compute_carrying_sol_coef(ctx, graph, n_edge,
    5126             :                                 carry.intra, carry.inter, fallback, 1);
    5127           0 :         isl_carry_clear(&carry);
    5128           0 :         return sol;
    5129             : error:
    5130           0 :         isl_carry_clear(&carry);
    5131           0 :         return NULL;
    5132             : }
    5133             : 
    5134             : /* Construct a schedule row for each node such that as many validity dependences
    5135             :  * as possible are carried and then continue with the next band.
    5136             :  * If "fallback" is set, then the carrying is performed as a fallback
    5137             :  * for the Pluto-like scheduler.
    5138             :  * If "coincidence" is set, then try and carry coincidence edges as well.
    5139             :  *
    5140             :  * If there are no validity dependences, then no dependence can be carried and
    5141             :  * the procedure is guaranteed to fail.  If there is more than one component,
    5142             :  * then try computing a schedule on each component separately
    5143             :  * to prevent or at least postpone this failure.
    5144             :  *
    5145             :  * If a schedule row is computed, then check that dependences are carried
    5146             :  * for at least one of the edges.
    5147             :  *
    5148             :  * If the computed schedule row turns out to be trivial on one or
    5149             :  * more nodes where it should not be trivial, then we throw it away
    5150             :  * and try again on each component separately.
    5151             :  *
    5152             :  * If there is only one component, then we accept the schedule row anyway,
    5153             :  * but we do not consider it as a complete row and therefore do not
    5154             :  * increment graph->n_row.  Note that the ranks of the nodes that
    5155             :  * do get a non-trivial schedule part will get updated regardless and
    5156             :  * graph->maxvar is computed based on these ranks.  The test for
    5157             :  * whether more schedule rows are required in compute_schedule_wcc
    5158             :  * is therefore not affected.
    5159             :  *
    5160             :  * Insert a band corresponding to the schedule row at position "node"
    5161             :  * of the schedule tree and continue with the construction of the schedule.
    5162             :  * This insertion and the continued construction is performed by split_scaled
    5163             :  * after optionally checking for non-trivial common divisors.
    5164             :  */
    5165           0 : static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
    5166             :         struct isl_sched_graph *graph, int fallback, int coincidence)
    5167             : {
    5168             :         int trivial;
    5169             :         isl_ctx *ctx;
    5170             :         isl_vec *sol;
    5171             : 
    5172           0 :         if (!node)
    5173           0 :                 return NULL;
    5174             : 
    5175           0 :         ctx = isl_schedule_node_get_ctx(node);
    5176           0 :         sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
    5177           0 :         if (!sol)
    5178           0 :                 return isl_schedule_node_free(node);
    5179           0 :         if (sol->size == 0) {
    5180           0 :                 isl_vec_free(sol);
    5181           0 :                 if (graph->scc > 1)
    5182           0 :                         return compute_component_schedule(node, graph, 1);
    5183           0 :                 isl_die(ctx, isl_error_unknown, "unable to carry dependences",
    5184             :                         return isl_schedule_node_free(node));
    5185             :         }
    5186             : 
    5187           0 :         trivial = is_any_trivial(graph, sol);
    5188           0 :         if (trivial < 0) {
    5189           0 :                 sol = isl_vec_free(sol);
    5190           0 :         } else if (trivial && graph->scc > 1) {
    5191           0 :                 isl_vec_free(sol);
    5192           0 :                 return compute_component_schedule(node, graph, 1);
    5193             :         }
    5194             : 
    5195           0 :         if (update_schedule(graph, sol, 0) < 0)
    5196           0 :                 return isl_schedule_node_free(node);
    5197           0 :         if (trivial)
    5198           0 :                 graph->n_row--;
    5199             : 
    5200           0 :         return split_scaled(node, graph);
    5201             : }
    5202             : 
    5203             : /* Construct a schedule row for each node such that as many validity dependences
    5204             :  * as possible are carried and then continue with the next band.
    5205             :  * Do so as a fallback for the Pluto-like scheduler.
    5206             :  * If "coincidence" is set, then try and carry coincidence edges as well.
    5207             :  */
    5208           0 : static __isl_give isl_schedule_node *carry_fallback(
    5209             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
    5210             :         int coincidence)
    5211             : {
    5212           0 :         return carry(node, graph, 1, coincidence);
    5213             : }
    5214             : 
    5215             : /* Construct a schedule row for each node such that as many validity dependences
    5216             :  * as possible are carried and then continue with the next band.
    5217             :  * Do so for the case where the Feautrier scheduler was selected
    5218             :  * by the user.
    5219             :  */
    5220           0 : static __isl_give isl_schedule_node *carry_feautrier(
    5221             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    5222             : {
    5223           0 :         return carry(node, graph, 0, 0);
    5224             : }
    5225             : 
    5226             : /* Construct a schedule row for each node such that as many validity dependences
    5227             :  * as possible are carried and then continue with the next band.
    5228             :  * Do so as a fallback for the Pluto-like scheduler.
    5229             :  */
    5230           0 : static __isl_give isl_schedule_node *carry_dependences(
    5231             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    5232             : {
    5233           0 :         return carry_fallback(node, graph, 0);
    5234             : }
    5235             : 
    5236             : /* Construct a schedule row for each node such that as many validity or
    5237             :  * coincidence dependences as possible are carried and
    5238             :  * then continue with the next band.
    5239             :  * Do so as a fallback for the Pluto-like scheduler.
    5240             :  */
    5241           0 : static __isl_give isl_schedule_node *carry_coincidence(
    5242             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    5243             : {
    5244           0 :         return carry_fallback(node, graph, 1);
    5245             : }
    5246             : 
    5247             : /* Topologically sort statements mapped to the same schedule iteration
    5248             :  * and add insert a sequence node in front of "node"
    5249             :  * corresponding to this order.
    5250             :  * If "initialized" is set, then it may be assumed that compute_maxvar
    5251             :  * has been called on the current band.  Otherwise, call
    5252             :  * compute_maxvar if and before carry_dependences gets called.
    5253             :  *
    5254             :  * If it turns out to be impossible to sort the statements apart,
    5255             :  * because different dependences impose different orderings
    5256             :  * on the statements, then we extend the schedule such that
    5257             :  * it carries at least one more dependence.
    5258             :  */
    5259           0 : static __isl_give isl_schedule_node *sort_statements(
    5260             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
    5261             :         int initialized)
    5262             : {
    5263             :         isl_ctx *ctx;
    5264             :         isl_union_set_list *filters;
    5265             : 
    5266           0 :         if (!node)
    5267           0 :                 return NULL;
    5268             : 
    5269           0 :         ctx = isl_schedule_node_get_ctx(node);
    5270           0 :         if (graph->n < 1)
    5271           0 :                 isl_die(ctx, isl_error_internal,
    5272             :                         "graph should have at least one node",
    5273             :                         return isl_schedule_node_free(node));
    5274             : 
    5275           0 :         if (graph->n == 1)
    5276           0 :                 return node;
    5277             : 
    5278           0 :         if (update_edges(ctx, graph) < 0)
    5279           0 :                 return isl_schedule_node_free(node);
    5280             : 
    5281           0 :         if (graph->n_edge == 0)
    5282           0 :                 return node;
    5283             : 
    5284           0 :         if (detect_sccs(ctx, graph) < 0)
    5285           0 :                 return isl_schedule_node_free(node);
    5286             : 
    5287           0 :         next_band(graph);
    5288           0 :         if (graph->scc < graph->n) {
    5289           0 :                 if (!initialized && compute_maxvar(graph) < 0)
    5290           0 :                         return isl_schedule_node_free(node);
    5291           0 :                 return carry_dependences(node, graph);
    5292             :         }
    5293             : 
    5294           0 :         filters = extract_sccs(ctx, graph);
    5295           0 :         node = isl_schedule_node_insert_sequence(node, filters);
    5296             : 
    5297           0 :         return node;
    5298             : }
    5299             : 
    5300             : /* Are there any (non-empty) (conditional) validity edges in the graph?
    5301             :  */
    5302           0 : static int has_validity_edges(struct isl_sched_graph *graph)
    5303             : {
    5304             :         int i;
    5305             : 
    5306           0 :         for (i = 0; i < graph->n_edge; ++i) {
    5307             :                 int empty;
    5308             : 
    5309           0 :                 empty = isl_map_plain_is_empty(graph->edge[i].map);
    5310           0 :                 if (empty < 0)
    5311           0 :                         return -1;
    5312           0 :                 if (empty)
    5313           0 :                         continue;
    5314           0 :                 if (is_any_validity(&graph->edge[i]))
    5315           0 :                         return 1;
    5316             :         }
    5317             : 
    5318           0 :         return 0;
    5319             : }
    5320             : 
    5321             : /* Should we apply a Feautrier step?
    5322             :  * That is, did the user request the Feautrier algorithm and are
    5323             :  * there any validity dependences (left)?
    5324             :  */
    5325           0 : static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
    5326             : {
    5327           0 :         if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
    5328           0 :                 return 0;
    5329             : 
    5330           0 :         return has_validity_edges(graph);
    5331             : }
    5332             : 
    5333             : /* Compute a schedule for a connected dependence graph using Feautrier's
    5334             :  * multi-dimensional scheduling algorithm and return the updated schedule node.
    5335             :  *
    5336             :  * The original algorithm is described in [1].
    5337             :  * The main idea is to minimize the number of scheduling dimensions, by
    5338             :  * trying to satisfy as many dependences as possible per scheduling dimension.
    5339             :  *
    5340             :  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
    5341             :  *     Problem, Part II: Multi-Dimensional Time.
    5342             :  *     In Intl. Journal of Parallel Programming, 1992.
    5343             :  */
    5344           0 : static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
    5345             :         isl_schedule_node *node, struct isl_sched_graph *graph)
    5346             : {
    5347           0 :         return carry_feautrier(node, graph);
    5348             : }
    5349             : 
    5350             : /* Turn off the "local" bit on all (condition) edges.
    5351             :  */
    5352           0 : static void clear_local_edges(struct isl_sched_graph *graph)
    5353             : {
    5354             :         int i;
    5355             : 
    5356           0 :         for (i = 0; i < graph->n_edge; ++i)
    5357           0 :                 if (is_condition(&graph->edge[i]))
    5358           0 :                         clear_local(&graph->edge[i]);
    5359           0 : }
    5360             : 
    5361             : /* Does "graph" have both condition and conditional validity edges?
    5362             :  */
    5363           0 : static int need_condition_check(struct isl_sched_graph *graph)
    5364             : {
    5365             :         int i;
    5366           0 :         int any_condition = 0;
    5367           0 :         int any_conditional_validity = 0;
    5368             : 
    5369           0 :         for (i = 0; i < graph->n_edge; ++i) {
    5370           0 :                 if (is_condition(&graph->edge[i]))
    5371           0 :                         any_condition = 1;
    5372           0 :                 if (is_conditional_validity(&graph->edge[i]))
    5373           0 :                         any_conditional_validity = 1;
    5374             :         }
    5375             : 
    5376           0 :         return any_condition && any_conditional_validity;
    5377             : }
    5378             : 
    5379             : /* Does "graph" contain any coincidence edge?
    5380             :  */
    5381           0 : static int has_any_coincidence(struct isl_sched_graph *graph)
    5382             : {
    5383             :         int i;
    5384             : 
    5385           0 :         for (i = 0; i < graph->n_edge; ++i)
    5386           0 :                 if (is_coincidence(&graph->edge[i]))
    5387           0 :                         return 1;
    5388             : 
    5389           0 :         return 0;
    5390             : }
    5391             : 
    5392             : /* Extract the final schedule row as a map with the iteration domain
    5393             :  * of "node" as domain.
    5394             :  */
    5395           0 : static __isl_give isl_map *final_row(struct isl_sched_node *node)
    5396             : {
    5397             :         isl_multi_aff *ma;
    5398             :         int row;
    5399             : 
    5400           0 :         row = isl_mat_rows(node->sched) - 1;
    5401           0 :         ma = node_extract_partial_schedule_multi_aff(node, row, 1);
    5402           0 :         return isl_map_from_multi_aff(ma);
    5403             : }
    5404             : 
    5405             : /* Is the conditional validity dependence in the edge with index "edge_index"
    5406             :  * violated by the latest (i.e., final) row of the schedule?
    5407             :  * That is, is i scheduled after j
    5408             :  * for any conditional validity dependence i -> j?
    5409             :  */
    5410           0 : static int is_violated(struct isl_sched_graph *graph, int edge_index)
    5411             : {
    5412             :         isl_map *src_sched, *dst_sched, *map;
    5413           0 :         struct isl_sched_edge *edge = &graph->edge[edge_index];
    5414             :         int empty;
    5415             : 
    5416           0 :         src_sched = final_row(edge->src);
    5417           0 :         dst_sched = final_row(edge->dst);
    5418           0 :         map = isl_map_copy(edge->map);
    5419           0 :         map = isl_map_apply_domain(map, src_sched);
    5420           0 :         map = isl_map_apply_range(map, dst_sched);
    5421           0 :         map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
    5422           0 :         empty = isl_map_is_empty(map);
    5423           0 :         isl_map_free(map);
    5424             : 
    5425           0 :         if (empty < 0)
    5426           0 :                 return -1;
    5427             : 
    5428           0 :         return !empty;
    5429             : }
    5430             : 
    5431             : /* Does "graph" have any satisfied condition edges that
    5432             :  * are adjacent to the conditional validity constraint with
    5433             :  * domain "conditional_source" and range "conditional_sink"?
    5434             :  *
    5435             :  * A satisfied condition is one that is not local.
    5436             :  * If a condition was forced to be local already (i.e., marked as local)
    5437             :  * then there is no need to check if it is in fact local.
    5438             :  *
    5439             :  * Additionally, mark all adjacent condition edges found as local.
    5440             :  */
    5441           0 : static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
    5442             :         __isl_keep isl_union_set *conditional_source,
    5443             :         __isl_keep isl_union_set *conditional_sink)
    5444             : {
    5445             :         int i;
    5446           0 :         int any = 0;
    5447             : 
    5448           0 :         for (i = 0; i < graph->n_edge; ++i) {
    5449             :                 int adjacent, local;
    5450             :                 isl_union_map *condition;
    5451             : 
    5452           0 :                 if (!is_condition(&graph->edge[i]))
    5453           0 :                         continue;
    5454           0 :                 if (is_local(&graph->edge[i]))
    5455           0 :                         continue;
    5456             : 
    5457           0 :                 condition = graph->edge[i].tagged_condition;
    5458           0 :                 adjacent = domain_intersects(condition, conditional_sink);
    5459           0 :                 if (adjacent >= 0 && !adjacent)
    5460           0 :                         adjacent = range_intersects(condition,
    5461             :                                                         conditional_source);
    5462           0 :                 if (adjacent < 0)
    5463           0 :                         return -1;
    5464           0 :                 if (!adjacent)
    5465           0 :                         continue;
    5466             : 
    5467           0 :                 set_local(&graph->edge[i]);
    5468             : 
    5469           0 :                 local = is_condition_false(&graph->edge[i]);
    5470           0 :                 if (local < 0)
    5471           0 :                         return -1;
    5472           0 :                 if (!local)
    5473           0 :                         any = 1;
    5474             :         }
    5475             : 
    5476           0 :         return any;
    5477             : }
    5478             : 
    5479             : /* Are there any violated conditional validity dependences with
    5480             :  * adjacent condition dependences that are not local with respect
    5481             :  * to the current schedule?
    5482             :  * That is, is the conditional validity constraint violated?
    5483             :  *
    5484             :  * Additionally, mark all those adjacent condition dependences as local.
    5485             :  * We also mark those adjacent condition dependences that were not marked
    5486             :  * as local before, but just happened to be local already.  This ensures
    5487             :  * that they remain local if the schedule is recomputed.
    5488             :  *
    5489             :  * We first collect domain and range of all violated conditional validity
    5490             :  * dependences and then check if there are any adjacent non-local
    5491             :  * condition dependences.
    5492             :  */
    5493           0 : static int has_violated_conditional_constraint(isl_ctx *ctx,
    5494             :         struct isl_sched_graph *graph)
    5495             : {
    5496             :         int i;
    5497           0 :         int any = 0;
    5498             :         isl_union_set *source, *sink;
    5499             : 
    5500           0 :         source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
    5501           0 :         sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
    5502           0 :         for (i = 0; i < graph->n_edge; ++i) {
    5503             :                 isl_union_set *uset;
    5504             :                 isl_union_map *umap;
    5505             :                 int violated;
    5506             : 
    5507           0 :                 if (!is_conditional_validity(&graph->edge[i]))
    5508           0 :                         continue;
    5509             : 
    5510           0 :                 violated = is_violated(graph, i);
    5511           0 :                 if (violated < 0)
    5512           0 :                         goto error;
    5513           0 :                 if (!violated)
    5514           0 :                         continue;
    5515             : 
    5516           0 :                 any = 1;
    5517             : 
    5518           0 :                 umap = isl_union_map_copy(graph->edge[i].tagged_validity);
    5519           0 :                 uset = isl_union_map_domain(umap);
    5520           0 :                 source = isl_union_set_union(source, uset);
    5521           0 :                 source = isl_union_set_coalesce(source);
    5522             : 
    5523           0 :                 umap = isl_union_map_copy(graph->edge[i].tagged_validity);
    5524           0 :                 uset = isl_union_map_range(umap);
    5525           0 :                 sink = isl_union_set_union(sink, uset);
    5526           0 :                 sink = isl_union_set_coalesce(sink);
    5527             :         }
    5528             : 
    5529           0 :         if (any)
    5530           0 :                 any = has_adjacent_true_conditions(graph, source, sink);
    5531             : 
    5532           0 :         isl_union_set_free(source);
    5533           0 :         isl_union_set_free(sink);
    5534           0 :         return any;
    5535             : error:
    5536           0 :         isl_union_set_free(source);
    5537           0 :         isl_union_set_free(sink);
    5538           0 :         return -1;
    5539             : }
    5540             : 
    5541             : /* Examine the current band (the rows between graph->band_start and
    5542             :  * graph->n_total_row), deciding whether to drop it or add it to "node"
    5543             :  * and then continue with the computation of the next band, if any.
    5544             :  * If "initialized" is set, then it may be assumed that compute_maxvar
    5545             :  * has been called on the current band.  Otherwise, call
    5546             :  * compute_maxvar if and before carry_dependences gets called.
    5547             :  *
    5548             :  * The caller keeps looking for a new row as long as
    5549             :  * graph->n_row < graph->maxvar.  If the latest attempt to find
    5550             :  * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
    5551             :  * then we either
    5552             :  * - split between SCCs and start over (assuming we found an interesting
    5553             :  *      pair of SCCs between which to split)
    5554             :  * - continue with the next band (assuming the current band has at least
    5555             :  *      one row)
    5556             :  * - if there is more than one SCC left, then split along all SCCs
    5557             :  * - if outer coincidence needs to be enforced, then try to carry as many
    5558             :  *      validity or coincidence dependences as possible and
    5559             :  *      continue with the next band
    5560             :  * - try to carry as many validity dependences as possible and
    5561             :  *      continue with the next band
    5562             :  * In each case, we first insert a band node in the schedule tree
    5563             :  * if any rows have been computed.
    5564             :  *
    5565             :  * If the caller managed to complete the schedule and the current band
    5566             :  * is empty, then finish off by topologically
    5567             :  * sorting the statements based on the remaining dependences.
    5568             :  * If, on the other hand, the current band has at least one row,
    5569             :  * then continue with the next band.  Note that this next band
    5570             :  * will necessarily be empty, but the graph may still be split up
    5571             :  * into weakly connected components before arriving back here.
    5572             :  */
    5573           0 : static __isl_give isl_schedule_node *compute_schedule_finish_band(
    5574             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
    5575             :         int initialized)
    5576             : {
    5577             :         int empty;
    5578             : 
    5579           0 :         if (!node)
    5580           0 :                 return NULL;
    5581             : 
    5582           0 :         empty = graph->n_total_row == graph->band_start;
    5583           0 :         if (graph->n_row < graph->maxvar) {
    5584             :                 isl_ctx *ctx;
    5585             : 
    5586           0 :                 ctx = isl_schedule_node_get_ctx(node);
    5587           0 :                 if (!ctx->opt->schedule_maximize_band_depth && !empty)
    5588           0 :                         return compute_next_band(node, graph, 1);
    5589           0 :                 if (graph->src_scc >= 0)
    5590           0 :                         return compute_split_schedule(node, graph);
    5591           0 :                 if (!empty)
    5592           0 :                         return compute_next_band(node, graph, 1);
    5593           0 :                 if (graph->scc > 1)
    5594           0 :                         return compute_component_schedule(node, graph, 1);
    5595           0 :                 if (!initialized && compute_maxvar(graph) < 0)
    5596           0 :                         return isl_schedule_node_free(node);
    5597           0 :                 if (isl_options_get_schedule_outer_coincidence(ctx))
    5598           0 :                         return carry_coincidence(node, graph);
    5599           0 :                 return carry_dependences(node, graph);
    5600             :         }
    5601             : 
    5602           0 :         if (!empty)
    5603           0 :                 return compute_next_band(node, graph, 1);
    5604           0 :         return sort_statements(node, graph, initialized);
    5605             : }
    5606             : 
    5607             : /* Construct a band of schedule rows for a connected dependence graph.
    5608             :  * The caller is responsible for determining the strongly connected
    5609             :  * components and calling compute_maxvar first.
    5610             :  *
    5611             :  * We try to find a sequence of as many schedule rows as possible that result
    5612             :  * in non-negative dependence distances (independent of the previous rows
    5613             :  * in the sequence, i.e., such that the sequence is tilable), with as
    5614             :  * many of the initial rows as possible satisfying the coincidence constraints.
    5615             :  * The computation stops if we can't find any more rows or if we have found
    5616             :  * all the rows we wanted to find.
    5617             :  *
    5618             :  * If ctx->opt->schedule_outer_coincidence is set, then we force the
    5619             :  * outermost dimension to satisfy the coincidence constraints.  If this
    5620             :  * turns out to be impossible, we fall back on the general scheme above
    5621             :  * and try to carry as many dependences as possible.
    5622             :  *
    5623             :  * If "graph" contains both condition and conditional validity dependences,
    5624             :  * then we need to check that that the conditional schedule constraint
    5625             :  * is satisfied, i.e., there are no violated conditional validity dependences
    5626             :  * that are adjacent to any non-local condition dependences.
    5627             :  * If there are, then we mark all those adjacent condition dependences
    5628             :  * as local and recompute the current band.  Those dependences that
    5629             :  * are marked local will then be forced to be local.
    5630             :  * The initial computation is performed with no dependences marked as local.
    5631             :  * If we are lucky, then there will be no violated conditional validity
    5632             :  * dependences adjacent to any non-local condition dependences.
    5633             :  * Otherwise, we mark some additional condition dependences as local and
    5634             :  * recompute.  We continue this process until there are no violations left or
    5635             :  * until we are no longer able to compute a schedule.
    5636             :  * Since there are only a finite number of dependences,
    5637             :  * there will only be a finite number of iterations.
    5638             :  */
    5639           0 : static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
    5640             :         struct isl_sched_graph *graph)
    5641             : {
    5642             :         int has_coincidence;
    5643             :         int use_coincidence;
    5644           0 :         int force_coincidence = 0;
    5645             :         int check_conditional;
    5646             : 
    5647           0 :         if (sort_sccs(graph) < 0)
    5648           0 :                 return isl_stat_error;
    5649             : 
    5650           0 :         clear_local_edges(graph);
    5651           0 :         check_conditional = need_condition_check(graph);
    5652           0 :         has_coincidence = has_any_coincidence(graph);
    5653             : 
    5654           0 :         if (ctx->opt->schedule_outer_coincidence)
    5655           0 :                 force_coincidence = 1;
    5656             : 
    5657           0 :         use_coincidence = has_coincidence;
    5658           0 :         while (graph->n_row < graph->maxvar) {
    5659             :                 isl_vec *sol;
    5660             :                 int violated;
    5661             :                 int coincident;
    5662             : 
    5663           0 :                 graph->src_scc = -1;
    5664           0 :                 graph->dst_scc = -1;
    5665             : 
    5666           0 :                 if (setup_lp(ctx, graph, use_coincidence) < 0)
    5667           0 :                         return isl_stat_error;
    5668           0 :                 sol = solve_lp(ctx, graph);
    5669           0 :                 if (!sol)
    5670           0 :                         return isl_stat_error;
    5671           0 :                 if (sol->size == 0) {
    5672           0 :                         int empty = graph->n_total_row == graph->band_start;
    5673             : 
    5674           0 :                         isl_vec_free(sol);
    5675           0 :                         if (use_coincidence && (!force_coincidence || !empty)) {
    5676           0 :                                 use_coincidence = 0;
    5677           0 :                                 continue;
    5678             :                         }
    5679           0 :                         return isl_stat_ok;
    5680             :                 }
    5681           0 :                 coincident = !has_coincidence || use_coincidence;
    5682           0 :                 if (update_schedule(graph, sol, coincident) < 0)
    5683           0 :                         return isl_stat_error;
    5684             : 
    5685           0 :                 if (!check_conditional)
    5686           0 :                         continue;
    5687           0 :                 violated = has_violated_conditional_constraint(ctx, graph);
    5688           0 :                 if (violated < 0)
    5689           0 :                         return isl_stat_error;
    5690           0 :                 if (!violated)
    5691           0 :                         continue;
    5692           0 :                 if (reset_band(graph) < 0)
    5693           0 :                         return isl_stat_error;
    5694           0 :                 use_coincidence = has_coincidence;
    5695             :         }
    5696             : 
    5697           0 :         return isl_stat_ok;
    5698             : }
    5699             : 
    5700             : /* Compute a schedule for a connected dependence graph by considering
    5701             :  * the graph as a whole and return the updated schedule node.
    5702             :  *
    5703             :  * The actual schedule rows of the current band are computed by
    5704             :  * compute_schedule_wcc_band.  compute_schedule_finish_band takes
    5705             :  * care of integrating the band into "node" and continuing
    5706             :  * the computation.
    5707             :  */
    5708           0 : static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
    5709             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    5710             : {
    5711             :         isl_ctx *ctx;
    5712             : 
    5713           0 :         if (!node)
    5714           0 :                 return NULL;
    5715             : 
    5716           0 :         ctx = isl_schedule_node_get_ctx(node);
    5717           0 :         if (compute_schedule_wcc_band(ctx, graph) < 0)
    5718           0 :                 return isl_schedule_node_free(node);
    5719             : 
    5720           0 :         return compute_schedule_finish_band(node, graph, 1);
    5721             : }
    5722             : 
    5723             : /* Clustering information used by compute_schedule_wcc_clustering.
    5724             :  *
    5725             :  * "n" is the number of SCCs in the original dependence graph
    5726             :  * "scc" is an array of "n" elements, each representing an SCC
    5727             :  * of the original dependence graph.  All entries in the same cluster
    5728             :  * have the same number of schedule rows.
    5729             :  * "scc_cluster" maps each SCC index to the cluster to which it belongs,
    5730             :  * where each cluster is represented by the index of the first SCC
    5731             :  * in the cluster.  Initially, each SCC belongs to a cluster containing
    5732             :  * only that SCC.
    5733             :  *
    5734             :  * "scc_in_merge" is used by merge_clusters_along_edge to keep
    5735             :  * track of which SCCs need to be merged.
    5736             :  *
    5737             :  * "cluster" contains the merged clusters of SCCs after the clustering
    5738             :  * has completed.
    5739             :  *
    5740             :  * "scc_node" is a temporary data structure used inside copy_partial.
    5741             :  * For each SCC, it keeps track of the number of nodes in the SCC
    5742             :  * that have already been copied.
    5743             :  */
    5744             : struct isl_clustering {
    5745             :         int n;
    5746             :         struct isl_sched_graph *scc;
    5747             :         struct isl_sched_graph *cluster;
    5748             :         int *scc_cluster;
    5749             :         int *scc_node;
    5750             :         int *scc_in_merge;
    5751             : };
    5752             : 
    5753             : /* Initialize the clustering data structure "c" from "graph".
    5754             :  *
    5755             :  * In particular, allocate memory, extract the SCCs from "graph"
    5756             :  * into c->scc, initialize scc_cluster and construct
    5757             :  * a band of schedule rows for each SCC.
    5758             :  * Within each SCC, there is only one SCC by definition.
    5759             :  * Each SCC initially belongs to a cluster containing only that SCC.
    5760             :  */
    5761           0 : static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
    5762             :         struct isl_sched_graph *graph)
    5763             : {
    5764             :         int i;
    5765             : 
    5766           0 :         c->n = graph->scc;
    5767           0 :         c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
    5768           0 :         c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
    5769           0 :         c->scc_cluster = isl_calloc_array(ctx, int, c->n);
    5770           0 :         c->scc_node = isl_calloc_array(ctx, int, c->n);
    5771           0 :         c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
    5772           0 :         if (!c->scc || !c->cluster ||
    5773           0 :             !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
    5774           0 :                 return isl_stat_error;
    5775             : 
    5776           0 :         for (i = 0; i < c->n; ++i) {
    5777           0 :                 if (extract_sub_graph(ctx, graph, &node_scc_exactly,
    5778           0 :                                         &edge_scc_exactly, i, &c->scc[i]) < 0)
    5779           0 :                         return isl_stat_error;
    5780           0 :                 c->scc[i].scc = 1;
    5781           0 :                 if (compute_maxvar(&c->scc[i]) < 0)
    5782           0 :                         return isl_stat_error;
    5783           0 :                 if (compute_schedule_wcc_band(ctx, &c->scc[i]) < 0)
    5784           0 :                         return isl_stat_error;
    5785           0 :                 c->scc_cluster[i] = i;
    5786             :         }
    5787             : 
    5788           0 :         return isl_stat_ok;
    5789             : }
    5790             : 
    5791             : /* Free all memory allocated for "c".
    5792             :  */
    5793           0 : static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
    5794             : {
    5795             :         int i;
    5796             : 
    5797           0 :         if (c->scc)
    5798           0 :                 for (i = 0; i < c->n; ++i)
    5799           0 :                         graph_free(ctx, &c->scc[i]);
    5800           0 :         free(c->scc);
    5801           0 :         if (c->cluster)
    5802           0 :                 for (i = 0; i < c->n; ++i)
    5803           0 :                         graph_free(ctx, &c->cluster[i]);
    5804           0 :         free(c->cluster);
    5805           0 :         free(c->scc_cluster);
    5806           0 :         free(c->scc_node);
    5807           0 :         free(c->scc_in_merge);
    5808           0 : }
    5809             : 
    5810             : /* Should we refrain from merging the cluster in "graph" with
    5811             :  * any other cluster?
    5812             :  * In particular, is its current schedule band empty and incomplete.
    5813             :  */
    5814           0 : static int bad_cluster(struct isl_sched_graph *graph)
    5815             : {
    5816           0 :         return graph->n_row < graph->maxvar &&
    5817           0 :                 graph->n_total_row == graph->band_start;
    5818             : }
    5819             : 
    5820             : /* Is "edge" a proximity edge with a non-empty dependence relation?
    5821             :  */
    5822           0 : static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
    5823             : {
    5824           0 :         if (!is_proximity(edge))
    5825           0 :                 return isl_bool_false;
    5826           0 :         return isl_bool_not(isl_map_plain_is_empty(edge->map));
    5827             : }
    5828             : 
    5829             : /* Return the index of an edge in "graph" that can be used to merge
    5830             :  * two clusters in "c".
    5831             :  * Return graph->n_edge if no such edge can be found.
    5832             :  * Return -1 on error.
    5833             :  *
    5834             :  * In particular, return a proximity edge between two clusters
    5835             :  * that is not marked "no_merge" and such that neither of the
    5836             :  * two clusters has an incomplete, empty band.
    5837             :  *
    5838             :  * If there are multiple such edges, then try and find the most
    5839             :  * appropriate edge to use for merging.  In particular, pick the edge
    5840             :  * with the greatest weight.  If there are multiple of those,
    5841             :  * then pick one with the shortest distance between
    5842             :  * the two cluster representatives.
    5843             :  */
    5844           0 : static int find_proximity(struct isl_sched_graph *graph,
    5845             :         struct isl_clustering *c)
    5846             : {
    5847           0 :         int i, best = graph->n_edge, best_dist, best_weight;
    5848             : 
    5849           0 :         for (i = 0; i < graph->n_edge; ++i) {
    5850           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    5851             :                 int dist, weight;
    5852             :                 isl_bool prox;
    5853             : 
    5854           0 :                 prox = is_non_empty_proximity(edge);
    5855           0 :                 if (prox < 0)
    5856           0 :                         return -1;
    5857           0 :                 if (!prox)
    5858           0 :                         continue;
    5859           0 :                 if (edge->no_merge)
    5860           0 :                         continue;
    5861           0 :                 if (bad_cluster(&c->scc[edge->src->scc]) ||
    5862           0 :                     bad_cluster(&c->scc[edge->dst->scc]))
    5863           0 :                         continue;
    5864           0 :                 dist = c->scc_cluster[edge->dst->scc] -
    5865           0 :                         c->scc_cluster[edge->src->scc];
    5866           0 :                 if (dist == 0)
    5867           0 :                         continue;
    5868           0 :                 weight = edge->weight;
    5869           0 :                 if (best < graph->n_edge) {
    5870           0 :                         if (best_weight > weight)
    5871           0 :                                 continue;
    5872           0 :                         if (best_weight == weight && best_dist <= dist)
    5873           0 :                                 continue;
    5874             :                 }
    5875           0 :                 best = i;
    5876           0 :                 best_dist = dist;
    5877           0 :                 best_weight = weight;
    5878             :         }
    5879             : 
    5880           0 :         return best;
    5881             : }
    5882             : 
    5883             : /* Internal data structure used in mark_merge_sccs.
    5884             :  *
    5885             :  * "graph" is the dependence graph in which a strongly connected
    5886             :  * component is constructed.
    5887             :  * "scc_cluster" maps each SCC index to the cluster to which it belongs.
    5888             :  * "src" and "dst" are the indices of the nodes that are being merged.
    5889             :  */
    5890             : struct isl_mark_merge_sccs_data {
    5891             :         struct isl_sched_graph *graph;
    5892             :         int *scc_cluster;
    5893             :         int src;
    5894             :         int dst;
    5895             : };
    5896             : 
    5897             : /* Check whether the cluster containing node "i" depends on the cluster
    5898             :  * containing node "j".  If "i" and "j" belong to the same cluster,
    5899             :  * then they are taken to depend on each other to ensure that
    5900             :  * the resulting strongly connected component consists of complete
    5901             :  * clusters.  Furthermore, if "i" and "j" are the two nodes that
    5902             :  * are being merged, then they are taken to depend on each other as well.
    5903             :  * Otherwise, check if there is a (conditional) validity dependence
    5904             :  * from node[j] to node[i], forcing node[i] to follow node[j].
    5905             :  */
    5906           0 : static isl_bool cluster_follows(int i, int j, void *user)
    5907             : {
    5908           0 :         struct isl_mark_merge_sccs_data *data = user;
    5909           0 :         struct isl_sched_graph *graph = data->graph;
    5910           0 :         int *scc_cluster = data->scc_cluster;
    5911             : 
    5912           0 :         if (data->src == i && data->dst == j)
    5913           0 :                 return isl_bool_true;
    5914           0 :         if (data->src == j && data->dst == i)
    5915           0 :                 return isl_bool_true;
    5916           0 :         if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
    5917           0 :                 return isl_bool_true;
    5918             : 
    5919           0 :         return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
    5920             : }
    5921             : 
    5922             : /* Mark all SCCs that belong to either of the two clusters in "c"
    5923             :  * connected by the edge in "graph" with index "edge", or to any
    5924             :  * of the intermediate clusters.
    5925             :  * The marking is recorded in c->scc_in_merge.
    5926             :  *
    5927             :  * The given edge has been selected for merging two clusters,
    5928             :  * meaning that there is at least a proximity edge between the two nodes.
    5929             :  * However, there may also be (indirect) validity dependences
    5930             :  * between the two nodes.  When merging the two clusters, all clusters
    5931             :  * containing one or more of the intermediate nodes along the
    5932             :  * indirect validity dependences need to be merged in as well.
    5933             :  *
    5934             :  * First collect all such nodes by computing the strongly connected
    5935             :  * component (SCC) containing the two nodes connected by the edge, where
    5936             :  * the two nodes are considered to depend on each other to make
    5937             :  * sure they end up in the same SCC.  Similarly, each node is considered
    5938             :  * to depend on every other node in the same cluster to ensure
    5939             :  * that the SCC consists of complete clusters.
    5940             :  *
    5941             :  * Then the original SCCs that contain any of these nodes are marked
    5942             :  * in c->scc_in_merge.
    5943             :  */
    5944           0 : static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
    5945             :         int edge, struct isl_clustering *c)
    5946             : {
    5947             :         struct isl_mark_merge_sccs_data data;
    5948             :         struct isl_tarjan_graph *g;
    5949             :         int i;
    5950             : 
    5951           0 :         for (i = 0; i < c->n; ++i)
    5952           0 :                 c->scc_in_merge[i] = 0;
    5953             : 
    5954           0 :         data.graph = graph;
    5955           0 :         data.scc_cluster = c->scc_cluster;
    5956           0 :         data.src = graph->edge[edge].src - graph->node;
    5957           0 :         data.dst = graph->edge[edge].dst - graph->node;
    5958             : 
    5959           0 :         g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
    5960             :                                         &cluster_follows, &data);
    5961           0 :         if (!g)
    5962           0 :                 goto error;
    5963             : 
    5964           0 :         i = g->op;
    5965           0 :         if (i < 3)
    5966           0 :                 isl_die(ctx, isl_error_internal,
    5967             :                         "expecting at least two nodes in component",
    5968             :                         goto error);
    5969           0 :         if (g->order[--i] != -1)
    5970           0 :                 isl_die(ctx, isl_error_internal,
    5971             :                         "expecting end of component marker", goto error);
    5972             : 
    5973           0 :         for (--i; i >= 0 && g->order[i] != -1; --i) {
    5974           0 :                 int scc = graph->node[g->order[i]].scc;
    5975           0 :                 c->scc_in_merge[scc] = 1;
    5976             :         }
    5977             : 
    5978           0 :         isl_tarjan_graph_free(g);
    5979           0 :         return isl_stat_ok;
    5980             : error:
    5981           0 :         isl_tarjan_graph_free(g);
    5982           0 :         return isl_stat_error;
    5983             : }
    5984             : 
    5985             : /* Construct the identifier "cluster_i".
    5986             :  */
    5987           0 : static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
    5988             : {
    5989             :         char name[40];
    5990             : 
    5991           0 :         snprintf(name, sizeof(name), "cluster_%d", i);
    5992           0 :         return isl_id_alloc(ctx, name, NULL);
    5993             : }
    5994             : 
    5995             : /* Construct the space of the cluster with index "i" containing
    5996             :  * the strongly connected component "scc".
    5997             :  *
    5998             :  * In particular, construct a space called cluster_i with dimension equal
    5999             :  * to the number of schedule rows in the current band of "scc".
    6000             :  */
    6001           0 : static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
    6002             : {
    6003             :         int nvar;
    6004             :         isl_space *space;
    6005             :         isl_id *id;
    6006             : 
    6007           0 :         nvar = scc->n_total_row - scc->band_start;
    6008           0 :         space = isl_space_copy(scc->node[0].space);
    6009           0 :         space = isl_space_params(space);
    6010           0 :         space = isl_space_set_from_params(space);
    6011           0 :         space = isl_space_add_dims(space, isl_dim_set, nvar);
    6012           0 :         id = cluster_id(isl_space_get_ctx(space), i);
    6013           0 :         space = isl_space_set_tuple_id(space, isl_dim_set, id);
    6014             : 
    6015           0 :         return space;
    6016             : }
    6017             : 
    6018             : /* Collect the domain of the graph for merging clusters.
    6019             :  *
    6020             :  * In particular, for each cluster with first SCC "i", construct
    6021             :  * a set in the space called cluster_i with dimension equal
    6022             :  * to the number of schedule rows in the current band of the cluster.
    6023             :  */
    6024           0 : static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
    6025             :         struct isl_sched_graph *graph, struct isl_clustering *c)
    6026             : {
    6027             :         int i;
    6028             :         isl_space *space;
    6029             :         isl_union_set *domain;
    6030             : 
    6031           0 :         space = isl_space_params_alloc(ctx, 0);
    6032           0 :         domain = isl_union_set_empty(space);
    6033             : 
    6034           0 :         for (i = 0; i < graph->scc; ++i) {
    6035             :                 isl_space *space;
    6036             : 
    6037           0 :                 if (!c->scc_in_merge[i])
    6038           0 :                         continue;
    6039           0 :                 if (c->scc_cluster[i] != i)
    6040           0 :                         continue;
    6041           0 :                 space = cluster_space(&c->scc[i], i);
    6042           0 :                 domain = isl_union_set_add_set(domain, isl_set_universe(space));
    6043             :         }
    6044             : 
    6045           0 :         return domain;
    6046             : }
    6047             : 
    6048             : /* Construct a map from the original instances to the corresponding
    6049             :  * cluster instance in the current bands of the clusters in "c".
    6050             :  */
    6051           0 : static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
    6052             :         struct isl_sched_graph *graph, struct isl_clustering *c)
    6053             : {
    6054             :         int i, j;
    6055             :         isl_space *space;
    6056             :         isl_union_map *cluster_map;
    6057             : 
    6058           0 :         space = isl_space_params_alloc(ctx, 0);
    6059           0 :         cluster_map = isl_union_map_empty(space);
    6060           0 :         for (i = 0; i < graph->scc; ++i) {
    6061             :                 int start, n;
    6062             :                 isl_id *id;
    6063             : 
    6064           0 :                 if (!c->scc_in_merge[i])
    6065           0 :                         continue;
    6066             : 
    6067           0 :                 id = cluster_id(ctx, c->scc_cluster[i]);
    6068           0 :                 start = c->scc[i].band_start;
    6069           0 :                 n = c->scc[i].n_total_row - start;
    6070           0 :                 for (j = 0; j < c->scc[i].n; ++j) {
    6071             :                         isl_multi_aff *ma;
    6072             :                         isl_map *map;
    6073           0 :                         struct isl_sched_node *node = &c->scc[i].node[j];
    6074             : 
    6075           0 :                         ma = node_extract_partial_schedule_multi_aff(node,
    6076             :                                                                     start, n);
    6077           0 :                         ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
    6078             :                                                             isl_id_copy(id));
    6079           0 :                         map = isl_map_from_multi_aff(ma);
    6080           0 :                         cluster_map = isl_union_map_add_map(cluster_map, map);
    6081             :                 }
    6082           0 :                 isl_id_free(id);
    6083             :         }
    6084             : 
    6085           0 :         return cluster_map;
    6086             : }
    6087             : 
    6088             : /* Add "umap" to the schedule constraints "sc" of all types of "edge"
    6089             :  * that are not isl_edge_condition or isl_edge_conditional_validity.
    6090             :  */
    6091           0 : static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
    6092             :         struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
    6093             :         __isl_take isl_schedule_constraints *sc)
    6094             : {
    6095             :         enum isl_edge_type t;
    6096             : 
    6097           0 :         if (!sc)
    6098           0 :                 return NULL;
    6099             : 
    6100           0 :         for (t = isl_edge_first; t <= isl_edge_last; ++t) {
    6101           0 :                 if (t == isl_edge_condition ||
    6102             :                     t == isl_edge_conditional_validity)
    6103           0 :                         continue;
    6104           0 :                 if (!is_type(edge, t))
    6105           0 :                         continue;
    6106           0 :                 sc = isl_schedule_constraints_add(sc, t,
    6107             :                                                     isl_union_map_copy(umap));
    6108             :         }
    6109             : 
    6110           0 :         return sc;
    6111             : }
    6112             : 
    6113             : /* Add schedule constraints of types isl_edge_condition and
    6114             :  * isl_edge_conditional_validity to "sc" by applying "umap" to
    6115             :  * the domains of the wrapped relations in domain and range
    6116             :  * of the corresponding tagged constraints of "edge".
    6117             :  */
    6118           0 : static __isl_give isl_schedule_constraints *add_conditional_constraints(
    6119             :         struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
    6120             :         __isl_take isl_schedule_constraints *sc)
    6121             : {
    6122             :         enum isl_edge_type t;
    6123             :         isl_union_map *tagged;
    6124             : 
    6125           0 :         for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
    6126           0 :                 if (!is_type(edge, t))
    6127           0 :                         continue;
    6128           0 :                 if (t == isl_edge_condition)
    6129           0 :                         tagged = isl_union_map_copy(edge->tagged_condition);
    6130             :                 else
    6131           0 :                         tagged = isl_union_map_copy(edge->tagged_validity);
    6132           0 :                 tagged = isl_union_map_zip(tagged);
    6133           0 :                 tagged = isl_union_map_apply_domain(tagged,
    6134             :                                         isl_union_map_copy(umap));
    6135           0 :                 tagged = isl_union_map_zip(tagged);
    6136           0 :                 sc = isl_schedule_constraints_add(sc, t, tagged);
    6137           0 :                 if (!sc)
    6138           0 :                         return NULL;
    6139             :         }
    6140             : 
    6141           0 :         return sc;
    6142             : }
    6143             : 
    6144             : /* Given a mapping "cluster_map" from the original instances to
    6145             :  * the cluster instances, add schedule constraints on the clusters
    6146             :  * to "sc" corresponding to the original constraints represented by "edge".
    6147             :  *
    6148             :  * For non-tagged dependence constraints, the cluster constraints
    6149             :  * are obtained by applying "cluster_map" to the edge->map.
    6150             :  *
    6151             :  * For tagged dependence constraints, "cluster_map" needs to be applied
    6152             :  * to the domains of the wrapped relations in domain and range
    6153             :  * of the tagged dependence constraints.  Pick out the mappings
    6154             :  * from these domains from "cluster_map" and construct their product.
    6155             :  * This mapping can then be applied to the pair of domains.
    6156             :  */
    6157           0 : static __isl_give isl_schedule_constraints *collect_edge_constraints(
    6158             :         struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
    6159             :         __isl_take isl_schedule_constraints *sc)
    6160             : {
    6161             :         isl_union_map *umap;
    6162             :         isl_space *space;
    6163             :         isl_union_set *uset;
    6164             :         isl_union_map *umap1, *umap2;
    6165             : 
    6166           0 :         if (!sc)
    6167           0 :                 return NULL;
    6168             : 
    6169           0 :         umap = isl_union_map_from_map(isl_map_copy(edge->map));
    6170           0 :         umap = isl_union_map_apply_domain(umap,
    6171             :                                 isl_union_map_copy(cluster_map));
    6172           0 :         umap = isl_union_map_apply_range(umap,
    6173             :                                 isl_union_map_copy(cluster_map));
    6174           0 :         sc = add_non_conditional_constraints(edge, umap, sc);
    6175           0 :         isl_union_map_free(umap);
    6176             : 
    6177           0 :         if (!sc || (!is_condition(edge) && !is_conditional_validity(edge)))
    6178           0 :                 return sc;
    6179             : 
    6180           0 :         space = isl_space_domain(isl_map_get_space(edge->map));
    6181           0 :         uset = isl_union_set_from_set(isl_set_universe(space));
    6182           0 :         umap1 = isl_union_map_copy(cluster_map);
    6183           0 :         umap1 = isl_union_map_intersect_domain(umap1, uset);
    6184           0 :         space = isl_space_range(isl_map_get_space(edge->map));
    6185           0 :         uset = isl_union_set_from_set(isl_set_universe(space));
    6186           0 :         umap2 = isl_union_map_copy(cluster_map);
    6187           0 :         umap2 = isl_union_map_intersect_domain(umap2, uset);
    6188           0 :         umap = isl_union_map_product(umap1, umap2);
    6189             : 
    6190           0 :         sc = add_conditional_constraints(edge, umap, sc);
    6191             : 
    6192           0 :         isl_union_map_free(umap);
    6193           0 :         return sc;
    6194             : }
    6195             : 
    6196             : /* Given a mapping "cluster_map" from the original instances to
    6197             :  * the cluster instances, add schedule constraints on the clusters
    6198             :  * to "sc" corresponding to all edges in "graph" between nodes that
    6199             :  * belong to SCCs that are marked for merging in "scc_in_merge".
    6200             :  */
    6201           0 : static __isl_give isl_schedule_constraints *collect_constraints(
    6202             :         struct isl_sched_graph *graph, int *scc_in_merge,
    6203             :         __isl_keep isl_union_map *cluster_map,
    6204             :         __isl_take isl_schedule_constraints *sc)
    6205             : {
    6206             :         int i;
    6207             : 
    6208           0 :         for (i = 0; i < graph->n_edge; ++i) {
    6209           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    6210             : 
    6211           0 :                 if (!scc_in_merge[edge->src->scc])
    6212           0 :                         continue;
    6213           0 :                 if (!scc_in_merge[edge->dst->scc])
    6214           0 :                         continue;
    6215           0 :                 sc = collect_edge_constraints(edge, cluster_map, sc);
    6216             :         }
    6217             : 
    6218           0 :         return sc;
    6219             : }
    6220             : 
    6221             : /* Construct a dependence graph for scheduling clusters with respect
    6222             :  * to each other and store the result in "merge_graph".
    6223             :  * In particular, the nodes of the graph correspond to the schedule
    6224             :  * dimensions of the current bands of those clusters that have been
    6225             :  * marked for merging in "c".
    6226             :  *
    6227             :  * First construct an isl_schedule_constraints object for this domain
    6228             :  * by transforming the edges in "graph" to the domain.
    6229             :  * Then initialize a dependence graph for scheduling from these
    6230             :  * constraints.
    6231             :  */
    6232           0 : static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
    6233             :         struct isl_clustering *c, struct isl_sched_graph *merge_graph)
    6234             : {
    6235             :         isl_union_set *domain;
    6236             :         isl_union_map *cluster_map;
    6237             :         isl_schedule_constraints *sc;
    6238             :         isl_stat r;
    6239             : 
    6240           0 :         domain = collect_domain(ctx, graph, c);
    6241           0 :         sc = isl_schedule_constraints_on_domain(domain);
    6242           0 :         if (!sc)
    6243           0 :                 return isl_stat_error;
    6244           0 :         cluster_map = collect_cluster_map(ctx, graph, c);
    6245           0 :         sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
    6246           0 :         isl_union_map_free(cluster_map);
    6247             : 
    6248           0 :         r = graph_init(merge_graph, sc);
    6249             : 
    6250           0 :         isl_schedule_constraints_free(sc);
    6251             : 
    6252           0 :         return r;
    6253             : }
    6254             : 
    6255             : /* Compute the maximal number of remaining schedule rows that still need
    6256             :  * to be computed for the nodes that belong to clusters with the maximal
    6257             :  * dimension for the current band (i.e., the band that is to be merged).
    6258             :  * Only clusters that are about to be merged are considered.
    6259             :  * "maxvar" is the maximal dimension for the current band.
    6260             :  * "c" contains information about the clusters.
    6261             :  *
    6262             :  * Return the maximal number of remaining schedule rows or -1 on error.
    6263             :  */
    6264           0 : static int compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
    6265             : {
    6266             :         int i, j;
    6267             :         int max_slack;
    6268             : 
    6269           0 :         max_slack = 0;
    6270           0 :         for (i = 0; i < c->n; ++i) {
    6271             :                 int nvar;
    6272             :                 struct isl_sched_graph *scc;
    6273             : 
    6274           0 :                 if (!c->scc_in_merge[i])
    6275           0 :                         continue;
    6276           0 :                 scc = &c->scc[i];
    6277           0 :                 nvar = scc->n_total_row - scc->band_start;
    6278           0 :                 if (nvar != maxvar)
    6279           0 :                         continue;
    6280           0 :                 for (j = 0; j < scc->n; ++j) {
    6281           0 :                         struct isl_sched_node *node = &scc->node[j];
    6282             :                         int slack;
    6283             : 
    6284           0 :                         if (node_update_vmap(node) < 0)
    6285           0 :                                 return -1;
    6286           0 :                         slack = node->nvar - node->rank;
    6287           0 :                         if (slack > max_slack)
    6288           0 :                                 max_slack = slack;
    6289             :                 }
    6290             :         }
    6291             : 
    6292           0 :         return max_slack;
    6293             : }
    6294             : 
    6295             : /* If there are any clusters where the dimension of the current band
    6296             :  * (i.e., the band that is to be merged) is smaller than "maxvar" and
    6297             :  * if there are any nodes in such a cluster where the number
    6298             :  * of remaining schedule rows that still need to be computed
    6299             :  * is greater than "max_slack", then return the smallest current band
    6300             :  * dimension of all these clusters.  Otherwise return the original value
    6301             :  * of "maxvar".  Return -1 in case of any error.
    6302             :  * Only clusters that are about to be merged are considered.
    6303             :  * "c" contains information about the clusters.
    6304             :  */
    6305           0 : static int limit_maxvar_to_slack(int maxvar, int max_slack,
    6306             :         struct isl_clustering *c)
    6307             : {
    6308             :         int i, j;
    6309             : 
    6310           0 :         for (i = 0; i < c->n; ++i) {
    6311             :                 int nvar;
    6312             :                 struct isl_sched_graph *scc;
    6313             : 
    6314           0 :                 if (!c->scc_in_merge[i])
    6315           0 :                         continue;
    6316           0 :                 scc = &c->scc[i];
    6317           0 :                 nvar = scc->n_total_row - scc->band_start;
    6318           0 :                 if (nvar >= maxvar)
    6319           0 :                         continue;
    6320           0 :                 for (j = 0; j < scc->n; ++j) {
    6321           0 :                         struct isl_sched_node *node = &scc->node[j];
    6322             :                         int slack;
    6323             : 
    6324           0 :                         if (node_update_vmap(node) < 0)
    6325           0 :                                 return -1;
    6326           0 :                         slack = node->nvar - node->rank;
    6327           0 :                         if (slack > max_slack) {
    6328           0 :                                 maxvar = nvar;
    6329           0 :                                 break;
    6330             :                         }
    6331             :                 }
    6332             :         }
    6333             : 
    6334           0 :         return maxvar;
    6335             : }
    6336             : 
    6337             : /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
    6338             :  * that still need to be computed.  In particular, if there is a node
    6339             :  * in a cluster where the dimension of the current band is smaller
    6340             :  * than merge_graph->maxvar, but the number of remaining schedule rows
    6341             :  * is greater than that of any node in a cluster with the maximal
    6342             :  * dimension for the current band (i.e., merge_graph->maxvar),
    6343             :  * then adjust merge_graph->maxvar to the (smallest) current band dimension
    6344             :  * of those clusters.  Without this adjustment, the total number of
    6345             :  * schedule dimensions would be increased, resulting in a skewed view
    6346             :  * of the number of coincident dimensions.
    6347             :  * "c" contains information about the clusters.
    6348             :  *
    6349             :  * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
    6350             :  * then there is no point in attempting any merge since it will be rejected
    6351             :  * anyway.  Set merge_graph->maxvar to zero in such cases.
    6352             :  */
    6353           0 : static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
    6354             :         struct isl_sched_graph *merge_graph, struct isl_clustering *c)
    6355             : {
    6356             :         int max_slack, maxvar;
    6357             : 
    6358           0 :         max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
    6359           0 :         if (max_slack < 0)
    6360           0 :                 return isl_stat_error;
    6361           0 :         maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
    6362           0 :         if (maxvar < 0)
    6363           0 :                 return isl_stat_error;
    6364             : 
    6365           0 :         if (maxvar < merge_graph->maxvar) {
    6366           0 :                 if (isl_options_get_schedule_maximize_band_depth(ctx))
    6367           0 :                         merge_graph->maxvar = 0;
    6368             :                 else
    6369           0 :                         merge_graph->maxvar = maxvar;
    6370             :         }
    6371             : 
    6372           0 :         return isl_stat_ok;
    6373             : }
    6374             : 
    6375             : /* Return the number of coincident dimensions in the current band of "graph",
    6376             :  * where the nodes of "graph" are assumed to be scheduled by a single band.
    6377             :  */
    6378           0 : static int get_n_coincident(struct isl_sched_graph *graph)
    6379             : {
    6380             :         int i;
    6381             : 
    6382           0 :         for (i = graph->band_start; i < graph->n_total_row; ++i)
    6383           0 :                 if (!graph->node[0].coincident[i])
    6384           0 :                         break;
    6385             : 
    6386           0 :         return i - graph->band_start;
    6387             : }
    6388             : 
    6389             : /* Should the clusters be merged based on the cluster schedule
    6390             :  * in the current (and only) band of "merge_graph", given that
    6391             :  * coincidence should be maximized?
    6392             :  *
    6393             :  * If the number of coincident schedule dimensions in the merged band
    6394             :  * would be less than the maximal number of coincident schedule dimensions
    6395             :  * in any of the merged clusters, then the clusters should not be merged.
    6396             :  */
    6397           0 : static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
    6398             :         struct isl_sched_graph *merge_graph)
    6399             : {
    6400             :         int i;
    6401             :         int n_coincident;
    6402             :         int max_coincident;
    6403             : 
    6404           0 :         max_coincident = 0;
    6405           0 :         for (i = 0; i < c->n; ++i) {
    6406           0 :                 if (!c->scc_in_merge[i])
    6407           0 :                         continue;
    6408           0 :                 n_coincident = get_n_coincident(&c->scc[i]);
    6409           0 :                 if (n_coincident > max_coincident)
    6410           0 :                         max_coincident = n_coincident;
    6411             :         }
    6412             : 
    6413           0 :         n_coincident = get_n_coincident(merge_graph);
    6414             : 
    6415           0 :         return n_coincident >= max_coincident;
    6416             : }
    6417             : 
    6418             : /* Return the transformation on "node" expressed by the current (and only)
    6419             :  * band of "merge_graph" applied to the clusters in "c".
    6420             :  *
    6421             :  * First find the representation of "node" in its SCC in "c" and
    6422             :  * extract the transformation expressed by the current band.
    6423             :  * Then extract the transformation applied by "merge_graph"
    6424             :  * to the cluster to which this SCC belongs.
    6425             :  * Combine the two to obtain the complete transformation on the node.
    6426             :  *
    6427             :  * Note that the range of the first transformation is an anonymous space,
    6428             :  * while the domain of the second is named "cluster_X".  The range
    6429             :  * of the former therefore needs to be adjusted before the two
    6430             :  * can be combined.
    6431             :  */
    6432           0 : static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
    6433             :         struct isl_sched_node *node, struct isl_clustering *c,
    6434             :         struct isl_sched_graph *merge_graph)
    6435             : {
    6436             :         struct isl_sched_node *scc_node, *cluster_node;
    6437             :         int start, n;
    6438             :         isl_id *id;
    6439             :         isl_space *space;
    6440             :         isl_multi_aff *ma, *ma2;
    6441             : 
    6442           0 :         scc_node = graph_find_node(ctx, &c->scc[node->scc], node->space);
    6443           0 :         if (scc_node && !is_node(&c->scc[node->scc], scc_node))
    6444           0 :                 isl_die(ctx, isl_error_internal, "unable to find node",
    6445             :                         return NULL);
    6446           0 :         start = c->scc[node->scc].band_start;
    6447           0 :         n = c->scc[node->scc].n_total_row - start;
    6448           0 :         ma = node_extract_partial_schedule_multi_aff(scc_node, start, n);
    6449           0 :         space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
    6450           0 :         cluster_node = graph_find_node(ctx, merge_graph, space);
    6451           0 :         if (cluster_node && !is_node(merge_graph, cluster_node))
    6452           0 :                 isl_die(ctx, isl_error_internal, "unable to find cluster",
    6453             :                         space = isl_space_free(space));
    6454           0 :         id = isl_space_get_tuple_id(space, isl_dim_set);
    6455           0 :         ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
    6456           0 :         isl_space_free(space);
    6457           0 :         n = merge_graph->n_total_row;
    6458           0 :         ma2 = node_extract_partial_schedule_multi_aff(cluster_node, 0, n);
    6459           0 :         ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
    6460             : 
    6461           0 :         return isl_map_from_multi_aff(ma);
    6462             : }
    6463             : 
    6464             : /* Give a set of distances "set", are they bounded by a small constant
    6465             :  * in direction "pos"?
    6466             :  * In practice, check if they are bounded by 2 by checking that there
    6467             :  * are no elements with a value greater than or equal to 3 or
    6468             :  * smaller than or equal to -3.
    6469             :  */
    6470           0 : static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
    6471             : {
    6472             :         isl_bool bounded;
    6473             :         isl_set *test;
    6474             : 
    6475           0 :         if (!set)
    6476           0 :                 return isl_bool_error;
    6477             : 
    6478           0 :         test = isl_set_copy(set);
    6479           0 :         test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
    6480           0 :         bounded = isl_set_is_empty(test);
    6481           0 :         isl_set_free(test);
    6482             : 
    6483           0 :         if (bounded < 0 || !bounded)
    6484           0 :                 return bounded;
    6485             : 
    6486           0 :         test = isl_set_copy(set);
    6487           0 :         test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
    6488           0 :         bounded = isl_set_is_empty(test);
    6489           0 :         isl_set_free(test);
    6490             : 
    6491           0 :         return bounded;
    6492             : }
    6493             : 
    6494             : /* Does the set "set" have a fixed (but possible parametric) value
    6495             :  * at dimension "pos"?
    6496             :  */
    6497           0 : static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
    6498             : {
    6499             :         int n;
    6500             :         isl_bool single;
    6501             : 
    6502           0 :         if (!set)
    6503           0 :                 return isl_bool_error;
    6504           0 :         set = isl_set_copy(set);
    6505           0 :         n = isl_set_dim(set, isl_dim_set);
    6506           0 :         set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
    6507           0 :         set = isl_set_project_out(set, isl_dim_set, 0, pos);
    6508           0 :         single = isl_set_is_singleton(set);
    6509           0 :         isl_set_free(set);
    6510             : 
    6511           0 :         return single;
    6512             : }
    6513             : 
    6514             : /* Does "map" have a fixed (but possible parametric) value
    6515             :  * at dimension "pos" of either its domain or its range?
    6516             :  */
    6517           0 : static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
    6518             : {
    6519             :         isl_set *set;
    6520             :         isl_bool single;
    6521             : 
    6522           0 :         set = isl_map_domain(isl_map_copy(map));
    6523           0 :         single = has_single_value(set, pos);
    6524           0 :         isl_set_free(set);
    6525             : 
    6526           0 :         if (single < 0 || single)
    6527           0 :                 return single;
    6528             : 
    6529           0 :         set = isl_map_range(isl_map_copy(map));
    6530           0 :         single = has_single_value(set, pos);
    6531           0 :         isl_set_free(set);
    6532             : 
    6533           0 :         return single;
    6534             : }
    6535             : 
    6536             : /* Does the edge "edge" from "graph" have bounded dependence distances
    6537             :  * in the merged graph "merge_graph" of a selection of clusters in "c"?
    6538             :  *
    6539             :  * Extract the complete transformations of the source and destination
    6540             :  * nodes of the edge, apply them to the edge constraints and
    6541             :  * compute the differences.  Finally, check if these differences are bounded
    6542             :  * in each direction.
    6543             :  *
    6544             :  * If the dimension of the band is greater than the number of
    6545             :  * dimensions that can be expected to be optimized by the edge
    6546             :  * (based on its weight), then also allow the differences to be unbounded
    6547             :  * in the remaining dimensions, but only if either the source or
    6548             :  * the destination has a fixed value in that direction.
    6549             :  * This allows a statement that produces values that are used by
    6550             :  * several instances of another statement to be merged with that
    6551             :  * other statement.
    6552             :  * However, merging such clusters will introduce an inherently
    6553             :  * large proximity distance inside the merged cluster, meaning
    6554             :  * that proximity distances will no longer be optimized in
    6555             :  * subsequent merges.  These merges are therefore only allowed
    6556             :  * after all other possible merges have been tried.
    6557             :  * The first time such a merge is encountered, the weight of the edge
    6558             :  * is replaced by a negative weight.  The second time (i.e., after
    6559             :  * all merges over edges with a non-negative weight have been tried),
    6560             :  * the merge is allowed.
    6561             :  */
    6562           0 : static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
    6563             :         struct isl_sched_graph *graph, struct isl_clustering *c,
    6564             :         struct isl_sched_graph *merge_graph)
    6565             : {
    6566             :         int i, n, n_slack;
    6567             :         isl_bool bounded;
    6568             :         isl_map *map, *t;
    6569             :         isl_set *dist;
    6570             : 
    6571           0 :         map = isl_map_copy(edge->map);
    6572           0 :         t = extract_node_transformation(ctx, edge->src, c, merge_graph);
    6573           0 :         map = isl_map_apply_domain(map, t);
    6574           0 :         t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
    6575           0 :         map = isl_map_apply_range(map, t);
    6576           0 :         dist = isl_map_deltas(isl_map_copy(map));
    6577             : 
    6578           0 :         bounded = isl_bool_true;
    6579           0 :         n = isl_set_dim(dist, isl_dim_set);
    6580           0 :         n_slack = n - edge->weight;
    6581           0 :         if (edge->weight < 0)
    6582           0 :                 n_slack -= graph->max_weight + 1;
    6583           0 :         for (i = 0; i < n; ++i) {
    6584             :                 isl_bool bounded_i, singular_i;
    6585             : 
    6586           0 :                 bounded_i = distance_is_bounded(dist, i);
    6587           0 :                 if (bounded_i < 0)
    6588           0 :                         goto error;
    6589           0 :                 if (bounded_i)
    6590           0 :                         continue;
    6591           0 :                 if (edge->weight >= 0)
    6592           0 :                         bounded = isl_bool_false;
    6593           0 :                 n_slack--;
    6594           0 :                 if (n_slack < 0)
    6595           0 :                         break;
    6596           0 :                 singular_i = has_singular_src_or_dst(map, i);
    6597           0 :                 if (singular_i < 0)
    6598           0 :                         goto error;
    6599           0 :                 if (singular_i)
    6600           0 :                         continue;
    6601           0 :                 bounded = isl_bool_false;
    6602           0 :                 break;
    6603             :         }
    6604           0 :         if (!bounded && i >= n && edge->weight >= 0)
    6605           0 :                 edge->weight -= graph->max_weight + 1;
    6606           0 :         isl_map_free(map);
    6607           0 :         isl_set_free(dist);
    6608             : 
    6609           0 :         return bounded;
    6610             : error:
    6611           0 :         isl_map_free(map);
    6612           0 :         isl_set_free(dist);
    6613           0 :         return isl_bool_error;
    6614             : }
    6615             : 
    6616             : /* Should the clusters be merged based on the cluster schedule
    6617             :  * in the current (and only) band of "merge_graph"?
    6618             :  * "graph" is the original dependence graph, while "c" records
    6619             :  * which SCCs are involved in the latest merge.
    6620             :  *
    6621             :  * In particular, is there at least one proximity constraint
    6622             :  * that is optimized by the merge?
    6623             :  *
    6624             :  * A proximity constraint is considered to be optimized
    6625             :  * if the dependence distances are small.
    6626             :  */
    6627           0 : static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
    6628             :         struct isl_sched_graph *graph, struct isl_clustering *c,
    6629             :         struct isl_sched_graph *merge_graph)
    6630             : {
    6631             :         int i;
    6632             : 
    6633           0 :         for (i = 0; i < graph->n_edge; ++i) {
    6634           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    6635             :                 isl_bool bounded;
    6636             : 
    6637           0 :                 if (!is_proximity(edge))
    6638           0 :                         continue;
    6639           0 :                 if (!c->scc_in_merge[edge->src->scc])
    6640           0 :                         continue;
    6641           0 :                 if (!c->scc_in_merge[edge->dst->scc])
    6642           0 :                         continue;
    6643           0 :                 if (c->scc_cluster[edge->dst->scc] ==
    6644           0 :                     c->scc_cluster[edge->src->scc])
    6645           0 :                         continue;
    6646           0 :                 bounded = has_bounded_distances(ctx, edge, graph, c,
    6647             :                                                 merge_graph);
    6648           0 :                 if (bounded < 0 || bounded)
    6649           0 :                         return bounded;
    6650             :         }
    6651             : 
    6652           0 :         return isl_bool_false;
    6653             : }
    6654             : 
    6655             : /* Should the clusters be merged based on the cluster schedule
    6656             :  * in the current (and only) band of "merge_graph"?
    6657             :  * "graph" is the original dependence graph, while "c" records
    6658             :  * which SCCs are involved in the latest merge.
    6659             :  *
    6660             :  * If the current band is empty, then the clusters should not be merged.
    6661             :  *
    6662             :  * If the band depth should be maximized and the merge schedule
    6663             :  * is incomplete (meaning that the dimension of some of the schedule
    6664             :  * bands in the original schedule will be reduced), then the clusters
    6665             :  * should not be merged.
    6666             :  *
    6667             :  * If the schedule_maximize_coincidence option is set, then check that
    6668             :  * the number of coincident schedule dimensions is not reduced.
    6669             :  *
    6670             :  * Finally, only allow the merge if at least one proximity
    6671             :  * constraint is optimized.
    6672             :  */
    6673           0 : static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
    6674             :         struct isl_clustering *c, struct isl_sched_graph *merge_graph)
    6675             : {
    6676           0 :         if (merge_graph->n_total_row == merge_graph->band_start)
    6677           0 :                 return isl_bool_false;
    6678             : 
    6679           0 :         if (isl_options_get_schedule_maximize_band_depth(ctx) &&
    6680           0 :             merge_graph->n_total_row < merge_graph->maxvar)
    6681           0 :                 return isl_bool_false;
    6682             : 
    6683           0 :         if (isl_options_get_schedule_maximize_coincidence(ctx)) {
    6684             :                 isl_bool ok;
    6685             : 
    6686           0 :                 ok = ok_to_merge_coincident(c, merge_graph);
    6687           0 :                 if (ok < 0 || !ok)
    6688           0 :                         return ok;
    6689             :         }
    6690             : 
    6691           0 :         return ok_to_merge_proximity(ctx, graph, c, merge_graph);
    6692             : }
    6693             : 
    6694             : /* Apply the schedule in "t_node" to the "n" rows starting at "first"
    6695             :  * of the schedule in "node" and return the result.
    6696             :  *
    6697             :  * That is, essentially compute
    6698             :  *
    6699             :  *      T * N(first:first+n-1)
    6700             :  *
    6701             :  * taking into account the constant term and the parameter coefficients
    6702             :  * in "t_node".
    6703             :  */
    6704           0 : static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
    6705             :         struct isl_sched_node *t_node, struct isl_sched_node *node,
    6706             :         int first, int n)
    6707             : {
    6708             :         int i, j;
    6709             :         isl_mat *t;
    6710             :         int n_row, n_col, n_param, n_var;
    6711             : 
    6712           0 :         n_param = node->nparam;
    6713           0 :         n_var = node->nvar;
    6714           0 :         n_row = isl_mat_rows(t_node->sched);
    6715           0 :         n_col = isl_mat_cols(node->sched);
    6716           0 :         t = isl_mat_alloc(ctx, n_row, n_col);
    6717           0 :         if (!t)
    6718           0 :                 return NULL;
    6719           0 :         for (i = 0; i < n_row; ++i) {
    6720           0 :                 isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
    6721           0 :                 isl_seq_clr(t->row[i] + 1 + n_param, n_var);
    6722           0 :                 for (j = 0; j < n; ++j)
    6723           0 :                         isl_seq_addmul(t->row[i],
    6724           0 :                                         t_node->sched->row[i][1 + n_param + j],
    6725           0 :                                         node->sched->row[first + j],
    6726           0 :                                         1 + n_param + n_var);
    6727             :         }
    6728           0 :         return t;
    6729             : }
    6730             : 
    6731             : /* Apply the cluster schedule in "t_node" to the current band
    6732             :  * schedule of the nodes in "graph".
    6733             :  *
    6734             :  * In particular, replace the rows starting at band_start
    6735             :  * by the result of applying the cluster schedule in "t_node"
    6736             :  * to the original rows.
    6737             :  *
    6738             :  * The coincidence of the schedule is determined by the coincidence
    6739             :  * of the cluster schedule.
    6740             :  */
    6741           0 : static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
    6742             :         struct isl_sched_node *t_node)
    6743             : {
    6744             :         int i, j;
    6745             :         int n_new;
    6746             :         int start, n;
    6747             : 
    6748           0 :         start = graph->band_start;
    6749           0 :         n = graph->n_total_row - start;
    6750             : 
    6751           0 :         n_new = isl_mat_rows(t_node->sched);
    6752           0 :         for (i = 0; i < graph->n; ++i) {
    6753           0 :                 struct isl_sched_node *node = &graph->node[i];
    6754             :                 isl_mat *t;
    6755             : 
    6756           0 :                 t = node_transformation(ctx, t_node, node, start, n);
    6757           0 :                 node->sched = isl_mat_drop_rows(node->sched, start, n);
    6758           0 :                 node->sched = isl_mat_concat(node->sched, t);
    6759           0 :                 node->sched_map = isl_map_free(node->sched_map);
    6760           0 :                 if (!node->sched)
    6761           0 :                         return isl_stat_error;
    6762           0 :                 for (j = 0; j < n_new; ++j)
    6763           0 :                         node->coincident[start + j] = t_node->coincident[j];
    6764             :         }
    6765           0 :         graph->n_total_row -= n;
    6766           0 :         graph->n_row -= n;
    6767           0 :         graph->n_total_row += n_new;
    6768           0 :         graph->n_row += n_new;
    6769             : 
    6770           0 :         return isl_stat_ok;
    6771             : }
    6772             : 
    6773             : /* Merge the clusters marked for merging in "c" into a single
    6774             :  * cluster using the cluster schedule in the current band of "merge_graph".
    6775             :  * The representative SCC for the new cluster is the SCC with
    6776             :  * the smallest index.
    6777             :  *
    6778             :  * The current band schedule of each SCC in the new cluster is obtained
    6779             :  * by applying the schedule of the corresponding original cluster
    6780             :  * to the original band schedule.
    6781             :  * All SCCs in the new cluster have the same number of schedule rows.
    6782             :  */
    6783           0 : static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
    6784             :         struct isl_sched_graph *merge_graph)
    6785             : {
    6786             :         int i;
    6787           0 :         int cluster = -1;
    6788             :         isl_space *space;
    6789             : 
    6790           0 :         for (i = 0; i < c->n; ++i) {
    6791             :                 struct isl_sched_node *node;
    6792             : 
    6793           0 :                 if (!c->scc_in_merge[i])
    6794           0 :                         continue;
    6795           0 :                 if (cluster < 0)
    6796           0 :                         cluster = i;
    6797           0 :                 space = cluster_space(&c->scc[i], c->scc_cluster[i]);
    6798           0 :                 node = graph_find_node(ctx, merge_graph, space);
    6799           0 :                 isl_space_free(space);
    6800           0 :                 if (!node)
    6801           0 :                         return isl_stat_error;
    6802           0 :                 if (!is_node(merge_graph, node))
    6803           0 :                         isl_die(ctx, isl_error_internal,
    6804             :                                 "unable to find cluster",
    6805             :                                 return isl_stat_error);
    6806           0 :                 if (transform(ctx, &c->scc[i], node) < 0)
    6807           0 :                         return isl_stat_error;
    6808           0 :                 c->scc_cluster[i] = cluster;
    6809             :         }
    6810             : 
    6811           0 :         return isl_stat_ok;
    6812             : }
    6813             : 
    6814             : /* Try and merge the clusters of SCCs marked in c->scc_in_merge
    6815             :  * by scheduling the current cluster bands with respect to each other.
    6816             :  *
    6817             :  * Construct a dependence graph with a space for each cluster and
    6818             :  * with the coordinates of each space corresponding to the schedule
    6819             :  * dimensions of the current band of that cluster.
    6820             :  * Construct a cluster schedule in this cluster dependence graph and
    6821             :  * apply it to the current cluster bands if it is applicable
    6822             :  * according to ok_to_merge.
    6823             :  *
    6824             :  * If the number of remaining schedule dimensions in a cluster
    6825             :  * with a non-maximal current schedule dimension is greater than
    6826             :  * the number of remaining schedule dimensions in clusters
    6827             :  * with a maximal current schedule dimension, then restrict
    6828             :  * the number of rows to be computed in the cluster schedule
    6829             :  * to the minimal such non-maximal current schedule dimension.
    6830             :  * Do this by adjusting merge_graph.maxvar.
    6831             :  *
    6832             :  * Return isl_bool_true if the clusters have effectively been merged
    6833             :  * into a single cluster.
    6834             :  *
    6835             :  * Note that since the standard scheduling algorithm minimizes the maximal
    6836             :  * distance over proximity constraints, the proximity constraints between
    6837             :  * the merged clusters may not be optimized any further than what is
    6838             :  * sufficient to bring the distances within the limits of the internal
    6839             :  * proximity constraints inside the individual clusters.
    6840             :  * It may therefore make sense to perform an additional translation step
    6841             :  * to bring the clusters closer to each other, while maintaining
    6842             :  * the linear part of the merging schedule found using the standard
    6843             :  * scheduling algorithm.
    6844             :  */
    6845           0 : static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
    6846             :         struct isl_clustering *c)
    6847             : {
    6848           0 :         struct isl_sched_graph merge_graph = { 0 };
    6849             :         isl_bool merged;
    6850             : 
    6851           0 :         if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
    6852           0 :                 goto error;
    6853             : 
    6854           0 :         if (compute_maxvar(&merge_graph) < 0)
    6855           0 :                 goto error;
    6856           0 :         if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
    6857           0 :                 goto error;
    6858           0 :         if (compute_schedule_wcc_band(ctx, &merge_graph) < 0)
    6859           0 :                 goto error;
    6860           0 :         merged = ok_to_merge(ctx, graph, c, &merge_graph);
    6861           0 :         if (merged && merge(ctx, c, &merge_graph) < 0)
    6862           0 :                 goto error;
    6863             : 
    6864           0 :         graph_free(ctx, &merge_graph);
    6865           0 :         return merged;
    6866             : error:
    6867           0 :         graph_free(ctx, &merge_graph);
    6868           0 :         return isl_bool_error;
    6869             : }
    6870             : 
    6871             : /* Is there any edge marked "no_merge" between two SCCs that are
    6872             :  * about to be merged (i.e., that are set in "scc_in_merge")?
    6873             :  * "merge_edge" is the proximity edge along which the clusters of SCCs
    6874             :  * are going to be merged.
    6875             :  *
    6876             :  * If there is any edge between two SCCs with a negative weight,
    6877             :  * while the weight of "merge_edge" is non-negative, then this
    6878             :  * means that the edge was postponed.  "merge_edge" should then
    6879             :  * also be postponed since merging along the edge with negative weight should
    6880             :  * be postponed until all edges with non-negative weight have been tried.
    6881             :  * Replace the weight of "merge_edge" by a negative weight as well and
    6882             :  * tell the caller not to attempt a merge.
    6883             :  */
    6884           0 : static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
    6885             :         struct isl_sched_edge *merge_edge)
    6886             : {
    6887             :         int i;
    6888             : 
    6889           0 :         for (i = 0; i < graph->n_edge; ++i) {
    6890           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    6891             : 
    6892           0 :                 if (!scc_in_merge[edge->src->scc])
    6893           0 :                         continue;
    6894           0 :                 if (!scc_in_merge[edge->dst->scc])
    6895           0 :                         continue;
    6896           0 :                 if (edge->no_merge)
    6897           0 :                         return 1;
    6898           0 :                 if (merge_edge->weight >= 0 && edge->weight < 0) {
    6899           0 :                         merge_edge->weight -= graph->max_weight + 1;
    6900           0 :                         return 1;
    6901             :                 }
    6902             :         }
    6903             : 
    6904           0 :         return 0;
    6905             : }
    6906             : 
    6907             : /* Merge the two clusters in "c" connected by the edge in "graph"
    6908             :  * with index "edge" into a single cluster.
    6909             :  * If it turns out to be impossible to merge these two clusters,
    6910             :  * then mark the edge as "no_merge" such that it will not be
    6911             :  * considered again.
    6912             :  *
    6913             :  * First mark all SCCs that need to be merged.  This includes the SCCs
    6914             :  * in the two clusters, but it may also include the SCCs
    6915             :  * of intermediate clusters.
    6916             :  * If there is already a no_merge edge between any pair of such SCCs,
    6917             :  * then simply mark the current edge as no_merge as well.
    6918             :  * Likewise, if any of those edges was postponed by has_bounded_distances,
    6919             :  * then postpone the current edge as well.
    6920             :  * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
    6921             :  * if the clusters did not end up getting merged, unless the non-merge
    6922             :  * is due to the fact that the edge was postponed.  This postponement
    6923             :  * can be recognized by a change in weight (from non-negative to negative).
    6924             :  */
    6925           0 : static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
    6926             :         struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
    6927             : {
    6928             :         isl_bool merged;
    6929           0 :         int edge_weight = graph->edge[edge].weight;
    6930             : 
    6931           0 :         if (mark_merge_sccs(ctx, graph, edge, c) < 0)
    6932           0 :                 return isl_stat_error;
    6933             : 
    6934           0 :         if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
    6935           0 :                 merged = isl_bool_false;
    6936             :         else
    6937           0 :                 merged = try_merge(ctx, graph, c);
    6938           0 :         if (merged < 0)
    6939           0 :                 return isl_stat_error;
    6940           0 :         if (!merged && edge_weight == graph->edge[edge].weight)
    6941           0 :                 graph->edge[edge].no_merge = 1;
    6942             : 
    6943           0 :         return isl_stat_ok;
    6944             : }
    6945             : 
    6946             : /* Does "node" belong to the cluster identified by "cluster"?
    6947             :  */
    6948           0 : static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
    6949             : {
    6950           0 :         return node->cluster == cluster;
    6951             : }
    6952             : 
    6953             : /* Does "edge" connect two nodes belonging to the cluster
    6954             :  * identified by "cluster"?
    6955             :  */
    6956           0 : static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
    6957             : {
    6958           0 :         return edge->src->cluster == cluster && edge->dst->cluster == cluster;
    6959             : }
    6960             : 
    6961             : /* Swap the schedule of "node1" and "node2".
    6962             :  * Both nodes have been derived from the same node in a common parent graph.
    6963             :  * Since the "coincident" field is shared with that node
    6964             :  * in the parent graph, there is no need to also swap this field.
    6965             :  */
    6966           0 : static void swap_sched(struct isl_sched_node *node1,
    6967             :         struct isl_sched_node *node2)
    6968             : {
    6969             :         isl_mat *sched;
    6970             :         isl_map *sched_map;
    6971             : 
    6972           0 :         sched = node1->sched;
    6973           0 :         node1->sched = node2->sched;
    6974           0 :         node2->sched = sched;
    6975             : 
    6976           0 :         sched_map = node1->sched_map;
    6977           0 :         node1->sched_map = node2->sched_map;
    6978           0 :         node2->sched_map = sched_map;
    6979           0 : }
    6980             : 
    6981             : /* Copy the current band schedule from the SCCs that form the cluster
    6982             :  * with index "pos" to the actual cluster at position "pos".
    6983             :  * By construction, the index of the first SCC that belongs to the cluster
    6984             :  * is also "pos".
    6985             :  *
    6986             :  * The order of the nodes inside both the SCCs and the cluster
    6987             :  * is assumed to be same as the order in the original "graph".
    6988             :  *
    6989             :  * Since the SCC graphs will no longer be used after this function,
    6990             :  * the schedules are actually swapped rather than copied.
    6991             :  */
    6992           0 : static isl_stat copy_partial(struct isl_sched_graph *graph,
    6993             :         struct isl_clustering *c, int pos)
    6994             : {
    6995             :         int i, j;
    6996             : 
    6997           0 :         c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
    6998           0 :         c->cluster[pos].n_row = c->scc[pos].n_row;
    6999           0 :         c->cluster[pos].maxvar = c->scc[pos].maxvar;
    7000           0 :         j = 0;
    7001           0 :         for (i = 0; i < graph->n; ++i) {
    7002             :                 int k;
    7003             :                 int s;
    7004             : 
    7005           0 :                 if (graph->node[i].cluster != pos)
    7006           0 :                         continue;
    7007           0 :                 s = graph->node[i].scc;
    7008           0 :                 k = c->scc_node[s]++;
    7009           0 :                 swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
    7010           0 :                 if (c->scc[s].maxvar > c->cluster[pos].maxvar)
    7011           0 :                         c->cluster[pos].maxvar = c->scc[s].maxvar;
    7012           0 :                 ++j;
    7013             :         }
    7014             : 
    7015           0 :         return isl_stat_ok;
    7016             : }
    7017             : 
    7018             : /* Is there a (conditional) validity dependence from node[j] to node[i],
    7019             :  * forcing node[i] to follow node[j] or do the nodes belong to the same
    7020             :  * cluster?
    7021             :  */
    7022           0 : static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
    7023             : {
    7024           0 :         struct isl_sched_graph *graph = user;
    7025             : 
    7026           0 :         if (graph->node[i].cluster == graph->node[j].cluster)
    7027           0 :                 return isl_bool_true;
    7028           0 :         return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
    7029             : }
    7030             : 
    7031             : /* Extract the merged clusters of SCCs in "graph", sort them, and
    7032             :  * store them in c->clusters.  Update c->scc_cluster accordingly.
    7033             :  *
    7034             :  * First keep track of the cluster containing the SCC to which a node
    7035             :  * belongs in the node itself.
    7036             :  * Then extract the clusters into c->clusters, copying the current
    7037             :  * band schedule from the SCCs that belong to the cluster.
    7038             :  * Do this only once per cluster.
    7039             :  *
    7040             :  * Finally, topologically sort the clusters and update c->scc_cluster
    7041             :  * to match the new scc numbering.  While the SCCs were originally
    7042             :  * sorted already, some SCCs that depend on some other SCCs may
    7043             :  * have been merged with SCCs that appear before these other SCCs.
    7044             :  * A reordering may therefore be required.
    7045             :  */
    7046           0 : static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
    7047             :         struct isl_clustering *c)
    7048             : {
    7049             :         int i;
    7050             : 
    7051           0 :         for (i = 0; i < graph->n; ++i)
    7052           0 :                 graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
    7053             : 
    7054           0 :         for (i = 0; i < graph->scc; ++i) {
    7055           0 :                 if (c->scc_cluster[i] != i)
    7056           0 :                         continue;
    7057           0 :                 if (extract_sub_graph(ctx, graph, &node_cluster_exactly,
    7058           0 :                                 &edge_cluster_exactly, i, &c->cluster[i]) < 0)
    7059           0 :                         return isl_stat_error;
    7060           0 :                 c->cluster[i].src_scc = -1;
    7061           0 :                 c->cluster[i].dst_scc = -1;
    7062           0 :                 if (copy_partial(graph, c, i) < 0)
    7063           0 :                         return isl_stat_error;
    7064             :         }
    7065             : 
    7066           0 :         if (detect_ccs(ctx, graph, &node_follows_strong_or_same_cluster) < 0)
    7067           0 :                 return isl_stat_error;
    7068           0 :         for (i = 0; i < graph->n; ++i)
    7069           0 :                 c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
    7070             : 
    7071           0 :         return isl_stat_ok;
    7072             : }
    7073             : 
    7074             : /* Compute weights on the proximity edges of "graph" that can
    7075             :  * be used by find_proximity to find the most appropriate
    7076             :  * proximity edge to use to merge two clusters in "c".
    7077             :  * The weights are also used by has_bounded_distances to determine
    7078             :  * whether the merge should be allowed.
    7079             :  * Store the maximum of the computed weights in graph->max_weight.
    7080             :  *
    7081             :  * The computed weight is a measure for the number of remaining schedule
    7082             :  * dimensions that can still be completely aligned.
    7083             :  * In particular, compute the number of equalities between
    7084             :  * input dimensions and output dimensions in the proximity constraints.
    7085             :  * The directions that are already handled by outer schedule bands
    7086             :  * are projected out prior to determining this number.
    7087             :  *
    7088             :  * Edges that will never be considered by find_proximity are ignored.
    7089             :  */
    7090           0 : static isl_stat compute_weights(struct isl_sched_graph *graph,
    7091             :         struct isl_clustering *c)
    7092             : {
    7093             :         int i;
    7094             : 
    7095           0 :         graph->max_weight = 0;
    7096             : 
    7097           0 :         for (i = 0; i < graph->n_edge; ++i) {
    7098           0 :                 struct isl_sched_edge *edge = &graph->edge[i];
    7099           0 :                 struct isl_sched_node *src = edge->src;
    7100           0 :                 struct isl_sched_node *dst = edge->dst;
    7101             :                 isl_basic_map *hull;
    7102             :                 isl_bool prox;
    7103             :                 int n_in, n_out;
    7104             : 
    7105           0 :                 prox = is_non_empty_proximity(edge);
    7106           0 :                 if (prox < 0)
    7107           0 :                         return isl_stat_error;
    7108           0 :                 if (!prox)
    7109           0 :                         continue;
    7110           0 :                 if (bad_cluster(&c->scc[edge->src->scc]) ||
    7111           0 :                     bad_cluster(&c->scc[edge->dst->scc]))
    7112           0 :                         continue;
    7113           0 :                 if (c->scc_cluster[edge->dst->scc] ==
    7114           0 :                     c->scc_cluster[edge->src->scc])
    7115           0 :                         continue;
    7116             : 
    7117           0 :                 hull = isl_map_affine_hull(isl_map_copy(edge->map));
    7118           0 :                 hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
    7119             :                                                     isl_mat_copy(src->vmap));
    7120           0 :                 hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
    7121             :                                                     isl_mat_copy(dst->vmap));
    7122           0 :                 hull = isl_basic_map_project_out(hull,
    7123           0 :                                                 isl_dim_in, 0, src->rank);
    7124           0 :                 hull = isl_basic_map_project_out(hull,
    7125           0 :                                                 isl_dim_out, 0, dst->rank);
    7126           0 :                 hull = isl_basic_map_remove_divs(hull);
    7127           0 :                 n_in = isl_basic_map_dim(hull, isl_dim_in);
    7128           0 :                 n_out = isl_basic_map_dim(hull, isl_dim_out);
    7129           0 :                 hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
    7130             :                                                         isl_dim_in, 0, n_in);
    7131           0 :                 hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
    7132             :                                                         isl_dim_out, 0, n_out);
    7133           0 :                 if (!hull)
    7134           0 :                         return isl_stat_error;
    7135           0 :                 edge->weight = isl_basic_map_n_equality(hull);
    7136           0 :                 isl_basic_map_free(hull);
    7137             : 
    7138           0 :                 if (edge->weight > graph->max_weight)
    7139           0 :                         graph->max_weight = edge->weight;
    7140             :         }
    7141             : 
    7142           0 :         return isl_stat_ok;
    7143             : }
    7144             : 
    7145             : /* Call compute_schedule_finish_band on each of the clusters in "c"
    7146             :  * in their topological order.  This order is determined by the scc
    7147             :  * fields of the nodes in "graph".
    7148             :  * Combine the results in a sequence expressing the topological order.
    7149             :  *
    7150             :  * If there is only one cluster left, then there is no need to introduce
    7151             :  * a sequence node.  Also, in this case, the cluster necessarily contains
    7152             :  * the SCC at position 0 in the original graph and is therefore also
    7153             :  * stored in the first cluster of "c".
    7154             :  */
    7155           0 : static __isl_give isl_schedule_node *finish_bands_clustering(
    7156             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
    7157             :         struct isl_clustering *c)
    7158             : {
    7159             :         int i;
    7160             :         isl_ctx *ctx;
    7161             :         isl_union_set_list *filters;
    7162             : 
    7163           0 :         if (graph->scc == 1)
    7164           0 :                 return compute_schedule_finish_band(node, &c->cluster[0], 0);
    7165             : 
    7166           0 :         ctx = isl_schedule_node_get_ctx(node);
    7167             : 
    7168           0 :         filters = extract_sccs(ctx, graph);
    7169           0 :         node = isl_schedule_node_insert_sequence(node, filters);
    7170             : 
    7171           0 :         for (i = 0; i < graph->scc; ++i) {
    7172           0 :                 int j = c->scc_cluster[i];
    7173           0 :                 node = isl_schedule_node_child(node, i);
    7174           0 :                 node = isl_schedule_node_child(node, 0);
    7175           0 :                 node = compute_schedule_finish_band(node, &c->cluster[j], 0);
    7176           0 :                 node = isl_schedule_node_parent(node);
    7177           0 :                 node = isl_schedule_node_parent(node);
    7178             :         }
    7179             : 
    7180           0 :         return node;
    7181             : }
    7182             : 
    7183             : /* Compute a schedule for a connected dependence graph by first considering
    7184             :  * each strongly connected component (SCC) in the graph separately and then
    7185             :  * incrementally combining them into clusters.
    7186             :  * Return the updated schedule node.
    7187             :  *
    7188             :  * Initially, each cluster consists of a single SCC, each with its
    7189             :  * own band schedule.  The algorithm then tries to merge pairs
    7190             :  * of clusters along a proximity edge until no more suitable
    7191             :  * proximity edges can be found.  During this merging, the schedule
    7192             :  * is maintained in the individual SCCs.
    7193             :  * After the merging is completed, the full resulting clusters
    7194             :  * are extracted and in finish_bands_clustering,
    7195             :  * compute_schedule_finish_band is called on each of them to integrate
    7196             :  * the band into "node" and to continue the computation.
    7197             :  *
    7198             :  * compute_weights initializes the weights that are used by find_proximity.
    7199             :  */
    7200           0 : static __isl_give isl_schedule_node *compute_schedule_wcc_clustering(
    7201             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    7202             : {
    7203             :         isl_ctx *ctx;
    7204             :         struct isl_clustering c;
    7205             :         int i;
    7206             : 
    7207           0 :         ctx = isl_schedule_node_get_ctx(node);
    7208             : 
    7209           0 :         if (clustering_init(ctx, &c, graph) < 0)
    7210           0 :                 goto error;
    7211             : 
    7212           0 :         if (compute_weights(graph, &c) < 0)
    7213           0 :                 goto error;
    7214             : 
    7215             :         for (;;) {
    7216           0 :                 i = find_proximity(graph, &c);
    7217           0 :                 if (i < 0)
    7218           0 :                         goto error;
    7219           0 :                 if (i >= graph->n_edge)
    7220           0 :                         break;
    7221           0 :                 if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
    7222           0 :                         goto error;
    7223           0 :         }
    7224             : 
    7225           0 :         if (extract_clusters(ctx, graph, &c) < 0)
    7226           0 :                 goto error;
    7227             : 
    7228           0 :         node = finish_bands_clustering(node, graph, &c);
    7229             : 
    7230           0 :         clustering_free(ctx, &c);
    7231           0 :         return node;
    7232             : error:
    7233           0 :         clustering_free(ctx, &c);
    7234           0 :         return isl_schedule_node_free(node);
    7235             : }
    7236             : 
    7237             : /* Compute a schedule for a connected dependence graph and return
    7238             :  * the updated schedule node.
    7239             :  *
    7240             :  * If Feautrier's algorithm is selected, we first recursively try to satisfy
    7241             :  * as many validity dependences as possible. When all validity dependences
    7242             :  * are satisfied we extend the schedule to a full-dimensional schedule.
    7243             :  *
    7244             :  * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering
    7245             :  * depending on whether the user has selected the option to try and
    7246             :  * compute a schedule for the entire (weakly connected) component first.
    7247             :  * If there is only a single strongly connected component (SCC), then
    7248             :  * there is no point in trying to combine SCCs
    7249             :  * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole
    7250             :  * is called instead.
    7251             :  */
    7252           0 : static __isl_give isl_schedule_node *compute_schedule_wcc(
    7253             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
    7254             : {
    7255             :         isl_ctx *ctx;
    7256             : 
    7257           0 :         if (!node)
    7258           0 :                 return NULL;
    7259             : 
    7260           0 :         ctx = isl_schedule_node_get_ctx(node);
    7261           0 :         if (detect_sccs(ctx, graph) < 0)
    7262           0 :                 return isl_schedule_node_free(node);
    7263             : 
    7264           0 :         if (compute_maxvar(graph) < 0)
    7265           0 :                 return isl_schedule_node_free(node);
    7266             : 
    7267           0 :         if (need_feautrier_step(ctx, graph))
    7268           0 :                 return compute_schedule_wcc_feautrier(node, graph);
    7269             : 
    7270           0 :         if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx))
    7271           0 :                 return compute_schedule_wcc_whole(node, graph);
    7272             :         else
    7273           0 :                 return compute_schedule_wcc_clustering(node, graph);
    7274             : }
    7275             : 
    7276             : /* Compute a schedule for each group of nodes identified by node->scc
    7277             :  * separately and then combine them in a sequence node (or as set node
    7278             :  * if graph->weak is set) inserted at position "node" of the schedule tree.
    7279             :  * Return the updated schedule node.
    7280             :  *
    7281             :  * If "wcc" is set then each of the groups belongs to a single
    7282             :  * weakly connected component in the dependence graph so that
    7283             :  * there is no need for compute_sub_schedule to look for weakly
    7284             :  * connected components.
    7285             :  *
    7286             :  * If a set node would be introduced and if the number of components
    7287             :  * is equal to the number of nodes, then check if the schedule
    7288             :  * is already complete.  If so, a redundant set node would be introduced
    7289             :  * (without any further descendants) stating that the statements
    7290             :  * can be executed in arbitrary order, which is also expressed
    7291             :  * by the absence of any node.  Refrain from inserting any nodes
    7292             :  * in this case and simply return.
    7293             :  */
    7294           0 : static __isl_give isl_schedule_node *compute_component_schedule(
    7295             :         __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
    7296             :         int wcc)
    7297             : {
    7298             :         int component;
    7299             :         isl_ctx *ctx;
    7300             :         isl_union_set_list *filters;
    7301             : 
    7302           0 :         if (!node)
    7303           0 :                 return NULL;
    7304             : 
    7305           0 :         if (graph->weak && graph->scc == graph->n) {
    7306           0 :                 if (compute_maxvar(graph) < 0)
    7307           0 :                         return isl_schedule_node_free(node);
    7308           0 :                 if (graph->n_row >= graph->maxvar)
    7309           0 :                         return node;
    7310             :         }
    7311             : 
    7312           0 :         ctx = isl_schedule_node_get_ctx(node);
    7313           0 :         filters = extract_sccs(ctx, graph);
    7314           0 :         if (graph->weak)
    7315           0 :                 node = isl_schedule_node_insert_set(node, filters);
    7316             :         else
    7317           0 :                 node = isl_schedule_node_insert_sequence(node, filters);
    7318             : 
    7319           0 :         for (component = 0; component < graph->scc; ++component) {
    7320           0 :                 node = isl_schedule_node_child(node, component);
    7321           0 :                 node = isl_schedule_node_child(node, 0);
    7322           0 :                 node = compute_sub_schedule(node, ctx, graph,
    7323             :                                     &node_scc_exactly,
    7324             :                                     &edge_scc_exactly, component, wcc);
    7325           0 :                 node = isl_schedule_node_parent(node);
    7326           0 :                 node = isl_schedule_node_parent(node);
    7327             :         }
    7328             : 
    7329           0 :         return node;
    7330             : }
    7331             : 
    7332             : /* Compute a schedule for the given dependence graph and insert it at "node".
    7333             :  * Return the updated schedule node.
    7334             :  *
    7335             :  * We first check if the graph is connected (through validity and conditional
    7336             :  * validity dependences) and, if not, compute a schedule
    7337             :  * for each component separately.
    7338             :  * If the schedule_serialize_sccs option is set, then we check for strongly
    7339             :  * connected components instead and compute a separate schedule for
    7340             :  * each such strongly connected component.
    7341             :  */
    7342           0 : static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
    7343             :         struct isl_sched_graph *graph)
    7344             : {
    7345             :         isl_ctx *ctx;
    7346             : 
    7347           0 :         if (!node)
    7348           0 :                 return NULL;
    7349             : 
    7350           0 :         ctx = isl_schedule_node_get_ctx(node);
    7351           0 :         if (isl_options_get_schedule_serialize_sccs(ctx)) {
    7352           0 :                 if (detect_sccs(ctx, graph) < 0)
    7353           0 :                         return isl_schedule_node_free(node);
    7354             :         } else {
    7355           0 :                 if (detect_wccs(ctx, graph) < 0)
    7356           0 :                         return isl_schedule_node_free(node);
    7357             :         }
    7358             : 
    7359           0 :         if (graph->scc > 1)
    7360           0 :                 return compute_component_schedule(node, graph, 1);
    7361             : 
    7362           0 :         return compute_schedule_wcc(node, graph);
    7363             : }
    7364             : 
    7365             : /* Compute a schedule on sc->domain that respects the given schedule
    7366             :  * constraints.
    7367             :  *
    7368             :  * In particular, the schedule respects all the validity dependences.
    7369             :  * If the default isl scheduling algorithm is used, it tries to minimize
    7370             :  * the dependence distances over the proximity dependences.
    7371             :  * If Feautrier's scheduling algorithm is used, the proximity dependence
    7372             :  * distances are only minimized during the extension to a full-dimensional
    7373             :  * schedule.
    7374             :  *
    7375             :  * If there are any condition and conditional validity dependences,
    7376             :  * then the conditional validity dependences may be violated inside
    7377             :  * a tilable band, provided they have no adjacent non-local
    7378             :  * condition dependences.
    7379             :  */
    7380           0 : __isl_give isl_schedule *isl_schedule_constraints_compute_schedule(
    7381             :         __isl_take isl_schedule_constraints *sc)
    7382             : {
    7383           0 :         isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc);
    7384           0 :         struct isl_sched_graph graph = { 0 };
    7385             :         isl_schedule *sched;
    7386             :         isl_schedule_node *node;
    7387             :         isl_union_set *domain;
    7388             : 
    7389           0 :         sc = isl_schedule_constraints_align_params(sc);
    7390             : 
    7391           0 :         domain = isl_schedule_constraints_get_domain(sc);
    7392           0 :         if (isl_union_set_n_set(domain) == 0) {
    7393           0 :                 isl_schedule_constraints_free(sc);
    7394           0 :                 return isl_schedule_from_domain(domain);
    7395             :         }
    7396             : 
    7397           0 :         if (graph_init(&graph, sc) < 0)
    7398           0 :                 domain = isl_union_set_free(domain);
    7399             : 
    7400           0 :         node = isl_schedule_node_from_domain(domain);
    7401           0 :         node = isl_schedule_node_child(node, 0);
    7402           0 :         if (graph.n > 0)
    7403           0 :                 node = compute_schedule(node, &graph);
    7404           0 :         sched = isl_schedule_node_get_schedule(node);
    7405           0 :         isl_schedule_node_free(node);
    7406             : 
    7407           0 :         graph_free(ctx, &graph);
    7408           0 :         isl_schedule_constraints_free(sc);
    7409             : 
    7410           0 :         return sched;
    7411             : }
    7412             : 
    7413             : /* Compute a schedule for the given union of domains that respects
    7414             :  * all the validity dependences and minimizes
    7415             :  * the dependence distances over the proximity dependences.
    7416             :  *
    7417             :  * This function is kept for backward compatibility.
    7418             :  */
    7419           0 : __isl_give isl_schedule *isl_union_set_compute_schedule(
    7420             :         __isl_take isl_union_set *domain,
    7421             :         __isl_take isl_union_map *validity,
    7422             :         __isl_take isl_union_map *proximity)
    7423             : {
    7424             :         isl_schedule_constraints *sc;
    7425             : 
    7426           0 :         sc = isl_schedule_constraints_on_domain(domain);
    7427           0 :         sc = isl_schedule_constraints_set_validity(sc, validity);
    7428           0 :         sc = isl_schedule_constraints_set_proximity(sc, proximity);
    7429             : 
    7430           0 :         return isl_schedule_constraints_compute_schedule(sc);
    7431             : }

Generated by: LCOV version 1.12