From 96cc0c38a827ac197099498cbce8191c3a2e1e35 Mon Sep 17 00:00:00 2001 From: Cem Keylan Date: Thu, 23 Sep 2021 17:03:37 +0300 Subject: tsort: add new tool --- .gitignore | 1 + Makefile | 14 +- usr.bin/tsort/CVS/Entries | 4 + usr.bin/tsort/CVS/Repository | 1 + usr.bin/tsort/CVS/Root | 1 + usr.bin/tsort/Makefile | 9 + usr.bin/tsort/tsort.1 | 167 +++++++ usr.bin/tsort/tsort.c | 1014 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1209 insertions(+), 2 deletions(-) create mode 100644 usr.bin/tsort/CVS/Entries create mode 100644 usr.bin/tsort/CVS/Repository create mode 100644 usr.bin/tsort/CVS/Root create mode 100644 usr.bin/tsort/Makefile create mode 100644 usr.bin/tsort/tsort.1 create mode 100644 usr.bin/tsort/tsort.c diff --git a/.gitignore b/.gitignore index 17e1976..d519b23 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,6 @@ /patch /pax /signify +/tsort /lib/libc/hash/*hl.c config.mk diff --git a/Makefile b/Makefile index 4cc5547..769f113 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,8 @@ BIN = \ md5 \ patch \ pax \ - signify + signify \ + tsort LIB = lib/libbsd.a LIBOBJ = \ @@ -74,7 +75,8 @@ MAN = \ bin/pax/cpio.1 \ bin/pax/pax.1 \ bin/pax/tar.1 \ - usr.bin/signify/signify.1 + usr.bin/signify/signify.1 \ + usr.bin/tsort/tsort.1 MANDOCLIBS = ${LIB} GREPLIBS = ${LIB} @@ -313,6 +315,14 @@ BINOBJ += ${SIGNIFYOBJ} signify: ${SIGNIFYOBJ} ${LIB} ${CC} ${LDFLAGS} -o $@ ${SIGNIFYOBJ} ${LIB} +# ------------------------------------------------------------------------------ +# tsort +TSORTOBJ = \ + usr.bin/tsort/tsort.o +BINOBJ += ${TSORTOBJ} +tsort: ${TSORTOBJ} ${LIB} + ${CC} ${LDFLAGS} -o $@ ${TSORTOBJ} ${LIB} + # ------------------------------------------------------------------------------ # hash helpers HELPER = lib/libc/hash/helper.c diff --git a/usr.bin/tsort/CVS/Entries b/usr.bin/tsort/CVS/Entries new file mode 100644 index 0000000..528f3f2 --- /dev/null +++ b/usr.bin/tsort/CVS/Entries @@ -0,0 +1,4 @@ +/Makefile/1.7/Thu Sep 23 13:56:23 2021// +/tsort.1/1.24/Thu Sep 23 13:56:23 2021// +/tsort.c/1.37/Thu Sep 23 13:56:23 2021// +D diff --git a/usr.bin/tsort/CVS/Repository b/usr.bin/tsort/CVS/Repository new file mode 100644 index 0000000..3079881 --- /dev/null +++ b/usr.bin/tsort/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/tsort diff --git a/usr.bin/tsort/CVS/Root b/usr.bin/tsort/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/tsort/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/tsort/Makefile b/usr.bin/tsort/Makefile new file mode 100644 index 0000000..0c57770 --- /dev/null +++ b/usr.bin/tsort/Makefile @@ -0,0 +1,9 @@ +# $OpenBSD: Makefile,v 1.7 2017/07/09 21:23:19 espie Exp $ + +PROG = tsort + +CDIAGFLAGS = -Wall -Wno-char-subscripts -Wstrict-prototypes -pedantic -W +DPADD += ${LIBUTIL} +LDADD += -lutil + +.include diff --git a/usr.bin/tsort/tsort.1 b/usr.bin/tsort/tsort.1 new file mode 100644 index 0000000..56f5fc3 --- /dev/null +++ b/usr.bin/tsort/tsort.1 @@ -0,0 +1,167 @@ +.\" $OpenBSD: tsort.1,v 1.24 2019/04/23 18:13:11 schwarze Exp $ +.\" $NetBSD: tsort.1,v 1.6 1996/01/17 20:37:49 mycroft Exp $ +.\" +.\" Copyright (c) 1990, 1993, 1994 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This manual is derived from one contributed to Berkeley by +.\" Michael Rendell of Memorial University of Newfoundland. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)tsort.1 8.3 (Berkeley) 4/1/94 +.\" +.Dd $Mdocdate: April 23 2019 $ +.Dt TSORT 1 +.Os +.Sh NAME +.Nm tsort +.Nd topological sort of a directed graph +.Sh SYNOPSIS +.Nm tsort +.Op Fl flqrvw +.Op Fl h Ar file +.Op Ar file +.Sh DESCRIPTION +.Nm tsort +takes a list of pairs of node names representing directed arcs in +a graph and prints the nodes in topological order on standard output. +That is, the input describes a partial ordering relation, from which +.Nm +computes a total order compatible with this partial ordering. +.Pp +Input is taken from the named +.Ar file , +or from standard input if no file +is given. +.Pp +Node names in the input are separated by white space and there must +be an even number of node names. +.Pp +Presence of a node in a graph can be represented by an arc from the node +to itself. +This is useful when a node is not connected to any other nodes. +.Pp +If the graph contains a cycle (and therefore cannot be properly sorted), +one of the arcs in the cycle is ignored and the sort continues. +Cycles are reported on standard error. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl f +Resolve ambiguities by selecting nodes based on the order of appearance +of the first component of the pairs. +.It Fl h Ar file +Use +.Ar file , +which holds an ordered list of nodes, to resolve ambiguities. +In case of duplicates, the first entry is chosen. +.It Fl l +Search for and display the longest cycle. +Can take a very long time, as it may need to solve an NP-complete problem. +.It Fl q +Do not display informational messages about cycles. +This is primarily intended for building libraries, where optimal ordering +is not critical, and cycles occur often. +.It Fl r +Reverse the ordering relation. +.It Fl v +Inform on the exact number of edges broken while breaking cycles. +If a hints file was used, inform on seen nodes absent from that file. +.It Fl w +Exit with exit code the number of cycles +.Nm +had to break. +.El +.Sh EXIT STATUS +.Ex -std tsort +.Sh EXAMPLES +Faced with the input: +.Bd -literal -offset indent +a b +b c +b d +d f +c e +.Ed +.Pp +.Nm +outputs: +.Bd -literal -offset indent +a +b +c +e +d +f +.Ed +.Pp +which is one total ordering compatible with the individual relations. +There is no unicity, another compatible total ordering would be: +.Bd -literal -offset indent +a +b +c +d +e +f +.Ed +.Pp +.Nm +is commonly used to analyze dependencies and find a correct build order +in a static way, whereas +.Xr make 1 +accomplishes the same task in a dynamic way. +.Sh SEE ALSO +.Xr ar 1 , +.Xr lorder 1 , +.Xr make 1 +.Rs +.%A Donald E. Knuth +.%B The Art of Computer Programming +.%V Vol. 1 +.%P pp. 258\(en268 +.%D 1973 +.Re +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl fhlqrvw +are extensions to that specification. +.Sh HISTORY +A +.Nm +command appeared in +.At v7 . +This +.Nm tsort +command was completely rewritten by Marc Espie for +.Ox , +to finally use the well-known optimal algorithms for topological sorting. diff --git a/usr.bin/tsort/tsort.c b/usr.bin/tsort/tsort.c new file mode 100644 index 0000000..3e0e1ab --- /dev/null +++ b/usr.bin/tsort/tsort.c @@ -0,0 +1,1014 @@ +/* $OpenBSD: tsort.c,v 1.37 2019/07/11 17:28:32 mestre Exp $ */ +/* ex:ts=8 sw=4: + * + * Copyright (c) 1999-2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The complexity of topological sorting is O(e), where e is the + * size of input. While reading input, vertices have to be identified, + * thus add the complexity of e keys retrieval among v keys using + * an appropriate data structure. This program uses open double hashing + * for that purpose. See Knuth for the expected complexity of double + * hashing (Brent variation should probably be used if v << e, as a user + * option). + * + * The algorithm used for longest cycle reporting is accurate, but somewhat + * expensive. It may need to build all free paths of the graph (a free + * path is a path that never goes twice through the same node), whose + * number can be as high as O(2^e). Usually, the number of free paths is + * much smaller though. This program's author does not believe that a + * significantly better worst-case complexity algorithm exists. + * + * In case of a hints file, the set of minimal nodes is maintained as a + * heap. The resulting complexity is O(e+v log v) for the worst case. + * The average should actually be near O(e). + * + * If the hints file is incomplete, there is some extra complexity incurred + * by make_transparent, which does propagate order values to unmarked + * nodes. In the worst case, make_transparent is O(e u), + * where u is the number of originally unmarked nodes. + * In practice, it is much faster. + * + * The simple topological sort algorithm detects cycles. This program + * goes further, breaking cycles through the use of simple heuristics. + * Each cycle break checks the whole set of nodes, hence if c cycles break + * are needed, this is an extra cost of O(c v). + * + * Possible heuristics are as follows: + * - break cycle at node with lowest number of predecessors (default case), + * - break longest cycle at node with lowest number of predecessors, + * - break cycle at next node from the hints file. + * + * Except for the hints file case, which sets an explicit constraint on + * which cycle to break, those heuristics locally result in the smallest + * number of broken edges. + * + * Those are admittedly greedy strategies, as is the selection of the next + * node from the hints file amongst equivalent candidates that is used for + * `stable' topological sorting. + */ + +#ifdef __GNUC__ +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif + +struct node; + +/* The set of arcs from a given node is stored as a linked list. */ +struct link { + struct link *next; + struct node *node; +}; + +#define NO_ORDER UINT_MAX + +struct node { + unsigned int refs; /* Number of arcs left, coming into this node. + * Note that nodes with a null count can't + * be part of cycles. */ + unsigned int order; /* Order of nodes according to a hint file. */ + + struct link *arcs; /* List of forward arcs. */ + + /* Cycle detection algorithms build a free path of nodes. */ + struct node *from; /* Previous node in the current path. */ + struct link *traverse; /* Next link to traverse when backtracking. */ + unsigned int mark; /* Mark processed nodes in cycle discovery. */ + + char k[1]; /* Name of this node. */ +}; + +#define HASH_START 9 + +struct array { + unsigned int entries; + struct node **t; +}; + +static void nodes_init(struct ohash *); +static struct node *node_lookup(struct ohash *, const char *, const char *); +static __dead void usage(void); +static struct node *new_node(const char *, const char *); + +static unsigned int read_pairs(FILE *, struct ohash *, int, + const char *, unsigned int, int); +static void split_nodes(struct ohash *, struct array *, struct array *); +static void make_transparent(struct ohash *); +static void insert_arc(struct node *, struct node *); + +#ifdef DEBUG +static void dump_node(struct node *); +static void dump_array(struct array *); +static void dump_hash(struct ohash *); +#endif +static unsigned int read_hints(FILE *, struct ohash *, int, + const char *, unsigned int); +static struct node *find_smallest_node(struct array *); +static struct node *find_good_cycle_break(struct array *); +static void print_cycle(struct array *); +static struct node *find_cycle_from(struct node *, struct array *); +static struct node *find_predecessor(struct array *, struct node *); +static unsigned int traverse_node(struct node *, unsigned int, struct array *); +static struct node *find_longest_cycle(struct array *, struct array *); +static struct node *find_normal_cycle(struct array *, struct array *); + +static void heap_down(struct array *, unsigned int); +static void heapify(struct array *, int); +static struct node *dequeue(struct array *); +static void enqueue(struct array *, struct node *); + + + +static void *hash_calloc(size_t, size_t, void *); +static void hash_free(void *, void *); +static void* entry_alloc(size_t, void *); +static void *ereallocarray(void *, size_t, size_t); +static void *emem(void *); +#define DEBUG_TRAVERSE 0 +static struct ohash_info node_info = { + offsetof(struct node, k), NULL, hash_calloc, hash_free, entry_alloc }; +static void parse_args(int, char *[], struct ohash *); +static int tsort(struct ohash *); + +static int quiet_flag, long_flag, + warn_flag, hints_flag, verbose_flag; + + +int main(int, char *[]); + +/*** + *** Memory handling. + ***/ + +static void * +emem(void *p) +{ + if (p) + return p; + else + errx(1, "Memory exhausted"); +} + +static void * +hash_calloc(size_t n, size_t s, void *u UNUSED) +{ + return emem(calloc(n, s)); +} + +static void +hash_free(void *p, void *u UNUSED) +{ + free(p); +} + +static void * +entry_alloc(size_t s, void *u UNUSED) +{ + return ereallocarray(NULL, 1, s); +} + +static void * +ereallocarray(void *p, size_t n, size_t s) +{ + return emem(reallocarray(p, n, s)); +} + + +/*** + *** Hash table. + ***/ + +/* Inserting and finding nodes in the hash structure. + * We handle interval strings for efficiency wrt fgetln. */ +static struct node * +new_node(const char *start, const char *end) +{ + struct node *n; + + n = ohash_create_entry(&node_info, start, &end); + n->from = NULL; + n->arcs = NULL; + n->refs = 0; + n->mark = 0; + n->order = NO_ORDER; + n->traverse = NULL; + return n; +} + + +static void +nodes_init(struct ohash *h) +{ + ohash_init(h, HASH_START, &node_info); +} + +static struct node * +node_lookup(struct ohash *h, const char *start, const char *end) +{ + unsigned int i; + struct node * n; + + i = ohash_qlookupi(h, start, &end); + + n = ohash_find(h, i); + if (n == NULL) + n = ohash_insert(h, i, new_node(start, end)); + return n; +} + +#ifdef DEBUG +static void +dump_node(struct node *n) +{ + struct link *l; + + if (n->refs == 0) + return; + printf("%s (%u/%u): ", n->k, n->order, n->refs); + for (l = n->arcs; l != NULL; l = l->next) + if (n->refs != 0) + printf("%s(%u/%u) ", l->node->k, l->node->order, l->node->refs); + putchar('\n'); +} + +static void +dump_array(struct array *a) +{ + unsigned int i; + + for (i = 0; i < a->entries; i++) + dump_node(a->t[i]); +} + +static void +dump_hash(struct ohash *h) +{ + unsigned int i; + struct node *n; + + for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i)) + dump_node(n); +} +#endif + +/*** + *** Reading data. + ***/ + +static void +insert_arc(struct node *a, struct node *b) +{ + struct link *l; + + /* Check that this arc is not already present. */ + for (l = a->arcs; l != NULL; l = l->next) { + if (l->node == b) + return; + } + b->refs++; + l = ereallocarray(NULL, 1, sizeof(struct link)); + l->node = b; + l->next = a->arcs; + a->arcs = l; +} + +static unsigned int +read_pairs(FILE *f, struct ohash *h, int reverse, const char *name, + unsigned int order, int hint) +{ + int toggle; + struct node *a; + size_t size; + char *str; + + toggle = 1; + a = NULL; + + while ((str = fgetln(f, &size)) != NULL) { + char *sentinel; + + sentinel = str + size; + for (;;) { + char *e; + + while (str < sentinel && + isspace((unsigned char)*str)) + str++; + if (str == sentinel) + break; + for (e = str; + e < sentinel && !isspace((unsigned char)*e); e++) + continue; + if (toggle) { + a = node_lookup(h, str, e); + if (a->order == NO_ORDER && hint) + a->order = order++; + } else { + struct node *b; + + b = node_lookup(h, str, e); + assert(a != NULL); + if (b != a) { + if (reverse) + insert_arc(b, a); + else + insert_arc(a, b); + } + } + toggle = !toggle; + str = e; + } + } + if (toggle == 0) + errx(1, "odd number of node names in %s", name); + if (!feof(f)) + err(1, "error reading %s", name); + return order; +} + +static unsigned int +read_hints(FILE *f, struct ohash *h, int quiet, const char *name, + unsigned int order) +{ + char *str; + size_t size; + + while ((str = fgetln(f, &size)) != NULL) { + char *sentinel; + + sentinel = str + size; + for (;;) { + char *e; + struct node *a; + + while (str < sentinel && isspace((unsigned char)*str)) + str++; + if (str == sentinel) + break; + for (e = str; + e < sentinel && !isspace((unsigned char)*e); e++) + continue; + a = node_lookup(h, str, e); + if (a->order != NO_ORDER) { + if (!quiet) + warnx( + "duplicate node %s in hints file %s", + a->k, name); + } else + a->order = order++; + str = e; + } + } + if (!feof(f)) + err(1, "error reading %s", name); + return order; +} + + +/*** + *** Standard heap handling routines. + ***/ + +static void +heap_down(struct array *h, unsigned int i) +{ + unsigned int j; + struct node *swap; + + for (; (j=2*i+1) < h->entries; i = j) { + if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order) + j++; + if (h->t[i]->order <= h->t[j]->order) + break; + swap = h->t[i]; + h->t[i] = h->t[j]; + h->t[j] = swap; + } +} + +static void +heapify(struct array *h, int verbose) +{ + unsigned int i; + + for (i = h->entries; i != 0;) { + if (h->t[--i]->order == NO_ORDER && verbose) + warnx("node %s absent from hints file", h->t[i]->k); + heap_down(h, i); + } +} + +#define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] ) + +static struct node * +dequeue(struct array *h) +{ + struct node *n; + + if (h->entries == 0) + n = NULL; + else { + n = h->t[0]; + if (--h->entries != 0) { + h->t[0] = h->t[h->entries]; + heap_down(h, 0); + } + } + return n; +} + +#define ENQUEUE(h, n) do { \ + if (hints_flag) \ + enqueue((h), (n)); \ + else \ + (h)->t[(h)->entries++] = (n); \ + } while(0); + +static void +enqueue(struct array *h, struct node *n) +{ + unsigned int i, j; + struct node *swap; + + h->t[h->entries++] = n; + for (i = h->entries-1; i > 0; i = j) { + j = (i-1)/2; + if (h->t[j]->order < h->t[i]->order) + break; + swap = h->t[j]; + h->t[j] = h->t[i]; + h->t[i] = swap; + } +} + +/* Nodes without order should not hinder direct dependencies. + * Iterate until no nodes are left. + */ +static void +make_transparent(struct ohash *hash) +{ + struct node *n; + unsigned int i; + struct link *l; + int adjusted; + int bad; + unsigned int min; + + /* first try to solve complete nodes */ + do { + adjusted = 0; + bad = 0; + for (n = ohash_first(hash, &i); n != NULL; + n = ohash_next(hash, &i)) { + if (n->order == NO_ORDER) { + min = NO_ORDER; + + for (l = n->arcs; l != NULL; l = l->next) { + /* unsolved node -> delay resolution */ + if (l->node->order == NO_ORDER) { + bad = 1; + break; + } else if (l->node->order < min) + min = l->node->order; + } + if (min < NO_ORDER && l == NULL) { + n->order = min; + adjusted = 1; + } + } + } + + } while (adjusted); + + /* then, if incomplete nodes are left, do them */ + if (bad) do { + adjusted = 0; + for (n = ohash_first(hash, &i); n != NULL; + n = ohash_next(hash, &i)) + if (n->order == NO_ORDER) + for (l = n->arcs; l != NULL; l = l->next) + if (l->node->order < n->order) { + n->order = l->node->order; + adjusted = 1; + } + } while (adjusted); +} + + +/*** + *** Search through hash array for nodes. + ***/ + +/* Split nodes into unrefed nodes/live nodes. */ +static void +split_nodes(struct ohash *hash, struct array *heap, struct array *remaining) +{ + + struct node *n; + unsigned int i; + + heap->t = ereallocarray(NULL, ohash_entries(hash), + sizeof(struct node *)); + remaining->t = ereallocarray(NULL, ohash_entries(hash), + sizeof(struct node *)); + heap->entries = 0; + remaining->entries = 0; + + for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) { + if (n->refs == 0) + heap->t[heap->entries++] = n; + else + remaining->t[remaining->entries++] = n; + } +} + +/* Good point to break a cycle: live node with as few refs as possible. */ +static struct node * +find_good_cycle_break(struct array *h) +{ + unsigned int i; + unsigned int best; + struct node *u; + + best = UINT_MAX; + u = NULL; + + assert(h->entries != 0); + for (i = 0; i < h->entries; i++) { + struct node *n = h->t[i]; + /* No need to look further. */ + if (n->refs == 1) + return n; + if (n->refs != 0 && n->refs < best) { + best = n->refs; + u = n; + } + } + assert(u != NULL); + return u; +} + +/* Retrieve the node with the smallest order. */ +static struct node * +find_smallest_node(struct array *h) +{ + unsigned int i; + unsigned int best; + struct node *u; + + best = UINT_MAX; + u = NULL; + + assert(h->entries != 0); + for (i = 0; i < h->entries; i++) { + struct node *n = h->t[i]; + if (n->refs != 0 && n->order < best) { + best = n->order; + u = n; + } + } + assert(u != NULL); + return u; +} + + +/*** + *** Graph algorithms. + ***/ + +/* Explore the nodes reachable from i to find a cycle, store it in c. + * This may fail. */ +static struct node * +find_cycle_from(struct node *i, struct array *c) +{ + struct node *n; + + n = i; + /* XXX Previous cycle findings may have left this pointer non-null. */ + i->from = NULL; + + for (;;) { + /* Note that all marks are reversed before this code exits. */ + n->mark = 1; + if (n->traverse) + n->traverse = n->traverse->next; + else + n->traverse = n->arcs; + /* Skip over dead nodes. */ + while (n->traverse && n->traverse->node->refs == 0) + n->traverse = n->traverse->next; + if (n->traverse) { + struct node *go = n->traverse->node; + + if (go->mark) { + c->entries = 0; + for (; n != NULL && n != go; n = n->from) { + c->t[c->entries++] = n; + n->mark = 0; + } + for (; n != NULL; n = n->from) + n->mark = 0; + c->t[c->entries++] = go; + return go; + } else { + go->from = n; + n = go; + } + } else { + n->mark = 0; + n = n->from; + if (n == NULL) + return NULL; + } + } +} + +/* Find a live predecessor of node n. This is a slow routine, as it needs + * to go through the whole array, but it is not needed often. + */ +static struct node * +find_predecessor(struct array *a, struct node *n) +{ + unsigned int i; + + for (i = 0; i < a->entries; i++) { + struct node *m; + + m = a->t[i]; + if (m->refs != 0) { + struct link *l; + + for (l = m->arcs; l != NULL; l = l->next) + if (l->node == n) + return m; + } + } + assert(1 == 0); + return NULL; +} + +/* Traverse all strongly connected components reachable from node n. + Start numbering them at o. Return the maximum order reached. + Update the largest cycle found so far. + */ +static unsigned int +traverse_node(struct node *n, unsigned int o, struct array *c) +{ + unsigned int min, max; + + n->from = NULL; + min = o; + max = ++o; + + for (;;) { + n->mark = o; + if (DEBUG_TRAVERSE) + printf("%s(%u) ", n->k, n->mark); + /* Find next arc to explore. */ + if (n->traverse) + n->traverse = n->traverse->next; + else + n->traverse = n->arcs; + /* Skip over dead nodes. */ + while (n->traverse && n->traverse->node->refs == 0) + n->traverse = n->traverse->next; + /* If arc left. */ + if (n->traverse) { + struct node *go; + + go = n->traverse->node; + /* Optimisation: if go->mark < min, we already + * visited this strongly-connected component in + * a previous pass. Hence, this can yield no new + * cycle. */ + + /* Not part of the current path: go for it. */ + if (go->mark == 0 || go->mark == min) { + go->from = n; + n = go; + o++; + if (o > max) + max = o; + /* Part of the current path: check cycle length. */ + } else if (go->mark > min) { + if (DEBUG_TRAVERSE) + printf("%d\n", o - go->mark + 1); + if (o - go->mark + 1 > c->entries) { + struct node *t; + unsigned int i; + + c->entries = o - go->mark + 1; + i = 0; + c->t[i++] = go; + for (t = n; t != go; t = t->from) + c->t[i++] = t; + } + } + + /* No arc left: backtrack. */ + } else { + n->mark = min; + n = n->from; + if (!n) + return max; + o--; + } + } +} + +static void +print_cycle(struct array *c) +{ + unsigned int i; + + /* Printing in reverse order, since cycle discoveries finds reverse + * edges. */ + for (i = c->entries; i != 0;) { + i--; + warnx("%s", c->t[i]->k); + } +} + +static struct node * +find_longest_cycle(struct array *h, struct array *c) +{ + unsigned int i; + unsigned int o; + unsigned int best; + struct node *n; + static int notfirst = 0; + + assert(h->entries != 0); + + /* No cycle found yet. */ + c->entries = 0; + + /* Reset the set of marks, except the first time around. */ + if (notfirst) { + for (i = 0; i < h->entries; i++) + h->t[i]->mark = 0; + } else + notfirst = 1; + + o = 0; + + /* Traverse the array. Each unmarked, live node heralds a + * new set of strongly connected components. */ + for (i = 0; i < h->entries; i++) { + n = h->t[i]; + if (n->refs != 0 && n->mark == 0) { + /* Each call to traverse_node uses a separate + * interval of numbers to mark nodes. */ + o++; + o = traverse_node(n, o, c); + } + } + + assert(c->entries != 0); + n = c->t[0]; + best = n->refs; + for (i = 0; i < c->entries; i++) { + if (c->t[i]->refs < best) { + n = c->t[i]; + best = n->refs; + } + } + return n; +} + +static struct node * +find_normal_cycle(struct array *h, struct array *c) +{ + struct node *b, *n; + + if (hints_flag) + n = find_smallest_node(h); + else + n = find_good_cycle_break(h); + while ((b = find_cycle_from(n, c)) == NULL) + n = find_predecessor(h, n); + return b; +} + + +#define plural(n) ((n) > 1 ? "s" : "") + +static void +parse_args(int argc, char *argv[], struct ohash *pairs) +{ + int c; + unsigned int order; + int reverse_flag; + const char **files; + int i, j; + + i = 0; + + reverse_flag = quiet_flag = long_flag = + warn_flag = hints_flag = verbose_flag = 0; + /* argc is good enough, as we start at argv[1] */ + files = ereallocarray(NULL, argc, sizeof (char *)); + while ((c = getopt(argc, argv, "h:flqrvw")) != -1) { + switch(c) { + case 'h': + files[i++] = optarg; + hints_flag = 1; + break; + /*FALLTHRU*/ + case 'f': + hints_flag = 2; + break; + case 'l': + long_flag = 1; + break; + case 'q': + quiet_flag = 1; + break; + case 'r': + reverse_flag = 1; + break; + case 'v': + verbose_flag = 1; + break; + case 'w': + warn_flag = 1; + break; + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + switch(argc) { + case 1: + files[i++] = argv[0]; + break; + case 0: + break; + default: + usage(); + } + + files[i] = NULL; + + nodes_init(pairs); + order = 0; + + for (j = 0; j != i-argc; j++) { + FILE *f; + + f = fopen(files[j], "r"); + if (f == NULL) + err(1, "Can't open hint file %s", files[i]); + order = read_hints(f, pairs, quiet_flag, files[i], order); + fclose(f); + } + free(files); + + if (argc == 1) { + FILE *f; + + f = fopen(argv[0], "r"); + if (f == NULL) + err(1, "Can't open file %s", argv[0]); + order = read_pairs(f, pairs, reverse_flag, argv[0], order, + hints_flag == 2); + fclose(f); + } else { + order = read_pairs(stdin, pairs, reverse_flag, "stdin", + order, hints_flag == 2); + } +} + +static int +tsort(struct ohash *pairs) +{ + struct array aux; /* Unrefed nodes/cycle reporting. */ + struct array remaining; + unsigned int broken_arcs, broken_cycles; + unsigned int left; + + broken_arcs = 0; + broken_cycles = 0; + + if (hints_flag) + make_transparent(pairs); + split_nodes(pairs, &aux, &remaining); + ohash_delete(pairs); + + if (hints_flag) + heapify(&aux, verbose_flag); + + left = remaining.entries + aux.entries; + while (left != 0) { + + /* Standard topological sort. */ + while (aux.entries) { + struct link *l; + struct node *n; + + n = DEQUEUE(&aux); + printf("%s\n", n->k); + left--; + /* We can't free nodes, as we don't know which + * entry we can remove in the hash table. We + * rely on refs == 0 to recognize live nodes. + * Decrease ref count of live nodes, enter new + * candidates into the unrefed list. */ + for (l = n->arcs; l != NULL; l = l->next) + if (l->node->refs != 0 && + --l->node->refs == 0) { + ENQUEUE(&aux, l->node); + } + } + /* There are still cycles to break. */ + if (left != 0) { + struct node *n; + + broken_cycles++; + /* XXX Simple cycle detection and long cycle + * detection are mutually exclusive. */ + + if (long_flag) + n = find_longest_cycle(&remaining, &aux); + else + n = find_normal_cycle(&remaining, &aux); + + if (!quiet_flag) { + warnx("cycle in data"); + print_cycle(&aux); + } + + if (verbose_flag) + warnx("%u edge%s broken", n->refs, + plural(n->refs)); + broken_arcs += n->refs; + n->refs = 0; + /* Reinitialization, cycle reporting uses aux. */ + aux.t[0] = n; + aux.entries = 1; + } + } + if (verbose_flag && broken_cycles != 0) + warnx("%u cycle%s broken, for a total of %u edge%s", + broken_cycles, plural(broken_cycles), + broken_arcs, plural(broken_arcs)); + if (warn_flag) + return (broken_cycles < 256 ? broken_cycles : 255); + else + return (0); +} + +int +main(int argc, char *argv[]) +{ + struct ohash pairs; + + if (pledge("stdio rpath", NULL) == -1) + err(1, "pledge"); + + parse_args(argc, argv, &pairs); + + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); + + return tsort(&pairs); +} + + +extern char *__progname; + +static void +usage(void) +{ + fprintf(stderr, "Usage: %s [-flqrvw] [-h file] [file]\n", __progname); + exit(1); +} -- cgit v1.2.3