Reputation: 1464
I have a custom Sort function applied to a struct. The full code is here on play.golang.org.
type Stmt struct {
Name string
After []string
}
func sortStmts(stmts []Stmt) []Stmt {
sort.Sort(ByAfter(stmts))
return stmts
}
type ByAfter []Stmt
func (a ByAfter) Len() int { return len(a) }
func (a ByAfter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByAfter) Less(i, j int) bool {
isLess := true
//fmt.Printf("%s.%+v is being compared with %s.%+v\n", a[i].Name, a[i].After, a[j].Name, a[j].After)
for _, v := range a[i].After {
if a[j].Name == v {
isLess = false
break
}
}
if isLess {
//fmt.Printf("%s.%+v is Before %s.%+v\n", a[i].Name, a[i].After, a[j].Name, a[j].After)
} else {
//fmt.Printf("%s.%+v is After %s.%+v\n", a[i].Name, a[i].After, a[j].Name, a[j].After)
}
return isLess
}
My purpose is to automatically create a set of sql create statements ordered properly so that dependent tables come first.
So if Stmt{Name: "user_role", After: []string{"user", "role"} }
is present, then in the ordered list user_role
should be after both user
and role
.
This seemed to work fine for a bit until we started adding more values. Only then did I go in and check and realize that I might have just got accidentally lucky the first time, but there really isn't any consistency.
Is there something I'm doing wrong in the sort function that the results are random. I'm particularly interested in seeing why the "role" item is not coming before the "user_role" item even though I've designated it user_role as coming after role.
Upvotes: 0
Views: 1339
Reputation: 777
As mentioned by Anonymous, you need a topological sort for this. Tarjan's strongly connected components algorithm has the property that SCCs are returned in reverse topological sort order. This means it can be used as a topological sort algorithm.
Here is an implementation of Tarjan's algorithm (runnable here and originally posted by me to the golang-nuts list) based on the pseudocode at wikipedia (more generally implemented here but using essentially the same underlying code):
package main
import (
"fmt"
"log"
)
type Stmt struct {
Name string
After []string
}
func main() {
stmts := []Stmt{
{Name: "app", After: []string{"app_user"}},
{Name: "billingplan", After: []string{}},
{Name: "campaign", After: []string{"app_user"}},
{Name: "campaign_app", After: []string{"campaign", "app"}},
{Name: "campaign_ip", After: []string{"campaign", "ip"}},
{Name: "campaign_operator", After: []string{"campaign", "operator"}},
{Name: "campaign_sponsor", After: []string{"campaign", "sponsor"}},
{Name: "campaign_subscriberfilter", After: []string{"campaign", "subscriber_filters"}},
{Name: "campaign_url", After: []string{"campaign", "url"}},
{Name: "contentpartner", After: []string{"app_user"}},
{Name: "filter_criteria", After: []string{"campaign", "subscriber_filters"}},
{Name: "ip", After: []string{"app_user"}},
{Name: "mobile_registered", After: []string{"campaign", "app"}},
{Name: "operator", After: []string{}},
{Name: "passwords", After: []string{"app_user"}},
{Name: "publish_package", After: []string{}},
{Name: "role", After: []string{}},
{Name: "passwords", After: []string{"app_user"}},
{Name: "sponsor", After: []string{"app_user"}},
{Name: "subscriber_dbs", After: []string{}},
{Name: "subscriber_filters", After: []string{"subscriber_dbs"}},
{Name: "timezone", After: []string{}},
{Name: "url", After: []string{"app_user"}},
{Name: "app_user", After: []string{}},
{Name: "user_role", After: []string{"app_user", "role"}},
}
g := make(graph)
for _, s := range stmts {
g[s.Name] = after(s.After)
}
sorted, err := topoSort(g)
if err != nil {
log.Fatalf("could not sort: %v", err)
}
for _, s := range sorted {
fmt.Println(s)
}
}
func topoSort(g graph) ([]string, error) {
sccs := tarjanSCC(g)
sorted := make([]string, len(sccs))
for i, s := range sccs {
if len(s) != 1 {
return nil, fmt.Errorf("found directed cycle: %q", s)
}
sorted[i] = s[0]
}
return sorted, nil
}
// graph is an edge list representation of a directed graph.
type graph map[string]set
// set is an string set.
type set map[string]struct{}
func after(i []string) set {
if len(i) == 0 {
return nil
}
s := make(set)
for _, v := range i {
s[v] = struct{}{}
}
return s
}
// tarjanSCC returns a the strongly connected components of the
// directed graph g.
func tarjanSCC(g graph) [][]string {
t := tarjan{
g: g,
indexTable: make(map[string]int, len(g)),
lowLink: make(map[string]int, len(g)),
onStack: make(map[string]bool, len(g)),
}
for v := range t.g {
if t.indexTable[v] == 0 {
t.strongconnect(v)
}
}
return t.sccs
}
// tarjan implements Tarjan's strongly connected component finding
// algorithm. The implementation is from the pseudocode at
//
// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
//
type tarjan struct {
g graph
index int
indexTable map[string]int
lowLink map[string]int
onStack map[string]bool
stack []string
sccs [][]string
}
// strongconnect is the strongconnect function described in the
// wikipedia article.
func (t *tarjan) strongconnect(v string) {
// Set the depth index for v to the smallest unused index.
t.index++
t.indexTable[v] = t.index
t.lowLink[v] = t.index
t.stack = append(t.stack, v)
t.onStack[v] = true
// Consider successors of v.
for w := range t.g[v] {
if t.indexTable[w] == 0 {
// Successor w has not yet been visited; recur on it.
t.strongconnect(w)
t.lowLink[v] = min(t.lowLink[v], t.lowLink[w])
} else if t.onStack[w] {
// Successor w is in stack s and hence in the current SCC.
t.lowLink[v] = min(t.lowLink[v], t.indexTable[w])
}
}
// If v is a root node, pop the stack and generate an SCC.
if t.lowLink[v] == t.indexTable[v] {
// Start a new strongly connected component.
var (
scc []string
w string
)
for {
w, t.stack = t.stack[len(t.stack)-1], t.stack[:len(t.stack)-1]
t.onStack[w] = false
// Add w to current strongly connected component.
scc = append(scc, w)
if w == v {
break
}
}
// Output the current strongly connected component.
t.sccs = append(t.sccs, scc)
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
Note that running this code repeatedly will not result in the same strict ordering of output since a number of the paths are not definitively orderable relative to each other (this won't show up in the playground since the results are cached - you can see this though by wrapping the call to tarjanSCC).
Although it may be easier to directly implement a topological sort, by using Tarjan's SCC algorithm, we are able to find the cause of a sort failure, for example here (cf with the same data here).
Upvotes: 1
Reputation: 58300
Your "Less" function is not transitive. That is, if A < B and B < C, then it must also hold that A < C.
You can't specify a partial order with a regular sort function and get sorted output. Instead, you need to implement a topological sort.
Here's a simple implementation on your data (except that I removed the duplicate "passwords" entry).
package main
import "fmt"
type Stmt struct {
Name string
After []string
}
func topSort(ss []Stmt) []string {
after := map[string][]string{} // things that must come after
counts := map[string]int{} // number unsatified preconditions
zc := map[string]struct{}{} // things with zero count
for _, s := range ss {
for _, a := range s.After {
after[a] = append(after[a], s.Name)
}
counts[s.Name] = len(s.After)
if len(s.After) == 0 {
zc[s.Name] = struct{}{}
}
}
r := []string{}
for len(zc) > 0 {
for n := range zc {
r = append(r, n)
for _, a := range after[n] {
counts[a]--
if counts[a] == 0 {
zc[a] = struct{}{}
}
}
delete(zc, n)
}
}
return r
}
func main() {
stmts := []Stmt{
{Name: "app", After: []string{"app_user"}},
{Name: "billingplan", After: []string{}},
{Name: "campaign", After: []string{"app_user"}},
{Name: "campaign_app", After: []string{"campaign", "app"}},
{Name: "campaign_ip", After: []string{"campaign", "ip"}},
{Name: "campaign_operator", After: []string{"campaign", "operator"}},
{Name: "campaign_sponsor", After: []string{"campaign", "sponsor"}},
{Name: "campaign_subscriberfilter", After: []string{"campaign", "subscriber_filters"}},
{Name: "campaign_url", After: []string{"campaign", "url"}},
{Name: "contentpartner", After: []string{"app_user"}},
{Name: "filter_criteria", After: []string{"campaign", "subscriber_filters"}},
{Name: "ip", After: []string{"app_user"}},
{Name: "mobile_registered", After: []string{"campaign", "app"}},
{Name: "operator", After: []string{}},
{Name: "passwords", After: []string{"app_user"}},
{Name: "publish_package", After: []string{}},
{Name: "role", After: []string{}},
{Name: "sponsor", After: []string{"app_user"}},
{Name: "subscriber_dbs", After: []string{}},
{Name: "subscriber_filters", After: []string{"subscriber_dbs"}},
{Name: "timezone", After: []string{}},
{Name: "url", After: []string{"app_user"}},
{Name: "app_user", After: []string{}},
{Name: "user_role", After: []string{"app_user", "role"}},
}
r := topSort(stmts)
for _, s := range r {
fmt.Println(s)
}
}
Upvotes: 7
Reputation: 1464
This question was answered here: https://groups.google.com/forum/#!topic/golang-nuts/C_7JY1f3cSc. It was a more detailed and specific answer that I was able to immediately use. Requested the person to update it here but he/she hasn't ... so putting it myself.
Since I had a tarjan lying around. Here is the topological sort for that data:
http://play.golang.org/p/SHagFMvuhl
On Thu, 2015-03-12 at 22:47 -0700, Sathish VJ wrote:
In my case, I did check and there is that transitive nature in the dependencies.
One specific thing I'm noting when I print out the debug statements is that some comparisons are never happening. For example, user_role is never getting compared to role as of now. Though at one point when there were lesser elements, it was.
sort.Sort will not make all comparisons. That would result in O(n^2) time.
Upvotes: 0