triedawgfoldtest.perl is a Perl program for analysing some transformations of tries: optimisation (finding equivalent nodes and merging them, thus turning a tree into an acyclic directed graph) and folding (replacing unbranching chains of edges and nodes with multi-character edges). It performs these transformations both singly and combined (in all orders). It then reports the number of nodes, the number of edges, and the total number of characters in edge names for each variation. With the --dot option, it also outputs the resulting graphs in DOT format (see Graphviz).
Example: Given the input file digits containing these ten lines:
zero one two three four five six seven eight nine
it produces this report:
=== digits === 38 total nodes 10 nodes with no edges 28 nodes with only single-character edges 0 nodes with only multi-character edges 0 nodes with both single- and multi-character edges 37 total edges 37 single-character edges 0 multi-character edges 37 total edge characters === digits_opt === 24 total nodes 1 nodes with no edges 23 nodes with only single-character edges 0 nodes with only multi-character edges 0 nodes with both single- and multi-character edges 32 total edges 32 single-character edges 0 multi-character edges 32 total edge characters === digits_fold === 14 total nodes 10 nodes with no edges 0 nodes with only single-character edges 3 nodes with only multi-character edges 1 nodes with both single- and multi-character edges 13 total edges 3 single-character edges 10 multi-character edges 37 total edge characters === digits_opt_fold === 8 total nodes 1 nodes with no edges 3 nodes with only single-character edges 2 nodes with only multi-character edges 2 nodes with both single- and multi-character edges 16 total edges 8 single-character edges 8 multi-character edges 32 total edge characters === digits_fold_opt === 5 total nodes 1 nodes with no edges 0 nodes with only single-character edges 3 nodes with only multi-character edges 1 nodes with both single- and multi-character edges 13 total edges 3 single-character edges 10 multi-character edges 37 total edge characters
The DOT files produced with the --dot option are shown below together with their SVG renditions.
digits.dot | |
---|---|
digraph { rankdir=LR; node [shape=circle]; 0 [style=filled]; 0->1 [label=e]; 0->2 [label=f]; 0->3 [label=n]; 0->4 [label=o]; 0->5 [label=s]; 0->6 [label=t]; 0->7 [label=z]; 1->8 [label=i]; 2->9 [label=i]; 2->10 [label=o]; 3->11 [label=i]; 4->12 [label=n]; 5->13 [label=e]; 5->14 [label=i]; 6->15 [label=h]; 6->16 [label=w]; 7->17 [label=e]; 8->18 [label=g]; 9->19 [label=v]; 10->20 [label=u]; 11->21 [label=n]; 12->22 [label=e]; 13->23 [label=v]; 14->24 [label=x]; 15->25 [label=r]; 16->26 [label=o]; 17->27 [label=r]; 18->28 [label=h]; 19->29 [label=e]; 20->30 [label=r]; 21->31 [label=e]; 22 [shape=doublecircle]; 23->32 [label=e]; 24 [shape=doublecircle]; 25->33 [label=e]; 26 [shape=doublecircle]; 27->34 [label=o]; 28->35 [label=t]; 29 [shape=doublecircle]; 30 [shape=doublecircle]; 31 [shape=doublecircle]; 32->36 [label=n]; 33->37 [label=e]; 34 [shape=doublecircle]; 35 [shape=doublecircle]; 36 [shape=doublecircle]; 37 [shape=doublecircle]; } |
|
digits_opt.dot | |
digraph { rankdir=LR; node [shape=circle]; 0 [style=filled]; 0->1 [label=e]; 0->2 [label=f]; 0->3 [label=n]; 0->4 [label=o]; 0->5 [label=s]; 0->6 [label=t]; 0->7 [label=z]; 1->8 [label=i]; 2->9 [label=i]; 2->10 [label=o]; 3->4 [label=i]; 4->11 [label=n]; 5->12 [label=e]; 5->13 [label=i]; 6->14 [label=h]; 6->15 [label=w]; 7->16 [label=e]; 8->17 [label=g]; 9->11 [label=v]; 10->18 [label=u]; 11->19 [label=e]; 12->20 [label=v]; 13->19 [label=x]; 14->21 [label=r]; 15->19 [label=o]; 16->15 [label=r]; 17->22 [label=h]; 18->19 [label=r]; 19 [shape=doublecircle]; 20->23 [label=e]; 21->11 [label=e]; 22->19 [label=t]; 23->19 [label=n]; } |
|
digits_fold.dot | |
digraph { rankdir=LR; node [shape=circle]; 0 [style=filled]; 0->1 [label=eight]; 0->2 [label=f]; 0->3 [label=nine]; 0->4 [label=one]; 0->5 [label=s]; 0->6 [label=t]; 0->7 [label=zero]; 1 [shape=doublecircle]; 2->8 [label=ive]; 2->9 [label=our]; 3 [shape=doublecircle]; 4 [shape=doublecircle]; 5->10 [label=even]; 5->11 [label=ix]; 6->12 [label=hree]; 6->13 [label=wo]; 7 [shape=doublecircle]; 8 [shape=doublecircle]; 9 [shape=doublecircle]; 10 [shape=doublecircle]; 11 [shape=doublecircle]; 12 [shape=doublecircle]; 13 [shape=doublecircle]; } |
|
digits_opt_fold.dot | |
digraph { rankdir=LR; node [shape=circle]; 0 [style=filled]; 0->1 [label=eight]; 0->2 [label=f]; 0->3 [label=ni]; 0->3 [label=o]; 0->4 [label=s]; 0->5 [label=t]; 0->6 [label=zer]; 1 [shape=doublecircle]; 2->7 [label=iv]; 2->1 [label=our]; 3->7 [label=n]; 4->1 [label=even]; 4->1 [label=ix]; 5->7 [label=hre]; 5->6 [label=w]; 6->1 [label=o]; 7->1 [label=e]; } |
|
digits_fold_opt.dot | |
digraph { rankdir=LR; node [shape=circle]; 0 [style=filled]; 0->1 [label=eight]; 0->2 [label=f]; 0->1 [label=nine]; 0->1 [label=one]; 0->3 [label=s]; 0->4 [label=t]; 0->1 [label=zero]; 1 [shape=doublecircle]; 2->1 [label=ive]; 2->1 [label=our]; 3->1 [label=even]; 3->1 [label=ix]; 4->1 [label=hree]; 4->1 [label=wo]; } |