triedawgfoldtest.perl is a Perl program for analysing some transformations of tries: optimisation (finding equivalent nodes and merging them, thus turning a tree into an acyclic directed graph) and folding (replacing unbranching chains of edges and nodes with multi-character edges). It performs these transformations both singly and combined (in all orders). It then reports the number of nodes, the number of edges, and the total number of characters in edge names for each variation. With the --dot option, it also outputs the resulting graphs in DOT format (see Graphviz).
Example: Given the input file digits containing these ten lines:
zero one two three four five six seven eight nine
it produces this report:
=== digits ===
38 total nodes
10 nodes with no edges
28 nodes with only single-character edges
0 nodes with only multi-character edges
0 nodes with both single- and multi-character edges
37 total edges
37 single-character edges
0 multi-character edges
37 total edge characters
=== digits_opt ===
24 total nodes
1 nodes with no edges
23 nodes with only single-character edges
0 nodes with only multi-character edges
0 nodes with both single- and multi-character edges
32 total edges
32 single-character edges
0 multi-character edges
32 total edge characters
=== digits_fold ===
14 total nodes
10 nodes with no edges
0 nodes with only single-character edges
3 nodes with only multi-character edges
1 nodes with both single- and multi-character edges
13 total edges
3 single-character edges
10 multi-character edges
37 total edge characters
=== digits_opt_fold ===
8 total nodes
1 nodes with no edges
3 nodes with only single-character edges
2 nodes with only multi-character edges
2 nodes with both single- and multi-character edges
16 total edges
8 single-character edges
8 multi-character edges
32 total edge characters
=== digits_fold_opt ===
5 total nodes
1 nodes with no edges
0 nodes with only single-character edges
3 nodes with only multi-character edges
1 nodes with both single- and multi-character edges
13 total edges
3 single-character edges
10 multi-character edges
37 total edge characters
The DOT files produced with the --dot option are shown below together with their SVG renditions.
| digits.dot | |
|---|---|
digraph {
rankdir=LR;
node [shape=circle];
0 [style=filled];
0->1 [label=e];
0->2 [label=f];
0->3 [label=n];
0->4 [label=o];
0->5 [label=s];
0->6 [label=t];
0->7 [label=z];
1->8 [label=i];
2->9 [label=i];
2->10 [label=o];
3->11 [label=i];
4->12 [label=n];
5->13 [label=e];
5->14 [label=i];
6->15 [label=h];
6->16 [label=w];
7->17 [label=e];
8->18 [label=g];
9->19 [label=v];
10->20 [label=u];
11->21 [label=n];
12->22 [label=e];
13->23 [label=v];
14->24 [label=x];
15->25 [label=r];
16->26 [label=o];
17->27 [label=r];
18->28 [label=h];
19->29 [label=e];
20->30 [label=r];
21->31 [label=e];
22 [shape=doublecircle];
23->32 [label=e];
24 [shape=doublecircle];
25->33 [label=e];
26 [shape=doublecircle];
27->34 [label=o];
28->35 [label=t];
29 [shape=doublecircle];
30 [shape=doublecircle];
31 [shape=doublecircle];
32->36 [label=n];
33->37 [label=e];
34 [shape=doublecircle];
35 [shape=doublecircle];
36 [shape=doublecircle];
37 [shape=doublecircle];
} |
|
| digits_opt.dot | |
digraph {
rankdir=LR;
node [shape=circle];
0 [style=filled];
0->1 [label=e];
0->2 [label=f];
0->3 [label=n];
0->4 [label=o];
0->5 [label=s];
0->6 [label=t];
0->7 [label=z];
1->8 [label=i];
2->9 [label=i];
2->10 [label=o];
3->4 [label=i];
4->11 [label=n];
5->12 [label=e];
5->13 [label=i];
6->14 [label=h];
6->15 [label=w];
7->16 [label=e];
8->17 [label=g];
9->11 [label=v];
10->18 [label=u];
11->19 [label=e];
12->20 [label=v];
13->19 [label=x];
14->21 [label=r];
15->19 [label=o];
16->15 [label=r];
17->22 [label=h];
18->19 [label=r];
19 [shape=doublecircle];
20->23 [label=e];
21->11 [label=e];
22->19 [label=t];
23->19 [label=n];
} |
|
| digits_fold.dot | |
digraph {
rankdir=LR;
node [shape=circle];
0 [style=filled];
0->1 [label=eight];
0->2 [label=f];
0->3 [label=nine];
0->4 [label=one];
0->5 [label=s];
0->6 [label=t];
0->7 [label=zero];
1 [shape=doublecircle];
2->8 [label=ive];
2->9 [label=our];
3 [shape=doublecircle];
4 [shape=doublecircle];
5->10 [label=even];
5->11 [label=ix];
6->12 [label=hree];
6->13 [label=wo];
7 [shape=doublecircle];
8 [shape=doublecircle];
9 [shape=doublecircle];
10 [shape=doublecircle];
11 [shape=doublecircle];
12 [shape=doublecircle];
13 [shape=doublecircle];
} |
|
| digits_opt_fold.dot | |
digraph {
rankdir=LR;
node [shape=circle];
0 [style=filled];
0->1 [label=eight];
0->2 [label=f];
0->3 [label=ni];
0->3 [label=o];
0->4 [label=s];
0->5 [label=t];
0->6 [label=zer];
1 [shape=doublecircle];
2->7 [label=iv];
2->1 [label=our];
3->7 [label=n];
4->1 [label=even];
4->1 [label=ix];
5->7 [label=hre];
5->6 [label=w];
6->1 [label=o];
7->1 [label=e];
} |
|
| digits_fold_opt.dot | |
digraph {
rankdir=LR;
node [shape=circle];
0 [style=filled];
0->1 [label=eight];
0->2 [label=f];
0->1 [label=nine];
0->1 [label=one];
0->3 [label=s];
0->4 [label=t];
0->1 [label=zero];
1 [shape=doublecircle];
2->1 [label=ive];
2->1 [label=our];
3->1 [label=even];
3->1 [label=ix];
4->1 [label=hree];
4->1 [label=wo];
} |
|