[antlr-interest] Help with compressing trees and custom nodes

Eric researcher0x00 at gmail.com
Fri Mar 16 06:42:50 PDT 2012


Hi Todd,


This is just a suggestion and not the only possible answer.

The easiest way for me to work with AST transformations is to just view the
AST as an n-ary tree and call the tree API directly.

Here are some example methods I use. This is C# code for the ANTLR 3 C#
version; you will obviously need to translate to your target language.

        private static void ReplaceNode(CommonTree node, int tokenId,
string text)
        {
            // This method does not set token StatIndex, token StopIndex,
token Line, or token charPostionInLine
            ITreeAdaptor adaptor = new CommonTreeAdaptor();
            CommonTree parent = (CommonTree)node.Parent;
            CommonToken newToken = new CommonToken(tokenId);
            CommonTree newNode = (CommonTree)adaptor.Create(newToken, text);
            int index = node.ChildIndex;
            parent.DeleteChild(index);
            parent.InsertChild(index, newNode);
            Debug.Assert(newToken.Type == tokenId);
            Debug.Assert(newNode.Text == text);
            Debug.Assert(newNode.Parent != null);
            Debug.Assert(newNode.ChildIndex == index);
        }

       private static void CreateEpsilonRule(CommonTree rules, TokenMap
tokenMap)
        {
            int ruleTokenId = tokenMap.GetId("RULE");
            int idTokenId = tokenMap.GetId("ID");
            int alternativesTokenId = tokenMap.GetId("ALTERNATIVES");
            int alternativeTokenId = tokenMap.GetId("ALT");
            int stringLiteralTokenId = tokenMap.GetId("STRING_LITERAL");
            int ruleRefTokenId = tokenMap.GetId("RULE_REF");
            ITreeAdaptor adaptor = new CommonTreeAdaptor();
            epsilonRuleMade = true;
            string rule1Name = "EPSILON";
            CommonToken newRule1Token = new CommonToken(ruleTokenId);
            CommonTree newRule = (CommonTree)adaptor.Create(newRule1Token,
"RULE");
            rules.AddChild(newRule);
            CommonToken rule1IdToken = new CommonToken(idTokenId);
            CommonTree rule1IdNode =
(CommonTree)adaptor.Create(rule1IdToken, rule1Name);
            newRule.AddChild(rule1IdNode);
            CommonToken alternatesToken = new
CommonToken(alternativesTokenId, "ALTERNATIVES");
            CommonTree alternatesNode =
(CommonTree)adaptor.Create(alternatesToken);
            newRule.AddChild(alternatesNode);
            CommonToken alternateToken = new
CommonToken(alternativeTokenId, "ALT");
            CommonTree alternateNode =
(CommonTree)adaptor.Create(alternateToken);
            alternatesNode.AddChild(alternateNode);
            CommonToken charLiteralToken = new
CommonToken(stringLiteralTokenId, string.Empty);
            CommonTree charLiteralNode =
(CommonTree)adaptor.Create(charLiteralToken);
            alternateNode.AddChild(charLiteralNode);
            newRule.FreshenParentAndChildIndexesDeeply();
        }

        private static void FindRuleRefs(CommonTree node,
SortedList<string, List<CommonTree>> ruleRefs, int ruleRefTokenId)
        {
            if (node.Type == ruleRefTokenId)
            {
                List<CommonTree> ruleRefList;
                if (!ruleRefs.TryGetValue(node.Text, out ruleRefList))
                {
                    ruleRefList = new List<CommonTree>();
                    ruleRefs[node.Text] = ruleRefList;
                }
                ruleRefList.Add(node);
            }
            if (node.Children != null)
            {
                int childCount = node.Children.Count;
                for (int index = 0; index < childCount; index++)
                {
                    CommonTree visitNode = (CommonTree)node.Children[index];
                    FindRuleRefs(visitNode, ruleRefs, ruleRefTokenId);
                }
            }
        }

        private static void ChangeToken(CommonTree node, int tokenId,
string text)
        {
            // This method does not perserve token StatIndex, token
StopIndex, token Line, or token charPostionInLine
            ITreeAdaptor adaptor = new CommonTreeAdaptor();
            CommonTree parent = (CommonTree)node.Parent;
            CommonToken newToken = new CommonToken(tokenId);
            CommonTree newNode = (CommonTree)adaptor.Create(newToken, text);
            int index = node.ChildIndex;
            if (node.Children != null)
            {
                // Note: <Node>.Children is read only, so newNode.Children
= node.Children; is not allowed.
                newNode.AddChildren(node.Children);
            }
            parent.DeleteChild(index);
            parent.InsertChild(index, newNode);
            Debug.Assert(newToken.Type == tokenId);
            Debug.Assert(newNode.Text == text);
            Debug.Assert(newNode.Parent != null);
            Debug.Assert(newNode.ChildIndex == index);
            if (newNode.Children != null)
            {
                Debug.Assert(newNode.Children.Count == node.Children.Count);
            }
        }

And here is the TokenMap class used by the methods

    public class TokenMap
    {
        private SortedList<int, string> ids = new SortedList<int, string>();
        private SortedList<string, int> names = new SortedList<string,
int>();
        private string tokenFilename;
        public int NextId
        {
            get
            {
                int value = ids.Count;
                string name;
                while (this.ids.TryGetValue(value, out name))
                {
                    value++;
                }
                return value;
            }
        }
        public string this[int tokenType]
        {
            get
            {
                string result = string.Empty;
                if (!(ids.TryGetValue(tokenType, out result)))
                {
                    //Debug.Fail("How did we get here?");
                }
                return result;
            }
        }
        public TokenMap(string tokenFilename)
        {
            Debug.Assert(!string.IsNullOrWhiteSpace(tokenFilename));
            this.tokenFilename = tokenFilename;
            using (StreamReader streamReader = new
StreamReader(this.tokenFilename))
            {
                string line = streamReader.ReadLine();
                while (line != null)
                {
                    string[] tokenParts = line.Split('=');
                    string name = string.Empty;
                    string indexText = string.Empty;
                    if (tokenParts.Length == 2)
                    {
                        name = tokenParts[0];
                        indexText = tokenParts[1];
                    }
                    else
                    {
                        // If more than one '=' then split on last '='
                        // i.e. '>='=5
                        //  '=='=6
                        int lastEqualPos = line.LastIndexOf('=');
                        name = line.Substring(0, lastEqualPos - 1);
                        int length = line.Length - (lastEqualPos + 1);
                        indexText = line.Substring(lastEqualPos + 1,
length);
                    }
                    int index = -1;
                    if (int.TryParse(indexText, out index))
                    {
                        if (!name.StartsWith("T__"))
                        {
                            if (!ids.ContainsKey(index))
                            {
                                ids.Add(index, name);
                            }
                            //else
                            //{
                            //    Console.WriteLine("Duplicate key: " +
index);
                            //    Debug.Fail("How did we get here?");
                            //}
                        }
                        else
                        {
                            // Do nothing.
                        }
                    }
                    else
                    {
                        Console.WriteLine("Unable to parse to int: '" +
indexText + "'");
                        Debug.Fail("How did we get here?");
                    }
                    line = streamReader.ReadLine();
                }
            }
            BuildNamesIndex();
        }
        private void BuildNamesIndex()
        {
            this.names.Clear();
            foreach (KeyValuePair<int, string> kvp in this.ids)
            {
                this.names.Add(kvp.Value, kvp.Key);
            }
        }
        public bool ContainsKey(int tokenType)
        {
            return ids.ContainsKey(tokenType);
        }
        public int GetId(string name)
        {
            int value = -1;
            if (!this.names.TryGetValue(name, out value))
            {
                value = -1;
            }
            return value;
        }
        public string ToListing()
        {
            StringBuilder lines = new StringBuilder();
            lines.AppendLine("Token map for " + this.tokenFilename);
            lines.AppendLine("Id  Name");
            foreach (KeyValuePair<int, string> kvp in this.ids)
            {
                string line = string.Format("{0,3} {1}", kvp.Key,
kvp.Value);
                lines.AppendLine(line);
            }
            return lines.ToString();
        }
    }

 Hope that helps, Eric




On Thu, Mar 15, 2012 at 2:02 PM, Todd Nine <tnine at apigee.com> wrote:

> Hi guys.  I'm new to antlr and have a question regarding tree compressions.
>  My ultimate goal is to use my grammar to create an AST.  From this AST I
> then will utilize the visitor pattern to walk the tree and evaluate my
> results for our Cassandra query engine.  I'm having issues with my tree
> having a lot of additional nodes that don't have 2 children due to operator
> precedence.  My questions are below.
>
> 1. How can I create a different node class for each element?  For instance,
> && ad || need their own nodes, as well as 'NOT' 'within' etc
>
> 2. How can I collapse tree elements that only have 1 child.  I.E turn orexp
> -> andexp -> notexp -> andexp into just an andexp node
>
>
> I've read all the documentation here, but I have a few things that aren't
> clear.
>
> http://www.antlr.org/wiki/display/ANTLR3/Tree+construction
>
> When creating my own node classes, it's not clear to me if I need to
> subclass an antlr node class, or just create any class.  Are there any
> examples on both compression and creating custom classes?
>
>
> Thanks,
>
> Todd
>
> List: http://www.antlr.org/mailman/listinfo/antlr-interest
> Unsubscribe:
> http://www.antlr.org/mailman/options/antlr-interest/your-email-address
>


More information about the antlr-interest mailing list