[antlr-interest] Help with compressing trees and custom nodes
Eric
researcher0x00 at gmail.com
Fri Mar 16 06:42:50 PDT 2012
Hi Todd,
This is just a suggestion and not the only possible answer.
The easiest way for me to work with AST transformations is to just view the
AST as an n-ary tree and call the tree API directly.
Here are some example methods I use. This is C# code for the ANTLR 3 C#
version; you will obviously need to translate to your target language.
private static void ReplaceNode(CommonTree node, int tokenId,
string text)
{
// This method does not set token StatIndex, token StopIndex,
token Line, or token charPostionInLine
ITreeAdaptor adaptor = new CommonTreeAdaptor();
CommonTree parent = (CommonTree)node.Parent;
CommonToken newToken = new CommonToken(tokenId);
CommonTree newNode = (CommonTree)adaptor.Create(newToken, text);
int index = node.ChildIndex;
parent.DeleteChild(index);
parent.InsertChild(index, newNode);
Debug.Assert(newToken.Type == tokenId);
Debug.Assert(newNode.Text == text);
Debug.Assert(newNode.Parent != null);
Debug.Assert(newNode.ChildIndex == index);
}
private static void CreateEpsilonRule(CommonTree rules, TokenMap
tokenMap)
{
int ruleTokenId = tokenMap.GetId("RULE");
int idTokenId = tokenMap.GetId("ID");
int alternativesTokenId = tokenMap.GetId("ALTERNATIVES");
int alternativeTokenId = tokenMap.GetId("ALT");
int stringLiteralTokenId = tokenMap.GetId("STRING_LITERAL");
int ruleRefTokenId = tokenMap.GetId("RULE_REF");
ITreeAdaptor adaptor = new CommonTreeAdaptor();
epsilonRuleMade = true;
string rule1Name = "EPSILON";
CommonToken newRule1Token = new CommonToken(ruleTokenId);
CommonTree newRule = (CommonTree)adaptor.Create(newRule1Token,
"RULE");
rules.AddChild(newRule);
CommonToken rule1IdToken = new CommonToken(idTokenId);
CommonTree rule1IdNode =
(CommonTree)adaptor.Create(rule1IdToken, rule1Name);
newRule.AddChild(rule1IdNode);
CommonToken alternatesToken = new
CommonToken(alternativesTokenId, "ALTERNATIVES");
CommonTree alternatesNode =
(CommonTree)adaptor.Create(alternatesToken);
newRule.AddChild(alternatesNode);
CommonToken alternateToken = new
CommonToken(alternativeTokenId, "ALT");
CommonTree alternateNode =
(CommonTree)adaptor.Create(alternateToken);
alternatesNode.AddChild(alternateNode);
CommonToken charLiteralToken = new
CommonToken(stringLiteralTokenId, string.Empty);
CommonTree charLiteralNode =
(CommonTree)adaptor.Create(charLiteralToken);
alternateNode.AddChild(charLiteralNode);
newRule.FreshenParentAndChildIndexesDeeply();
}
private static void FindRuleRefs(CommonTree node,
SortedList<string, List<CommonTree>> ruleRefs, int ruleRefTokenId)
{
if (node.Type == ruleRefTokenId)
{
List<CommonTree> ruleRefList;
if (!ruleRefs.TryGetValue(node.Text, out ruleRefList))
{
ruleRefList = new List<CommonTree>();
ruleRefs[node.Text] = ruleRefList;
}
ruleRefList.Add(node);
}
if (node.Children != null)
{
int childCount = node.Children.Count;
for (int index = 0; index < childCount; index++)
{
CommonTree visitNode = (CommonTree)node.Children[index];
FindRuleRefs(visitNode, ruleRefs, ruleRefTokenId);
}
}
}
private static void ChangeToken(CommonTree node, int tokenId,
string text)
{
// This method does not perserve token StatIndex, token
StopIndex, token Line, or token charPostionInLine
ITreeAdaptor adaptor = new CommonTreeAdaptor();
CommonTree parent = (CommonTree)node.Parent;
CommonToken newToken = new CommonToken(tokenId);
CommonTree newNode = (CommonTree)adaptor.Create(newToken, text);
int index = node.ChildIndex;
if (node.Children != null)
{
// Note: <Node>.Children is read only, so newNode.Children
= node.Children; is not allowed.
newNode.AddChildren(node.Children);
}
parent.DeleteChild(index);
parent.InsertChild(index, newNode);
Debug.Assert(newToken.Type == tokenId);
Debug.Assert(newNode.Text == text);
Debug.Assert(newNode.Parent != null);
Debug.Assert(newNode.ChildIndex == index);
if (newNode.Children != null)
{
Debug.Assert(newNode.Children.Count == node.Children.Count);
}
}
And here is the TokenMap class used by the methods
public class TokenMap
{
private SortedList<int, string> ids = new SortedList<int, string>();
private SortedList<string, int> names = new SortedList<string,
int>();
private string tokenFilename;
public int NextId
{
get
{
int value = ids.Count;
string name;
while (this.ids.TryGetValue(value, out name))
{
value++;
}
return value;
}
}
public string this[int tokenType]
{
get
{
string result = string.Empty;
if (!(ids.TryGetValue(tokenType, out result)))
{
//Debug.Fail("How did we get here?");
}
return result;
}
}
public TokenMap(string tokenFilename)
{
Debug.Assert(!string.IsNullOrWhiteSpace(tokenFilename));
this.tokenFilename = tokenFilename;
using (StreamReader streamReader = new
StreamReader(this.tokenFilename))
{
string line = streamReader.ReadLine();
while (line != null)
{
string[] tokenParts = line.Split('=');
string name = string.Empty;
string indexText = string.Empty;
if (tokenParts.Length == 2)
{
name = tokenParts[0];
indexText = tokenParts[1];
}
else
{
// If more than one '=' then split on last '='
// i.e. '>='=5
// '=='=6
int lastEqualPos = line.LastIndexOf('=');
name = line.Substring(0, lastEqualPos - 1);
int length = line.Length - (lastEqualPos + 1);
indexText = line.Substring(lastEqualPos + 1,
length);
}
int index = -1;
if (int.TryParse(indexText, out index))
{
if (!name.StartsWith("T__"))
{
if (!ids.ContainsKey(index))
{
ids.Add(index, name);
}
//else
//{
// Console.WriteLine("Duplicate key: " +
index);
// Debug.Fail("How did we get here?");
//}
}
else
{
// Do nothing.
}
}
else
{
Console.WriteLine("Unable to parse to int: '" +
indexText + "'");
Debug.Fail("How did we get here?");
}
line = streamReader.ReadLine();
}
}
BuildNamesIndex();
}
private void BuildNamesIndex()
{
this.names.Clear();
foreach (KeyValuePair<int, string> kvp in this.ids)
{
this.names.Add(kvp.Value, kvp.Key);
}
}
public bool ContainsKey(int tokenType)
{
return ids.ContainsKey(tokenType);
}
public int GetId(string name)
{
int value = -1;
if (!this.names.TryGetValue(name, out value))
{
value = -1;
}
return value;
}
public string ToListing()
{
StringBuilder lines = new StringBuilder();
lines.AppendLine("Token map for " + this.tokenFilename);
lines.AppendLine("Id Name");
foreach (KeyValuePair<int, string> kvp in this.ids)
{
string line = string.Format("{0,3} {1}", kvp.Key,
kvp.Value);
lines.AppendLine(line);
}
return lines.ToString();
}
}
Hope that helps, Eric
On Thu, Mar 15, 2012 at 2:02 PM, Todd Nine <tnine at apigee.com> wrote:
> Hi guys. I'm new to antlr and have a question regarding tree compressions.
> My ultimate goal is to use my grammar to create an AST. From this AST I
> then will utilize the visitor pattern to walk the tree and evaluate my
> results for our Cassandra query engine. I'm having issues with my tree
> having a lot of additional nodes that don't have 2 children due to operator
> precedence. My questions are below.
>
> 1. How can I create a different node class for each element? For instance,
> && ad || need their own nodes, as well as 'NOT' 'within' etc
>
> 2. How can I collapse tree elements that only have 1 child. I.E turn orexp
> -> andexp -> notexp -> andexp into just an andexp node
>
>
> I've read all the documentation here, but I have a few things that aren't
> clear.
>
> http://www.antlr.org/wiki/display/ANTLR3/Tree+construction
>
> When creating my own node classes, it's not clear to me if I need to
> subclass an antlr node class, or just create any class. Are there any
> examples on both compression and creating custom classes?
>
>
> Thanks,
>
> Todd
>
> List: http://www.antlr.org/mailman/listinfo/antlr-interest
> Unsubscribe:
> http://www.antlr.org/mailman/options/antlr-interest/your-email-address
>
More information about the antlr-interest
mailing list