Reputation: 547
I need your help in getting parent-child URL list from full URL, maybe using recursively. For example, below is URL in flat structure. I have to parse in parent-child form with inner level of child structure.
into a list of URL with child-parent relationship.
public class HtmlSiteMap
{
public string Url { get; set; }
public string PageTitle { get; set; }
public int UrlLevel { get; set; }
public List<HtmlSiteMap> Childrens { get; set; }
}
my expected output
{
Url: https://domain/UrlA,
PageTitle : UrlA,
UrlLevel : 0 ,
Childrens : {
Url : https://domain/UrlA/UrlA_L1,
PageTitle : UrlA_L1,
UrlLevel : 1,
Childrens : {
Url : https://domain/UrlA/UrlA_L1/UrlA_L1_L2,
PageTitle : UrlA_L1_L2,
UrlLevel : 2,
Childrens : null
},
{
Url : https://domain/UrlA/UrlA_L1/UrlA2_L1_L2,
PageTitle : UrlA2_L1_L2,
UrlLevel : 2,
Childrens : null
}
},
Url: https://domain/UrlB,
PageTitle : UrlB,
UrlLevel : 0 ,
Childrens : {
Url : https://domain/UrlB/UrlB_L1,
PageTitle : UrlB_L1,
UrlLevel : 1,
Childrens : {
Url : https://domain/UrlB/UrlB_L1/UrlB_L1_L2,
PageTitle : UrlB_L1_L2,
UrlLevel : 2,
Childrens : null
}
}
.................
.................
..................
}
I have tried to achieve by splitting and recursively. But unable to get a result. Your help will be appreciated.
Upvotes: 0
Views: 288
Reputation: 523
Here is another reverse solution. I am working on preordered list, constructing nodes from roots to leaves. This code looks more readable and understandable by beginners in recursion.
static void Main(string[] args)
{
var input = new[]
{
new Uri("https://domain/UrlA/UrlA_L1/UrlA_L1_L2"),
new Uri("https://domain/UrlA/UrlA_L1/UrlA2_L1_L2"),
new Uri("https://domain/UrlB/UrlB_L1/UrlB_L1_L2"),
new Uri("https://domain/UrlC/UrlC_L1/UrlC_L1_L2"),
new Uri("https://domain/UrlD/UrlD_L1/UrlD_L1_L2/UrlD_L1_L2_L3"),
new Uri("https://domain/UrlE/UrlE_L1/UrlE_L1_L2/UrlE_L1_L2_L3"),
new Uri("https://domain/UrlF/UrlF_L1/UrlF_L1_L2/UrlF_L1_L2_L3")
};
var rows = input.Select(u => u.AbsolutePath.Substring(1))
.OrderBy(s => s).Select(s => s.Split('/')).ToList();
var rootNodes = new List<HtmlSiteMap>();
ProcessNodes(rootNodes, rows,
input.Select(u => $"{u.Scheme}://{u.Host}/").Distinct().FirstOrDefault(), 0);
foreach (var node in rootNodes)
Console.WriteLine(node.ToString());
}
static void ProcessNodes(List<HtmlSiteMap> children, List<string[]> rows, string prefix, int level)
{
if (rows.Count == 0)
return;
HtmlSiteMap currentNode = null;
var subRows = new List<string[]>();
foreach (var parts in rows)
{
if (parts.Length == 0)
continue;
subRows.Add(parts.Skip(1).ToArray());
if (currentNode != null && currentNode.PageTitle == parts[0])
continue;
if(currentNode != null)
ProcessNodes(currentNode.Children, subRows, prefix + currentNode.PageTitle + "/", level + 1);
currentNode = new HtmlSiteMap
{
Url = prefix + parts[0],
PageTitle = parts[0],
UrlLevel = level
};
children.Add(currentNode);
subRows.Clear();
}
if(currentNode != null && subRows.Count > 0)
ProcessNodes(currentNode.Children, subRows, prefix + currentNode.PageTitle + "/", level + 1);
}
public class HtmlSiteMap
{
public string Url { get; set; }
public string PageTitle { get; set; }
public int UrlLevel { get; set; }
//Also renamed this from Childrens to Children
public List<HtmlSiteMap> Children { get; set; }
public HtmlSiteMap()
{
Children = new List<HtmlSiteMap>();
}
//Borrowed this from previous answer
public override string ToString()
{
var shift = new string(' ', UrlLevel);
var sb = new StringBuilder();
sb.AppendLine(shift + $"Url: {Url},");
sb.AppendLine(shift + $"PageTitle: {PageTitle},");
sb.AppendLine(shift + $"UrlLevel: {UrlLevel},");
sb.AppendLine(shift + "Children:");
if (Children.Count == 0)
{
sb.AppendLine(shift + "-");
}
else
{
foreach (var child in Children)
{
sb.AppendLine(child.ToString());
}
}
return sb.ToString();
}
}
Upvotes: 1
Reputation: 2299
A base version (without errors handling) may look like this:
static void Main(string[] args)
{
var input = new List<string>
{
"https://domain/UrlA/",
"https://domain/UrlA/UrlA_L1/",
"https://domain/UrlA/UrlA_L1/UrlA_L1_L2",
"https://domain/UrlA/UrlA_L1/UrlA2_L1_L2",
"https://domain/UrlB",
"https://domain/UrlB/UrlB_L1",
"https://domain/UrlB/UrlB_L1/UrlB_L1_L2",
"https://domain/UrlC/UrlC_L1/UrlC_L1_L2",
"https://domain/UrlD/UrlD_L1/UrlD_L1_L2/UrlD_L1_L2_L3",
"https://domain/UrlE/UrlE_L1/UrlE_L1_L2/UrlE_L1_L2_L3",
"https://domain/UrlF/UrlF_L1/UrlF_L1_L2/UrlF_L1_L2_L3"
};
var output = new List<HtmlSiteMap>();
foreach (var url in input)
{
var parts = url.Split(@"/", StringSplitOptions.RemoveEmptyEntries);
var current = new HtmlSiteMap
{
Url = url,
PageTitle = parts[^1]
};
var parentName = parts[^2];
static HtmlSiteMap FindParent(List<HtmlSiteMap> score, string name)
{
foreach (var item in score)
{
if (item.PageTitle == name)
{
return item;
}
var inChild = FindParent(item.Childrens, name);
if (inChild != null)
{
return inChild;
}
}
return null;
}
var parent = FindParent(output, parentName);
if (parent == null)
{
current.UrlLevel = 1;
output.Add(current);
}
else
{
current.UrlLevel = parent.UrlLevel + 1;
parent.Childrens.Add(current);
}
}
foreach (var current in output)
{
Console.WriteLine(current.ToString());
}
Console.ReadLine();
}
public class HtmlSiteMap
{
public string Url { get; set; }
public string PageTitle { get; set; }
public int UrlLevel { get; set; }
public List<HtmlSiteMap> Children { get; set; }
public HtmlSiteMap()
{
Children = new List<HtmlSiteMap>();
}
public override string ToString()
{
var shift = new string(' ', UrlLevel);
var sb = new StringBuilder();
sb.AppendLine(shift + $"Url: {Url},");
sb.AppendLine(shift + $"PageTitle: {PageTitle},");
sb.AppendLine(shift + $"UrlLevel: {UrlLevel},");
sb.AppendLine(shift + "Children:");
if (Children.Count == 0)
{
sb.AppendLine(shift + "-");
}
else
{
foreach (var child in Children)
{
sb.AppendLine(child.ToString());
}
}
return sb.ToString();
}
}
Upvotes: 1