MikMark
MikMark

Reputation: 547

C# retrieve parent-child url from full url using recursively

I need your help in getting parent-child URL list from full URL, maybe using recursively. For example, below is URL in flat structure. I have to parse in parent-child form with inner level of child structure.

into a list of URL with child-parent relationship.

public class HtmlSiteMap
{
  public string Url { get; set; }
  public string PageTitle { get; set; }
  public int UrlLevel { get; set; }
  public List<HtmlSiteMap> Childrens { get; set; }
}

my expected output

{
Url: https://domain/UrlA,
PageTitle : UrlA,
UrlLevel : 0 ,
Childrens : { 
      Url : https://domain/UrlA/UrlA_L1,
      PageTitle : UrlA_L1,
      UrlLevel : 1,
      Childrens : { 
                   Url : https://domain/UrlA/UrlA_L1/UrlA_L1_L2,
                   PageTitle : UrlA_L1_L2,
                   UrlLevel : 2,
                   Childrens : null
                  },
                  { 
                   Url : https://domain/UrlA/UrlA_L1/UrlA2_L1_L2,
                   PageTitle : UrlA2_L1_L2,
                   UrlLevel : 2,
                   Childrens : null
                  }
   },
Url: https://domain/UrlB,
PageTitle : UrlB,
UrlLevel : 0 ,
Childrens : {
             Url : https://domain/UrlB/UrlB_L1,
             PageTitle : UrlB_L1,
             UrlLevel : 1,
             Childrens : {
                         Url : https://domain/UrlB/UrlB_L1/UrlB_L1_L2,
                          PageTitle : UrlB_L1_L2,
                          UrlLevel : 2,
                          Childrens : null
                         }
            }
.................
.................
..................
}

I have tried to achieve by splitting and recursively. But unable to get a result. Your help will be appreciated.

Upvotes: 0

Views: 288

Answers (2)

Alexey Rumyantsev
Alexey Rumyantsev

Reputation: 523

Here is another reverse solution. I am working on preordered list, constructing nodes from roots to leaves. This code looks more readable and understandable by beginners in recursion.

static void Main(string[] args)
{
    var input = new[]
    {
        new Uri("https://domain/UrlA/UrlA_L1/UrlA_L1_L2"),
        new Uri("https://domain/UrlA/UrlA_L1/UrlA2_L1_L2"),
        new Uri("https://domain/UrlB/UrlB_L1/UrlB_L1_L2"),
        new Uri("https://domain/UrlC/UrlC_L1/UrlC_L1_L2"),
        new Uri("https://domain/UrlD/UrlD_L1/UrlD_L1_L2/UrlD_L1_L2_L3"),
        new Uri("https://domain/UrlE/UrlE_L1/UrlE_L1_L2/UrlE_L1_L2_L3"),
        new Uri("https://domain/UrlF/UrlF_L1/UrlF_L1_L2/UrlF_L1_L2_L3")
    };
    
    var rows = input.Select(u => u.AbsolutePath.Substring(1))
        .OrderBy(s => s).Select(s => s.Split('/')).ToList();
    
    var rootNodes = new List<HtmlSiteMap>();
    ProcessNodes(rootNodes, rows, 
        input.Select(u => $"{u.Scheme}://{u.Host}/").Distinct().FirstOrDefault(), 0);

    foreach (var node in rootNodes)
        Console.WriteLine(node.ToString());
}

static void ProcessNodes(List<HtmlSiteMap> children, List<string[]> rows, string prefix, int level)
{
    if (rows.Count == 0)
        return;
    HtmlSiteMap currentNode = null;
    var subRows = new List<string[]>();
    foreach (var parts in rows)
    {
        if (parts.Length == 0)
            continue;
        subRows.Add(parts.Skip(1).ToArray());
        
        if (currentNode != null && currentNode.PageTitle == parts[0])
            continue;
        
        if(currentNode != null)
            ProcessNodes(currentNode.Children, subRows, prefix + currentNode.PageTitle + "/", level + 1);

        currentNode = new HtmlSiteMap
        {
            Url = prefix + parts[0],
            PageTitle = parts[0],
            UrlLevel = level
        };
        children.Add(currentNode);
        subRows.Clear();
    }
    
    if(currentNode != null && subRows.Count > 0)
        ProcessNodes(currentNode.Children, subRows, prefix + currentNode.PageTitle + "/", level + 1);
}

public class HtmlSiteMap
{
    public string Url { get; set; }
    public string PageTitle { get; set; }
    public int UrlLevel { get; set; }
    //Also renamed this from Childrens to Children
    public List<HtmlSiteMap> Children { get; set; }

    public HtmlSiteMap()
    {
        Children = new List<HtmlSiteMap>();
    }

    //Borrowed this from previous answer
    public override string ToString()
    {
        var shift = new string(' ', UrlLevel);
        var sb = new StringBuilder();
        sb.AppendLine(shift + $"Url: {Url},");
        sb.AppendLine(shift + $"PageTitle: {PageTitle},");
        sb.AppendLine(shift + $"UrlLevel: {UrlLevel},");
        sb.AppendLine(shift + "Children:");
        if (Children.Count == 0)
        {
            sb.AppendLine(shift + "-");
        }
        else
        {
            foreach (var child in Children)
            {
                sb.AppendLine(child.ToString());
            }
        }
        return sb.ToString();
    }
}

Upvotes: 1

Miamy
Miamy

Reputation: 2299

A base version (without errors handling) may look like this:

   static void Main(string[] args)
    {
        var input = new List<string>
        {
            "https://domain/UrlA/",
            "https://domain/UrlA/UrlA_L1/",
            "https://domain/UrlA/UrlA_L1/UrlA_L1_L2",
            "https://domain/UrlA/UrlA_L1/UrlA2_L1_L2",
            "https://domain/UrlB",
            "https://domain/UrlB/UrlB_L1",
            "https://domain/UrlB/UrlB_L1/UrlB_L1_L2",
            "https://domain/UrlC/UrlC_L1/UrlC_L1_L2",
            "https://domain/UrlD/UrlD_L1/UrlD_L1_L2/UrlD_L1_L2_L3",
            "https://domain/UrlE/UrlE_L1/UrlE_L1_L2/UrlE_L1_L2_L3",
            "https://domain/UrlF/UrlF_L1/UrlF_L1_L2/UrlF_L1_L2_L3"
        };

        var output = new List<HtmlSiteMap>();

        foreach (var url in input)
        {
            var parts = url.Split(@"/", StringSplitOptions.RemoveEmptyEntries);
            var current = new HtmlSiteMap
            {
                Url = url,
                PageTitle = parts[^1]                    
            };

            var parentName = parts[^2];

            static HtmlSiteMap FindParent(List<HtmlSiteMap> score, string name)
            {
                foreach (var item in score)
                {
                    if (item.PageTitle == name)
                    {
                        return item;
                    }
                    var inChild = FindParent(item.Childrens, name);
                    if (inChild != null)
                    {
                        return inChild;
                    }
                }
                return null;
            }

            var parent = FindParent(output, parentName);
            if (parent == null)
            {
                current.UrlLevel = 1;
                output.Add(current);
            }
            else
            {
                current.UrlLevel = parent.UrlLevel + 1;
                parent.Childrens.Add(current);
            }
        }


        foreach (var current in output)
        {
            Console.WriteLine(current.ToString());
        }
        Console.ReadLine();
    }


    public class HtmlSiteMap
    {
        public string Url { get; set; }
        public string PageTitle { get; set; }
        public int UrlLevel { get; set; }
        public List<HtmlSiteMap> Children { get; set; }

        public HtmlSiteMap()
        {
            Children = new List<HtmlSiteMap>();
        }

        public override string ToString()
        {
            var shift = new string(' ', UrlLevel);
            var sb = new StringBuilder();
            sb.AppendLine(shift + $"Url: {Url},");
            sb.AppendLine(shift + $"PageTitle: {PageTitle},");
            sb.AppendLine(shift + $"UrlLevel: {UrlLevel},");
            sb.AppendLine(shift + "Children:");
            if (Children.Count == 0)
            {
                sb.AppendLine(shift + "-");
            }
            else
            {
                foreach (var child in Children)
                {
                    sb.AppendLine(child.ToString());
                }
            }
            return sb.ToString();
        }
    }

Upvotes: 1

Related Questions