Glory
Glory

Reputation: 45

matching characters in strings in visual C#

I'm working on visual C# to calculate the word error rate I have one textbox for the refrence which is the correct sentance and one for the hypothesis which is wrong one.

in order to calculate WER I need to calculate : substitution : the word that has been changed which was my first question Insert : the words that had been inserted in the sentence Deleted: the words that had been deleted from the original sentence

For EX:

refrence: This is a NPL program. hypothesis: it is an NPL cool.

it: substitution is: correct an :substitution NPL:correct program: deleted cool: inserted

I tried the algorithm that dasblinkenlight proposed ( thank you so much by the way ) I worked but there is a runtime error I couldn't figure it out, in line

int x=  Compute(buffer[j], buffer_ref[i]);

Index was outside the bounds of the array.

and here is my code :

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;

namespace WindowsFormsApplication1
{
    public partial class Form1 : Form
    {
        string [] hyp = new string[20];
        string [] refrence = new string[20];
        string [] Anser= new string[20];
        string[] buffer = new string[20];
        string[] buffer_ref = new string[20];
        int count = 0; // number of words 
        string ref2=" " ;
        string hyp2 = " ";
        string Anser2 = " ";
        string buffer2 = " ";

        int corecct_c=0;
        int corecct_d = 0;
        int corecct_i = 0;

        //====================================================================

        public Form1()
        {
            InitializeComponent();
            for (int i = 0; i <= 19; ++i)
            {
                hyp[i] = null;
                buffer[i] = null;
            }
        }

        private void textBox2_TextChanged(object sender, EventArgs e)
        {
            refrence = this.textBox2.Text.Split(' ');
            buffer_ref = this.textBox2.Text.Split(' ');


        }

        private void textBox1_TextChanged(object sender, EventArgs e)
        {
            hyp = this.textBox1.Text.Split(' ');
            buffer = this.textBox1.Text.Split(' ');
            //hyp = this.textBox1.Text;
            // fname1.Add(this.textBox1.Text);


        }

        public void correct(string[] R)
        {

            for (int i = 0; (i <= 19) && (R[i] != "."); ++i)
            {

                if (buffer[i] == refrence[i])
                { buffer[i] = "0";
                buffer_ref[i] = "0";
                    corecct_c = corecct_c + 1;
                    Anser[i] = "C";
                }
            }

        }

        // function that compute 2 strings
        public static int Compute(string s, string t)
        {
            int n = s.Length;
            int m = t.Length;
            int[,] d = new int[n + 1, m + 1];

            // Step 1
            if (n == 0)
            {
                return m;
            }

            if (m == 0)
            {
                return n;
            }

            // Step 2
            for (int i = 0; i <= n; d[i, 0] = i++)
            {
            }

            for (int j = 0; j <= m; d[0, j] = j++)
            {
            }

            // Step 3
            for (int i = 1; i <= n; i++)
            {
                //Step 4
                for (int j = 1; j <= m; j++)
                {
                    // Step 5
                    int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;

                    // Step 6
                    d[i, j] = Math.Min(
                        Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
                        d[i - 1, j - 1] + cost);
                }
            }
            // Step 7
            return d[n, m];
        }


        public void sub(){

            for (int j = 0;j<=19;j++) 
         {
             if (buffer[j].IndexOf("0") != -1)
             {


                 for (int i = 0; i <= 19; i++)
                 {

                     if (buffer_ref[j].IndexOf("0") != -1)
                     {

                       int x=  Compute(buffer[j], buffer_ref[i]);
                       if (x > 3)
                       {
                           buffer[j] = "0";
                           Anser[j] = "S";

                       }


                     }//end if

                 } 

             }//end if 


        }//end for 

        }// end fun

        private void button1_Click(object sender, EventArgs e)
        {


            correct(refrence);
            sub();
            for (int i = 0; (i <= 19) && (refrence[i] != "."); ++i)
            {
                //loop intialize 
                ref2 = ref2 + " " + refrence[i];
                hyp2 = hyp2 + " " + hyp[i];
                Anser2 = Anser2 + " " + Anser[i];
                buffer2 = buffer2 + " " + buffer[i];
                count++;
                            }

            listBox1.Items.Add(" Refrence :" + ref2);
            listBox1.Items.Add(" HYp :" + hyp2);
            listBox1.Items.Add(" Anser:" + Anser2);
            listBox1.Items.Add(" buffer:" + buffer2);
            listBox1.Items.Add(count);

        } 




        private void Form1_Load(object sender, EventArgs e)
        {

        }

        private void label1_Click(object sender, EventArgs e)
        {

        }



        private void button2_Click(object sender, EventArgs e)
        {

        }

        private void label2_Click(object sender, EventArgs e)
        {

        }

        private void listBox1_SelectedIndexChanged(object sender, EventArgs e)
        {

        }

    }
}

can you help me please ?

Upvotes: 3

Views: 956

Answers (2)

Sergey Kalinichenko
Sergey Kalinichenko

Reputation: 726699

There is a built-in way to test if two lines are identical, but there is no built-in way to tell if two lines are similar. You need to implement an algorithm that measures string similarity, such as the Levenshtein Distance - a very common Edit Distance algorithm. Lines with small edit distance can be declared similar depending on some threshold specific to your requirements.

Upvotes: 5

Carra
Carra

Reputation: 17964

You'll need to use an algorithm that compares the "distance" between two strings:

The closeness of a match is measured in terms of the number of primitive operations necessary to convert the string into an exact match. This number is called the edit distance between the string and the pattern. The usual primitive operations are:

insertion: cot → coat
deletion: coat → cot
substitution: coat → cost

Upvotes: 2

Related Questions