MATLAB: save arabic string in .txt file

Question

I am working on a steganography project involving Arabic characters and have written a MATLAB function called 'stegoText'. The function reads two UTF-8 encoded text files, 'secretmessage.txt' and 'covermessage.txt', and performs the following steps:

Converts Arabic characters to binary using a mapping table of UNICODE values to binary
Converts binary to DNA
Converts DNA to RNA by replacing T with U and adding a start and stop codon
Performs Huffman compression on the RNA sequence
Embeds the encoded string into an Arabic cover message
Writes the result to 'stego.txt' using UTF-16LE encoding

The function works well but the resulting 'stego.txt' file appears disordered. How can I fix this?

Here's how they differ between what I see on MATLAB and in the .txt file:

Here is the MATLAB code:

function stego = stegoText() 

file_path = 'C:\Users\Charbel\Desktop\secretmessage.txt';
secret = fileread(file_path);

file_path = 'C:\Users\Charbel\Desktop\covermessage.txt';
cover = fileread(file_path);

% Part A: Dealing with the arabic secret message 

% Step 1: Convert from Arabic to Binary

% Mapping table for arabic characters to binary using UNICODE

mappingTable = table();

mappingTable.Arabic = ['ن'; 'ح'; 'ط'; 'ف'; 'ش'; 'ا'; 'ي'; 'ر'; 'و'; 'ك'; 'د'; 'ت'; 'ز'; 'ع'; ...
    'م'; 'ص'; 'ج'; 'ه'; 'س'; 'ب'; 'ذ'; 'ض'; 'غ'; 'ظ'; 'ث'; 'ق'; 'خ'; 'ل'; ...
    ' '; '،'; '؛'; '.'; '9'; '8'; '7'; '4'; '1'; '6'; '0'; '2'; '5'; '3'; '؟'; 'َ'; 'ِ'; 'ؤ'; ...
    'ُ'; 'ة'; 'ى'; 'ْ'; '٠'; '١'; '٣'; '٤'; '٥'; '٦'; '٧'; '٨'; '٩'; ...
    'ء'; 'ئ'; ':'; '!'; '٢'];

mappingTable.Binary = ['000000'; '000001'; '000010'; '000011'; '000100'; '000101'; '000110'; '000111'; ...
    '001000'; '001001'; '001010'; '001011'; '001100'; '001101'; '001110'; '001111'; '010000'; '010001'; ...
    '010010'; '010011'; '010100'; '010101'; '010110'; '010111'; '011000'; '011001'; '011010'; '011011'; ...
    '011100'; '011101'; '011110'; '011111'; '100000'; '100001'; '100010'; '100011'; '100100'; '100101'; ...
    '100110'; '100111'; '101000'; '101001'; '101010'; '101011'; '101100'; '101101'; '101110'; '101111'; ...
    '110000'; '110001'; '110010'; '110011'; '110101'; '110110'; '110111'; '111000'; '111001'; ...
    '111010'; '111011'; '111100'; '111100'; '111101'; '111111'; '110100'];

secretBinary = '';

for i = 1:length(secret)
    index = find(mappingTable.Arabic == secret(i));
    binaryChar = mappingTable.Binary(index, :);
    secretBinary = [secretBinary binaryChar];
end

% Step 2: Convert from Binary to DNA

dnaStrand = '';

for i = 1:2:length(secretBinary)
    segment = secretBinary(i:i+1);
    switch segment
        case '00'
            dnaBase = 'A';
        case '01'
            dnaBase = 'C';
        case '10'
            dnaBase = 'G';
        case '11'
            dnaBase = 'T';
    end
    dnaStrand = [dnaStrand dnaBase];
end

% Step 3: Convert from DNA to RNA (replace T by U, add start and stop codons)

rnaStrand = strrep(dnaStrand, 'T', 'U');

startCodon = 'AUG';
stopCodon = ['UAA'; 'UAG'; 'UGA'];

randomIndex = randi([1, 3]);

rnaStrand = [startCodon rnaStrand stopCodon(randomIndex, :)];

% Step 4: Huffmann compression 

numericArray = double(rnaStrand);

symbols = unique(numericArray);
probabilities = ones(size(symbols)) / numel(symbols);

huffDict = huffmandict(symbols, probabilities);
save('huffman_dictionary.mat', 'huffDict');

encodedData = huffmanenco(numericArray, huffDict);

encodedString = char(encodedData + '0');

% Part B: Embedding the encoded string into the arabic cover message

% Step 1: Get the size of the encoded secret message, and the size of the cover message

sizeSecret = length(encodedString);
sizeCover = length(cover);

% Step 2: Embed the encodedString into the cover message

stego = '';

if sizeCover < sizeSecret
    stego = 'Cover message is not long enough';
else
    for i = 1:sizeSecret
        bit = encodedString(i);
        coverChar = cover(i);

        if i == 1 && isletter(coverChar)
            if bit == '1'
                stego = [stego coverChar char(hex2dec('200D'))];
            else
                stego = [stego coverChar char(hex2dec('FEFF'))];
            end

        elseif i > 1 && ~isletter(cover(i-1)) && isletter(coverChar)
            if bit == '1'
                stego = [stego coverChar char(hex2dec('200D'))];
            else
                stego = [stego coverChar char(hex2dec('FEFF'))];
            end

        elseif isletter(coverChar) && i < sizeCover && isletter(cover(i+1))
            if bit == '1'
                stego = [stego coverChar char(hex2dec('200E'))];
            else
                stego = [stego coverChar char(hex2dec('200F'))];
            end

        elseif isletter(coverChar) && (i == sizeCover || ~isletter(cover(i+1)))
            if bit == '1'
                stego = [stego coverChar char(hex2dec('200C'))];
            else
                stego = [stego coverChar char(hex2dec('200B'))];
            end

        elseif ~isletter(coverChar)
            if bit == '1'
                stego = [stego coverChar char(hex2dec('2009'))];
            else
                stego = [stego coverChar char(hex2dec('200A'))];
            end
        end
    end
    stego = [stego cover(length(encodedString)+1:end)];
end

filePath = fullfile('C:', 'Users', 'Charbel', 'Desktop', 'stego.txt');

fileID = fopen(filePath, 'w', 'n', 'UTF-16LE');

fwrite(fileID, stego, 'char', 'n');

fclose(fileID);

end

MATLAB: save arabic string in .txt file

Answers (1)

Related Questions