Eli Rose
Eli Rose

Reputation: 3

Checking if the user's input is a palindrome

I came across a code where the assembly program can check if the string is a palindrome, except the string was hardcoded. I wanted to practice and modify the code to do the same thing except that the program will take a user's input for the string.

I got the main code from this post: Palindrome using NASM: answer by user1157391

Below is the code I came up with, except I keep getting the errors: line 39: invalid operand type and line 41: division operator may only be applied to scalar values

section .data
        msg     db "Please provide a word: ",0
        len     equ $ - msg

        msg1    db "The word is a palindrome",0
        len1    equ $ - msg1
        
        msg2    db "The word is not pallindrome",0
        len2    equ $ - msg2
        
        nline   db 0xA
        nlen    equ $ - nline
        
segment .bss
        input   resb  10        ; reserve 10 bytes for input
        length  resb  10

        section .text
        global _start

_start:
        mov     edx, len        ; number of bytes to write
        mov     ecx, msg        ; ECX will point to the address of the string msg
        mov     ebx, 1          ; write to the STDOUT file
        mov     eax, 4          ; invoke SYS_WRITE (kernel opcode 4)
        int     0x80
    
        mov     edx, 11         ; number of bytes to read
        mov     ecx, input      ; reserve space for the user's input
        mov     ebx, 0          ; write to the STDIN file
        mov     eax, 3          ; invoke SYS_READ (kernel opcode 3)
        int     0x80

        mov     eax, input
        call    strlen
        mov     [length], eax

        mov     ebx, input                   ; start of word  
        mov     eax, (input + length - 1)    ; end of word                                   

        mov     ecx, (length / 2)             ; check will run for half of the word           
check:
        mov     dl, [ebx]                     ; compare first and last letters                
        cmp     [eax], dl
        jne     failure
        inc     ebx
        dec     eax
        loop    check

        ;; success                                                                  
        mov     edx, len1         
        mov     ecx, msg1      
        mov     ebx, 1          
        mov     eax, 4
        int     0x80    
        
        add     esp,4
        jmp     done

failure:
        mov     edx, len2        
        mov     ecx, msg2      
        mov     ebx, 1          
        mov     eax, 4
        int     0x80  
        
        add esp,4

done:
        ret
        
strlen:                     
    push    ebx             
    mov     ebx, eax        
 
nextchar:                   
    cmp     byte [eax], 0
    jz      finished
    inc     eax
    jmp     nextchar
 
finished:
    sub     eax, ebx
    pop     ebx             
    ret 

I've only started studying assembly language so it would be really nice if someone helps me out here. I've been losing my sanity for the whole day over this. Thank you!

UPDATE: Thank you to all of the answers and comments and @/Peter Cordes for the helpful suggestions. Because of those, I came up with the following (completely different approach) code. I also watched another video on how to check strings that has varying capitalization.

This doesn't work (for me) unless I input exactly 10 characters. However, when I hardcode the input, it works even with less than 10 characters.


msg     db "Please provide a word: ",0
len     equ $ - msg

palindrome  db  'The word is a palindrome'
lenP        equ $ - palindrome

notPalindrome   db  'The word is not a palindrome'
lenNP           equ $ - notPalindrome

segment .bss
input   resb  10        ; reserve 10 bytes for input
length  equ $ - input

section .text
    global _start
    
_start:
    mov     edx, len        ; number of bytes to write
    mov     ecx, msg        ; ECX will point to the address of the string msg
    mov     ebx, 1          ; write to the STDOUT file
    mov     eax, 4          ; invoke SYS_WRITE (kernel opcode 4)
    int     0x80
    
    mov     edx, 10         ; number of bytes to read
    mov     ecx, input      ; reserve space for the user's input
    mov     ebx, 0          ; write to the STDIN file
    mov     eax, 3          ; invoke SYS_READ (kernel opcode 3)
    int     0x80            ; start of word  

    add     eax, ecx
    dec     eax
    
capitalizer:
    cmp byte[ecx], 0
    je  finished
    
    mov bl, byte[ecx]           ;start
    mov dl, byte[eax]           ;end
    
    cmp bl, 90
    jle uppercase         ;start is uppercase
    
    sub bl, 32
    
uppercase:
    cmp dl, 90
    jle check               ;end is uppercase
    
    sub dl, 32
    
check:
    cmp dl, bl
    jne fail
    
    inc ecx
    dec eax
    jmp capitalizer
    
finished:
    mov     edx, lenP         
    mov     ecx, palindrome      
    mov     ebx, 1          
    mov     eax, 4
    int     0x80  
    jmp     exit
    
fail:
    mov     edx, lenNP         
    mov     ecx, notPalindrome      
    mov     ebx, 1          
    mov     eax, 4
    int     0x80  
  
 exit:
    mov     eax, 1
    mov     ebx, 0
    int     0x80```

Upvotes: 0

Views: 987

Answers (1)

Peter Cordes
Peter Cordes

Reputation: 365842

The read system call returns the number of bytes read in EAX. (Or a negative errno code like -EFAULT if you passed a bad pointer, -EBADF if the fd isn't open, etc.) In a real program you'd write error-handling code (and maybe a retry until you actually get to EOF in case read returns early from a long input), but in a toy program you can assume that one read system call succeeds and gets all the data you want to look at.

This data is not necessarily 0-terminated, because you passed the full buffer size to read1. It could have stored a non-zero input byte in the last character of the buffer. You can't strlen the buffer to find the length without maybe reading past the end.


But fortunately you don't need to, remember read leaves the input length in EAX. 2

So after the read syscall, add eax, ecx makes EAX a pointer to one-past-the-end of the string (like C read() + msg), while ECX is still pointing at the read arg. So you're all ready to loop them towards each other until they cross, the standard palindrome-checking algorithm.

Use cmp/jnb as the loop condition at the bottom of your palindrome loop, not the slow loop instruction. This is simpler than calculating how many iterations it will take for the pointers to cross; just loop until p < q is false, where p=start; q=end initially. Since this is homework, I'll let you choose the args to cmp.

Using a pointer to one past the end of the input, like C++ std::vector::end() is fairly common. You'd use it by dec eax / movzx edx, byte [eax] - decrement the pointer before reading it.)

(Or if you really want, work out the sub/shr details to make a counted loop with the read return value.)

Another complication: your input may include a newline. If you typed 10 bytes before newline, then the read buffer would only have those characters, no newline.

But on a shorter input, the buffer would hold a newline (0xa), which would compare unequal to the first byte. You might want to loop the end pointer (EAX) backwards until you find a non-newline, instead of special-casing cmp eax, length before adding. This will leave you with a pointer to the last byte, not one-past-last, so after doing this the main palindrome loop should load before decrementing the pointer.


Footnote 1: Actually you passed 11, so read itself can write past the end of your buffer. If you'd used length equ $-input to get NASM to calculate the length for you, or length equ 10 / input: resb length, you wouldn't have this problem and wouldn't have the length hard-coded in multiple places. You would mov edx, length before the read system call.

It makes no sense to reserve 10 bytes of space for length with length: resb 10. If anything you'd want 4 bytes (a dword integer), but it's a waste of instruction to keep it in memory at all. You're not close to running out of registers.

Footnote 2: It's really dumb that C functions like fgets don't tell you how many bytes they read, but fortunately the Unix system-call API doesn't suck. It's normal to know how large your data is, so take advantage of pointer+length instead of calling or implementing strlen whenever possible.

Some of those parts of the C library date back to very early C history, like maybe before it was called C. This partly explains the weird design of functions like fopen that take a string instead of an OR of bit constants (Why does C's "fopen" take a "const char *" as its second argument?), and the bad design of functions like strcpy which finds the length but chooses not to return it. (strcpy() return value). It's like the library designers hated efficiency, or valued code-size to the extreme (always pass around implicit-length strings, never keep track of their lengths), or didn't realize that rolling your own copy loops when you do want the end wouldn't be viable. (simple portable C compiles to slower asm than hand-written string functions.)

Upvotes: 2

Related Questions