Reputation: 851
I am looking to extract a substring from a string of variable length(input from the keyboard).
Here are my input: 1.A string. 2.An index/starting position for the substring. 3.length of the substring. I am supposed to output the substring. Here is a snippet where i try to get the substring.
cld ;df=0(forward)
lea si,buff
xor bx,bx
mov bx, offset pos ;starting index for substring
add si,bx
;add si,1
lea di,subst
mov cx, offset len ;length of the substring
rep movsb
mov bx,offset subst
xor si,si
mov si,offset len
mov byte ptr[bx+si+1],0 ;create a null terminated substring
in my result, the substring starts from the given position(pos), but then does not terminate when it reaches the given length.
Upvotes: 2
Views: 9368
Reputation: 365
Here it is a new real-mode's solution for 80x86+ microprocessors. It uses JUMP instructions and it uses only a one string's instruction.
GetSubStr3's function keeps a sub-string of user-def-length characters from the user-def-position of input-string. All the strings are null terminated, so I process the length of input-string. The position of sub-string may be over the length of input-string, so I normalize it ad I return it. The length of sub-string may be large more then the length of input-string, so I normalize it ad I return it.
This is the code, that I have successfully tested:
Procedure GetSubStr3; Assembler;
{ INPUT: DS:SI -> Address of input-string;
DX -> Position of sub-string;
BX -> Length of sub-string (except null);
ES:DI -> Address of output's sub-string;
TEMP: AL, CX
OUTPUT: DS:SI -> Address of input-string;
DX -> New position of sub-string;
BX -> New length of sub-string (except null);
ES:DI -> Address of output's sub-string}
Asm
ClD {Clear flag of string's direction}
Mov CX,DX {CX <- Counter of sub-string's position}
JCXZ @FindSubStrEnd {If count. of sub-string's p. == 0 ends cycle}
@FindSubStr: {Search real position of sub-string}
Cmp [DS:SI],Byte(0){Compare a character from input-string with 0}
JE @FindSubStrEnd {If the comparison is successful, ends cycle}
Inc SI {Increment offset of input-string}
Loop @FindSubStr {Decr. count. of sub-str.'s position, go to next cycle}
@FindSubStrEnd: {Adjust sub-string's position}
Sub DX,CX {Sub-string's pos. -= amount of not read char.}
Mov CX,BX {CX <- Counter of sub-string's length}
JCXZ @CopySubStrEnd {If count. of sub-string's l. == 0 ends cycle}
@CopySubStr: {Copy sub-string of input-string into output}
Mov AL,[DS:SI] {AL <- Character read from input-string}
Or AL,AL {If character read == 0 ...}
JE @CopySubStrEnd {... go to add 0 to output's sub-string}
Inc SI {Increment offset of input-string}
StoSB {Write ch. read into output's sub-str., incr. offset of output's sub-string}
Loop @CopySubStr {Decr, count. of sub-str.'s length, go to next cycle}
@CopySubStrEnd: {...}
Sub BX,CX {Sub-string's len. -= amount of not read char.}
Mov [ES:DI],Byte(0){Add 0 to output's sub-string}
Sub SI,BX {Restore ...}
Sub SI,DX {... Offset of input-string}
Sub DI,BX {Restore offset of output's sub-string}
End;
Here it is a old real-mode's solution for 80x86+ microprocessors. It don't uses JUMP instructions and it uses strings' instructions; it keeps a sub-string of user-def-length characters from the user-def-position of input-string. All the strings are null terminated, so I process the length of input-string. The position of sub-string may be over the length of input-string, so I normalize it ad I return it. The length of sub-string may be large more then the length of input-string, so I normalize it ad I return it.
Thanks to Peter Cordes I've implemented some suggestions of him and I optimized the first code, that I have successfully tested:
Procedure GetSubStr2; Assembler;
{ INPUT: ES:DI -> Address of input-string;
DX -> Position of sub-string;
BX -> Length of sub-string (except null);
DS:SI -> Address of output's sub-string;
TEMP: AX, CX
OUTPUT: DS:SI -> Address of input-string;
ES:DI -> Address of output's sub-string;
DX -> New position of sub-string;
BX -> New length of sub-string (except null)}
Asm
{Set CX's reg. with the length of
the input-string (except null)
without change the address
of the input-string (ES:DI).
----------------------}
ClD {Clear string direction flag}
XOr AL,AL {Set AL's reg. with null terminator}
Mov CX,0FFFFH {Set CX's reg. with maximum length of the string}
RepNE ScaSB {Search null and decrease CX's reg.}
LAHF {Load FZero flag into AH (Bit6)}
Not CX {Set CX with the number of char. scanned}
Sub DI,CX {Restore address of the input-string}
ShL AH,1 {...}
ShL AH,1 {... FCarry flag is set with (FZero flag after scan)}
SbB CX,0 {If it founds null decrease CX's reg.}
{Set DX's reg. with the minimum
value between the length
of the input-string
(except null) and the position
of the sub-string to get.
----------------------.
Input: DX, CX
Output: DX= MIN(CX,DX).
Temp: AX}
Sub DX,CX {DX = DX - CX}
SbB AX,AX {If DX < 0 set AX=0FFFFH else set AX=0}
And DX,AX {If DX >= 0 set DX=0 else nothing}
Add DX,CX {DX = DX + CX}
{----------------------}
Add DI,DX {ES:DI is the pointer to the sub-string to get}
Sub CX,DX {DX= (input-string's length)-(sub-string's start)}
{Set CX's reg. with the minimum
value between (the length
of the input-string)-(the
new position of the
sub-string) and the length
of the sub-string.
----------------------
Input: CX, BX
Output: CX= MIN(CX,BX).
Temp: AX}
Sub CX,BX {CX = CX - BX}
SbB AX,AX {If CX < 0 set AX=0FFFFH else set AX=0}
And CX,AX {If CX >= 0 set CX=0 else nothing}
Add CX,BX {CX = CX + BX}
{----------------------}
XChg DI,SI {Swap the address of the input-string with ...}
Mov AX,ES {...}
Mov BX,DS {...}
Mov ES,BX {...}
Mov DS,AX {... the address of the output's sub-string}
Mov BX,CX {BX= New length of the output's sub-string}
Rep MovSB {Copy the sub-string on the output}
Mov [ES:DI],Byte(0) {Set null t. to the end of the output's sub-string}
Sub DI,BX {Restore address of output's sub-string}
Sub SI,BX {Restore address of input-string}
End;
First code written by me:
Procedure GetSubStr; Assembler;
{ INPUT: ES:DI -> Address of input-string;
DX -> Position of sub-string;
BX -> Length of sub-string (except null);
DS:SI -> Address of output's sub-string;
TEMP: AX, CX
OUTPUT: DS:SI -> Address of input-string;
ES:DI -> Address of output's sub-string;
DX -> New position of sub-string;
BX -> New length of sub-string (except null)}
Asm
{Set CX's reg. with the length of
the input-string (except null)
without change the address
of the input-string (ES:DI).
----------------------}
ClD {Clear string direction flag}
XOr AL,AL {Set AL's reg. with null terminator}
Mov CX,0FFFFH {Set CX's reg. with maximum length of the string}
RepNE ScaSB {Search null and decrease CX's reg.}
LAHF {Load FZero flag into AH (Bit6)}
Not CX {Set CX with the number of char. scanned}
Sub DI,CX {Restore address of the input-string}
ShL AH,1 {...}
ShL AH,1 {... FCarry flag is set with (FZero flag after scan)}
SbB CX,0 {If it founds null decrease CX's reg.}
{Set DX's reg. with the minimum
value between the length
of the input-string
(except null) and the position
of the sub-string to get.
----------------------.
Input: DX, CX
Output: DX= MIN(CX,DX).
Temp: AX}
Sub DX,CX {DX = DX - CX}
SbB AX,AX {If DX < 0 set AX=0FFFFH else set AX=0}
And DX,AX {If DX >= 0 set DX=0 else nothing}
Add DX,CX {DX = DX + CX}
{----------------------}
Add DI,DX {ES:DI is the pointer to the sub-string to get}
Sub CX,DX {DX= (input-string's length)-(sub-string's start)}
{Set CX's reg. with the minimum
value between (the length
of the input-string)-(the
new position of the
sub-string) and the length
of the sub-string.
----------------------
Input: CX, BX
Output: CX= MIN(CX,BX).
Temp: AX}
Sub CX,BX {CX = CX - BX}
SbB AX,AX {If CX < 0 set AX=0FFFFH else set AX=0}
And CX,AX {If CX >= 0 set CX=0 else nothing}
Add CX,BX {CX = CX + BX}
{----------------------}
XChg DI,SI {Swap the address of the input-string with ...}
Mov AX,ES {...}
Mov BX,DS {...}
XChg AX,BX {...}
Mov ES,AX {...}
Mov DS,BX {... the address of the output's sub-string}
Mov BX,CX {BX= New length of the output's sub-string}
Rep MovSB {Copy the sub-string on the output}
XOr AL,AL {Set AL's reg. with null terminator}
StoSB {Set null t. to the end of the output's sub-string}
Sub DI,BX {Restore address of output's sub-string}
Sub SI,BX {Restore address of input-string}
End;
Hi!
Upvotes: 0
Reputation: 39166
xor bx,bx
mov bx, offset pos ;starting index for substring
When you mov
a word value in a word register you don't need to empty this register first. Just drop the xor bx,bx
mov bx, offset pos
mov cx, offset len
When you use the offset tag you tell the assembler to use the address of your variable when in fact you need the value of the variable. So drop the offset tag and write mov bx, pos
and mov cx, len
At the conclusion of rep movsb
ES:DI points to where you want to place your null. Use this fact and save yourself the trouble of calculating this.
Here's what I propose you could write:
cld ;df=0(forward)
mov bx, pos ;starting index for substring
lea si, [buff + bx] ; (1)
lea di, subst
mov cx, len ;length of the substring
rep movsb
mov al, 0
stosb ;create a null terminated substring
(1) This lea si, [buff + bx]
replaces the 2 instructions lea si, buff
and add si, bx
If you are going to output this substring with DOS function 09h you should not null terminate it but rather $ terminate it.
Upvotes: 2
Reputation: 10371
In order to run your snippet I added only the necessary code to make it run, changes:
Here is the code made with EMU8086 :
.model small
.stack 100h
.data
buff db 'he is coming' ;THE STRING.
len dw 12 ;STRING'S LENGTH.
pos dw 3 ;STARTING INDEX.
msj db 13,10,'The substring is : $'
subst db 12 dup('$') ;FILLED WITH '$' TO DISPLAY.
.code
start:
;INITIALIZE DATA AND EXTRA SEGMENTS.
mov ax, @data
mov ds, ax
mov es, ax
;GET SUBSTRING.
call get_substring
;DISPLAY SUBSTRING.
mov dx, offset msj
call printf
mov dx, offset subst
call printf
;WAIT FOR ANY KEY.
mov ah, 7
int 21h
;FINISH PROGRAM.
mov ax, 4c00h
int 21h
;-----------------------------------------
get_substring proc
;EMEKA'S CODE.
cld ;df=0(forward)
lea si,buff
xor bx,bx
mov bx, pos ;<=============== JOSE MANUEL!
add si,bx
;add si,1
lea di,subst
mov cx, len ;<=============== JOSE MANUEL!
sub cx, pos ;<=============== JOSE MANUEL!
rep movsb
; mov bx,offset subst
; xor si,si
; mov si,offset len
; mov byte ptr[bx+si+1],0 ;create a null terminated substring
mov [ byte ptr es:di], 0 ;<=============== WAYNE CONRAD!
ret
get_substring endp
;-----------------------------------------
;PARAMETER : DX POINTING TO '$' FINISHED STRING.
printf proc
mov ah, 9
int 21h
ret
printf endp
;-----------------------------------------
end start
Upvotes: 1