Numeric Conversion

Chapter 9

Numeric Values To Strings

Numeric Values to Hexadecimal Strings

  • Converting a numeric value to a hexadecimal string is relatively straightforward. Just take each nibble (4 bits) in the binary representation and convert that to one of the 16 characters β€œ0” through β€œ9” or β€œA” through β€œF”.

; Numeric to hex string functions

          
; btoh-
;
; This procedure converts the binary value
; in the AL register to 2 hexadecimal
; characters and returns those characters
; in the AH (HO hibble) and AL (LO nibble)
; registers. 

btoh        proc

            mov     ah, al    ;Do HO nibble first
            shr     ah, 4     ;Move HO nibble to LO
            or      ah, '0'   ;Convert to char
            cmp     ah, '9'+1 ;Is it 'A'..'F'?
            jb      AHisGood
            
; Convert 3ah..3fh to 'A'..'F'

            add     ah, 7

; Process the LO nibble here
            
AHisGood:   and     al, 0Fh   ;Strip away HO nibble
            or      al, '0'   ;Convert to char
            cmp     al, '9'+1 ;Is it 'A'..'F'?
            jb      ALisGood
            
; Convert 3ah..3fh to 'A'..'F'

            add     al, 7   
ALisGood:   ret
                        
btoh        endp



; btoStr-
;
;  Converts the byte in AL to a string of hexadecimal
; characters and stores them at the buffer pointed at
; by RDI. Buffer must have room for at least 3 bytes.
; This function zero-terminates the string.

btoStr      proc
            push    rax
            call    btoh            ;Do conversion here
            
; Create a zero-terminated string at [RDI] from the
; two characters we converted to hex format:

            mov     [rdi], ah
            mov     [rdi+1], al
            mov     byte ptr [rdi+2], 0
            pop     rax
            ret
btoStr      endp



; wtoStr-
;
;  Converts the word in AX to a string of hexadecimal
; characters and stores them at the buffer pointed at
; by RDI. Buffer must have room for at least 5 bytes.
; This function zero-terminates the string.

wtoStr      proc
            push    rdi
            push    rax     ;Note: leaves LO byte at [rsp]
            
; Use btoStr to convert HO byte to a string:

            mov     al, ah
            call    btoStr

            mov     al, [rsp]       ;Get LO byte
            add     rdi, 2          ;Skip HO chars
            call    btoStr
            
            pop     rax
            pop     rdi
            ret
wtoStr      endp


; dtoStr-
;
;  Converts the dword in EAX to a string of hexadecimal
; characters and stores them at the buffer pointed at
; by RDI. Buffer must have room for at least 9 bytes.
; This function zero-terminates the string.

dtoStr      proc
            push    rdi
            push    rax     ;Note: leaves LO word at [rsp]
            
; Use wtoStr to convert HO word to a string:

            shr     eax, 16
            call    wtoStr

            mov     ax, [rsp]       ;Get LO word
            add     rdi, 4          ;Skip HO chars
            call    wtoStr
            
            pop     rax
            pop     rdi
            ret
dtoStr      endp


; qtoStr-
;
;  Converts the qword in RAX to a string of hexadecimal
; characters and stores them at the buffer pointed at
; by RDI. Buffer must have room for at least 17 bytes.
; This function zero-terminates the string.

hexChar             byte    "0123456789ABCDEF"

qtoStr      proc
            push    rdi
            push    rcx
            push    rdx
            push    rax             ;Leaves LO dword at [rsp]
                            
            lea     rcx, hexChar

            xor     edx, edx        ;Zero extends!
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1] ;Table lookup
            mov     [rdi], dl
                            
; Emit bits 56-59:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+1], dl
                            
; Emit bits 52-55:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+2], dl
            
; Emit bits 48-51:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+3], dl
            
; Emit bits 44-47:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+4], dl
                            
; Emit bits 40-43:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+5], dl
            
; Emit bits 36-39:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+6], dl
            
; Emit bits 32-35:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+7], dl
            
; Emit bits 28-31:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+8], dl
            
; Emit bits 24-27:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+9], dl
            
; Emit bits 20-23:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+10], dl
            
; Emit bits 16-19:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+11], dl
            
; Emit bits 12-15:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+12], dl
            
; Emit bits 8-11:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+13], dl
            
; Emit bits 4-7:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+14], dl
            
; Emit bits 0-3:

            xor     edx, edx
            shld    rdx, rax, 4
            shl     rax, 4
            mov     dl, [rcx][rdx*1]
            mov     [rdi+15], dl
            
; Zero-terminate string:

            mov     byte ptr [rdi+16], 0
            
            pop     rax
            pop     rdx
            pop     rcx
            pop     rdi
            ret
qtoStr      endp


; otoStr - 
;
; Converts the oword in RDX:RAX to a string of hexadecimal
; characters and stores them at the buffer pointed at
; by RDI. Buffer must have room for at least 33 bytes.
; This function zero-terminates the string.


otoStr      proc
            push rdi
            push rax ; Note: leaves LO dword at [RSP]

; Use qtoStr to convert each qword to a string:

            mov rax, rdx
            call qtoStr

            mov rax, [rsp] ; Get LO qword
            add rdi, 16 ; Skip HO chars
            call qtoStr

            pop rax
            pop rdi
            ret
otoStr      endp

Unsigned Decimal Values to Strings

  • Decimal output is a little more complicated than hexadecimal output because the HO bits of a binary number affect the LO digits of the decimal representation.

  • It begins by dividing the value by 10 and saving the remainder in a local variable. If the quotient is not 0, the routine recursively calls itself to output any leading digits first. On return from the recursive call, the recursive algorithm outputs the digit associated with the remainder to complete the operation. Here’s how the operation works when printing the decimal value 789:

  1. Divide 789 by 10. The quotient is 78, and the remainder is 9.

  2. Save the remainder (9) in a local variable and recursively call the routine with the quotient.

  3. Recursive entry 1: Divide 78 by 10. The quotient is 7, and the remainder is 8.

  4. Save the remainder (8) in a local variable and recursively call the routine with the quotient.

  5. Recursive entry 2: Divide 7 by 10. The quotient is 0, and the remainder is 7.

  6. Save the remainder (7) in a local variable. Because the quotient is 0, don’t call the routine recursively.

  7. Output the remainder value saved in the local variable (7). Return to the caller (recursive entry 1).

  8. Return to recursive entry 1: Output the remainder value saved in the local variable in recursive entry 1 (8). Return to the caller (original invocation of the procedure).

  9. Original invocation: Output the remainder value saved in the local variable in the original call (9). Return to the original caller of the output routine.

  • Because it uses the div instruction, it can be fairly slow. Fortunately, we can speed it up by using the fist and fbstp instructions.

  • The fbstp instruction converts the 80-bit floating-point value currently sitting on the top of stack to an 18-digit packed BCD value. The fist instruction allows you to load a 64-bit integer onto the FPU stack.

  • So, by using these two instructions, you can (mostly) convert a 64-bit integer to a packed BCD value, which encodes a single decimal digit per 4 bits. Therefore, you can convert the packed BCD result that fbstp produces to a character string by using the same algorithm you use for converting hexadecimal numbers to a string.

Signed Integer Values to Strings

  • To convert a signed integer value to a string, you first check to see if the number is negative; if it is, you emit a hyphen (-) character and negate the value. Then the value can be converted.

Extended-Precision Unsigned Integers to Strings

Extended-Precision Signed Decimal Values to Strings

  • Once you have an extended-precision unsigned decimal output routine, writing an extended-precision signed decimal output routine is easy. The basic algorithm is similar to that for 64-bit integers given earlier:

    1. Check the sign of the number.

    2. If it is positive, call the unsigned output routine to print it. If the number is negative, print a minus sign. Then negate the number and call the unsigned output routine to print it.

Formatted Conversions

  • To create nicely formatted tables of values, you will need to write functions that provide appropriate padding in front of the string of digits before actually emitting the digits.

  • The first step is to write iSize and uSize routines that compute the minimum number of character positions needed to display the value. One algorithm to accomplish this is similar to the numeric string conversion routines. In fact, the only difference is that you initialize a counter to 0 upon entry into the routine, and you increment this counter rather than outputting a digit on each recursive call.

  • After the calculation is complete, these routines should return the size of the operand in the EAX register.

Floating-Point Values to Strings

  • Floating-point values can be converted to strings in one of two forms:

  1. Decimal notation conversion (for example, Β± xxx.yyy format)

  2. Exponential (or scientific) notation conversion (for example, Β± x.yyyyye Β± zz format)

  • By using the x87 FPU for all floating-point arithmetic during the conversion, all we need to do is write code to convert real10 values into string form.

  • To output the mantissa in decimal form with approximately 18 digits of precision, the trick is to successively multiply or divide the floating-point value by 10 until the number is between 1e+18 and just less than 1e+19.

  • Once the exponent is in the appropriate range, the mantissa bits form an 18-digit integer value (no fractional part), which can be converted to a decimal string to obtain the 18 digits that make up the mantissa value.

Floating-Point Value to a Decimal String

  • The r10ToStr function call will need the following arguments:

  • The algorithm for emitting the string differs for values with negative and non-negative exponents. Negative exponents are probably the easiest to process. Here’s the algorithm for emitting values with a negative exponent:

    1. The function begins by adding 3 to decDigits.

    2. If decDigits is less than 4, the function sets it to 4 as a default value.3

    3. If decDigits is greater than fWidth, the function emits fWidth "#" characters to the string and returns.

    4. If decDigits is less than fWidth, then output (fWidth - decDigits) padding characters (fill) to the output string.

    5. If r10 was negative, emit -0. to the string; otherwise, emit 0. to the string (with a leading space in front of the 0 if non-negative).

    6. Next, output the digits from the converted number. If the field width is less than 21 (18 digits plus the 3 leading 0. or -0. characters), then the function outputs the specified (fWidth) characters from the converted digit string. If the width is greater than 21, the function emits all 18 digits from the converted digits and follows it by however many 0 characters are necessary to fill out the field width.

    7. Finally, the function zero-terminates the string and returns.

Floating-Point Value to Exponential Form

  • Converting a floating-point value to exponential (scientific) form is a bit easier than converting it to decimal form. The mantissa always takes the form sx.y where s is a hyphen or a space, x is exactly one decimal digit, and y is one or more decimal digits.

  • The FPDigits function does almost all the work to create this string. The exponential conversion function needs to output the mantissa string with sign and decimal point characters and then output the decimal exponent for the number.

  • This code block from the previous code shows how to do it -

String-to-Numeric Conversion

Decimal Strings to Integers

  • The basic algorithm to convert a string containing decimal digits to a number is the following:

    1. Initialize an accumulator variable to 0.

    2. Skip any leading spaces or tabs in the string.

    3. Fetch the first character after the spaces or tabs.

    4. If the character is not a numeric digit, return an error. If the character is a numeric digit, fall through to step 5.

    5. Convert the numeric character to a numeric value (using AND 0Fh).

    6. Set the accumulator = (accumulator Γ— 10) + current numeric value.

    7. If overflow occurs, return and report an error. If no overflow occurs, fall to step 8.

    8. Fetch the next character from the string.

    9. If the character is a numeric digit, go back to step 5, else fall through to step 10.

    10. Return success, with accumulator containing the converted value.

  • For signed integer input, you use this same algorithm with the following modifications:

    • If the first non-space or tab character is a hyphen (-), set a flag denoting that the number is negative and skip the β€œ-” character (if the first character is not -, then clear the flag).

    • At the end of a successful conversion, if the flag is set, then negate the integer result before return

Hexadecimal Strings to Numeric Form

  • As was the case for numeric output, hexadecimal input is the easiest numeric input routine to write. The basic algorithm for hexadecimal-string-to-numeric conversion is the following:

    1. Initialize an extended-precision accumulator value to 0.

    2. For each input character that is a valid hexadecimal digit, repeat steps 3 through 6; drop down to step 7 when it is not a valid hexadecimal digit.

    3. Convert the hexadecimal character to a value in the range 0 to 15 (0h to 0Fh).

    4. If the HO 4 bits of the extended-precision accumulator value are nonzero, raise an exception.

    5. Multiply the current extended-precision value by 16 (that is, shift left 4 bits).

    6. Add the converted hexadecimal digit value to the accumulator.

    7. Check the current input character to ensure it is a valid delimiter. Raise an exception if it is not.

  • For hexadecimal string conversions that handle numbers greater than 64 bits, you have to use an extended-precision shift left by 4 bits.

Unsigned Decimal Strings to Integers

  • The algorithm for unsigned decimal input is nearly identical to that for hexadecimal input. In fact, the only difference is that you multiply the accumulating value by 10 rather than 16 for each input character.

Extended-Precision Decimal String to Unsigned Integer

  • The algorithm for (decimal) string-to-numeric conversion is the same regardless of integer size. You read a decimal character, convert it to an integer, multiply the accumulating result by 10, and add in the converted character. The only things that change for larger-than-64-bit values are the multiplication by 10 and addition operations.

  • Once you have an unsigned decimal input routine, writing a signed decimal input routine is easy, as described by the following algorithm:

    1. Consume any delimiter characters at the beginning of the input stream.

    2. If the next input character is a minus sign, consume this character and set a flag noting that the number is negative; else just drop down to step 3.

    3. Call the unsigned decimal input routine to convert the rest of the string to an integer.

    4. Check the return result to make sure its HO bit is clear. Raise a value out of range exception if the HO bit of the result is set.

    5. If the code encountered a minus sign in step 2, negate the result.

Real String to Floating-Point

  • Converting a string of characters representing a floating-point number to the 80-bit real10 format is slightly easier than the real10-to-string conversion. The algorithm to do the conversion is the following:

  1. Begin by stripping away any leading space or tab characters (and any other delimiters).

  2. Check for a leading plus (+) or minus (-) sign character. Skip it if one is present. Set a sign flag to true if the number is negative (false if non-negative).

  3. Initialize an exponent value to –18. The algorithm will create a leftjustified packed BCD value from the mantissa digits in the string to provide to the fbld instruction, and left-justified packed BCD values are always greater than or equal to 10^18. Initializing the exponent to –18 accounts for this.

  4. Initialize a significant-digit-counter variable that counts the number of significant digits processed thus far to 18.

  5. If the number begins with any leading zeros, skip over them (do not change the exponent or significant digit counters for leading zeros to the left of the decimal point).

  6. If the scan encounters a decimal point after processing any leading zeros, go to step 11; else fall through to step 7.

  7. For each nonzero digit to the left of the decimal point, if the significant digit counter is not zero, insert the nonzero digit into a β€œdigit string” array at the position specified by the significant digit counter (minus 1). Note that this will insert the characters into the string in a reversed position.

  8. For each digit to the left of the decimal point, increment the exponent value (originally initialized to –18) by 1.

  9. If the significant digit counter is not zero, decrement the significant digit counter (this will also provide the index into the digit string array).

  10. If the first nondigit encountered is not a decimal point, skip to step 14.

  11. Skip over the decimal point character.

  12. For each digit encountered to the right of the decimal point, continue adding the digits (in reverse order) to the digit string array as long as the significant digit counter is not zero. If the significant digit counter is greater than zero, decrement it. Also, decrement the exponent value.

  13. If the algorithm hasn’t encountered at least one decimal digit by this point, report an illegal character exception and return.

  14. If the current character is not e or E, then go to step 20. Otherwise, skip over the e or E character and continue with step 15.

  15. If the next character is + or -, skip over it. Set a flag to true if the sign character is -, and set it to false otherwise (note that this exponent sign flag is different from the mantissa sign flag set earlier in this algorithm).

  16. If the next character is not a decimal digit, report an error.

  17. Convert the string of digits (starting with the current decimal digit character) to an integer.

  18. Add the converted integer to the exponent value (which was initialized to –18 at the start of this algorithm).

  19. If the exponent value is outside the range –4930 to +4930, report an out-of-range exception.

  20. Convert the digit string array of characters to an 18-digit (9-byte) packed BCD value by stripping the HO 4 bits of each character, merging pairs of characters into a single byte (by shifting the odd-indexed byte to the left 4 bits and logically ORing with the even-indexed byte of each pair), and then setting the HO (10th) byte to 0.

  21. Convert the packed BCD value to a real10 value (using the fbld instruction).

  22. Take the absolute value of the exponent (though preserve the sign of the exponent). This value will be 13 bits or less (4096 has bit 12 set, so 4930 or less will have some combination of bits 0 to 13 set to 1, with all other bits 0).

  23. If the exponent was positive, then for each set bit in the exponent, multiply the current real10 value by 10 raised to the power specified by that bit. For example, if bits 12, 10, and 1 are set, multiply the real10 value by 10^4096, 10^1024, and 10^2.

  24. If the exponent was negative, then for each set bit in the exponent, divide the current real10 value by 10 raised to the power specified by that bit. For example, if bits 12, 10, and 1 are set, divide the real10 value by 10^4096, 10^1024, and 10^2.

  25. If the mantissa is negative (the first sign flag set at the beginning of the algorithm), then negate the floating-point number.

Last updated