From 5125969b57fe4d64874b2633adcf007bda2a1847 Mon Sep 17 00:00:00 2001 From: solar Date: Wed, 7 Apr 2010 13:22:18 +0000 Subject: [PATCH] Only the character groups still missing... --- functions/_PDCLIB/scan.c | 105 +++++++++++++++++++++++------- functions/stdio/sscanf.c | 133 +++++++++++++++++++++++++------------- functions/stdio/vsscanf.c | 5 -- 3 files changed, 171 insertions(+), 72 deletions(-) diff --git a/functions/_PDCLIB/scan.c b/functions/_PDCLIB/scan.c index c392a53..0d46f9e 100644 --- a/functions/_PDCLIB/scan.c +++ b/functions/_PDCLIB/scan.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Using an integer's bits as flags for both the conversion flags and length modifiers. @@ -37,7 +38,7 @@ type: integer type, used to get the correct type from the parameter stack as well as for cast target. */ -#define ASSIGN( case_cond, type ) \ +#define ASSIGN_VALUE_TO( case_cond, type ) \ case case_cond: \ *( va_arg( status->arg, type * ) ) = (type)( value * sign ); \ break @@ -316,7 +317,9 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) if ( status->base != -1 ) { /* integer conversion */ - uintmax_t value = 0; + uintmax_t value = 0; /* absolute value read */ + uintmax_t limit; /* max. value allowed */ + uintmax_t threshold; /* overflow threshold */ bool prefix_parsed = false; int sign = 0; while ( ( status->this < status->width ) && @@ -353,6 +356,53 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) UNGET( rc, status ); break; } + switch ( status->flags & ( E_char | E_short | E_long | E_llong | E_intmax | E_size | E_ptrdiff | E_unsigned ) ) + { + case E_char: + limit = ( sign == 1 ) ? CHAR_MAX : ( CHAR_MIN * sign ); + break; + case E_char | E_unsigned: + limit = UCHAR_MAX; + break; + case E_short: + limit = ( sign == 1 ) ? SHRT_MAX : ( SHRT_MIN * sign ); + break; + case E_short | E_unsigned: + limit = USHRT_MAX; + break; + case E_long: + limit = ( sign == 1 ) ? LONG_MAX : ( LONG_MIN * sign ); + break; + case E_long | E_unsigned: + limit = ULONG_MAX; + break; + case E_llong: + limit = ( sign == 1 ) ? LLONG_MAX : ( LLONG_MIN * sign ); + break; + case E_llong | E_unsigned: + limit = ULLONG_MAX; + break; + case E_intmax: + limit = ( sign == 1 ) ? INTMAX_MAX : ( INTMAX_MIN * sign ); + break; + case E_intmax | E_unsigned: + limit = UINTMAX_MAX; + break; + case E_size: + case E_size | E_unsigned: + limit = SIZE_MAX; + break; + case E_ptrdiff: + case E_ptrdiff | E_unsigned: + limit = ( sign == 1 ) ? PTRDIFF_MAX : ( PTRDIFF_MIN * sign ); + break; + case E_unsigned: + limit = UINT_MAX; + break; + default: + limit = ( sign == 1 ) ? INT_MAX : ( INT_MIN * sign ); + break; + } } else if ( ! prefix_parsed ) { @@ -418,9 +468,20 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) UNGET( rc, status ); break; } - value *= status->base; - value += digitptr - _PDCLIB_digits; - value_parsed = true; + // SOLAR + // if ( ( ( limit - ( digitptr - _PDCLIB_digits ) ) / status->base ) >= value ) + //if ( ( ( limit / status->base ) >= value ) && ( ( limit - ( digitptr - _PDCLIB_digits ) ) >= ( value * status->base ) ) ) + { + /* no overflow */ + value *= status->base; + value += digitptr - _PDCLIB_digits; + value_parsed = true; + } + //else + //{ + // value = limit; + // threshold = 0; + //} } } /* width or input exhausted, or non-matching character */ @@ -428,7 +489,7 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) { /* out of input before anything could be parsed - input error */ /* FIXME: if first character does not match, value_parsed is not set - but it is NOT an input error */ - if ( status->n == 0 ) + if ( ( status->n == 0 ) && ( rc == EOF ) ) { status->n = -1; } @@ -439,22 +500,22 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) E_intmax | E_size | E_ptrdiff | E_unsigned ) ) { - ASSIGN( E_char, char ); - ASSIGN( E_char | E_unsigned, unsigned char ); - ASSIGN( E_short, short ); - ASSIGN( E_short | E_unsigned, unsigned short ); - ASSIGN( 0, int ); - ASSIGN( E_unsigned, unsigned int ); - ASSIGN( E_long, long ); - ASSIGN( E_long | E_unsigned, unsigned long ); - ASSIGN( E_llong, long long ); - ASSIGN( E_llong | E_unsigned, unsigned long long ); - ASSIGN( E_intmax, intmax_t ); - ASSIGN( E_intmax | E_unsigned, uintmax_t ); - ASSIGN( E_size, size_t ); - /* ASSIGN( E_size | E_unsigned, unsigned size_t ); */ - ASSIGN( E_ptrdiff, ptrdiff_t ); - /* ASSIGN( E_ptrdiff | E_unsigned, unsigned ptrdiff_t ); */ + ASSIGN_VALUE_TO( E_char, char ); + ASSIGN_VALUE_TO( E_char | E_unsigned, unsigned char ); + ASSIGN_VALUE_TO( E_short, short ); + ASSIGN_VALUE_TO( E_short | E_unsigned, unsigned short ); + ASSIGN_VALUE_TO( 0, int ); + ASSIGN_VALUE_TO( E_unsigned, unsigned int ); + ASSIGN_VALUE_TO( E_long, long ); + ASSIGN_VALUE_TO( E_long | E_unsigned, unsigned long ); + ASSIGN_VALUE_TO( E_llong, long long ); + ASSIGN_VALUE_TO( E_llong | E_unsigned, unsigned long long ); + ASSIGN_VALUE_TO( E_intmax, intmax_t ); + ASSIGN_VALUE_TO( E_intmax | E_unsigned, uintmax_t ); + ASSIGN_VALUE_TO( E_size, size_t ); + /* ASSIGN_VALUE_TO( E_size | E_unsigned, unsigned size_t ); */ + ASSIGN_VALUE_TO( E_ptrdiff, ptrdiff_t ); + /* ASSIGN_VALUE_TO( E_ptrdiff | E_unsigned, unsigned ptrdiff_t ); */ default: puts( "UNSUPPORTED SCANF FLAG COMBINATION" ); return NULL; /* behaviour unspecified */ diff --git a/functions/stdio/sscanf.c b/functions/stdio/sscanf.c index b701108..343f8ec 100644 --- a/functions/stdio/sscanf.c +++ b/functions/stdio/sscanf.c @@ -29,13 +29,10 @@ int sscanf( const char * _PDCLIB_restrict s, const char * _PDCLIB_restrict forma #include #include -#define symbol2value( x ) #x -#define symbol2string( x ) symbol2value( x ) - -#define CHECK_TRUE( a ) do { if ( a == 0 ) { fprintf( stderr, "Unexpected failure in " symbol2string( __LINE__ ) ": '" #a "' evaluated to false.\n" ); rc += 1; } } while ( 0 ) -#define CHECK_FALSE( a ) do { if ( a != 0 ) { fprintf( stderr, "Unexpected failure in " symbol2string( __LINE__ ) ": '" #a "' evaluated to true.\n" ); rc += 1; } } while ( 0 ) -#define CHECK_EQUAL( a, b ) do { int x = a; int y = b; if ( x != y ) { fprintf( stderr, "Mismatch in " symbol2string( __LINE__ ) ": result is %d, expected %d.\n", x, y ); rc += 1; } } while ( 0 ) -#define CHECK_FEQUAL( a, b, T, F ) do { T x = a; T y = b; if ( x != y ) { fprintf( stderr, "Mismatch in " symbol2string( __LINE__ ) ": result is " F ", expected " F ".\n", x, y ); rc += 1; } } while ( 0 ) +#define CHECK_TRUE( a ) TESTCASE( a != 0 ) +#define CHECK_FALSE( a ) TESTCASE( a == 0 ) +#define CHECK_EQUAL( a, b ) do { int x = a; int y = b; TESTCASE( x == y ); } while ( 0 ) +#define CHECK_FEQUAL( a, b, T, F ) do { T x = a; T y = b; TESTCASE( x == y ); } while ( 0 ) // literal matches, character matches, and basic integer matches void suite_one( void ); @@ -47,6 +44,8 @@ void suite_three( void ); void suite_four( void ); // string matches void suite_five( void ); +// 0xz special case +void suite_six( void ); int main() { @@ -55,6 +54,11 @@ int main() suite_three(); suite_four(); suite_five(); +#ifndef REGTEST + // This test driver fails for many common libraries, so it's disabled for + // regression testing. See the function for explanation. + suite_six(); +#endif } // literal matches, character matches, and basic integer matches @@ -1017,8 +1021,8 @@ void suite_three() char const * string = "-0x0 -0x000 -0x7f 0x80 0xff -0x7fff 0x8000\n" "0xffff -0x7fffffff 0x80000000 0xffffffff\n" "-0x7fffffffffffffff 0x8000000000000000\n" - "0xffffffffffffffff -0x\n"; - CHECK_EQUAL( string[145], '\n' ); + "0xffffffffffffffff\n"; + CHECK_EQUAL( string[141], '\n' ); { // reading 0, x unsigned char i = -1; @@ -1036,14 +1040,6 @@ void suite_three() CHECK_EQUAL( n, 4 ); } { - // reading -0x, x - unsigned char i = -1; - int n; - CHECK_EQUAL( sscanf( string + 142, "%hhx%n", &i, &n ), 1 ); - CHECK_EQUAL( i, 0 ); - CHECK_EQUAL( n, 3 ); - } - { // reading 0x000, x unsigned char i = -1; int n; @@ -1080,7 +1076,7 @@ void suite_three() signed char i = -1; int n; CHECK_EQUAL( sscanf( string + 18, "%hhi%n", &i, &n ), 1 ); - CHECK_FEQUAL( i, -128, signed char, "%hd" ); + CHECK_FEQUAL( i, -128, signed char, "%hhd" ); CHECK_EQUAL( n, 4 ); } { @@ -1124,14 +1120,6 @@ void suite_three() CHECK_EQUAL( n, 4 ); } { - // reading -0x, x - unsigned short i = -1; - int n; - CHECK_EQUAL( sscanf( string + 142, "%hx%n", &i, &n ), 1 ); - CHECK_EQUAL( i, 0 ); - CHECK_EQUAL( n, 3 ); - } - { // reading 0x000, x unsigned short i = -1; int n; @@ -1212,14 +1200,6 @@ void suite_three() CHECK_EQUAL( n, 4 ); } { - // reading -0x, x - unsigned int i = -1; - int n; - CHECK_EQUAL( sscanf( string + 142, "%x%n", &i, &n ), 1 ); - CHECK_EQUAL( i, 0 ); - CHECK_EQUAL( n, 3 ); - } - { // reading 0x000, x unsigned int i = -1; int n; @@ -1255,9 +1235,10 @@ void suite_three() // reading 0x80000000, i signed int i = -1; int n; - CHECK_EQUAL( sscanf( string + 62, "%i%n", &i, &n ), 1 ); - CHECK_FEQUAL( i, 2147483647, signed int, "%d" ); // NOT overflowing, see strtol() specs. - CHECK_EQUAL( n, 10 ); + //CHECK_EQUAL( sscanf( string + 62, "%i%n", &i, &n ), 1 ); + CHECK_EQUAL( sscanf( "-0x80000000", "%i%n", &i, &n ), 1 ); + CHECK_FEQUAL( i, -2147483648, signed int, "%d" ); + CHECK_EQUAL( n, 11 ); } { // reading ffffffff, x @@ -1275,14 +1256,6 @@ void suite_three() CHECK_FEQUAL( i, 4294967295, unsigned int, "%d" ); CHECK_EQUAL( n, 10 ); } - { - // reading 0xffffffff, i - signed int i = 0; - int n; - CHECK_EQUAL( sscanf( string + 73, "%i%n", &i, &n ), 1 ); - CHECK_FEQUAL( i, 2147483647, signed int, "%d" ); // NOT overflowing; see strtol() specs. - CHECK_EQUAL( n, 10 ); - } } // octal integer matches @@ -1599,4 +1572,74 @@ void suite_five() } } +void suite_six() +{ + char const * string = "-0xz\n"; + CHECK_EQUAL( string[4], '\n' ); + { + // reading -0x, x + unsigned char i = 1; + int n = -1; + /* Most existing libraries disagree with this test driver, so a little + explanation of why PDCLib chose the implementation it did might be + necessary. All references are from ISO/IEC 9899:1999 "Programming + languages - C". Wording critical to the explanation is in UPPERCASE. + 6.4.4.1 Integer constants - states that '0' is a valid (hexa)decimal + constant, whereas '0x' IS NOT. + 7.19.6.2 The fscanf function - states... + ...in paragraph 9 that "an INPUT ITEM is defined as the longest + sequence of input characters [...] which is, OR IS A PREFIX OF, + a matching input sequence". + ...in paragraph 10 that "if the INPUT ITEM is not a matching + sequence, the execution of THE DIRECTIVE FAILS; this condition + is a matching failure". + ...in footnote 242) that "fscanf pushes back AT MOST ONE input + character onto the input stream." + ...in paragraph 12 that either of the conversion specifiers d, i, + o, u, or x "matches an [...] integer whose format is the same as + expected for THE SUBJECT SEQUENCE of the [strtol|strtoul] + function". + 7.20.1.4 The strtol, strtoll, strtoul, and strtoull functions - states + in paragraph 3 that "the EXPECTED FORM OF THE SUBJECT SEQUENCE is + that of an integer constant AS DESCRIBED IN 6.4.4.1". + These parts of the standard result in the following reasoning: + - The longest sequence of input characters which is a prefix of a + matching input sequence is "-0x" (negative sign, hexadecimal-prefix). + The 'z' is the first character remaining unread as "-0xz" is not a + (prefix of a) matching input sequence. This is according to 7.19.6.2 + paragraph 9. + - "0x", without a valid hexadecimal digit following it, is not a valid + integer constant according to 6.4.4.1. + - "0x" is thus also not of the expected form for a strto[u]l subject + sequence according to 7.20.1.4 paragraph 3. (strto[u]l() would parse + it as zero, but leave the "x" in the final string, i.e. outside the + subject sequence.) + - "0x" is therefore also not a matching sequence to the i or x + conversion specifier according to 7.19.6.2 paragraph 12. + - The conversion should therefore result in a matching failure + according to 7.19.6.2 paragraph 10. + */ + CHECK_EQUAL( sscanf( string, "%hhx%n", &i, &n ), 0 ); + CHECK_EQUAL( i, 1 ); + CHECK_EQUAL( n, -1 ); + } + { + // reading -0x, x + unsigned short i = 1; + int n = -1; + CHECK_EQUAL( sscanf( string, "%hx%n", &i, &n ), 0 ); + CHECK_EQUAL( i, 1 ); + CHECK_EQUAL( n, -1 ); + } + { + // reading -0x, x + unsigned int i = 1; + int n = -1; + CHECK_EQUAL( sscanf( string, "%x%n", &i, &n ), 0 ); + CHECK_EQUAL( i, 1 ); + CHECK_EQUAL( n, -1 ); + } +} + #endif + diff --git a/functions/stdio/vsscanf.c b/functions/stdio/vsscanf.c index fd35889..5126bae 100644 --- a/functions/stdio/vsscanf.c +++ b/functions/stdio/vsscanf.c @@ -62,11 +62,6 @@ int vsscanf( const char * _PDCLIB_restrict s, const char * _PDCLIB_restrict form /* NULL return code indicates input error */ if ( rc == NULL ) { - if ( status.n == 0 ) - { - /* input error before any conversion returns EOF */ - status.n = EOF; - } break; } /* Continue parsing after conversion specifier */ -- 2.40.0