X-Git-Url: https://pd.if.org/git/?a=blobdiff_plain;f=functions%2F_PDCLIB%2Fscan.c;h=0d46f9ecfd53efe0f8a13fdd901487f482198c55;hb=8203e6d8264b7b9918e4b105e3f523c1d1152a7e;hp=2f59fa59cc405d49979da8123b7196caaf4de38f;hpb=18051554bf8cae6f007b7c0f66411689ee8be88f;p=pdclib.old diff --git a/functions/_PDCLIB/scan.c b/functions/_PDCLIB/scan.c index 2f59fa5..0d46f9e 100644 --- a/functions/_PDCLIB/scan.c +++ b/functions/_PDCLIB/scan.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Using an integer's bits as flags for both the conversion flags and length modifiers. @@ -31,27 +32,46 @@ #define E_unsigned 1<<16 -#define ASSIGN( case_cond, type ) \ +/* Helper macro for assigning a readily converted integer value to the correct + parameter type, used in a switch on status->flags (see E_* flags above). + case_cond: combination of the E_* flags above, used for the switch-case + type: integer type, used to get the correct type from the parameter + stack as well as for cast target. +*/ +#define ASSIGN_VALUE_TO( case_cond, type ) \ case case_cond: \ *( va_arg( status->arg, type * ) ) = (type)( value * sign ); \ break +/* Helper function to get a character from the string or stream, whatever is + used for input. When reading from a string, returns EOF on end-of-string + so that handling of the return value can be uniform for both streams and + strings. +*/ static int GET( struct _PDCLIB_status_t * status ) { - ++(status->i); - ++(status->this); + int rc; if ( status->stream != NULL ) { - return getc( status->stream ); + rc = getc( status->stream ); } else { - return *((status->s)++); + rc = ( *status->s == '\0' ) ? EOF : (unsigned char)*((status->s)++); + } + if ( rc != EOF ) + { + ++(status->i); + ++(status->this); } + return rc; } +/* Helper function to put a read character back into the string or stream, + whatever is used for input. +*/ static void UNGET( int c, struct _PDCLIB_status_t * status ) { if ( status->stream != NULL ) @@ -60,7 +80,7 @@ static void UNGET( int c, struct _PDCLIB_status_t * status ) } else { - *(--(status->s)) = c; + --(status->s); } --(status->i); --(status->this); @@ -79,7 +99,11 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) switch ( rc ) { case EOF: - /* matching failure */ + /* input error */ + if ( status->n == 0 ) + { + status->n = -1; + } return NULL; case '%': return ++spec; @@ -106,7 +130,12 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) strtol() will return zero. In both cases, endptr will point to the rest of the conversion specifier - just what we need. */ + char const * prev_spec = spec; status->width = (int)strtol( spec, (char**)&spec, 10 ); + if ( spec == prev_spec ) + { + status->width = SIZE_MAX; + } /* Optional length modifier We step one character ahead in any case, and step back only if we find @@ -199,17 +228,33 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) case 'c': { char * c = va_arg( status->arg, char * ); + /* for %c, default width is one */ if ( status->width == SIZE_MAX ) { status->width = 1; } + /* reading until width reached or input exhausted */ while ( ( status->this < status->width ) && ( ( rc = GET( status ) ) != EOF ) ) { *(c++) = rc; value_parsed = true; } - return value_parsed ? spec : NULL; + /* width or input exhausted */ + if ( value_parsed ) + { + ++status->n; + return ++spec; + } + else + { + /* input error, no character read */ + if ( status->n == 0 ) + { + status->n = -1; + } + return NULL; + } } case 's': { @@ -221,11 +266,13 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) { if ( value_parsed ) { + /* matching sequence terminated by whitespace */ *c = '\0'; return spec; } else { + /* leading whitespace not counted against width */ --(status->this); } } @@ -239,10 +286,16 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) if ( value_parsed ) { *c = '\0'; - return spec; + ++status->n; + return ++spec; } else { + /* input error, no character read */ + if ( status->n == 0 ) + { + status->n = -1; + } return NULL; } } @@ -264,14 +317,31 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) if ( status->base != -1 ) { /* integer conversion */ - uintmax_t value = 0; + uintmax_t value = 0; /* absolute value read */ + uintmax_t limit; /* max. value allowed */ + uintmax_t threshold; /* overflow threshold */ bool prefix_parsed = false; int sign = 0; while ( ( status->this < status->width ) && ( ( rc = GET( status ) ) != EOF ) ) { - if ( ! sign ) + if ( isspace( rc ) ) { + if ( sign ) + { + /* matching sequence terminated by whitespace */ + UNGET( rc, status ); + break; + } + else + { + /* leading whitespace not counted against width */ + status->this--; + } + } + else if ( ! sign ) + { + /* no sign parsed yet */ switch ( rc ) { case '-': @@ -281,16 +351,66 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) sign = 1; break; default: + /* not a sign; put back character */ sign = 1; UNGET( rc, status ); break; } + switch ( status->flags & ( E_char | E_short | E_long | E_llong | E_intmax | E_size | E_ptrdiff | E_unsigned ) ) + { + case E_char: + limit = ( sign == 1 ) ? CHAR_MAX : ( CHAR_MIN * sign ); + break; + case E_char | E_unsigned: + limit = UCHAR_MAX; + break; + case E_short: + limit = ( sign == 1 ) ? SHRT_MAX : ( SHRT_MIN * sign ); + break; + case E_short | E_unsigned: + limit = USHRT_MAX; + break; + case E_long: + limit = ( sign == 1 ) ? LONG_MAX : ( LONG_MIN * sign ); + break; + case E_long | E_unsigned: + limit = ULONG_MAX; + break; + case E_llong: + limit = ( sign == 1 ) ? LLONG_MAX : ( LLONG_MIN * sign ); + break; + case E_llong | E_unsigned: + limit = ULLONG_MAX; + break; + case E_intmax: + limit = ( sign == 1 ) ? INTMAX_MAX : ( INTMAX_MIN * sign ); + break; + case E_intmax | E_unsigned: + limit = UINTMAX_MAX; + break; + case E_size: + case E_size | E_unsigned: + limit = SIZE_MAX; + break; + case E_ptrdiff: + case E_ptrdiff | E_unsigned: + limit = ( sign == 1 ) ? PTRDIFF_MAX : ( PTRDIFF_MIN * sign ); + break; + case E_unsigned: + limit = UINT_MAX; + break; + default: + limit = ( sign == 1 ) ? INT_MAX : ( INT_MIN * sign ); + break; + } } else if ( ! prefix_parsed ) { + /* no prefix (0x... for hex, 0... for octal) parsed yet */ prefix_parsed = true; if ( rc != '0' ) { + /* not a prefix; if base not yet set, set to decimal */ if ( status->base == 0 ) { status->base = 10; @@ -299,11 +419,14 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) } else { + /* starts with zero, so it might be a prefix. */ + /* check what follows next (might be 0x...) */ if ( ( status->this < status->width ) && ( ( rc = GET( status ) ) != EOF ) ) { if ( tolower( rc ) == 'x' ) { + /* 0x... would be prefix for hex base... */ if ( ( status->base == 0 ) || ( status->base == 16 ) ) { @@ -311,20 +434,29 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) } else { + /* ...unless already set to other value */ UNGET( rc, status ); value_parsed = true; } } else { + /* 0... but not 0x.... would be octal prefix */ UNGET( rc, status ); if ( status->base == 0 ) { status->base = 8; } + /* in any case we have read a zero */ value_parsed = true; } } + else + { + /* failed to read beyond the initial zero */ + value_parsed = true; + break; + } } } else @@ -333,44 +465,63 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) if ( digitptr == NULL ) { /* end of input item */ + UNGET( rc, status ); break; } - value *= status->base; - value += digitptr - _PDCLIB_digits; - value_parsed = true; + // SOLAR + // if ( ( ( limit - ( digitptr - _PDCLIB_digits ) ) / status->base ) >= value ) + //if ( ( ( limit / status->base ) >= value ) && ( ( limit - ( digitptr - _PDCLIB_digits ) ) >= ( value * status->base ) ) ) + { + /* no overflow */ + value *= status->base; + value += digitptr - _PDCLIB_digits; + value_parsed = true; + } + //else + //{ + // value = limit; + // threshold = 0; + //} } } - /* width exceeded, EOF, read error, non-matching character */ + /* width or input exhausted, or non-matching character */ if ( ! value_parsed ) { - /* matching error */ + /* out of input before anything could be parsed - input error */ + /* FIXME: if first character does not match, value_parsed is not set - but it is NOT an input error */ + if ( ( status->n == 0 ) && ( rc == EOF ) ) + { + status->n = -1; + } return NULL; } + /* convert value to target type and assign to parameter */ switch ( status->flags & ( E_char | E_short | E_long | E_llong | E_intmax | E_size | E_ptrdiff | E_unsigned ) ) { - ASSIGN( E_char, char ); - ASSIGN( E_char | E_unsigned, unsigned char ); - ASSIGN( E_short, short ); - ASSIGN( E_short | E_unsigned, unsigned short ); - ASSIGN( 0, int ); - ASSIGN( E_unsigned, unsigned int ); - ASSIGN( E_long, long ); - ASSIGN( E_long | E_unsigned, unsigned long ); - ASSIGN( E_llong, long long ); - ASSIGN( E_llong | E_unsigned, unsigned long long ); - ASSIGN( E_intmax, intmax_t ); - ASSIGN( E_intmax | E_unsigned, uintmax_t ); - ASSIGN( E_size, size_t ); - /* ASSIGN( E_size | E_unsigned, unsigned size_t ); */ - ASSIGN( E_ptrdiff, ptrdiff_t ); - /* ASSIGN( E_ptrdiff | E_unsigned, unsigned ptrdiff_t ); */ + ASSIGN_VALUE_TO( E_char, char ); + ASSIGN_VALUE_TO( E_char | E_unsigned, unsigned char ); + ASSIGN_VALUE_TO( E_short, short ); + ASSIGN_VALUE_TO( E_short | E_unsigned, unsigned short ); + ASSIGN_VALUE_TO( 0, int ); + ASSIGN_VALUE_TO( E_unsigned, unsigned int ); + ASSIGN_VALUE_TO( E_long, long ); + ASSIGN_VALUE_TO( E_long | E_unsigned, unsigned long ); + ASSIGN_VALUE_TO( E_llong, long long ); + ASSIGN_VALUE_TO( E_llong | E_unsigned, unsigned long long ); + ASSIGN_VALUE_TO( E_intmax, intmax_t ); + ASSIGN_VALUE_TO( E_intmax | E_unsigned, uintmax_t ); + ASSIGN_VALUE_TO( E_size, size_t ); + /* ASSIGN_VALUE_TO( E_size | E_unsigned, unsigned size_t ); */ + ASSIGN_VALUE_TO( E_ptrdiff, ptrdiff_t ); + /* ASSIGN_VALUE_TO( E_ptrdiff | E_unsigned, unsigned ptrdiff_t ); */ default: puts( "UNSUPPORTED SCANF FLAG COMBINATION" ); - return NULL; + return NULL; /* behaviour unspecified */ } - return spec; + ++(status->n); + return ++spec; } /* TODO: Floats. */ return NULL; @@ -379,10 +530,13 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) #ifdef TEST #include <_PDCLIB_test.h> +#include + + int main( void ) { - TESTCASE( NO_TESTDRIVER ); + /* Testing covered by fscanf.c */ return TEST_RESULTS; }