X-Git-Url: https://pd.if.org/git/?a=blobdiff_plain;f=functions%2F_PDCLIB%2Fscan.c;h=63afff235e25281778b4bdc584ca42b1e8952596;hb=519fc78c528a1d4e07fbd5f8353d5c3f81b8e99b;hp=523b0c81449c5846f50868f09118fcb445c69dba;hpb=7f5e4748e1e44b136cbeddc70601bbf2a5a56e1b;p=pdclib diff --git a/functions/_PDCLIB/scan.c b/functions/_PDCLIB/scan.c index 523b0c8..63afff2 100644 --- a/functions/_PDCLIB/scan.c +++ b/functions/_PDCLIB/scan.c @@ -10,7 +10,11 @@ #include #include #include +#include #include +#include +#include +#include /* Using an integer's bits as flags for both the conversion flags and length modifiers. @@ -28,45 +32,46 @@ #define E_unsigned 1<<16 -#define MATCH_FAIL -1 -#define MATCH_ERROR -2 +/* Helper macro for assigning a readily converted integer value to the correct + parameter type, used in a switch on status->flags (see E_* flags above). + case_cond: combination of the E_* flags above, used for the switch-case + type: integer type, used to get the correct type from the parameter + stack as well as for cast target. +*/ +#define ASSIGN_VALUE_TO( case_cond, type ) \ + case case_cond: \ + *( va_arg( status->arg, type * ) ) = (type)( value * sign ); \ + break + -static int MATCH( int c, struct _PDCLIB_status_t * status ) +/* Helper function to get a character from the string or stream, whatever is + used for input. When reading from a string, returns EOF on end-of-string + so that handling of the return value can be uniform for both streams and + strings. +*/ +static int GET( struct _PDCLIB_status_t * status ) { + int rc; if ( status->stream != NULL ) { - if ( ! _PDCLIB_prepread( status->stream ) ) - { - return MATCH_ERROR; - } - if ( tolower( status->stream->buffer[ status->stream->bufidx ] ) == c ) - { - /* recycling parameter */ - c = getc( status->stream ); - } - else - { - return MATCH_FAIL; - } + rc = getc( status->stream ); } else { - if ( tolower( *(status->s) ) == c ) - { - /* recycling parameter */ - c = *((status->s)++); /* TODO: \0 */ - } - else - { - return MATCH_FAIL; - } + rc = ( *status->s == '\0' ) ? EOF : (unsigned char)*((status->s)++); + } + if ( rc != EOF ) + { + ++(status->i); + ++(status->this); } - ++(status->i); - ++(status->this); - return c; + return rc; } +/* Helper function to put a read character back into the string or stream, + whatever is used for input. +*/ static void UNGET( int c, struct _PDCLIB_status_t * status ) { if ( status->stream != NULL ) @@ -75,21 +80,45 @@ static void UNGET( int c, struct _PDCLIB_status_t * status ) } else { - *(--(status->s)) = c; + --(status->s); } --(status->i); --(status->this); } +/* Helper function to check if a character is part of a given scanset */ +static bool NOT_IN_SCANSET( const char * start_scanlist, const char * end_scanlist, bool negate_scanlist, int rc ) +{ + // SOLAR + return true; +} + + const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) { + /* generic input character */ + int rc; const char * orig_spec = spec; if ( *(++spec) == '%' ) { /* %% -> match single '%' */ - MATCH( *spec, status ); - return ++spec; + rc = GET( status ); + switch ( rc ) + { + case EOF: + /* input error */ + if ( status->n == 0 ) + { + status->n = -1; + } + return NULL; + case '%': + return ++spec; + default: + UNGET( rc, status ); + break; + } } /* Initializing status structure */ status->flags = 0; @@ -109,7 +138,12 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) strtol() will return zero. In both cases, endptr will point to the rest of the conversion specifier - just what we need. */ + char const * prev_spec = spec; status->width = (int)strtol( spec, (char**)&spec, 10 ); + if ( spec == prev_spec ) + { + status->width = SIZE_MAX; + } /* Optional length modifier We step one character ahead in any case, and step back only if we find @@ -166,6 +200,10 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) } /* Conversion specifier */ + + /* whether valid input had been parsed */ + bool value_parsed = false; + switch ( *spec ) { case 'd': @@ -196,13 +234,125 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) case 'A': break; case 'c': - /* TODO */ - break; + { + char * c = va_arg( status->arg, char * ); + /* for %c, default width is one */ + if ( status->width == SIZE_MAX ) + { + status->width = 1; + } + /* reading until width reached or input exhausted */ + while ( ( status->this < status->width ) && + ( ( rc = GET( status ) ) != EOF ) ) + { + *(c++) = rc; + value_parsed = true; + } + /* width or input exhausted */ + if ( value_parsed ) + { + ++status->n; + return ++spec; + } + else + { + /* input error, no character read */ + if ( status->n == 0 ) + { + status->n = -1; + } + return NULL; + } + } case 's': - /* TODO */ - break; + { + char * c = va_arg( status->arg, char * ); + while ( ( status->this < status->width ) && + ( ( rc = GET( status ) ) != EOF ) ) + { + if ( isspace( rc ) ) + { + UNGET( rc, status ); + if ( value_parsed ) + { + /* matching sequence terminated by whitespace */ + *c = '\0'; + ++status->n; + return ++spec; + } + else + { + /* matching error */ + return NULL; + } + } + else + { + /* match */ + value_parsed = true; + *(c++) = rc; + } + } + /* width or input exhausted */ + if ( value_parsed ) + { + *c = '\0'; + ++status->n; + return ++spec; + } + else + { + /* input error, no character read */ + if ( status->n == 0 ) + { + status->n = -1; + } + return NULL; + } + } + case '[': + { + const char * endspec = spec; + bool negative_scanlist = false; + if ( *(++endspec) == '^' ) + { + negative_scanlist = true; + ++endspec; + } + spec = endspec; + do + { + // TODO: This can run beyond a malformed format string + ++endspec; + } while ( *endspec != ']' ); + // read according to scanlist, equiv. to %s above + char * c = va_arg( status->arg, char * ); + while ( ( status->this < status->width ) && + ( ( rc = GET( status ) ) != EOF ) ) + { + if ( NOT_IN_SCANSET( spec, endspec, negative_scanlist, rc ) ) + { + break; + } + value_parsed = true; + *(c++) = rc; + } + if ( value_parsed ) + { + *c = '\0'; + ++status->n; + return ++endspec; + } + else + { + if ( status->n == 0 ) + { + status->n = -1; + } + return NULL; + } + } case 'p': - /* TODO */ status->base = 16; status->flags |= E_unsigned; break; @@ -216,67 +366,170 @@ const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status ) /* No conversion specifier. Bad conversion. */ return orig_spec; } - bool zero = false; + if ( status->base != -1 ) { - bool value = false; - int rc; - if ( ( rc = MATCH( '0', status ) ) >= 0 ) + /* integer conversion */ + uintmax_t value = 0; /* absolute value read */ + bool prefix_parsed = false; + int sign = 0; + while ( ( status->this < status->width ) && + ( ( rc = GET( status ) ) != EOF ) ) { - if ( ( rc = MATCH( 'x', status ) ) >= 0 ) + if ( isspace( rc ) ) { - if ( ( status->base == 0 ) || ( status->base == 16 ) ) + if ( sign ) { - status->base = 16; + /* matching sequence terminated by whitespace */ + UNGET( rc, status ); + break; } else { - UNGET( rc, status ); - value = true; + /* leading whitespace not counted against width */ + status->this--; } } - else if ( rc == MATCH_FAIL ) + else if ( ! sign ) { - if ( status->base == 0 ) + /* no sign parsed yet */ + switch ( rc ) { - status->base = 8; + case '-': + sign = -1; + break; + case '+': + sign = 1; + break; + default: + /* not a sign; put back character */ + sign = 1; + UNGET( rc, status ); + break; + } + } + else if ( ! prefix_parsed ) + { + /* no prefix (0x... for hex, 0... for octal) parsed yet */ + prefix_parsed = true; + if ( rc != '0' ) + { + /* not a prefix; if base not yet set, set to decimal */ + if ( status->base == 0 ) + { + status->base = 10; + } + UNGET( rc, status ); } else { - value = true; + /* starts with zero, so it might be a prefix. */ + /* check what follows next (might be 0x...) */ + if ( ( status->this < status->width ) && + ( ( rc = GET( status ) ) != EOF ) ) + { + if ( tolower( rc ) == 'x' ) + { + /* 0x... would be prefix for hex base... */ + if ( ( status->base == 0 ) || + ( status->base == 16 ) ) + { + status->base = 16; + } + else + { + /* ...unless already set to other value */ + UNGET( rc, status ); + value_parsed = true; + } + } + else + { + /* 0... but not 0x.... would be octal prefix */ + UNGET( rc, status ); + if ( status->base == 0 ) + { + status->base = 8; + } + /* in any case we have read a zero */ + value_parsed = true; + } + } + else + { + /* failed to read beyond the initial zero */ + value_parsed = true; + break; + } } } else { - /* TODO: MATCH_ERROR */ + char * digitptr = memchr( _PDCLIB_digits, tolower( rc ), status->base ); + if ( digitptr == NULL ) + { + /* end of input item */ + UNGET( rc, status ); + break; + } + value *= status->base; + value += digitptr - _PDCLIB_digits; + value_parsed = true; } } - else if ( rc == MATCH_FAIL ) + /* width or input exhausted, or non-matching character */ + if ( ! value_parsed ) { - if ( status->base == 0 ) + /* out of input before anything could be parsed - input error */ + /* FIXME: if first character does not match, value_parsed is not set - but it is NOT an input error */ + if ( ( status->n == 0 ) && ( rc == EOF ) ) { - status->base = 10; + status->n = -1; } + return NULL; } - else + /* convert value to target type and assign to parameter */ + switch ( status->flags & ( E_char | E_short | E_long | E_llong | + E_intmax | E_size | E_ptrdiff | + E_unsigned ) ) { - /* TODO: MATCH_ERROR */ + ASSIGN_VALUE_TO( E_char, char ); + ASSIGN_VALUE_TO( E_char | E_unsigned, unsigned char ); + ASSIGN_VALUE_TO( E_short, short ); + ASSIGN_VALUE_TO( E_short | E_unsigned, unsigned short ); + ASSIGN_VALUE_TO( 0, int ); + ASSIGN_VALUE_TO( E_unsigned, unsigned int ); + ASSIGN_VALUE_TO( E_long, long ); + ASSIGN_VALUE_TO( E_long | E_unsigned, unsigned long ); + ASSIGN_VALUE_TO( E_llong, long long ); + ASSIGN_VALUE_TO( E_llong | E_unsigned, unsigned long long ); + ASSIGN_VALUE_TO( E_intmax, intmax_t ); + ASSIGN_VALUE_TO( E_intmax | E_unsigned, uintmax_t ); + ASSIGN_VALUE_TO( E_size, size_t ); + /* ASSIGN_VALUE_TO( E_size | E_unsigned, unsigned size_t ); */ + ASSIGN_VALUE_TO( E_ptrdiff, ptrdiff_t ); + /* ASSIGN_VALUE_TO( E_ptrdiff | E_unsigned, unsigned ptrdiff_t ); */ + default: + puts( "UNSUPPORTED SCANF FLAG COMBINATION" ); + return NULL; /* behaviour unspecified */ } - /* TODO: Integer conversion */ - } - else - { - /* TODO: Float conversions? */ + ++(status->n); + return ++spec; } + /* TODO: Floats. */ return NULL; } + #ifdef TEST #include <_PDCLIB_test.h> +#include + + int main( void ) { - TESTCASE( NO_TESTDRIVER ); + /* Testing covered by fscanf.c */ return TEST_RESULTS; }