#include <stdbool.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <stdint.h>
#include <ctype.h>
+#include <string.h>
+#include <stddef.h>
+#include <limits.h>
/* Using an integer's bits as flags for both the conversion flags and length
modifiers.
#define E_unsigned 1<<16
-#define MATCH_FAIL -1
-#define MATCH_ERROR -2
+/* Helper macro for assigning a readily converted integer value to the correct
+ parameter type, used in a switch on status->flags (see E_* flags above).
+ case_cond: combination of the E_* flags above, used for the switch-case
+ type: integer type, used to get the correct type from the parameter
+ stack as well as for cast target.
+*/
+#define ASSIGN_VALUE_TO( case_cond, type ) \
+ case case_cond: \
+ *( va_arg( status->arg, type * ) ) = (type)( value * sign ); \
+ break
+
-static int MATCH( int c, struct _PDCLIB_status_t * status )
+/* Helper function to get a character from the string or stream, whatever is
+ used for input. When reading from a string, returns EOF on end-of-string
+ so that handling of the return value can be uniform for both streams and
+ strings.
+*/
+static int GET( struct _PDCLIB_status_t * status )
{
+ int rc;
if ( status->stream != NULL )
{
- if ( ! _PDCLIB_prepread( status->stream ) )
- {
- return MATCH_ERROR;
- }
- if ( tolower( status->stream->buffer[ status->stream->bufidx ] ) == c )
- {
- /* recycling parameter */
- c = getc( status->stream );
- }
- else
- {
- return MATCH_FAIL;
- }
+ rc = getc( status->stream );
}
else
{
- if ( tolower( *(status->s) ) == c )
- {
- /* recycling parameter */
- c = *((status->s)++); /* TODO: \0 */
- }
- else
- {
- return MATCH_FAIL;
- }
+ rc = ( *status->s == '\0' ) ? EOF : (unsigned char)*((status->s)++);
}
- ++(status->i);
- ++(status->this);
- return c;
+ if ( rc != EOF )
+ {
+ ++(status->i);
+ ++(status->current);
+ }
+ return rc;
}
+/* Helper function to put a read character back into the string or stream,
+ whatever is used for input.
+*/
static void UNGET( int c, struct _PDCLIB_status_t * status )
{
if ( status->stream != NULL )
}
else
{
- *(--(status->s)) = c;
+ --(status->s);
}
--(status->i);
- --(status->this);
+ --(status->current);
+}
+
+
+/* Helper function to check if a character is part of a given scanset */
+static bool IN_SCANSET( const char * scanlist, const char * end_scanlist, int rc )
+{
+ // SOLAR
+ int previous = -1;
+ while ( scanlist != end_scanlist )
+ {
+ if ( ( *scanlist == '-' ) && ( previous != -1 ) )
+ {
+ /* possible scangroup ("a-z") */
+ if ( ++scanlist == end_scanlist )
+ {
+ /* '-' at end of scanlist does not describe a scangroup */
+ return rc == '-';
+ }
+ while ( ++previous <= (unsigned char)*scanlist )
+ {
+ if ( previous == rc )
+ {
+ return true;
+ }
+ }
+ previous = -1;
+ }
+ else
+ {
+ /* not a scangroup, check verbatim */
+ if ( rc == (unsigned char)*scanlist )
+ {
+ return true;
+ }
+ previous = (unsigned char)(*scanlist++);
+ }
+ }
+ return false;
}
const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status )
{
+ /* generic input character */
+ int rc;
const char * orig_spec = spec;
if ( *(++spec) == '%' )
{
/* %% -> match single '%' */
- MATCH( *spec, status );
- return ++spec;
+ rc = GET( status );
+ switch ( rc )
+ {
+ case EOF:
+ /* input error */
+ if ( status->n == 0 )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ case '%':
+ return ++spec;
+ default:
+ UNGET( rc, status );
+ break;
+ }
}
/* Initializing status structure */
status->flags = 0;
status->base = -1;
- status->this = 0;
+ status->current = 0;
status->width = 0;
status->prec = 0;
strtol() will return zero. In both cases, endptr will point to the
rest of the conversion specifier - just what we need.
*/
+ char const * prev_spec = spec;
status->width = (int)strtol( spec, (char**)&spec, 10 );
+ if ( spec == prev_spec )
+ {
+ status->width = SIZE_MAX;
+ }
/* Optional length modifier
We step one character ahead in any case, and step back only if we find
}
/* Conversion specifier */
+
+ /* whether valid input had been parsed */
+ bool value_parsed = false;
+
switch ( *spec )
{
case 'd':
case 'A':
break;
case 'c':
- /* TODO */
- break;
+ {
+ char * c = va_arg( status->arg, char * );
+ /* for %c, default width is one */
+ if ( status->width == SIZE_MAX )
+ {
+ status->width = 1;
+ }
+ /* reading until width reached or input exhausted */
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ *(c++) = rc;
+ value_parsed = true;
+ }
+ /* width or input exhausted */
+ if ( value_parsed )
+ {
+ ++status->n;
+ return ++spec;
+ }
+ else
+ {
+ /* input error, no character read */
+ if ( status->n == 0 )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ }
+ }
case 's':
- /* TODO */
- break;
+ {
+ char * c = va_arg( status->arg, char * );
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ if ( isspace( rc ) )
+ {
+ UNGET( rc, status );
+ if ( value_parsed )
+ {
+ /* matching sequence terminated by whitespace */
+ *c = '\0';
+ ++status->n;
+ return ++spec;
+ }
+ else
+ {
+ /* matching error */
+ return NULL;
+ }
+ }
+ else
+ {
+ /* match */
+ value_parsed = true;
+ *(c++) = rc;
+ }
+ }
+ /* width or input exhausted */
+ if ( value_parsed )
+ {
+ *c = '\0';
+ ++status->n;
+ return ++spec;
+ }
+ else
+ {
+ /* input error, no character read */
+ if ( status->n == 0 )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ }
+ }
+ case '[':
+ {
+ const char * endspec = spec;
+ bool negative_scanlist = false;
+ if ( *(++endspec) == '^' )
+ {
+ negative_scanlist = true;
+ ++endspec;
+ }
+ spec = endspec;
+ do
+ {
+ // TODO: This can run beyond a malformed format string
+ ++endspec;
+ } while ( *endspec != ']' );
+ // read according to scanlist, equiv. to %s above
+ char * c = va_arg( status->arg, char * );
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ if ( negative_scanlist )
+ {
+ if ( IN_SCANSET( spec, endspec, rc ) )
+ {
+ UNGET( rc, status );
+ break;
+ }
+ }
+ else
+ {
+ if ( ! IN_SCANSET( spec, endspec, rc ) )
+ {
+ UNGET( rc, status );
+ break;
+ }
+ }
+ value_parsed = true;
+ *(c++) = rc;
+ }
+ if ( value_parsed )
+ {
+ *c = '\0';
+ ++status->n;
+ return ++endspec;
+ }
+ else
+ {
+ if ( rc == EOF )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ }
+ }
case 'p':
- /* TODO */
status->base = 16;
status->flags |= E_unsigned;
break;
/* No conversion specifier. Bad conversion. */
return orig_spec;
}
- bool zero = false;
+
if ( status->base != -1 )
{
- bool value = false;
- int rc;
- if ( ( rc = MATCH( '0', status ) ) >= 0 )
+ /* integer conversion */
+ uintmax_t value = 0; /* absolute value read */
+ bool prefix_parsed = false;
+ int sign = 0;
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
{
- if ( ( rc = MATCH( 'x', status ) ) >= 0 )
+ if ( isspace( rc ) )
{
- if ( ( status->base == 0 ) || ( status->base == 16 ) )
+ if ( sign )
{
- status->base = 16;
+ /* matching sequence terminated by whitespace */
+ UNGET( rc, status );
+ break;
}
else
{
- UNGET( rc, status );
- value = true;
+ /* leading whitespace not counted against width */
+ status->current--;
+ }
+ }
+ else if ( ! sign )
+ {
+ /* no sign parsed yet */
+ switch ( rc )
+ {
+ case '-':
+ sign = -1;
+ break;
+ case '+':
+ sign = 1;
+ break;
+ default:
+ /* not a sign; put back character */
+ sign = 1;
+ UNGET( rc, status );
+ break;
}
}
- else if ( rc == MATCH_FAIL )
+ else if ( ! prefix_parsed )
{
- if ( status->base == 0 )
+ /* no prefix (0x... for hex, 0... for octal) parsed yet */
+ prefix_parsed = true;
+ if ( rc != '0' )
{
- status->base = 8;
+ /* not a prefix; if base not yet set, set to decimal */
+ if ( status->base == 0 )
+ {
+ status->base = 10;
+ }
+ UNGET( rc, status );
}
else
{
- value = true;
+ /* starts with zero, so it might be a prefix. */
+ /* check what follows next (might be 0x...) */
+ if ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ if ( tolower( rc ) == 'x' )
+ {
+ /* 0x... would be prefix for hex base... */
+ if ( ( status->base == 0 ) ||
+ ( status->base == 16 ) )
+ {
+ status->base = 16;
+ }
+ else
+ {
+ /* ...unless already set to other value */
+ UNGET( rc, status );
+ value_parsed = true;
+ }
+ }
+ else
+ {
+ /* 0... but not 0x.... would be octal prefix */
+ UNGET( rc, status );
+ if ( status->base == 0 )
+ {
+ status->base = 8;
+ }
+ /* in any case we have read a zero */
+ value_parsed = true;
+ }
+ }
+ else
+ {
+ /* failed to read beyond the initial zero */
+ value_parsed = true;
+ break;
+ }
}
}
else
{
- /* TODO: MATCH_ERROR */
+ char * digitptr = memchr( _PDCLIB_digits, tolower( rc ), status->base );
+ if ( digitptr == NULL )
+ {
+ /* end of input item */
+ UNGET( rc, status );
+ break;
+ }
+ value *= status->base;
+ value += digitptr - _PDCLIB_digits;
+ value_parsed = true;
}
}
- else if ( rc == MATCH_FAIL )
+ /* width or input exhausted, or non-matching character */
+ if ( ! value_parsed )
{
- if ( status->base == 0 )
+ /* out of input before anything could be parsed - input error */
+ /* FIXME: if first character does not match, value_parsed is not set - but it is NOT an input error */
+ if ( ( status->n == 0 ) && ( rc == EOF ) )
{
- status->base = 10;
+ status->n = -1;
}
+ return NULL;
}
- else
+ /* convert value to target type and assign to parameter */
+ switch ( status->flags & ( E_char | E_short | E_long | E_llong |
+ E_intmax | E_size | E_ptrdiff |
+ E_unsigned ) )
{
- /* TODO: MATCH_ERROR */
+ ASSIGN_VALUE_TO( E_char, char );
+ ASSIGN_VALUE_TO( E_char | E_unsigned, unsigned char );
+ ASSIGN_VALUE_TO( E_short, short );
+ ASSIGN_VALUE_TO( E_short | E_unsigned, unsigned short );
+ ASSIGN_VALUE_TO( 0, int );
+ ASSIGN_VALUE_TO( E_unsigned, unsigned int );
+ ASSIGN_VALUE_TO( E_long, long );
+ ASSIGN_VALUE_TO( E_long | E_unsigned, unsigned long );
+ ASSIGN_VALUE_TO( E_llong, long long );
+ ASSIGN_VALUE_TO( E_llong | E_unsigned, unsigned long long );
+ ASSIGN_VALUE_TO( E_intmax, intmax_t );
+ ASSIGN_VALUE_TO( E_intmax | E_unsigned, uintmax_t );
+ ASSIGN_VALUE_TO( E_size, size_t );
+ /* ASSIGN_VALUE_TO( E_size | E_unsigned, unsigned size_t ); */
+ ASSIGN_VALUE_TO( E_ptrdiff, ptrdiff_t );
+ /* ASSIGN_VALUE_TO( E_ptrdiff | E_unsigned, unsigned ptrdiff_t ); */
+ default:
+ puts( "UNSUPPORTED SCANF FLAG COMBINATION" );
+ return NULL; /* behaviour unspecified */
}
- /* TODO: Integer conversion */
- }
- else
- {
- /* TODO: Float conversions? */
+ ++(status->n);
+ return ++spec;
}
+ /* TODO: Floats. */
return NULL;
}
+
#ifdef TEST
#include <_PDCLIB_test.h>
+#include <limits.h>
+
+
int main( void )
{
- TESTCASE( NO_TESTDRIVER );
+ /* Testing covered by fscanf.c */
return TEST_RESULTS;
}