From 7bcac73ac7bf61de3dd1bbeaf6071894c7805e7f Mon Sep 17 00:00:00 2001 From: solar <> Date: Fri, 7 Apr 2006 05:04:21 +0000 Subject: [PATCH] Temporary proof-of-concept for printf() output conversions. --- Makefile | 5 +- draft.c | 373 ++++++++++++++++++++++++++ functions/_PDCLIB/atomax.c | 3 +- functions/_PDCLIB/digits.c | 2 +- functions/_PDCLIB/strtox_main.c | 7 +- includes/stdio.h | 459 ++++++++++++++++++++++++++++++++ internals/_PDCLIB_int.h | 14 +- 7 files changed, 850 insertions(+), 13 deletions(-) create mode 100644 draft.c diff --git a/Makefile b/Makefile index 7cc7474..24e3013 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ AUXFILES := Makefile Readme.txt PROJDIRS := functions includes internals SRCFILES := $(shell find $(PROJDIRS) -mindepth 1 -maxdepth 3 -name "*.c") HDRFILES := $(shell find $(PROJDIRS) -mindepth 1 -maxdepth 3 -name "*.h") -INTFILES := atomax digits seed strtox_main strtox_prelim rename remove _Exit +INTFILES := atomax digits lengthmods seed strtox_main strtox_prelim rename remove _Exit OBJFILES := $(patsubst %.c,%.o,$(SRCFILES)) TSTFILES := $(patsubst %.c,%.t,$(SRCFILES)) REGFILES := $(filter-out $(patsubst %,functions/_PDCLIB/%.r,$(INTFILES)),$(patsubst %.c,%.r,$(SRCFILES))) @@ -43,6 +43,9 @@ dist: todolist: -@for file in $(ALLFILES); do grep -H TODO $$file; done; true +fixmelist: + -@for file in $(ALLFILES); do grep -H FIXME $$file; done; true + %.o: %.c Makefile @$(CC) $(CFLAGS) -Wall -DNDEBUG -MMD -MP -MT "$*.d $*.t" -g -std=c99 -I./includes -I./internals -c $< -o $@ diff --git a/draft.c b/draft.c new file mode 100644 index 0000000..fdda32d --- /dev/null +++ b/draft.c @@ -0,0 +1,373 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#undef TEST +#include + +/* Using an integer's bits as flags for both the conversion flags and length + modifiers. +*/ +#define E_minus 1<<0 +#define E_plus 1<<1 +#define E_alt 1<<2 +#define E_space 1<<3 +#define E_zero 1<<4 +#define E_done 1<<5 +#define E_char 1<<6 +#define E_short 1<<7 +#define E_long 1<<8 +#define E_llong 1<<9 +#define E_intmax 1<<10 +#define E_size 1<<11 +#define E_ptrdiff 1<<12 +#define E_double 1<<13 +#define E_lower 1<<14 +#define E_term 1<<15 + +void parse_out( const char * spec, va_list ap ); + +struct status_t +{ + int base; /* base to which the value shall be converted */ + int flags; /* flags and length modifiers */ + size_t n; /* maximum number of characters to be written */ + size_t i; /* number of characters already written */ + size_t this; /* number of output chars in the current conversion */ + char * s; /* target buffer */ + size_t width; /* width of current field */ + int prec; /* precision of current field */ +}; + +/* x - the character to be delivered + i - pointer to number of characters already delivered in this call + n - pointer to maximum number of characters to be delivered in this call + s - the buffer into which the character shall be delivered + TODO: Overruns. +*/ +#define DELIVER( x ) if ( status->i < status->n ) status->s[status->i] = x; ++(status->i) + +/* TODO: Left / right alignment - requires track-keeping of width and printed chars. + "Father function", die für right alignment "tail recursive" gerufen wird, und + "after" für left alignment? Parameter als struct? +*/ + +static void int2base( int value, struct status_t * status ) +{ + ++(status->this); + if ( ( value / status->base ) != 0 ) + { + int2base( value / status->base, status ); + } + else + { + char preface[3] = "\0\0"; + size_t preidx = 0; + if ( ( status->flags & E_alt ) && ( status->base == 16 || status->base == 8 ) ) + { + preface[ preidx++ ] = '0'; + if ( status->base == 16 ) + { + preface[ preidx++ ] = ( status->flags & E_lower ) ? 'x' : 'X'; + } + } + if ( value < 0 ) + { + preface[ preidx++ ] = '-'; + } + else + { + if ( status->flags & E_plus ) + { + preface[ preidx++ ] = '+'; + } + else if ( status->flags & E_space ) + { + preface[ preidx++ ] = ' '; + } + } + if ( ! ( ( status->flags & E_minus ) || ( status->flags & E_zero ) ) ) + { + while ( ( status->this + preidx ) < status->width ) + { + DELIVER( ' ' ); + ++(status->this); + } + } + preidx = 0; + while ( preface[ preidx ] != '\0' ) + { + DELIVER( preface[ preidx++ ] ); + ++(status->this); + } + if ( ( ! ( status->flags & E_minus ) ) && ( status->flags & E_zero ) ) + { + while ( status->this < status->width ) + { + DELIVER( '0' ); + ++(status->this); + } + } + } + if ( value < 0 ) + { + value *= -1; + } + if ( status->flags & E_lower ) + { + DELIVER( _PDCLIB_digits[ value % status->base ] ); + } + else + { + DELIVER( toupper( _PDCLIB_digits[ value % status->base ] ) ); + } +} + +static void padwrap( int value, struct status_t * status ) +{ + int2base( value, status ); + if ( status->flags & E_minus ) + { + while ( status->this < status->width ) + { + DELIVER( ' ' ); + ++(status->this); + } + } + if ( status->i == status->n ) + { + status->s[status->i] = '\0'; + } +} + +void parse_out( const char * spec, va_list ap ) +{ + /* TODO: '%' handled correctly? */ + struct status_t status = { 0, 0, 0, 0, 0, NULL, 0, EOF }; + /* First come 0..n flags */ + while ( ! ( status.flags & E_done ) ) + { + switch ( *(++spec) ) + { + case '-': + status.flags |= E_minus; + break; + case '+': + status.flags |= E_plus; + break; + case '#': + status.flags |= E_alt; + break; + case ' ': + status.flags |= E_space; + break; + case '0': + status.flags |= E_zero; + break; + default: + status.flags |= E_done; + break; + } + } + if ( *spec == '*' ) + { + /* Retrieve width value from argument stack */ + if ( ( status.width = va_arg( ap, int ) ) < 0 ) + { + /* Negative value is '-' flag plus absolute value */ + status.flags |= E_minus; + status.width *= -1; + } + ++spec; + } + else + { + /* If a width is given, strtol() will return its value. If not given, + strtol() will return zero. In both cases, endptr will point to the + rest of the conversion specifier. + */ + char * endptr; + status.width = (int)strtol( spec, &endptr, 10 ); + spec = endptr; + } + if ( *spec == '.' ) + { + if ( *(++spec) == '*' ) + { + /* Retrieve precision value from argument stack. A negative value + is as if no precision is given - as precision is initalized to + EOF (negative), there is no need for testing for negative here. + */ + status.prec = va_arg( ap, int ); + } + else + { + char * endptr; + status.prec = (int)strtol( spec, &endptr, 10 ); + spec = endptr; + } + } + /* We step one character ahead in any case, and step back only if we find + there has been no length modifier (or step ahead another character if it + has been "hh" or "ll"). + */ + switch ( *(spec++) ) + { + case 'h': + if ( *spec == 'h' ) + { + status.flags |= E_char; + ++spec; + } + else + { + status.flags |= E_short; + } + break; + case 'l': + if ( *spec == 'l' ) + { + status.flags |= E_llong; + ++spec; + } + else + { + status.flags |= E_long; + } + break; + case 'j': + status.flags |= E_intmax; + break; + case 'z': + status.flags |= E_size; + break; + case 't': + status.flags |= E_ptrdiff; + break; + case 'L': + status.flags |= E_double; + break; + default: + ++spec; + break; + } + switch ( *spec ) + { + case 'd': + case 'i': + /* int2base( 10, value, true ) */ + break; + case 'o': + /* int2base( 8, value, true ) */ + break; + case 'u': + /* uint2base( 10, value, true ) */ + break; + case 'x': + /* uint2base( 16, value, true ) */ + break; + case 'X': + /* uint2base( 16, value, false ) */ + break; + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + break; + case 'a': + case 'A': + break; + case 'c': + break; + case 's': + break; + case 'p': + /* uint2base( 16, (intptr_t)value, true ) */ + case 'n': + case '%': + // conversion specifier + break; + default: + // undefined + return; + } +} + +/* +static void int2base( int value, int base, struct status_t * status ) + +#define E_minus 1<<0 +#define E_plus 1<<1 +#define E_alt 1<<2 +#define E_space 1<<3 +#define E_zero 1<<4 +#define E_done 1<<5 +#define E_char 1<<6 +#define E_short 1<<7 +#define E_long 1<<8 +#define E_llong 1<<9 +#define E_intmax 1<<10 +#define E_size 1<<11 +#define E_ptrdiff 1<<12 +#define E_double 1<<13 +#define E_lower 1<<14 + + struct status_t +{ + int flags; + size_t n; + size_t i; + char * s; + size_t width; + size_t prec; +}; +*/ + +#define TESTCASE( _flags, _n, _width, _prec, _value, _base, _expect ) \ + status.flags = _flags | E_term; \ + status.n = _n; \ + status.i = 0; \ + status.width = _width; \ + status.prec = _prec; \ + status.base = _base; \ + status.this = 0; \ + memset( status.s, '\0', 20 ); \ + padwrap( _value, &status ); \ + printf( "Output '%s', RC %d \t- ", status.s, status.i ); \ + rc = snprintf( buffer, _n, _expect, _value ); \ + printf( "Expect '%s', RC %d\n", buffer, rc ); + +int main() +{ + struct status_t status; + int rc; + char * buffer = malloc( 20 ); + status.s = malloc( 20 ); + TESTCASE( E_plus, 5, 0, 0, 1234, 10, "%+d" ); + TESTCASE( E_space, 3, 0, 0, 1234, 10, "% d" ); + TESTCASE( E_space, 3, 0, 0, -1234, 10, "% d" ); + TESTCASE( E_plus, 3, 0, 0, -1234, 10, "%+d" ); + TESTCASE( E_done, 4, 0, 0, 65535, 16, "%X" ); + TESTCASE( E_lower | E_alt, 4, 0, 0, 65534, 16, "%#x" ); + TESTCASE( E_done, 4, 0, 0, 62, 8, "%o" ); + TESTCASE( E_alt, 4, 0, 0, 62, 8, "%#o" ); + TESTCASE( E_done, 6, 6, 0, 1234, 10, "%6d" ); + TESTCASE( E_minus, 6, 6, 0, 1234, 10, "%-6d" ); + TESTCASE( E_minus, 6, 2, 0, 1234, 10, "%-2d" ); + TESTCASE( E_done, 6, 2, 0, 1234, 10, "%2d" ); + TESTCASE( E_zero, 6, 6, 0, -1234, 10, "%06d" ); + TESTCASE( E_zero, 7, 7, 0, -65535, 16, "%07X" ); + TESTCASE( E_zero | E_minus, 6, 6, 0, 1234, 10, "%-06d" ); + TESTCASE( E_plus, 6, 6, 0, 1234, 10, "%+6d" ); + TESTCASE( E_space, 6, 6, 0, 1234, 10, "% 6d" ); + TESTCASE( E_space, 6, 6, 0, -1234, 10, "% 6d" ); + TESTCASE( E_space | E_minus, 6, 6, 0, -1234, 10, "%- 6d" ); + return 0; +} diff --git a/functions/_PDCLIB/atomax.c b/functions/_PDCLIB/atomax.c index 84a0c08..9a70218 100644 --- a/functions/_PDCLIB/atomax.c +++ b/functions/_PDCLIB/atomax.c @@ -20,7 +20,8 @@ _PDCLIB_intmax_t _PDCLIB_atomax( const char * s ) while ( isspace( *s ) ) ++s; if ( *s == '+' ) ++s; else if ( *s == '-' ) sign = *(s++); - while ( ( x = memchr( _PDCLIB_digits, *(s++), 10 ) ) != NULL ) + /* TODO: Earlier version was missing tolower() but was not caught by tests */ + while ( ( x = memchr( _PDCLIB_digits, tolower(*(s++)), 10 ) ) != NULL ) { rc = rc * 10 + ( x - _PDCLIB_digits ); } diff --git a/functions/_PDCLIB/digits.c b/functions/_PDCLIB/digits.c index 3963f8b..6f36064 100644 --- a/functions/_PDCLIB/digits.c +++ b/functions/_PDCLIB/digits.c @@ -6,7 +6,7 @@ Permission is granted to use, modify, and / or redistribute at will. */ -char _PDCLIB_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +char _PDCLIB_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; #ifdef TEST diff --git a/functions/_PDCLIB/strtox_main.c b/functions/_PDCLIB/strtox_main.c index 9f82cdb..bc14811 100644 --- a/functions/_PDCLIB/strtox_main.c +++ b/functions/_PDCLIB/strtox_main.c @@ -18,19 +18,20 @@ _PDCLIB_uintmax_t _PDCLIB_strtox_main( const char ** p, unsigned int base, uintm _PDCLIB_uintmax_t rc = 0; int digit = -1; const char * x; - while ( ( x = memchr( _PDCLIB_digits, toupper(**p), base ) ) != NULL ) + while ( ( x = memchr( _PDCLIB_digits, tolower(**p), base ) ) != NULL ) { digit = x - _PDCLIB_digits; if ( ( rc < limval ) || ( ( rc == limval ) && ( digit <= limdigit ) ) ) { - rc = rc * base + ( x - _PDCLIB_digits ); + rc = rc * base + digit; ++(*p); } else { errno = ERANGE; /* TODO: Only if endptr != NULL - but do we really want *another* parameter? */ - while ( memchr( _PDCLIB_digits, **p, base ) != NULL ) ++(*p); + /* TODO: Earlier version was missing tolower() here but was not caught by tests */ + while ( memchr( _PDCLIB_digits, tolower(**p), base ) != NULL ) ++(*p); /* TODO: This is ugly, but keeps caller from negating the error value */ *sign = '+'; return error; diff --git a/includes/stdio.h b/includes/stdio.h index 24fe0f4..05d5b6e 100644 --- a/includes/stdio.h +++ b/includes/stdio.h @@ -180,19 +180,478 @@ void setbuf( FILE * _PDCLIB_restrict stream, char * _PDCLIB_restrict buf ); int setvbuf( FILE * _PDCLIB_restrict stream, char * _PDCLIB_restrict buf, int mode, size_t size ); /* Formatted input/output functions */ + +/* + Write output to the given stream, as defined by the given format string and + 0..n subsequent arguments (the argument stack). + + The format string is written to the given stream verbatim, except for any + conversion specifiers included, which start with the letter '%' and are + documented below. If the given conversion specifiers require more arguments + from the argument stack than provided, behaviour is undefined. Additional + arguments not required by conversion specifiers are evaluated but otherwise + ignored. + + (The standard specifies the format string is allowed to contain multibyte + character sequences as long as it starts and ends in initial shift state, + but this is not yet supported by this implementation, which interprets the + format string as sequence of char.) + TODO: Add multibyte support to printf() functions. + + A conversion specifier consists of: + - Zero or more flags (one of the characters "-+ #0"). + - Optional minimum field width as decimal integer. Default is padding to the + left, using spaces. Note that 0 is taken as a flag, not the beginning of a + field width. Note also that a small field width will not result in the + truncation of a value. + - Optional precision (given as ".#" with # being a decimal integer), + specifying: + - the min. number of digits to appear (diouxX), + - the max. number of digits after the decimal point (aAeEfF), + - the max. number of significant digits (gG), + - the max. number of bytes to be written (s). + - behaviour with other conversion specifiers is undefined. + - Optional length modifier specifying the size of the argument (one of "hh", + "ll", or one of the characters "hljztL"). + - Conversion specifier character specifying the type of conversion to be + applied (and the type of the next argument from the argument stack). One + of the characters "diouxXfFeEgGaAcspn%". + + Minimum field width and/or precision may be given as asterisk ('*') instead + of a decimal integer. In this case, the next argument from the argument + stack is assumed to be an int value specifying the width / precision. A + negative field width is interpreted as flag '-' followed by a positive field + width. A negative precision is interpreted as if no precision was given. + + FLAGS + - Left-justify the conversion result within its field width. + + Prefix a '+' on positive signed conversion results. Prefix a '-' on + floating conversions resulting in negative zero, or negative values + rounding to zero. + space Prefix a space on positive signed conversion results, or if a signed + conversion results in no characters. If both '+' and ' ' are given, + ' ' is ignored. + # Use an "alternative form" for + - 'o' conversion, increasing precision until the first digit of the + result is a zero; + - 'x' or 'X' conversion, prefixing "0x" or "0X" to nonzero results; + - "aAeEfF" conversions, always printing a decimal point even if no + digits are following; + - 'g' or 'G' conversions, always printing a decimal point even if no + digits are following, and not removing trailing zeroes. + - behaviour for other conversions is unspecified. + 0 Use leading zeroes instead of spaces for field width padding. If both + '-' and '0' are given, '0' is ignored. If a precision is specified for + any of the "diouxX" conversions, '0' is ignored. Behaviour is only + defined for "diouxXaAeEfFgG". + + LENGTH MODIFIERS + hh For "diouxX" conversions, the argument from the argument stack is + assumed to be of char width. (It will have been subject to integer + promotion but will be converted back.) For 'n' conversions, the argument + is assumed to be a pointer to signed char. + h For "diouxX" conversions, the argument from the argument stack is + assumed to be of short int width. (It will have been subject to integer + promotion but will be converted back.) For 'n' conversions, the argument + is assumed to be a pointer to short int. + l For "diouxX" conversions, the argument from the argument stack is + assumed to be of long int width. For 'n' conversions, the argument is + assumed to be a pointer to short int. For 'c' conversions, the argument + is assumed to be a wint_t. For 's' conversions, the argument is assumed + to be a pointer to wchar_t. No effect on "aAeEfFgG" conversions. + ll For "diouxX" conversions, the argument from the argument stack is + assumed to be of long long int width. For 'n' conversions, the argument + is assumed to be a pointer to long long int. + j For "diouxX" conversions, the argument from the argument stack is + assumed to be of intmax_t width. For 'n' conversions, the argument is + assumed to be a pointer to intmax_t. + z For "diouxX" conversions, the argument from the argument stack is + assumed to be of size_t width. For 'n' conversions, the argument is + assumed to be a pointer to size_t. + t For "diouxX" conversions, the argument from the argument stack is + assumed to be of ptrdiff_t width. For 'n' conversions, the argument is + assumed to be a pointer to ptrdiff_t. + L For "aAeEfFgG" conversions, the argument from the argument stack is + assumed to be a long double. + Length modifiers appearing for any conversions not mentioned above will have + undefined behaviour. + If a length modifier appears with any conversion specifier other than as + specified above, the behavior is undefined. + + CONVERSION SPECIFIERS + d,i The argument from the argument stack is assumed to be of type int, and + is converted to a signed decimal value with a minimum number of digits + as specified by the precision (default 1), padded with leading zeroes. + A zero value converted with precision zero yields no output. + o The argument from the argument stack is assumed to be of type unsigned + int, and is converted to an unsigned octal value, other behaviour being + as above. + u The argument from the argument stack is assumed to be of type unsigned + int, and converted to an unsigned decimal value, other behaviour being + as above. + x,X The argument from the argument stack is assumed to be of type unsigned + int, and converted to an unsigned hexadecimal value, using lowercase + "abcdef" for 'x' and uppercase "ABCDEF" for 'X' conversion, other + behaviour being as above. + f,F The argument from the argument stack is assumed to be of type double, + and converted to a decimal floating point in decimal-point notation, + with the number of digits after the decimal point as specified by the + precision (default 6) and the value being rounded appropriately. If + precision is zero (and the '#' flag is not given), no decimal point is + printed. At least one digit is always printed before the decimal point. + For 'f' conversions, an infinity value is printed as either [-]inf or + [-]infinity (, depending on the configuration of this implementation. A + NaN value is printed as [-]nan. For 'F' conversions uppercase characters + are used for these special values. The flags '-', '+' and ' ' apply as + usual to these special values, '#' and '0' have no effect. + e,E The argument from the argument stack is assumed to be of type double, + and converted to a decimal floating point in normalized exponential + notation ([?]d.ddd e±dd). "Normalized" means one nonzero digit before + the decimal point, unless the value is zero. The number of digits after + the decimal point is specified by the precision (default 6), the value + being rounded appropriately. If precision is zero (and the '#' flag is + not given), no decimal point is printed. The exponent has at least two + digits, and not more than necessary to represent the exponent. If the + value is zero, the exponent is zero. The 'e' written to indicate the + exponend is uppercase for 'E' conversions. + Infinity or NaN values are represented as for 'f' and 'F' conversions, + respectively. + g,G The argument from the argument stack is assumed to be of type double, + and converted according to either 'f' or 'e' format for 'g' conversions, + or 'F' or 'E' format for 'G' conversions, respectively, with the actual + conversion chosen depending on the value. 'e' / 'E' conversion is chosen + if the resulting exponent is < -4 or >= the precision (default 1). + Trailing zeroes are removed (unless the '#' flag is given). A decimal + point appears only if followed by a digit. + Infinity or NaN values are represented as for 'f' and 'F' conversions, + respectively. + a,A The argument from the argument stack is assumed to be of type double, + and converted to a floating point hexadecimal notation ([?]0xh.hhhh p±d) + with one hexadecimal digit (being nonzero if the value is normalized, + and otherwise unspecified) before the decimal point, and the number of + digits after the decimal point being specified by the precision. If no + precision is given, the default is to print as many digits as nevessary + to give an exact representation of the value (if FLT_RADIX is a power of + 2). If no precision is given and FLT_RADIX is not a power of 2, the + default is to print as many digits to distinguish values of type double + (possibly omitting trailing zeroes). (A precision p is sufficient to + distinguish values of the source type if 16^p-1 > b^n where b is + FLT_RADIX and n is the number of digits in the significand (to base b) + of the source type. A smaller p might suffice depending on the + implementation's scheme for determining the digit to the left of the + decimal point.) The error has the correct sign for the current rounding + direction. + Unless the '#' flag is given, no decimal-point is given for zero + precision. + The 'a' conversion uses lowercase "abcdef", "0x" and 'p', the 'A' + conversion uppercase "ABCDEF", "0X" and 'P'. + The exponent always has at least one digit, and not more than necessary + to represent the decimal exponent of 2. If the value is zero, the + exponent is zero. + Infinity or NaN values are represented as for 'f' and 'F' conversions, + respectively. + Binary implementations are at liberty to chose the hexadecimal digit to + the left of the decimal point so that subsequent digits align to nibble + boundaries. + c The argument from the argument stack is assumed to be of type int, and + converted to a character after the value has been cast to unsigned char. + If the 'l' length modifier is given, the argument is assumed to be of + type wint_t, and converted as by a "%ls" conversion with no precision + and a pointer to a two-element wchar_t array, with the first element + being the wint_t argument and the second a '\0' wide character. + s The argument from the argument stack is assumed to be a char array (i.e. + pointer to char). Characters from that array are printed until a zero + byte is encountered or as many bytes as specified by a given precision + have been written. + If the l length modifier is given, the argument from the argument stack + is assumed to be a wchar_t array (i.e. pointer to wchar_t). Wide + characters from that array are converted to multibyte characters as by + calls to wcrtomb() (using a mbstate_t object initialized to zero prior + to the first conversion), up to and including the terminating null wide + character. The resulting multibyte character sequence is then printed up + to but not including the terminating null character. If a precision is + given, it specifies the maximum number of bytes to be written (including + shift sequences). If the given precision would require access to a wide + character one past the end of the array, the array shall contain a '\0' + wide character. In no case is a partial multibyte character written. + Redundant shift sequences may result if the multibyte characters have a + state-dependent encoding. + TODO: Clarify these statements regarding %ls. + p The argument from the argument stack is assumed to be a void pointer, + and converted to a sequence of printing characters in an implementation- + defined manner. + This implementation casts the pointer to type intptr_t, and prints the + value as if a %#x conversion specifier was given. + n The argument from the argument stack is assumed to be a pointer to a + signed integer, into which the number of characters written so far by + this call to fprintf is stored. The behaviour, should any flags, field + widths, or precisions be given is undefined. + % A verbatim '%' character is written. No argument is taken from the + argument stack. + + Returns the number of characters written if successful, a negative value + otherwise. +*/ int fprintf( FILE * _PDCLIB_restrict stream, const char * _PDCLIB_restrict format, ... ); + +/* TODO: fscanf() documentation */ +/* + Write output to the given stream, as defined by the given format string and + 0..n subsequent arguments (the argument stack). + + The format string is written to the given stream verbatim, except for any + conversion specifiers included, which start with the letter '%' and are + documented below. If the given conversion specifiers require more arguments + from the argument stack than provided, behaviour is undefined. Additional + arguments not required by conversion specifiers are evaluated but otherwise + ignored. + + (The standard specifies the format string is allowed to contain multibyte + character sequences as long as it starts and ends in initial shift state, + but this is not yet supported by this implementation, which interprets the + format string as sequence of char.) + TODO: Add multibyte support to printf() functions. + + Read input from the given stream, as defined by the given format string and + 0..n subsequent arguments (the argument stack). + + The format string contains a sequence of directives that are expected to + match the input. If such a directive fails to match, the function returns + (matching error). It also returns if an input error occurs (input error). + + Directives can be: + - one or more whitespaces, matching any number of whitespaces in the input; + - printing characters, matching the input verbatim; + - conversion specifications, which convert an input sequence into a value as + defined by the individual specifier, and store that value in a memory + location pointed to by the next pointer on the argument stack. Details are + documented below. If there is an insufficient number of pointers on the + argument stack, behaviour is undefined. Additional arguments not required + by any conversion specifications are evaluated, but otherwise ignored. + +The format shall be a multibyte character sequence, beginning and ending in its initial +shift state. The format is composed of zero or more directives: one or more white-space +characters, an ordinary multibyte character (neither % nor a white-space character), or a +conversion speci?cation. Each conversion speci?cation is introduced by the character %. +After the %, the following appear in sequence: +? An optional assignment-suppressing character *. +? An optional nonzero decimal integer that speci?es the maximum ?eld width (in + characters). +? An optional length modi?er that speci?es the size of the receiving object. +? A conversion speci?er character that speci?es the type of conversion to be applied. +The fscanf function executes each directive of the format in turn. If a directive fails, as +detailed below, the function returns. Failures are described as input failures (due to the +occurrence of an encoding error or the unavailability of input characters), or matching +failures (due to inappropriate input). +A directive composed of white-space character(s) is executed by reading input up to the +?rst non-white-space character (which remains unread), or until no more characters can +be read. +A directive that is an ordinary multibyte character is executed by reading the next +characters of the stream. If any of those characters differ from the ones composing the +directive, the directive fails and the differing and subsequent characters remain unread. +Similarly, if end-of-?le, an encoding error, or a read error prevents a character from being +read, the directive fails. +A directive that is a conversion speci?cation de?nes a set of matching input sequences, as +described below for each speci?er. A conversion speci?cation is executed in the +following steps: +Input white-space characters (as speci?ed by the isspace function) are skipped, unless +the speci?cation includes a [, c, or n speci?er.241) +These white-space characters are not counted against a speci?ed ?eld width. +An input item is read from the stream, unless the speci?cation includes an n speci?er. An +input item is de?ned as the longest sequence of input characters which does not exceed +any speci?ed ?eld width and which is, or is a pre?x of, a matching input sequence.242) +fscanf pushes back at most one input character onto the input stream. Therefore, some sequences +that are acceptable to strtod, strtol, etc., are unacceptable to fscanf. +The ?rst character, if any, after the input item remains unread. If the length of the input +item is zero, the execution of the directive fails; this condition is a matching failure unless +end-of-?le, an encoding error, or a read error prevented input from the stream, in which +case it is an input failure. +Except in the case of a % speci?er, the input item (or, in the case of a %n directive, the +count of input characters) is converted to a type appropriate to the conversion speci?er. If +the input item is not a matching sequence, the execution of the directive fails: this +condition is a matching failure. Unless assignment suppression was indicated by a *, the +result of the conversion is placed in the object pointed to by the ?rst argument following +the format argument that has not already received a conversion result. If this object +does not have an appropriate type, or if the result of the conversion cannot be represented +in the object, the behavior is unde?ned. +The length modi?ers and their meanings are: + + LENGTH MODIFIERS + hh For "diouxXn" conversions, the next pointer from the argument stack is + assumed to point to a variable of of char width. + h For "diouxXn" conversions, the next pointer from the argument stack is + assumed to point to a variable of short int width. + l For "diouxXn" conversions, the next pointer from the argument stack is + assumed to point to a variable of long int width. + For "aAeEfFgG" conversions, it is assumed to point to a variable of type + double. + For "cs[" conversions, it is assumed to point to a variable of type + wchar_t. + ll For "diouxXn" conversions, the next pointer from the argument stack is + assumed to point to a variable of long long int width. + j For "diouxXn" conversions, the next pointer from the argument stack is + assumed to point to a variable of intmax_t width. + z For "diouxXn" conversions, the next pointer from the argument stack is + assumed to point to a variable of size_t width. + t For "diouxXn" conversions, the next pointer from the argument stack is + assumed to point to a variable of ptrdiff_t width. + L For "aAeEfFgG" conversions, the next pointer from the argument stack is + assumed to point to a variable of type long double. + Length modifiers appearing for any conversions not mentioned above will have + undefined behaviour. + If a length modifier appears with any conversion specifier other than as + specified above, the behavior is undefined. + + CONVERSION SPECIFIERS + d Matches an (optionally signed) decimal integer of the format expected + by strtol() with base 10. The next pointer from the argument stack is + assumed to point to a signed integer. + i Matches an (optionally signed) integer of the format expected by + strtol() with base 0. The next pointer from the argument stack is + assumed to point to a signed integer. + o Matches an (optionally signed) octal integer of the format expected by + strtoul() with base 8. The next pointer from the argument stack is + assumed to point to an unsigned integer. + u Matches an (optionally signed) decimal integer of the format expected + by strtoul() with base 10. The next pointer from the argument stack is + assumed to point to an unsigned integer. + x Matches an (optionally signed) hexadecimal integer of the format + expected by strtoul() with base 16. The next pointer from the argument + stack is assumed to point to an unsigned integer. + aefg Matches an (optionally signed) floating point number, infinity, or not- + a-number-value of the format expected by strtod(). The next pointer + from the argument stack is assumed to point to a float. + c Matches a number of characters as specified by the field width (default + 1). The next pointer from the argument stack is assumed to point to a + character array large enough to hold that many characters. + If the 'l' length modifier is given, the input is assumed to match a + sequence of multibyte characters (starting in the initial shift state), + which will be converted to a wide character sequence as by successive + calls to mbrtowc() with a mbstate_t object initialized to zero prior to + the first conversion. The next pointer from the argument stack is + assumed to point to a wchar_t array large enough to hold that many + characters. + In either case, note that no '\0' character is added to terminate the + sequence. + s Matches a sequence of non-white-space characters. The next pointer from + the argument stack is assumed to point to a character array large + enough to hold the sequence including terminating '\0' character. + If the 'l' length modifier is given, the input is assumed to match a + sequence of multibyte characters (starting in the initial shift state), + which will be converted to a wide character sequence as by a call to + mbrtowc() with a mbstate_t object initialized to zero prior to the + first conversion. The next pointer from the argument stack is assumed + to point to a wchar_t array large enough to hold the sequence including + terminating '\0' character. + [ Matches a nonempty sequence consisting of any of those characters + specified between itself and a corresponding closing bracket (']'). + If the first character in the list is a circumflex ('^'), this matches + a nonempty sequence consisting of any characters NOT specified. If the + closing bracket appears as the first character in the scanset ("[]" or + "[^]", it is assumed to belong to the scanset, which then ends with the + NEXT closing bracket. + If there is a '-' character in the scanset which is not the first after + the opening bracket (or the circumflex, see above) or the last in the + scanset, behaviour is implementation-defined. This implementation + handles this character like any other. + + The extend of the input field is determined byte-by-byte for the above + conversions ('c', 's', '['), with no special provisions being made for + multibyte characters. The resulting field is nevertheless a multibyte + sequence begining in intial shift state. + + p Matches a sequence of characters as produced by the printf() "%p" + conversion. The next pointer from the argument stack is assumed to + point to a void pointer, which will be filled with the same location + as the pointer used in the printf() statement. Note that behaviour is + undefined if the input value is not the result of an earlier printf() + call. + n Does not read input. The next pointer from the argument stack is + assumed to point to a signed integer, into which the number of + characters read from input so far by this call to fscanf() is stored. + This does not affect the return value of fscanf(). The behaviour, + should an assignment-supressing character of field width be given, + is undefined. + This can be used to test the success of literal matches and suppressed + assignments. + % Matches a single, verbatim '%' character. + + A, E, F, G and X are valid, and equivalent to their lowercase counterparts. + + Returns the number of input items successfully assigned. This can be zero if + an early mismatch occurs. Returns EOF if an input failure occurs before the + first conversion. +*/ int fscanf( FILE * _PDCLIB_restrict stream, const char * _PDCLIB_restrict format, ... ); + +/* Equivalent to fprintf( stdout, format, ... ). */ int printf( const char * _PDCLIB_restrict format, ... ); + +/* Equivalent to fscanf( stdin, format, ... ). */ int scanf( const char * _PDCLIB_restrict format, ... ); + +/* Equivalent to fprintf( stdout, format, ... ), except that the result is + written into the buffer pointed to by s, instead of stdout, and that any + characters beyond the (n-1)th are discarded. The (n)th character is + replaced by a '\0' character in this case. + Returns the number of characters that would have been written (not counting + the terminating '\0' character) if n had been sufficiently large, if + successful, and a negative number if an encoding error ocurred. +*/ int snprintf( char * _PDCLIB_restrict s, size_t n, const char * _PDCLIB_restrict format, ... ); + +/* Equivalent to fprintf( stdout, format, ... ), except that the result is + written into the buffer pointed to by s, instead of stdout. +*/ int sprintf( char * _PDCLIB_restrict s, const char * _PDCLIB_restrict format, ... ); + +/* Equivalent to fscanf( stdin, format, ... ), except that the input is read + from the buffer pointed to by s, instead of stdin. +*/ int sscanf( const char * _PDCLIB_restrict s, const char * _PDCLIB_restrict format, ... ); + +/* Equivalent to fprintf( stream, format, ... ), except that the argument stack + is passed as va_list parameter. Note that va_list is not declared by + . +*/ int vfprintf( FILE * _PDCLIB_restrict stream, const char * _PDCLIB_restrict format, _PDCLIB_va_list arg ); + +/* Equivalent to fscanf( stream, format, ... ), except that the argument stack + is passed as va_list parameter. Note that va_list is not declared by + . +*/ int vfscanf( FILE * _PDCLIB_restrict stream, const char * _PDCLIB_restrict format, _PDCLIB_va_list arg ); + +/* Equivalent to fprintf( stdout, format, ... ), except that the argument stack + is passed as va_list parameter. Note that va_list is not declared by + . +*/ int vprintf( const char * _PDCLIB_restrict format, _PDCLIB_va_list arg ); + +/* Equivalent to fscanf( stdin, format, ... ), except that the argument stack + is passed as va_list parameter. Note that va_list is not declared by + . +*/ int vscanf( const char * _PDCLIB_restrict format, _PDCLIB_va_list arg ); + +/* Equivalent to snprintf( s, n, format, ... ), except that the argument stack + is passed as va_list parameter. Note that va_list is not declared by + . + */ int vsnprintf( char * _PDCLIB_restrict s, size_t n, const char * _PDCLIB_restrict format, _PDCLIB_va_list arg ); + +/* Equivalent to fprintf( stdout, format, ... ), except that the argument stack + is passed as va_list parameter, and the result is written to the buffer + pointed to by s, instead of stdout. Note that va_list is not declared by + . +*/ int vsprintf( char * _PDCLIB_restrict s, const char * _PDCLIB_restrict format, _PDCLIB_va_list arg ); + +/* Equivalent to fscanf( stdin, format, ... ), except that the argument stack + is passed as va_list parameter, and the input is read from the buffer + pointed to by s, instead of stdin. Note that va_list is not declared by + . +*/ int vsscanf( const char * _PDCLIB_restrict s, const char * _PDCLIB_restrict format, _PDCLIB_va_list arg ); /* Character input/output functions */ diff --git a/internals/_PDCLIB_int.h b/internals/_PDCLIB_int.h index e8ce60b..5e7d840 100644 --- a/internals/_PDCLIB_int.h +++ b/internals/_PDCLIB_int.h @@ -263,13 +263,13 @@ typedef unsigned _PDCLIB_intmax _PDCLIB_uintmax_t; struct { - _PDCLIB_fd_t handle; /* OS-specific file descriptor */ - _PDCLIB_fpos_t position; /* file position indicator */ - void * buffer; /* file buffer */ - size_t bufsize; /* size of buffer */ - int status; /* misc. status bits */ - /*mbstate_t mbstate;*//* multibyte parse state */ /* TODO: Unmask. */ - FILE * next; /* provisions for linked list handling */ + _PDCLIB_fd_t handle; /* OS-specific file descriptor */ + _PDCLIB_fpos_t position; /* file position indicator */ + void * buffer; /* file buffer */ + _PDCLIB_size_t bufsize; /* size of buffer */ + int status; /* misc. status bits */ + /*mbstate_t mbstate; multibyte parse state - TODO: Unmask. */ + struct _PDCLIB_file_t * next; /* provisions for linked list handling */ } _PDCLIB_file_t; /* -------------------------------------------------------------------------- */ -- 2.40.0