X-Git-Url: https://pd.if.org/git/?p=pd_readline;a=blobdiff_plain;f=mg%2Fre_search.c;fp=mg%2Fre_search.c;h=db39fdc8e704b61a696a509396d17b740d26613c;hp=0000000000000000000000000000000000000000;hb=a9843085ec916c175bd245a8398f30e6cc03f984;hpb=26fe4e09c6c3c250334fdeed60ce3061febecde2 diff --git a/mg/re_search.c b/mg/re_search.c new file mode 100644 index 0000000..db39fdc --- /dev/null +++ b/mg/re_search.c @@ -0,0 +1,615 @@ +/* $OpenBSD: re_search.c,v 1.26 2011/01/21 19:10:13 kjell Exp $ */ + +/* This file is in the public domain. */ + +/* + * regular expression search commands for Mg + * + * This file contains functions to implement several of gnuemacs's regular + * expression functions for Mg. Several of the routines below are just minor + * re-arrangements of Mg's non-regular expression search functions. Some of + * them are similar in structure to the original MicroEMACS, others are + * modifications of Rich Ellison's code. Peter Newton re-wrote about half of + * them from scratch. + */ + +#ifdef REGEX +#include "def.h" + +#include +#include + +#include "macro.h" + +#define SRCH_BEGIN (0) /* search sub-codes */ +#define SRCH_FORW (-1) +#define SRCH_BACK (-2) +#define SRCH_NOPR (-3) +#define SRCH_ACCM (-4) +#define SRCH_MARK (-5) + +#define RE_NMATCH 10 /* max number of matches */ +#define REPLEN 256 /* max length of replacement string */ + +char re_pat[NPAT]; /* regex pattern */ +int re_srch_lastdir = SRCH_NOPR; /* last search flags */ +int casefoldsearch = TRUE; /* does search ignore case? */ + +static int re_doreplace(RSIZE, char *); +static int re_forwsrch(void); +static int re_backsrch(void); +static int re_readpattern(char *); +static int killmatches(int); +static int countmatches(int); + +/* + * Search forward. + * Get a search string from the user and search for it starting at ".". If + * found, move "." to just after the matched characters. display does all + * the hard stuff. If not found, it just prints a message. + */ +/* ARGSUSED */ +int +re_forwsearch(int f, int n) +{ + int s; + + if ((s = re_readpattern("RE Search")) != TRUE) + return (s); + if (re_forwsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + re_srch_lastdir = SRCH_FORW; + return (TRUE); +} + +/* + * Reverse search. + * Get a search string from the user, and search, starting at "." + * and proceeding toward the front of the buffer. If found "." is left + * pointing at the first character of the pattern [the last character that + * was matched]. + */ +/* ARGSUSED */ +int +re_backsearch(int f, int n) +{ + int s; + + if ((s = re_readpattern("RE Search backward")) != TRUE) + return (s); + if (re_backsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + re_srch_lastdir = SRCH_BACK; + return (TRUE); +} + +/* + * Search again, using the same search string and direction as the last search + * command. The direction has been saved in "srch_lastdir", so you know which + * way to go. + * + * XXX: This code has problems -- some incompatibility(?) with extend.c causes + * match to fail when it should not. + */ +/* ARGSUSED */ +int +re_searchagain(int f, int n) +{ + if (re_srch_lastdir == SRCH_NOPR) { + ewprintf("No last search"); + return (FALSE); + } + if (re_srch_lastdir == SRCH_FORW) { + if (re_forwsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + return (TRUE); + } + if (re_srch_lastdir == SRCH_BACK) + if (re_backsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + + return (TRUE); +} + +/* Compiled regex goes here-- changed only when new pattern read */ +static regex_t re_buff; +static regmatch_t re_match[RE_NMATCH]; + +/* + * Re-Query Replace. + * Replace strings selectively. Does a search and replace operation. + */ +/* ARGSUSED */ +int +re_queryrepl(int f, int n) +{ + int rcnt = 0; /* replacements made so far */ + int plen, s; /* length of found string */ + char news[NPAT]; /* replacement string */ + + if ((s = re_readpattern("RE Query replace")) != TRUE) + return (s); + if (eread("Query replace %s with: ", news, NPAT, + EFNUL | EFNEW | EFCR, re_pat) == NULL) + return (ABORT); + ewprintf("Query replacing %s with %s:", re_pat, news); + + /* + * Search forward repeatedly, checking each time whether to insert + * or not. The "!" case makes the check always true, so it gets put + * into a tighter loop for efficiency. + */ + while (re_forwsrch() == TRUE) { +retry: + update(); + switch (getkey(FALSE)) { + case ' ': + plen = re_match[0].rm_eo - re_match[0].rm_so; + if (re_doreplace((RSIZE)plen, news) == FALSE) + return (FALSE); + rcnt++; + break; + + case '.': + plen = re_match[0].rm_eo - re_match[0].rm_so; + if (re_doreplace((RSIZE)plen, news) == FALSE) + return (FALSE); + rcnt++; + goto stopsearch; + + case CCHR('G'): /* ^G */ + (void)ctrlg(FFRAND, 0); + goto stopsearch; + case CCHR('['): /* ESC */ + case '`': + goto stopsearch; + case '!': + do { + plen = re_match[0].rm_eo - re_match[0].rm_so; + if (re_doreplace((RSIZE)plen, news) == FALSE) + return (FALSE); + rcnt++; + } while (re_forwsrch() == TRUE); + goto stopsearch; + + case CCHR('?'): /* To not replace */ + break; + + default: + ewprintf(" replace, [.] rep-end, don't, [!] repl rest quit"); + goto retry; + } + } + +stopsearch: + curwp->w_rflag |= WFFULL; + update(); + if (!inmacro) { + if (rcnt == 0) + ewprintf("(No replacements done)"); + else if (rcnt == 1) + ewprintf("(1 replacement done)"); + else + ewprintf("(%d replacements done)", rcnt); + } + return (TRUE); +} + +/* + * Routine re_doreplace calls lreplace to make replacements needed by + * re_query replace. Its reason for existence is to deal with \1, \2. etc. + * plen: length to remove + * st: replacement string + */ +static int +re_doreplace(RSIZE plen, char *st) +{ + int j, k, s, more, num, state; + struct line *clp; + char repstr[REPLEN]; + + clp = curwp->w_dotp; + more = TRUE; + j = 0; + state = 0; + num = 0; + + /* The following FSA parses the replacement string */ + while (more) { + switch (state) { + case 0: + if (*st == '\\') { + st++; + state = 1; + } else if (*st == '\0') + more = FALSE; + else { + repstr[j] = *st; + j++; + if (j >= REPLEN) + return (FALSE); + st++; + } + break; + case 1: + if (*st >= '0' && *st <= '9') { + num = *st - '0'; + st++; + state = 2; + } else if (*st == '\0') + more = FALSE; + else { + repstr[j] = *st; + j++; + if (j >= REPLEN) + return (FALSE); + st++; + state = 0; + } + break; + case 2: + if (*st >= '0' && *st <= '9') { + num = 10 * num + *st - '0'; + st++; + } else { + if (num >= RE_NMATCH) + return (FALSE); + k = re_match[num].rm_eo - re_match[num].rm_so; + if (j + k >= REPLEN) + return (FALSE); + bcopy(&(clp->l_text[re_match[num].rm_so]), + &repstr[j], k); + j += k; + if (*st == '\0') + more = FALSE; + if (*st == '\\') { + st++; + state = 1; + } else { + repstr[j] = *st; + j++; + if (j >= REPLEN) + return (FALSE); + st++; + state = 0; + } + } + break; + } /* switch (state) */ + } /* while (more) */ + + repstr[j] = '\0'; + s = lreplace(plen, repstr); + return (s); +} + +/* + * This routine does the real work of a forward search. The pattern is + * sitting in the external variable "pat". If found, dot is updated, the + * window system is notified of the change, and TRUE is returned. If the + * string isn't found, FALSE is returned. + */ +static int +re_forwsrch(void) +{ + int tbo, error; + struct line *clp; + + clp = curwp->w_dotp; + tbo = curwp->w_doto; + + if (tbo == clp->l_used) + /* + * Don't start matching past end of line -- must move to + * beginning of next line, unless at end of file. + */ + if (clp != curbp->b_headp) { + clp = lforw(clp); + tbo = 0; + } + /* + * Note this loop does not process the last line, but this editor + * always makes the last line empty so this is good. + */ + while (clp != (curbp->b_headp)) { + re_match[0].rm_so = tbo; + re_match[0].rm_eo = llength(clp); + error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, + REG_STARTEND); + if (error != 0) { + clp = lforw(clp); + tbo = 0; + } else { + curwp->w_doto = re_match[0].rm_eo; + curwp->w_dotp = clp; + curwp->w_rflag |= WFMOVE; + return (TRUE); + } + } + return (FALSE); +} + +/* + * This routine does the real work of a backward search. The pattern is sitting + * in the external variable "re_pat". If found, dot is updated, the window + * system is notified of the change, and TRUE is returned. If the string isn't + * found, FALSE is returned. + */ +static int +re_backsrch(void) +{ + struct line *clp; + int tbo; + regmatch_t lastmatch; + + clp = curwp->w_dotp; + tbo = curwp->w_doto; + + /* Start search one position to the left of dot */ + tbo = tbo - 1; + if (tbo < 0) { + /* must move up one line */ + clp = lback(clp); + tbo = llength(clp); + } + + /* + * Note this loop does not process the last line, but this editor + * always makes the last line empty so this is good. + */ + while (clp != (curbp->b_headp)) { + re_match[0].rm_so = 0; + re_match[0].rm_eo = llength(clp); + lastmatch.rm_so = -1; + /* + * Keep searching until we don't match any longer. Assumes a + * non-match does not modify the re_match array. We have to + * do this character-by-character after the first match since + * POSIX regexps don't give you a way to do reverse matches. + */ + while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, + REG_STARTEND) && re_match[0].rm_so < tbo) { + memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t)); + re_match[0].rm_so++; + re_match[0].rm_eo = llength(clp); + } + if (lastmatch.rm_so == -1) { + clp = lback(clp); + tbo = llength(clp); + } else { + memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t)); + curwp->w_doto = re_match[0].rm_so; + curwp->w_dotp = clp; + curwp->w_rflag |= WFMOVE; + return (TRUE); + } + } + return (FALSE); +} + +/* + * Read a pattern. + * Stash it in the external variable "re_pat". The "pat" is + * not updated if the user types in an empty line. If the user typed + * an empty line, and there is no old pattern, it is an error. + * Display the old pattern, in the style of Jeff Lomicka. There is + * some do-it-yourself control expansion. + */ +static int +re_readpattern(char *prompt) +{ + static int dofree = 0; + int flags, error, s; + char tpat[NPAT], *rep; + + if (re_pat[0] == '\0') + rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt); + else + rep = eread("%s: (default %s) ", tpat, NPAT, + EFNUL | EFNEW | EFCR, prompt, re_pat); + if (rep == NULL) + return (ABORT); + if (rep[0] != '\0') { + /* New pattern given */ + (void)strlcpy(re_pat, tpat, sizeof(re_pat)); + if (casefoldsearch) + flags = REG_EXTENDED | REG_ICASE; + else + flags = REG_EXTENDED; + if (dofree) + regfree(&re_buff); + error = regcomp(&re_buff, re_pat, flags); + if (error != 0) { + char message[256]; + regerror(error, &re_buff, message, sizeof(message)); + ewprintf("Regex Error: %s", message); + re_pat[0] = '\0'; + return (FALSE); + } + dofree = 1; + s = TRUE; + } else if (rep[0] == '\0' && re_pat[0] != '\0') + /* Just using old pattern */ + s = TRUE; + else + s = FALSE; + return (s); +} + +/* + * Cause case to not matter in searches. This is the default. If called + * with argument cause case to matter. + */ +/* ARGSUSED*/ +int +setcasefold(int f, int n) +{ + if (f & FFARG) { + casefoldsearch = FALSE; + ewprintf("Case-fold-search unset"); + } else { + casefoldsearch = TRUE; + ewprintf("Case-fold-search set"); + } + + /* + * Invalidate the regular expression pattern since I'm too lazy to + * recompile it. + */ + re_pat[0] = '\0'; + return (TRUE); +} + +/* + * Delete all lines after dot that contain a string matching regex. + */ +/* ARGSUSED */ +int +delmatchlines(int f, int n) +{ + int s; + + if ((s = re_readpattern("Flush lines (containing match for regexp)")) + != TRUE) + return (s); + + s = killmatches(TRUE); + return (s); +} + +/* + * Delete all lines after dot that don't contain a string matching regex. + */ +/* ARGSUSED */ +int +delnonmatchlines(int f, int n) +{ + int s; + + if ((s = re_readpattern("Keep lines (containing match for regexp)")) + != TRUE) + return (s); + + s = killmatches(FALSE); + return (s); +} + +/* + * This function does the work of deleting matching lines. + */ +static int +killmatches(int cond) +{ + int s, error; + int count = 0; + struct line *clp; + + clp = curwp->w_dotp; + if (curwp->w_doto == llength(clp)) + /* Consider dot on next line */ + clp = lforw(clp); + + while (clp != (curbp->b_headp)) { + /* see if line matches */ + re_match[0].rm_so = 0; + re_match[0].rm_eo = llength(clp); + error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, + REG_STARTEND); + + /* Delete line when appropriate */ + if ((cond == FALSE && error) || (cond == TRUE && !error)) { + curwp->w_doto = 0; + curwp->w_dotp = clp; + count++; + s = ldelete(llength(clp) + 1, KNONE); + clp = curwp->w_dotp; + curwp->w_rflag |= WFMOVE; + if (s == FALSE) + return (FALSE); + } else + clp = lforw(clp); + } + + ewprintf("%d line(s) deleted", count); + if (count > 0) + curwp->w_rflag |= WFMOVE; + + return (TRUE); +} + +/* + * Count lines matching regex. + */ +/* ARGSUSED */ +int +cntmatchlines(int f, int n) +{ + int s; + + if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) + return (s); + s = countmatches(TRUE); + + return (s); +} + +/* + * Count lines that fail to match regex. + */ +/* ARGSUSED */ +int +cntnonmatchlines(int f, int n) +{ + int s; + + if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) + return (s); + s = countmatches(FALSE); + + return (s); +} + +/* + * This function does the work of counting matching lines. + */ +int +countmatches(int cond) +{ + int error; + int count = 0; + struct line *clp; + + clp = curwp->w_dotp; + if (curwp->w_doto == llength(clp)) + /* Consider dot on next line */ + clp = lforw(clp); + + while (clp != (curbp->b_headp)) { + /* see if line matches */ + re_match[0].rm_so = 0; + re_match[0].rm_eo = llength(clp); + error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, + REG_STARTEND); + + /* Count line when appropriate */ + if ((cond == FALSE && error) || (cond == TRUE && !error)) + count++; + clp = lforw(clp); + } + + if (cond) + ewprintf("Number of lines matching: %d", count); + else + ewprintf("Number of lines not matching: %d", count); + + return (TRUE); +} +#endif /* REGEX */