--- /dev/null
+/* $OpenBSD: re_search.c,v 1.26 2011/01/21 19:10:13 kjell Exp $ */
+
+/* This file is in the public domain. */
+
+/*
+ * regular expression search commands for Mg
+ *
+ * This file contains functions to implement several of gnuemacs's regular
+ * expression functions for Mg. Several of the routines below are just minor
+ * re-arrangements of Mg's non-regular expression search functions. Some of
+ * them are similar in structure to the original MicroEMACS, others are
+ * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
+ * them from scratch.
+ */
+
+#ifdef REGEX
+#include "def.h"
+
+#include <sys/types.h>
+#include <regex.h>
+
+#include "macro.h"
+
+#define SRCH_BEGIN (0) /* search sub-codes */
+#define SRCH_FORW (-1)
+#define SRCH_BACK (-2)
+#define SRCH_NOPR (-3)
+#define SRCH_ACCM (-4)
+#define SRCH_MARK (-5)
+
+#define RE_NMATCH 10 /* max number of matches */
+#define REPLEN 256 /* max length of replacement string */
+
+char re_pat[NPAT]; /* regex pattern */
+int re_srch_lastdir = SRCH_NOPR; /* last search flags */
+int casefoldsearch = TRUE; /* does search ignore case? */
+
+static int re_doreplace(RSIZE, char *);
+static int re_forwsrch(void);
+static int re_backsrch(void);
+static int re_readpattern(char *);
+static int killmatches(int);
+static int countmatches(int);
+
+/*
+ * Search forward.
+ * Get a search string from the user and search for it starting at ".". If
+ * found, move "." to just after the matched characters. display does all
+ * the hard stuff. If not found, it just prints a message.
+ */
+/* ARGSUSED */
+int
+re_forwsearch(int f, int n)
+{
+ int s;
+
+ if ((s = re_readpattern("RE Search")) != TRUE)
+ return (s);
+ if (re_forwsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+ re_srch_lastdir = SRCH_FORW;
+ return (TRUE);
+}
+
+/*
+ * Reverse search.
+ * Get a search string from the user, and search, starting at "."
+ * and proceeding toward the front of the buffer. If found "." is left
+ * pointing at the first character of the pattern [the last character that
+ * was matched].
+ */
+/* ARGSUSED */
+int
+re_backsearch(int f, int n)
+{
+ int s;
+
+ if ((s = re_readpattern("RE Search backward")) != TRUE)
+ return (s);
+ if (re_backsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+ re_srch_lastdir = SRCH_BACK;
+ return (TRUE);
+}
+
+/*
+ * Search again, using the same search string and direction as the last search
+ * command. The direction has been saved in "srch_lastdir", so you know which
+ * way to go.
+ *
+ * XXX: This code has problems -- some incompatibility(?) with extend.c causes
+ * match to fail when it should not.
+ */
+/* ARGSUSED */
+int
+re_searchagain(int f, int n)
+{
+ if (re_srch_lastdir == SRCH_NOPR) {
+ ewprintf("No last search");
+ return (FALSE);
+ }
+ if (re_srch_lastdir == SRCH_FORW) {
+ if (re_forwsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+ return (TRUE);
+ }
+ if (re_srch_lastdir == SRCH_BACK)
+ if (re_backsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+
+ return (TRUE);
+}
+
+/* Compiled regex goes here-- changed only when new pattern read */
+static regex_t re_buff;
+static regmatch_t re_match[RE_NMATCH];
+
+/*
+ * Re-Query Replace.
+ * Replace strings selectively. Does a search and replace operation.
+ */
+/* ARGSUSED */
+int
+re_queryrepl(int f, int n)
+{
+ int rcnt = 0; /* replacements made so far */
+ int plen, s; /* length of found string */
+ char news[NPAT]; /* replacement string */
+
+ if ((s = re_readpattern("RE Query replace")) != TRUE)
+ return (s);
+ if (eread("Query replace %s with: ", news, NPAT,
+ EFNUL | EFNEW | EFCR, re_pat) == NULL)
+ return (ABORT);
+ ewprintf("Query replacing %s with %s:", re_pat, news);
+
+ /*
+ * Search forward repeatedly, checking each time whether to insert
+ * or not. The "!" case makes the check always true, so it gets put
+ * into a tighter loop for efficiency.
+ */
+ while (re_forwsrch() == TRUE) {
+retry:
+ update();
+ switch (getkey(FALSE)) {
+ case ' ':
+ plen = re_match[0].rm_eo - re_match[0].rm_so;
+ if (re_doreplace((RSIZE)plen, news) == FALSE)
+ return (FALSE);
+ rcnt++;
+ break;
+
+ case '.':
+ plen = re_match[0].rm_eo - re_match[0].rm_so;
+ if (re_doreplace((RSIZE)plen, news) == FALSE)
+ return (FALSE);
+ rcnt++;
+ goto stopsearch;
+
+ case CCHR('G'): /* ^G */
+ (void)ctrlg(FFRAND, 0);
+ goto stopsearch;
+ case CCHR('['): /* ESC */
+ case '`':
+ goto stopsearch;
+ case '!':
+ do {
+ plen = re_match[0].rm_eo - re_match[0].rm_so;
+ if (re_doreplace((RSIZE)plen, news) == FALSE)
+ return (FALSE);
+ rcnt++;
+ } while (re_forwsrch() == TRUE);
+ goto stopsearch;
+
+ case CCHR('?'): /* To not replace */
+ break;
+
+ default:
+ ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
+ goto retry;
+ }
+ }
+
+stopsearch:
+ curwp->w_rflag |= WFFULL;
+ update();
+ if (!inmacro) {
+ if (rcnt == 0)
+ ewprintf("(No replacements done)");
+ else if (rcnt == 1)
+ ewprintf("(1 replacement done)");
+ else
+ ewprintf("(%d replacements done)", rcnt);
+ }
+ return (TRUE);
+}
+
+/*
+ * Routine re_doreplace calls lreplace to make replacements needed by
+ * re_query replace. Its reason for existence is to deal with \1, \2. etc.
+ * plen: length to remove
+ * st: replacement string
+ */
+static int
+re_doreplace(RSIZE plen, char *st)
+{
+ int j, k, s, more, num, state;
+ struct line *clp;
+ char repstr[REPLEN];
+
+ clp = curwp->w_dotp;
+ more = TRUE;
+ j = 0;
+ state = 0;
+ num = 0;
+
+ /* The following FSA parses the replacement string */
+ while (more) {
+ switch (state) {
+ case 0:
+ if (*st == '\\') {
+ st++;
+ state = 1;
+ } else if (*st == '\0')
+ more = FALSE;
+ else {
+ repstr[j] = *st;
+ j++;
+ if (j >= REPLEN)
+ return (FALSE);
+ st++;
+ }
+ break;
+ case 1:
+ if (*st >= '0' && *st <= '9') {
+ num = *st - '0';
+ st++;
+ state = 2;
+ } else if (*st == '\0')
+ more = FALSE;
+ else {
+ repstr[j] = *st;
+ j++;
+ if (j >= REPLEN)
+ return (FALSE);
+ st++;
+ state = 0;
+ }
+ break;
+ case 2:
+ if (*st >= '0' && *st <= '9') {
+ num = 10 * num + *st - '0';
+ st++;
+ } else {
+ if (num >= RE_NMATCH)
+ return (FALSE);
+ k = re_match[num].rm_eo - re_match[num].rm_so;
+ if (j + k >= REPLEN)
+ return (FALSE);
+ bcopy(&(clp->l_text[re_match[num].rm_so]),
+ &repstr[j], k);
+ j += k;
+ if (*st == '\0')
+ more = FALSE;
+ if (*st == '\\') {
+ st++;
+ state = 1;
+ } else {
+ repstr[j] = *st;
+ j++;
+ if (j >= REPLEN)
+ return (FALSE);
+ st++;
+ state = 0;
+ }
+ }
+ break;
+ } /* switch (state) */
+ } /* while (more) */
+
+ repstr[j] = '\0';
+ s = lreplace(plen, repstr);
+ return (s);
+}
+
+/*
+ * This routine does the real work of a forward search. The pattern is
+ * sitting in the external variable "pat". If found, dot is updated, the
+ * window system is notified of the change, and TRUE is returned. If the
+ * string isn't found, FALSE is returned.
+ */
+static int
+re_forwsrch(void)
+{
+ int tbo, error;
+ struct line *clp;
+
+ clp = curwp->w_dotp;
+ tbo = curwp->w_doto;
+
+ if (tbo == clp->l_used)
+ /*
+ * Don't start matching past end of line -- must move to
+ * beginning of next line, unless at end of file.
+ */
+ if (clp != curbp->b_headp) {
+ clp = lforw(clp);
+ tbo = 0;
+ }
+ /*
+ * Note this loop does not process the last line, but this editor
+ * always makes the last line empty so this is good.
+ */
+ while (clp != (curbp->b_headp)) {
+ re_match[0].rm_so = tbo;
+ re_match[0].rm_eo = llength(clp);
+ error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+ REG_STARTEND);
+ if (error != 0) {
+ clp = lforw(clp);
+ tbo = 0;
+ } else {
+ curwp->w_doto = re_match[0].rm_eo;
+ curwp->w_dotp = clp;
+ curwp->w_rflag |= WFMOVE;
+ return (TRUE);
+ }
+ }
+ return (FALSE);
+}
+
+/*
+ * This routine does the real work of a backward search. The pattern is sitting
+ * in the external variable "re_pat". If found, dot is updated, the window
+ * system is notified of the change, and TRUE is returned. If the string isn't
+ * found, FALSE is returned.
+ */
+static int
+re_backsrch(void)
+{
+ struct line *clp;
+ int tbo;
+ regmatch_t lastmatch;
+
+ clp = curwp->w_dotp;
+ tbo = curwp->w_doto;
+
+ /* Start search one position to the left of dot */
+ tbo = tbo - 1;
+ if (tbo < 0) {
+ /* must move up one line */
+ clp = lback(clp);
+ tbo = llength(clp);
+ }
+
+ /*
+ * Note this loop does not process the last line, but this editor
+ * always makes the last line empty so this is good.
+ */
+ while (clp != (curbp->b_headp)) {
+ re_match[0].rm_so = 0;
+ re_match[0].rm_eo = llength(clp);
+ lastmatch.rm_so = -1;
+ /*
+ * Keep searching until we don't match any longer. Assumes a
+ * non-match does not modify the re_match array. We have to
+ * do this character-by-character after the first match since
+ * POSIX regexps don't give you a way to do reverse matches.
+ */
+ while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+ REG_STARTEND) && re_match[0].rm_so < tbo) {
+ memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
+ re_match[0].rm_so++;
+ re_match[0].rm_eo = llength(clp);
+ }
+ if (lastmatch.rm_so == -1) {
+ clp = lback(clp);
+ tbo = llength(clp);
+ } else {
+ memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
+ curwp->w_doto = re_match[0].rm_so;
+ curwp->w_dotp = clp;
+ curwp->w_rflag |= WFMOVE;
+ return (TRUE);
+ }
+ }
+ return (FALSE);
+}
+
+/*
+ * Read a pattern.
+ * Stash it in the external variable "re_pat". The "pat" is
+ * not updated if the user types in an empty line. If the user typed
+ * an empty line, and there is no old pattern, it is an error.
+ * Display the old pattern, in the style of Jeff Lomicka. There is
+ * some do-it-yourself control expansion.
+ */
+static int
+re_readpattern(char *prompt)
+{
+ static int dofree = 0;
+ int flags, error, s;
+ char tpat[NPAT], *rep;
+
+ if (re_pat[0] == '\0')
+ rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
+ else
+ rep = eread("%s: (default %s) ", tpat, NPAT,
+ EFNUL | EFNEW | EFCR, prompt, re_pat);
+ if (rep == NULL)
+ return (ABORT);
+ if (rep[0] != '\0') {
+ /* New pattern given */
+ (void)strlcpy(re_pat, tpat, sizeof(re_pat));
+ if (casefoldsearch)
+ flags = REG_EXTENDED | REG_ICASE;
+ else
+ flags = REG_EXTENDED;
+ if (dofree)
+ regfree(&re_buff);
+ error = regcomp(&re_buff, re_pat, flags);
+ if (error != 0) {
+ char message[256];
+ regerror(error, &re_buff, message, sizeof(message));
+ ewprintf("Regex Error: %s", message);
+ re_pat[0] = '\0';
+ return (FALSE);
+ }
+ dofree = 1;
+ s = TRUE;
+ } else if (rep[0] == '\0' && re_pat[0] != '\0')
+ /* Just using old pattern */
+ s = TRUE;
+ else
+ s = FALSE;
+ return (s);
+}
+
+/*
+ * Cause case to not matter in searches. This is the default. If called
+ * with argument cause case to matter.
+ */
+/* ARGSUSED*/
+int
+setcasefold(int f, int n)
+{
+ if (f & FFARG) {
+ casefoldsearch = FALSE;
+ ewprintf("Case-fold-search unset");
+ } else {
+ casefoldsearch = TRUE;
+ ewprintf("Case-fold-search set");
+ }
+
+ /*
+ * Invalidate the regular expression pattern since I'm too lazy to
+ * recompile it.
+ */
+ re_pat[0] = '\0';
+ return (TRUE);
+}
+
+/*
+ * Delete all lines after dot that contain a string matching regex.
+ */
+/* ARGSUSED */
+int
+delmatchlines(int f, int n)
+{
+ int s;
+
+ if ((s = re_readpattern("Flush lines (containing match for regexp)"))
+ != TRUE)
+ return (s);
+
+ s = killmatches(TRUE);
+ return (s);
+}
+
+/*
+ * Delete all lines after dot that don't contain a string matching regex.
+ */
+/* ARGSUSED */
+int
+delnonmatchlines(int f, int n)
+{
+ int s;
+
+ if ((s = re_readpattern("Keep lines (containing match for regexp)"))
+ != TRUE)
+ return (s);
+
+ s = killmatches(FALSE);
+ return (s);
+}
+
+/*
+ * This function does the work of deleting matching lines.
+ */
+static int
+killmatches(int cond)
+{
+ int s, error;
+ int count = 0;
+ struct line *clp;
+
+ clp = curwp->w_dotp;
+ if (curwp->w_doto == llength(clp))
+ /* Consider dot on next line */
+ clp = lforw(clp);
+
+ while (clp != (curbp->b_headp)) {
+ /* see if line matches */
+ re_match[0].rm_so = 0;
+ re_match[0].rm_eo = llength(clp);
+ error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+ REG_STARTEND);
+
+ /* Delete line when appropriate */
+ if ((cond == FALSE && error) || (cond == TRUE && !error)) {
+ curwp->w_doto = 0;
+ curwp->w_dotp = clp;
+ count++;
+ s = ldelete(llength(clp) + 1, KNONE);
+ clp = curwp->w_dotp;
+ curwp->w_rflag |= WFMOVE;
+ if (s == FALSE)
+ return (FALSE);
+ } else
+ clp = lforw(clp);
+ }
+
+ ewprintf("%d line(s) deleted", count);
+ if (count > 0)
+ curwp->w_rflag |= WFMOVE;
+
+ return (TRUE);
+}
+
+/*
+ * Count lines matching regex.
+ */
+/* ARGSUSED */
+int
+cntmatchlines(int f, int n)
+{
+ int s;
+
+ if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
+ return (s);
+ s = countmatches(TRUE);
+
+ return (s);
+}
+
+/*
+ * Count lines that fail to match regex.
+ */
+/* ARGSUSED */
+int
+cntnonmatchlines(int f, int n)
+{
+ int s;
+
+ if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
+ return (s);
+ s = countmatches(FALSE);
+
+ return (s);
+}
+
+/*
+ * This function does the work of counting matching lines.
+ */
+int
+countmatches(int cond)
+{
+ int error;
+ int count = 0;
+ struct line *clp;
+
+ clp = curwp->w_dotp;
+ if (curwp->w_doto == llength(clp))
+ /* Consider dot on next line */
+ clp = lforw(clp);
+
+ while (clp != (curbp->b_headp)) {
+ /* see if line matches */
+ re_match[0].rm_so = 0;
+ re_match[0].rm_eo = llength(clp);
+ error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+ REG_STARTEND);
+
+ /* Count line when appropriate */
+ if ((cond == FALSE && error) || (cond == TRUE && !error))
+ count++;
+ clp = lforw(clp);
+ }
+
+ if (cond)
+ ewprintf("Number of lines matching: %d", count);
+ else
+ ewprintf("Number of lines not matching: %d", count);
+
+ return (TRUE);
+}
+#endif /* REGEX */