]> pd.if.org Git - pd_readline/blobdiff - mg/re_search.c
Added mg from an OpenBSD mirror site. Many thanks to the OpenBSD team and the mg...
[pd_readline] / mg / re_search.c
diff --git a/mg/re_search.c b/mg/re_search.c
new file mode 100644 (file)
index 0000000..db39fdc
--- /dev/null
@@ -0,0 +1,615 @@
+/*     $OpenBSD: re_search.c,v 1.26 2011/01/21 19:10:13 kjell Exp $    */
+
+/* This file is in the public domain. */
+
+/*
+ *     regular expression search commands for Mg
+ *
+ * This file contains functions to implement several of gnuemacs's regular
+ * expression functions for Mg.  Several of the routines below are just minor
+ * re-arrangements of Mg's non-regular expression search functions.  Some of
+ * them are similar in structure to the original MicroEMACS, others are
+ * modifications of Rich Ellison's code.  Peter Newton re-wrote about half of
+ * them from scratch.
+ */
+
+#ifdef REGEX
+#include "def.h"
+
+#include <sys/types.h>
+#include <regex.h>
+
+#include "macro.h"
+
+#define SRCH_BEGIN     (0)             /* search sub-codes                 */
+#define SRCH_FORW      (-1)
+#define SRCH_BACK      (-2)
+#define SRCH_NOPR      (-3)
+#define SRCH_ACCM      (-4)
+#define SRCH_MARK      (-5)
+
+#define RE_NMATCH      10              /* max number of matches            */
+#define REPLEN         256             /* max length of replacement string */
+
+char   re_pat[NPAT];                   /* regex pattern                    */
+int    re_srch_lastdir = SRCH_NOPR;    /* last search flags                */
+int    casefoldsearch = TRUE;          /* does search ignore case?         */
+
+static int      re_doreplace(RSIZE, char *);
+static int      re_forwsrch(void);
+static int      re_backsrch(void);
+static int      re_readpattern(char *);
+static int      killmatches(int);
+static int      countmatches(int);
+
+/*
+ * Search forward.
+ * Get a search string from the user and search for it starting at ".".  If
+ * found, move "." to just after the matched characters.  display does all
+ * the hard stuff.  If not found, it just prints a message.
+ */
+/* ARGSUSED */
+int
+re_forwsearch(int f, int n)
+{
+       int     s;
+
+       if ((s = re_readpattern("RE Search")) != TRUE)
+               return (s);
+       if (re_forwsrch() == FALSE) {
+               ewprintf("Search failed: \"%s\"", re_pat);
+               return (FALSE);
+       }
+       re_srch_lastdir = SRCH_FORW;
+       return (TRUE);
+}
+
+/*
+ * Reverse search.
+ * Get a search string from the user, and search, starting at "."
+ * and proceeding toward the front of the buffer. If found "." is left
+ * pointing at the first character of the pattern [the last character that
+ * was matched].
+ */
+/* ARGSUSED */
+int
+re_backsearch(int f, int n)
+{
+       int     s;
+
+       if ((s = re_readpattern("RE Search backward")) != TRUE)
+               return (s);
+       if (re_backsrch() == FALSE) {
+               ewprintf("Search failed: \"%s\"", re_pat);
+               return (FALSE);
+       }
+       re_srch_lastdir = SRCH_BACK;
+       return (TRUE);
+}
+
+/*
+ * Search again, using the same search string and direction as the last search
+ * command.  The direction has been saved in "srch_lastdir", so you know which
+ * way to go.
+ *
+ * XXX: This code has problems -- some incompatibility(?) with extend.c causes
+ * match to fail when it should not.
+ */
+/* ARGSUSED */
+int
+re_searchagain(int f, int n)
+{
+       if (re_srch_lastdir == SRCH_NOPR) {
+               ewprintf("No last search");
+               return (FALSE);
+       }
+       if (re_srch_lastdir == SRCH_FORW) {
+               if (re_forwsrch() == FALSE) {
+                       ewprintf("Search failed: \"%s\"", re_pat);
+                       return (FALSE);
+               }
+               return (TRUE);
+       }
+       if (re_srch_lastdir == SRCH_BACK)
+               if (re_backsrch() == FALSE) {
+                       ewprintf("Search failed: \"%s\"", re_pat);
+                       return (FALSE);
+               }
+
+       return (TRUE);
+}
+
+/* Compiled regex goes here-- changed only when new pattern read */
+static regex_t         re_buff;
+static regmatch_t      re_match[RE_NMATCH];
+
+/*
+ * Re-Query Replace.
+ *     Replace strings selectively.  Does a search and replace operation.
+ */
+/* ARGSUSED */
+int
+re_queryrepl(int f, int n)
+{
+       int     rcnt = 0;               /* replacements made so far     */
+       int     plen, s;                /* length of found string       */
+       char    news[NPAT];             /* replacement string           */
+
+       if ((s = re_readpattern("RE Query replace")) != TRUE)
+               return (s);
+       if (eread("Query replace %s with: ", news, NPAT,
+           EFNUL | EFNEW | EFCR, re_pat) == NULL)
+               return (ABORT);
+       ewprintf("Query replacing %s with %s:", re_pat, news);
+
+       /*
+        * Search forward repeatedly, checking each time whether to insert
+        * or not.  The "!" case makes the check always true, so it gets put
+        * into a tighter loop for efficiency.
+        */
+       while (re_forwsrch() == TRUE) {
+retry:
+               update();
+               switch (getkey(FALSE)) {
+               case ' ':
+                       plen = re_match[0].rm_eo - re_match[0].rm_so;
+                       if (re_doreplace((RSIZE)plen, news) == FALSE)
+                               return (FALSE);
+                       rcnt++;
+                       break;
+
+               case '.':
+                       plen = re_match[0].rm_eo - re_match[0].rm_so;
+                       if (re_doreplace((RSIZE)plen, news) == FALSE)
+                               return (FALSE);
+                       rcnt++;
+                       goto stopsearch;
+
+               case CCHR('G'):                         /* ^G */
+                       (void)ctrlg(FFRAND, 0);
+                       goto stopsearch;
+               case CCHR('['):                         /* ESC */
+               case '`':
+                       goto stopsearch;
+               case '!':
+                       do {
+                               plen = re_match[0].rm_eo - re_match[0].rm_so;
+                               if (re_doreplace((RSIZE)plen, news) == FALSE)
+                                       return (FALSE);
+                               rcnt++;
+                       } while (re_forwsrch() == TRUE);
+                       goto stopsearch;
+
+               case CCHR('?'):                         /* To not replace */
+                       break;
+
+               default:
+                       ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
+                       goto retry;
+               }
+       }
+
+stopsearch:
+       curwp->w_rflag |= WFFULL;
+       update();
+       if (!inmacro) {
+               if (rcnt == 0)
+                       ewprintf("(No replacements done)");
+               else if (rcnt == 1)
+                       ewprintf("(1 replacement done)");
+               else
+                       ewprintf("(%d replacements done)", rcnt);
+       }
+       return (TRUE);
+}
+
+/*
+ * Routine re_doreplace calls lreplace to make replacements needed by
+ * re_query replace.  Its reason for existence is to deal with \1, \2. etc.
+ *  plen: length to remove
+ *  st:   replacement string
+ */
+static int
+re_doreplace(RSIZE plen, char *st)
+{
+       int      j, k, s, more, num, state;
+       struct line     *clp;
+       char     repstr[REPLEN];
+
+       clp = curwp->w_dotp;
+       more = TRUE;
+       j = 0;
+       state = 0;
+       num = 0;
+
+       /* The following FSA parses the replacement string */
+       while (more) {
+               switch (state) {
+               case 0:
+                       if (*st == '\\') {
+                               st++;
+                               state = 1;
+                       } else if (*st == '\0')
+                               more = FALSE;
+                       else {
+                               repstr[j] = *st;
+                               j++;
+                               if (j >= REPLEN)
+                                       return (FALSE);
+                               st++;
+                       }
+                       break;
+               case 1:
+                       if (*st >= '0' && *st <= '9') {
+                               num = *st - '0';
+                               st++;
+                               state = 2;
+                       } else if (*st == '\0')
+                               more = FALSE;
+                       else {
+                               repstr[j] = *st;
+                               j++;
+                               if (j >= REPLEN)
+                                       return (FALSE);
+                               st++;
+                               state = 0;
+                       }
+                       break;
+               case 2:
+                       if (*st >= '0' && *st <= '9') {
+                               num = 10 * num + *st - '0';
+                               st++;
+                       } else {
+                               if (num >= RE_NMATCH)
+                                       return (FALSE);
+                               k = re_match[num].rm_eo - re_match[num].rm_so;
+                               if (j + k >= REPLEN)
+                                       return (FALSE);
+                               bcopy(&(clp->l_text[re_match[num].rm_so]),
+                                   &repstr[j], k);
+                               j += k;
+                               if (*st == '\0')
+                                       more = FALSE;
+                               if (*st == '\\') {
+                                       st++;
+                                       state = 1;
+                               } else {
+                                       repstr[j] = *st;
+                                       j++;
+                                       if (j >= REPLEN)
+                                               return (FALSE);
+                                       st++;
+                                       state = 0;
+                               }
+                       }
+                       break;
+               }               /* switch (state) */
+       }                       /* while (more)   */
+
+       repstr[j] = '\0';
+       s = lreplace(plen, repstr);
+       return (s);
+}
+
+/*
+ * This routine does the real work of a forward search.  The pattern is
+ * sitting in the external variable "pat".  If found, dot is updated, the
+ * window system is notified of the change, and TRUE is returned.  If the
+ * string isn't found, FALSE is returned.
+ */
+static int
+re_forwsrch(void)
+{
+       int      tbo, error;
+       struct line     *clp;
+
+       clp = curwp->w_dotp;
+       tbo = curwp->w_doto;
+
+       if (tbo == clp->l_used)
+               /*
+                * Don't start matching past end of line -- must move to
+                * beginning of next line, unless at end of file.
+                */
+               if (clp != curbp->b_headp) {
+                       clp = lforw(clp);
+                       tbo = 0;
+               }
+       /*
+        * Note this loop does not process the last line, but this editor
+        * always makes the last line empty so this is good.
+        */
+       while (clp != (curbp->b_headp)) {
+               re_match[0].rm_so = tbo;
+               re_match[0].rm_eo = llength(clp);
+               error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+                   REG_STARTEND);
+               if (error != 0) {
+                       clp = lforw(clp);
+                       tbo = 0;
+               } else {
+                       curwp->w_doto = re_match[0].rm_eo;
+                       curwp->w_dotp = clp;
+                       curwp->w_rflag |= WFMOVE;
+                       return (TRUE);
+               }
+       }
+       return (FALSE);
+}
+
+/*
+ * This routine does the real work of a backward search.  The pattern is sitting
+ * in the external variable "re_pat".  If found, dot is updated, the window
+ * system is notified of the change, and TRUE is returned.  If the string isn't
+ * found, FALSE is returned.
+ */
+static int
+re_backsrch(void)
+{
+       struct line             *clp;
+       int              tbo;
+       regmatch_t       lastmatch;
+
+       clp = curwp->w_dotp;
+       tbo = curwp->w_doto;
+
+       /* Start search one position to the left of dot */
+       tbo = tbo - 1;
+       if (tbo < 0) {
+               /* must move up one line */
+               clp = lback(clp);
+               tbo = llength(clp);
+       }
+
+       /*
+        * Note this loop does not process the last line, but this editor
+        * always makes the last line empty so this is good.
+        */
+       while (clp != (curbp->b_headp)) {
+               re_match[0].rm_so = 0;
+               re_match[0].rm_eo = llength(clp);
+               lastmatch.rm_so = -1;
+               /*
+                * Keep searching until we don't match any longer.  Assumes a
+                * non-match does not modify the re_match array.  We have to
+                * do this character-by-character after the first match since
+                * POSIX regexps don't give you a way to do reverse matches.
+                */
+               while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+                   REG_STARTEND) && re_match[0].rm_so < tbo) {
+                       memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
+                       re_match[0].rm_so++;
+                       re_match[0].rm_eo = llength(clp);
+               }
+               if (lastmatch.rm_so == -1) {
+                       clp = lback(clp);
+                       tbo = llength(clp);
+               } else {
+                       memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
+                       curwp->w_doto = re_match[0].rm_so;
+                       curwp->w_dotp = clp;
+                       curwp->w_rflag |= WFMOVE;
+                       return (TRUE);
+               }
+       }
+       return (FALSE);
+}
+
+/*
+ * Read a pattern.
+ * Stash it in the external variable "re_pat". The "pat" is
+ * not updated if the user types in an empty line. If the user typed
+ * an empty line, and there is no old pattern, it is an error.
+ * Display the old pattern, in the style of Jeff Lomicka. There is
+ * some do-it-yourself control expansion.
+ */
+static int
+re_readpattern(char *prompt)
+{
+       static int      dofree = 0;
+       int             flags, error, s;
+       char            tpat[NPAT], *rep;
+
+       if (re_pat[0] == '\0')
+               rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
+       else
+               rep = eread("%s: (default %s) ", tpat, NPAT,
+                   EFNUL | EFNEW | EFCR, prompt, re_pat);
+       if (rep == NULL)
+               return (ABORT);
+       if (rep[0] != '\0') {
+               /* New pattern given */
+               (void)strlcpy(re_pat, tpat, sizeof(re_pat));
+               if (casefoldsearch)
+                       flags = REG_EXTENDED | REG_ICASE;
+               else
+                       flags = REG_EXTENDED;
+               if (dofree)
+                       regfree(&re_buff);
+               error = regcomp(&re_buff, re_pat, flags);
+               if (error != 0) {
+                       char    message[256];
+                       regerror(error, &re_buff, message, sizeof(message));
+                       ewprintf("Regex Error: %s", message);
+                       re_pat[0] = '\0';
+                       return (FALSE);
+               }
+               dofree = 1;
+               s = TRUE;
+       } else if (rep[0] == '\0' && re_pat[0] != '\0')
+               /* Just using old pattern */
+               s = TRUE;
+       else
+               s = FALSE;
+       return (s);
+}
+
+/*
+ * Cause case to not matter in searches.  This is the default. If called
+ * with argument cause case to matter.
+ */
+/* ARGSUSED*/
+int
+setcasefold(int f, int n)
+{
+       if (f & FFARG) {
+               casefoldsearch = FALSE;
+               ewprintf("Case-fold-search unset");
+       } else {
+               casefoldsearch = TRUE;
+               ewprintf("Case-fold-search set");
+       }
+
+       /*
+        * Invalidate the regular expression pattern since I'm too lazy to
+        * recompile it.
+        */
+       re_pat[0] = '\0';
+       return (TRUE);
+}
+
+/*
+ * Delete all lines after dot that contain a string matching regex.
+ */
+/* ARGSUSED */
+int
+delmatchlines(int f, int n)
+{
+       int     s;
+
+       if ((s = re_readpattern("Flush lines (containing match for regexp)"))
+           != TRUE)
+               return (s);
+
+       s = killmatches(TRUE);
+       return (s);
+}
+
+/*
+ * Delete all lines after dot that don't contain a string matching regex.
+ */
+/* ARGSUSED */
+int
+delnonmatchlines(int f, int n)
+{
+       int     s;
+
+       if ((s = re_readpattern("Keep lines (containing match for regexp)"))
+           != TRUE)
+               return (s);
+
+       s = killmatches(FALSE);
+       return (s);
+}
+
+/*
+ * This function does the work of deleting matching lines.
+ */
+static int
+killmatches(int cond)
+{
+       int      s, error;
+       int      count = 0;
+       struct line     *clp;
+
+       clp = curwp->w_dotp;
+       if (curwp->w_doto == llength(clp))
+               /* Consider dot on next line */
+               clp = lforw(clp);
+
+       while (clp != (curbp->b_headp)) {
+               /* see if line matches */
+               re_match[0].rm_so = 0;
+               re_match[0].rm_eo = llength(clp);
+               error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+                   REG_STARTEND);
+
+               /* Delete line when appropriate */
+               if ((cond == FALSE && error) || (cond == TRUE && !error)) {
+                       curwp->w_doto = 0;
+                       curwp->w_dotp = clp;
+                       count++;
+                       s = ldelete(llength(clp) + 1, KNONE);
+                       clp = curwp->w_dotp;
+                       curwp->w_rflag |= WFMOVE;
+                       if (s == FALSE)
+                               return (FALSE);
+               } else
+                       clp = lforw(clp);
+       }
+
+       ewprintf("%d line(s) deleted", count);
+       if (count > 0)
+               curwp->w_rflag |= WFMOVE;
+
+       return (TRUE);
+}
+
+/*
+ * Count lines matching regex.
+ */
+/* ARGSUSED */
+int
+cntmatchlines(int f, int n)
+{
+       int     s;
+
+       if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
+               return (s);
+       s = countmatches(TRUE);
+
+       return (s);
+}
+
+/*
+ * Count lines that fail to match regex.
+ */
+/* ARGSUSED */
+int
+cntnonmatchlines(int f, int n)
+{
+       int     s;
+
+       if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
+               return (s);
+       s = countmatches(FALSE);
+
+       return (s);
+}
+
+/*
+ * This function does the work of counting matching lines.
+ */
+int
+countmatches(int cond)
+{
+       int      error;
+       int      count = 0;
+       struct line     *clp;
+
+       clp = curwp->w_dotp;
+       if (curwp->w_doto == llength(clp))
+               /* Consider dot on next line */
+               clp = lforw(clp);
+
+       while (clp != (curbp->b_headp)) {
+               /* see if line matches */
+               re_match[0].rm_so = 0;
+               re_match[0].rm_eo = llength(clp);
+               error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
+                   REG_STARTEND);
+
+               /* Count line when appropriate */
+               if ((cond == FALSE && error) || (cond == TRUE && !error))
+                       count++;
+               clp = lforw(clp);
+       }
+
+       if (cond)
+               ewprintf("Number of lines matching: %d", count);
+       else
+               ewprintf("Number of lines not matching: %d", count);
+
+       return (TRUE);
+}
+#endif /* REGEX */