Fix --summary=tag highlighting bugs, make pyzet use INDEX_SUMMARISE_TAG

author Nick Downing <downing.nick@gmail.com>

Fri, 5 Jan 2018 15:55:10 +0000 (10:55 -0500)

committer Nick Downing <nick@ndcode.org>

Tue, 6 Nov 2018 00:08:37 +0000 (11:08 +1100)
author Nick Downing <downing.nick@gmail.com>
Fri, 5 Jan 2018 15:55:10 +0000 (10:55 -0500)
committer Nick Downing <nick@ndcode.org>
Tue, 6 Nov 2018 00:08:37 +0000 (11:08 +1100)
diff --git a/src/pyzet/zetmodule.c b/src/pyzet/zetmodule.c

index 122c364..b8fabc9 100644 (file)
--- a/src/pyzet/zetmodule.c
+++ b/src/pyzet/zetmodule.c
@@ -1076,7 +1076,7 @@ static PyObject * Index_search(PyObject * self, PyObject * args,
      }
  #if 1 /* Nick */
   opts |= INDEX_SEARCH_SUMMARY_TYPE;
- opt.summary_type = INDEX_SUMMARISE_CAPITALISE;
+ opt.summary_type = INDEX_SUMMARISE_TAG;
  #endif
      if (!index_search(Index->idx, query, startdoc, len,
            result, &results, &total_results, &est, opts, &opt)) {
@@ -1428,7 +1428,7 @@ zet_search(PyObject *self, PyObject *args) {
      }
  #if 1 /* Nick */
   opts |= INDEX_SEARCH_SUMMARY_TYPE;
- opt.summary_type = INDEX_SUMMARISE_CAPITALISE;
+ opt.summary_type = INDEX_SUMMARISE_TAG;
  #endif
      if (!index_search(idx, query, startdoc, len, result, 
            &results, &total_results, &est, opts, &opt)) {
diff --git a/src/summarise.c b/src/summarise.c

index 8d5bc2c..03f3484 100644 (file)
--- a/src/summarise.c
+++ b/src/summarise.c
@@ -147,6 +147,13 @@ static int ensure_space(struct sentence **sent, unsigned int space) {
  /* internal function to finish up extraction of a sentence */
  static struct sentence *extract_finish(struct sentence *sent, struct persum *ps,
    enum index_summary_type type, int highlight) {
+#if 1 // Nick, do this first
+    /* remove superfluous whitespace from the end of the sentence */
+    while (sent->buf[sent->buflen - 1] == ' ') {
+        sent->buflen--;
+    }
+#endif
+
      if (highlight && type == INDEX_SUMMARISE_TAG) {
          /* need to close the tag */
          if (sent->buflen + str_len("</b>") >= sent->bufsize 
@@ -162,13 +169,14 @@ static struct sentence *extract_finish(struct sentence *sent, struct persum *ps,
                && (space < str_len("</b>"))) {
                  sent->buflen--;
              }
-
-            /* end highlighting */
-            str_cpy(sent->buf + sent->buflen, "</b>");
-            sent->buflen += str_len("</b>");
          }
+
+        /* end highlighting */
+        str_cpy(sent->buf + sent->buflen, "</b>");
+        sent->buflen += str_len("</b>");
      }
  
+#if 0 // Nick, don't want to remove markup by mistake, revisit this later
      /* trim overly-long sentence term-by-term */
      while (sent->buflen > ps->summary_len) {
          sent->buflen--;
@@ -181,8 +189,10 @@ static struct sentence *extract_finish(struct sentence *sent, struct persum *ps,
      while (sent->buf[sent->buflen - 1] == ' ') {
          sent->buflen--;
      }
+#endif
  
      sent->buf[sent->buflen] = '\0';
+ //printf("sent->buf %s\n", sent->buf);
      return sent;
  }
author	Nick Downing <downing.nick@gmail.com>
	Fri, 5 Jan 2018 15:55:10 +0000 (10:55 -0500)
committer	Nick Downing <nick@ndcode.org>
	Tue, 6 Nov 2018 00:08:37 +0000 (11:08 +1100)
src/pyzet/zetmodule.c		patch \| blob \| history
src/summarise.c		patch \| blob \| history