MultiSource/Applications/lemon/xapian_queryparser.lemony - third_party/llvm-test-suite - Git at Google

 %include {
 /* queryparser.lemony: build a Xapian::Query object from a user query string.
  *
  * Copyright (C) 2004,2005,2006,2007 Olly Betts
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
  * License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
  * USA
  */

 #include <config.h>

 #include "omassert.h"
 #include "queryparser_internal.h"
 #include <xapian/error.h>
 #include <xapian/unicode.h>
 #include "stringutils.h"

 // Include the list of token values lemon generates.
 #include "queryparser_token.h"

 #include <algorithm>
 #include <list>
 #include <string>

 #include <string.h>

 using namespace std;

 using namespace Xapian;

 inline bool
 U_isupper(unsigned ch) {
     return (ch < 128 && C_isupper((unsigned char)ch));
 }

 inline bool
 U_isdigit(unsigned ch) {
     return (ch < 128 && C_isdigit((unsigned char)ch));
 }

 inline bool
 U_isalpha(unsigned ch) {
     return (ch < 128 && C_isalpha((unsigned char)ch));
 }

 using Xapian::Unicode::is_whitespace;

 inline bool
 is_not_whitespace(unsigned ch) {
     return !is_whitespace(ch);
 }

 using Xapian::Unicode::is_wordchar;

 inline bool
 is_not_wordchar(unsigned ch) {
     return !is_wordchar(ch);
 }

 inline bool
 is_digit(unsigned ch) {
     return (Unicode::get_category(ch) == Unicode::DECIMAL_DIGIT_NUMBER);
 }

 // FIXME: we used to keep trailing "-" (e.g. Cl-) but it's of dubious utility
 // and there's the risk of hyphens getting stuck onto the end of terms...
 inline bool
 is_suffix(unsigned ch) {
     return ch == '+' || ch == '#';
 }

 inline bool
 prefix_needs_colon(const string & prefix, unsigned ch)
 {
     if (!U_isupper(ch)) return false;
     string::size_type len = prefix.length();
     return (len > 1 && prefix[len - 1] != ':');
 }

 using Unicode::is_currency;

 /// A structure identifying a group of filter terms.
 struct filter_group_id {
     /** The prefix of the filter terms.
      *  This is used for boolean filter terms.
      */
     list<string> prefixes;

     /** The value number of the filter terms.
      *  This is used for value range terms.
      */
     Xapian::valueno valno;

     /// Make a new filter_group_id for boolean filter terms.
     explicit filter_group_id(const list<string> & prefixes_)
 	: prefixes(prefixes_), valno(Xapian::BAD_VALUENO) {}

     /// Make a new filter_group_id for value range terms.
     explicit filter_group_id(Xapian::valueno valno_)
 	: prefixes(), valno(valno_) {}

     /// Compare to another filter_group_id.
     bool operator<(const filter_group_id & other) const {
 	if (prefixes != other.prefixes) {
 	    return prefixes < other.prefixes;
 	}
 	return valno < other.valno;
     }
 };

 /** Class used to pass information about a token from lexer to parser.
  *
  *  Generally an instance of this class carries term information, but it can be
  *  used for the start or end of a value range, with some operators (e.g. the
  *  distance in NEAR/3 or ADJ/3, etc).
  */
 class Term {
     State * state;

   public:
     string name;
     list<string> prefixes;
     string unstemmed;
     QueryParser::stem_strategy stem;
     termpos pos;

     Term(const string &name_, termpos pos_) : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { }
     Term(const string &name_) : name(name_), stem(QueryParser::STEM_NONE), pos(0) { }
     Term(const string &name_, const list<string> &prefixes_)
 	: name(name_), prefixes(prefixes_), stem(QueryParser::STEM_NONE), pos(0) { }
     Term(termpos pos_) : stem(QueryParser::STEM_NONE), pos(pos_) { }
     Term(State * state_, const string &name_, const list<string> &prefixes_,
 	 const string &unstemmed_,
 	 QueryParser::stem_strategy stem_ = QueryParser::STEM_NONE,
 	 termpos pos_ = 0)
 	: state(state_), name(name_), prefixes(prefixes_), unstemmed(unstemmed_),
 	  stem(stem_), pos(pos_) { }

     std::string make_term(const string & prefix) const;

     void need_positions() {
 	if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
     }

     termpos get_termpos() const { return pos; }

     filter_group_id get_filter_group_id() const { return filter_group_id(prefixes); }

     Query * as_wildcarded_query(State * state) const;

     Query * as_partial_query(State * state_) const;

     Query get_query() const;

     Query get_query_with_synonyms() const;

     Query get_query_with_auto_synonyms() const;
 };

 /// Parser State shared between the lexer and the parser.
 class State {
     QueryParser::Internal * qpi;

   public:
     Query query;
     const char * error;
     unsigned flags;

     State(QueryParser::Internal * qpi_, unsigned flags_)
 	: qpi(qpi_), error(NULL), flags(flags_) { }

     string stem_term(const string &term) {
 	return qpi->stemmer(term);
     }

     void add_to_stoplist(const Term * term) {
 	qpi->stoplist.push_back(term->name);
     }

     void add_to_unstem(const string & term, const string & unstemmed) {
 	qpi->unstem.insert(make_pair(term, unstemmed));
     }

     valueno value_range(Query & q, Term *a, Term *b) {
 	string start = a->name;
 	string end = b->name;
 	Xapian::valueno valno = Xapian::BAD_VALUENO;
 	list<ValueRangeProcessor *>::const_iterator i;
 	for (i = qpi->valrangeprocs.begin(); i != qpi->valrangeprocs.end(); ++i) {
 	    valno = (**i)(start, end);
 	    if (valno != Xapian::BAD_VALUENO) {
 		delete a;
 		delete b;
 		q = Query(Query::OP_VALUE_RANGE, valno, start, end);
 		return valno;
 	    }
 	}
 	// FIXME: Do we want to report an error for this?  If not we need
 	// to perform the above check in the tokeniser and if none of the
 	// ValueRangeProcessor classes like the range, we rollback to
 	// parsing the query without treating this as a range.  Needs
 	// more thought and probably a look at queries users actually
 	// enter.
 	error = "Unknown range operation";
 	return valno;
     }

     Query::op default_op() const { return qpi->default_op; }

     bool is_stopword(const Term *term) const {
 	return qpi->stopper && (*qpi->stopper)(term->name);
     }

     Database get_database() const {
 	return qpi->db;
     }
 };

 string
 Term::make_term(const string & prefix) const
 {
     string term;
     if (stem == QueryParser::STEM_SOME) term += 'Z';
     if (!prefix.empty()) {
 	term += prefix;
 	if (prefix_needs_colon(prefix, name[0])) term += ':';
     }
     if (stem != QueryParser::STEM_NONE) {
 	term += state->stem_term(name);
     } else {
 	term += name;
     }

     if (!unstemmed.empty())
 	state->add_to_unstem(term, unstemmed);
     return term;
 }

 Query
 Term::get_query_with_synonyms() const
 {
     Query q = get_query();

     // Handle single-word synonyms with each prefix.
     list<string>::const_iterator piter;
     for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
 	// First try the unstemmed term:
 	string term;
 	if (!piter->empty()) {
 	    term += *piter;
 	    if (prefix_needs_colon(*piter, name[0])) term += ':';
 	}
 	term += name;

 	Xapian::Database db = state->get_database();
 	Xapian::TermIterator syn = db.synonyms_begin(term);
 	Xapian::TermIterator end = db.synonyms_end(term);
 	if (syn == end && stem != QueryParser::STEM_NONE) {
 	    // If that has no synonyms, try the stemmed form:
 	    term = 'Z';
 	    if (!piter->empty()) {
 		term += *piter;
 		if (prefix_needs_colon(*piter, name[0])) term += ':';
 	    }
 	    term += state->stem_term(name);
 	    syn = db.synonyms_begin(term);
 	    end = db.synonyms_end(term);
 	}
 	while (syn != end) {
 	    q = Query(Query::OP_OR, q, Query(*syn, 1, pos));
 	    ++syn;
 	}
     }
     return q;
 }

 Query
 Term::get_query_with_auto_synonyms() const
 {
     if (state->flags & QueryParser::FLAG_AUTO_SYNONYMS)
 	return get_query_with_synonyms();

     return get_query();
 }

 static void
 add_to_query(Query *& q, Query::op op, Query * term)
 {
     Assert(term);
     if (q) {
 	*q = Query(op, *q, *term);
 	delete term;
     } else {
 	q = term;
     }
 }

 static void
 add_to_query(Query *& q, Query::op op, const Query & term)
 {
     if (q) {
 	*q = Query(op, *q, term);
     } else {
 	q = new Query(term);
     }
 }

 Query
 Term::get_query() const
 {
     Assert(prefixes.size() >= 1);
     list<string>::const_iterator piter = prefixes.begin();
     Query q(make_term(*piter), 1, pos);
     while (++piter != prefixes.end()) {
 	q = Query(Query::OP_OR, q, Query(make_term(*piter), 1, pos));
     }
     return q;
 }

 Query *
 Term::as_wildcarded_query(State * state_) const
 {
     Database db = state_->get_database();
     Query * q = new Query;
     list<string>::const_iterator piter;
     for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
 	string root = *piter;
 	root += name;
 	TermIterator t = db.allterms_begin(root);
 	while (t != db.allterms_end(root)) {
 	    add_to_query(q, Query::OP_OR, Query(*t, 1, pos));
 	    ++t;
 	}
     }
     delete this;
     return q;
 }

 Query *
 Term::as_partial_query(State * state_) const
 {
     Database db = state_->get_database();
     Query * q = new Query;
     list<string>::const_iterator piter;
     for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
 	string root = *piter;
 	root += name;
 	TermIterator t = db.allterms_begin(root);
 	while (t != db.allterms_end(root)) {
 	    add_to_query(q, Query::OP_OR, Query(*t, 1, pos));
 	    ++t;
 	}
 	// Add the term, as it would normally be handled, as an alternative.
 	add_to_query(q, Query::OP_OR, Query(make_term(*piter), 1, pos));
     }
     delete this;
     return q;
 }

 inline bool
 is_phrase_generator(unsigned ch)
 {
     // These characters generate a phrase search.
     // Ordered mostly by frequency of calls to this function done when
     // running queryparsertest.
     return (ch && ch < 128 && strchr(".-/:\\@", ch) != NULL);
 }

 inline bool
 is_stem_preventer(unsigned ch)
 {
     return (ch && ch < 128 && strchr("(/\\@<>=*[{\"", ch) != NULL);
 }

 inline bool
 should_stem(const std::string & term)
 {
     const unsigned int SHOULD_STEM_MASK =
 	(1 << Unicode::LOWERCASE_LETTER) |
 	(1 << Unicode::TITLECASE_LETTER) |
 	(1 << Unicode::MODIFIER_LETTER) |
 	(1 << Unicode::OTHER_LETTER);
     Utf8Iterator u(term);
     return ((SHOULD_STEM_MASK >> Unicode::get_category(*u)) & 1);
 }

 inline unsigned check_infix(unsigned ch) {
     if (ch == '\'' || ch == '&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
 	// Unicode includes all these except '&' in it's word boundary rules,
 	// as well as 0x2019 (which we handle below) and ':' (for Swedish
 	// apparently, but we ignore this for now as it's problematic in
 	// real world cases).
 	return ch;
     }
     // 0x2019 is Unicode apostrophe and single closing quote.
     // 0x201b is Unicode single opening quote with the tail rising.
     if (ch == 0x2019 || ch == 0x201b) return '\'';
     return 0;
 }

 inline unsigned check_infix_digit(unsigned ch) {
     // This list of characters comes from Unicode's word identifying algorithm.
     switch (ch) {
 	case ',':
 	case '.':
 	case ';':
 	case 0x037e: // GREEK QUESTION MARK
 	case 0x0589: // ARMENIAN FULL STOP
 	case 0x060D: // ARABIC DATE SEPARATOR
 	case 0x07F8: // NKO COMMA
 	case 0x2044: // FRACTION SLASH
 	case 0xFE10: // PRESENTATION FORM FOR VERTICAL COMMA
 	case 0xFE13: // PRESENTATION FORM FOR VERTICAL COLON
 	case 0xFE14: // PRESENTATION FORM FOR VERTICAL SEMICOLON
 	    return ch;
     }
     return 0;
 }

 struct yyParser;

 // Prototype the functions lemon generates.
 static yyParser *ParseAlloc();
 static void ParseFree(yyParser *);
 static void Parse(yyParser *, int, Term *, State *);

 void
 QueryParser::Internal::add_prefix(const string &field, const string &prefix,
 				  bool filter)
 {
     map<string, PrefixInfo>::iterator p = prefixmap.find(field);
     if (p == prefixmap.end()) {
        prefixmap.insert(make_pair(field, PrefixInfo(filter, prefix)));
     } else {
        // Check that this is the same type of filter as the existing one(s).
        if (p->second.filter != filter) {
            throw Xapian::InvalidOperationError("Can't use add_prefix() and add_bool_prefix() on the same field name");
        }
        p->second.prefixes.push_back(prefix);
     }
 }

 string
 QueryParser::Internal::parse_term(Utf8Iterator &it, const Utf8Iterator &end,
 				  bool &was_acronym)
 {
     string term;
     // Look for initials separated by '.' (e.g. P.T.O., U.N.C.L.E).
     // Don't worry if there's a trailing '.' or not.
     if (U_isupper(*it)) {
 	string t;
 	Utf8Iterator p = it;
 	do {
 	    Unicode::append_utf8(t, *p++);
 	} while (p != end && *p == '.' && ++p != end && U_isupper(*p));
 	// One letter does not make an acronym!  If we handled a single
 	// uppercase letter here, we wouldn't catch M&S below.
 	if (t.length() > 1) {
 	    // Check there's not a (lower case) letter or digit
 	    // immediately after it.
 	    // FIXME: should I.B.M..P.T.O be a range search?
 	    if (p == end || !is_wordchar(*p)) {
 		it = p;
 		swap(term, t);
 	    }
 	}
     }
     was_acronym = !term.empty();

     if (term.empty()) {
 	unsigned prevch = *it;
 	Unicode::append_utf8(term, prevch);
 	while (++it != end) {
 	    unsigned ch = *it;
 	    if (!is_wordchar(ch)) {
 		// Treat a single embedded '&' or "'" or similar as a word
 		// character (e.g. AT&T, Fred's).  Also, normalise
 		// apostrophes to ASCII apostrophe.
 		Utf8Iterator p = it;
 		++p;
 		if (p == end || !is_wordchar(*p)) break;
 		unsigned nextch = *p;
 		if (is_digit(prevch) &&
 		    is_digit(nextch)) {
 		    ch = check_infix_digit(ch);
 		} else {
 		    ch = check_infix(ch);
 		}
 		if (!ch) break;
 	    }
 	    Unicode::append_utf8(term, ch);
 	    prevch = ch;
 	}
 	if (it != end && is_suffix(*it)) {
 	    string suff_term = term;
 	    Utf8Iterator p = it;
 	    // Keep trailing + (e.g. C++, Na+) or # (e.g. C#).
 	    do {
 		if (suff_term.size() - term.size() == 3) {
 		    suff_term.resize(0);
 		    break;
 		}
 		suff_term += *p;
 	    } while (is_suffix(*++p));
 	    if (!suff_term.empty() && (p == end || !is_wordchar(*p))) {
 		// If the suffixed term doesn't exist, check that the
 		// non-suffixed term does.  This also takes care of
 		// the case when QueryParser::set_database() hasn't
 		// been called.
 		bool use_suff_term = false;
 		string lc = Unicode::tolower(suff_term);
 		if (db.term_exists(lc)) {
 		    use_suff_term = true;
 		} else {
 		    lc = Unicode::tolower(term);
 		    if (!db.term_exists(lc)) use_suff_term = true;
 		}
 		if (use_suff_term) {
 		    term = suff_term;
 		    it = p;
 		}
 	    }
 	}
     }
     return term;
 }

 Query
 QueryParser::Internal::parse_query(const string &qs, unsigned flags,
 				   const string &default_prefix)
 {
     yyParser * pParser = ParseAlloc();

     // Set value_ranges if we may have to handle value ranges in the query.
     bool value_ranges;
     value_ranges = !valrangeprocs.empty() && (qs.find("..") != string::npos);

     termpos term_pos = 1;
     Utf8Iterator it(qs), end;

     State state(this, flags);

     // To successfully apply more than one spelling correction to a query
     // string, we must keep track of the offset due to previous corrections.
     int correction_offset = 0;
     corrected_query.resize(0);

     // Stack of prefixes, used for phrases and subexpressions.
     list<const PrefixInfo *> prefix_stack;

     // If default_prefix is specified, use it.  Otherwise, use any list
     // that has been set for the empty prefix.
     const PrefixInfo def_pfx(false, default_prefix);
     {
 	const PrefixInfo * default_prefixinfo = &def_pfx;
 	if (default_prefix.empty()) {
 	    map<string, PrefixInfo>::const_iterator f = prefixmap.find("");
 	    if (f != prefixmap.end()) default_prefixinfo = &(f->second);
 	}

 	// We always have the current prefix on the top of the stack.
 	prefix_stack.push_back(default_prefixinfo);
     }

     unsigned newprev = ' ';
 main_lex_loop:
     enum {
 	DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP
     } mode = DEFAULT;
     while (it != end) {
 	bool last_was_operator = false;
 	if (false) {
 just_had_operator:
 	    if (it == end) break;
 	    last_was_operator = true;
 	    mode = DEFAULT;
 	}
 	if (mode == IN_PHRASED_TERM) mode = DEFAULT;
 	if (is_whitespace(*it)) {
 	    newprev = ' ';
 	    ++it;
 	    it = find_if(it, end, is_not_whitespace);
 	    if (it == end) break;
 	}

 	if ((mode == DEFAULT || mode == IN_GROUP) && value_ranges) {
 	    // Scan forward to see if this could be the "start of range"
 	    // token.  Sadly this has O(n^2) tendencies, though at least
 	    // "n" is the number of words in a query which is likely to
 	    // remain fairly small.  FIXME: can we tokenise more elegantly?
 	    Utf8Iterator p = it;
 	    unsigned ch = 0;
 	    while (p != end) {
 		if (ch == '.' && *p == '.') {
 		    ++p;
 		    if (p == end || *p <= ' ' || *p == ')') break;

 		    string r;
 		    do {
 			Unicode::append_utf8(r, *it++);
 		    } while (it != p);
 		    // Trim off the trailing "..".
 		    r.resize(r.size() - 2);
 		    Parse(pParser, RANGE_START, new Term(r), &state);
 		    r.resize(0);
 		    // Allow any character except whitespace and ')' in a
 		    // RANGE_END.  Or should we be consistent with RANGE_START?
 		    do {
 			Unicode::append_utf8(r, *p++);
 		    } while (p != end && *p > ' ' && *p != ')');
 		    Parse(pParser, RANGE_END, new Term(r), &state);
 		    it = p;
 		    goto main_lex_loop;
 		}
 		ch = *p;
 		if (!(is_wordchar(ch) || is_currency(ch) ||
 		      (ch < 128 && strchr("%,-./:@", ch)))) break;
 		++p;
 	    }
 	}

 	if (!is_wordchar(*it)) {
 	    unsigned prev = newprev;
 	    unsigned ch = *it++;
 	    newprev = ch;
 	    // Drop out of IN_GROUP mode.
 	    if (mode == IN_GROUP) mode = DEFAULT;
 	    switch (ch) {
 	      case '"': // Quoted phrase.
 		if (mode == DEFAULT) {
 		    // Skip whitespace.
 		    it = find_if(it, end, is_not_whitespace);
 		    if (it == end) {
 			// Ignore an unmatched " at the end of the query to
 			// avoid generating an empty pair of QUOTEs which will
 			// cause a parse error.
 			goto done;
 		    }
 		    if (*it == '"') {
 			// Ignore empty "" (but only if we're not already
 			// IN_QUOTES as we don't merge two adjacent quoted
 			// phrases!)
 			newprev = *it++;
 			break;
 		    }
 		}
 		if (flags & QueryParser::FLAG_PHRASE) {
 		    Parse(pParser, QUOTE, NULL, &state);
 		    if (mode == DEFAULT) {
 			mode = IN_QUOTES;
 		    } else {
 			// Remove the prefix we pushed for this phrase.
 			if (mode == IN_PREFIXED_QUOTES)
 			    prefix_stack.pop_back();
 			mode = DEFAULT;
 		    }
 		}
 		break;

 	      case '+': case '-': // Loved or hated term/phrase/subexpression.
 		// Ignore + or - at the end of the query string.
 		if (it == end) goto done;
 		if (prev > ' ' && prev != '(') {
 		    // Or if not after whitespace or an open bracket.
 		    break;
 		}
 		if (is_whitespace(*it) || *it == '+' || *it == '-') {
 		    // Ignore + or - followed by a space, or further + or -.
 		    // Postfix + (such as in C++ and H+) is handled as part of
 		    // the term lexing code in parse_term().
 		    newprev = *it++;
 		    break;
 		}
 		if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
 		    Parse(pParser, (ch == '+' ? LOVE : HATE), NULL, &state);
 		    goto just_had_operator;
 		}
 		// Need to prevent the term after a LOVE or HATE starting a
 		// term group...
 		break;

 	      case '(': // Bracketed subexpression.
 		// Skip whitespace.
 		it = find_if(it, end, is_not_whitespace);
 		// Ignore ( at the end of the query string.
 		if (it == end) goto done;
 		if (prev > ' ' && strchr("()+-", prev) == NULL) {
 		    // Or if not after whitespace or a bracket or '+' or '-'.
 		    break;
 		}
 		if (*it == ')') {
 		    // Ignore empty ().
 		    newprev = *it++;
 		    break;
 		}
 		if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
 		    prefix_stack.push_back(prefix_stack.back());
 		    Parse(pParser, BRA, NULL, &state);
 		}
 		break;

 	      case ')': // End of bracketed subexpression.
 		if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
 		    // Remove the prefix we pushed for the corresponding BRA.
 		    // If brackets are unmatched, it's a syntax error, but
 		    // that's no excuse to SEGV!
 		    if (prefix_stack.size() > 1) prefix_stack.pop_back();
 		    Parse(pParser, KET, NULL, &state);
 		}
 		break;

 	      case '~': // Synonym expansion.
 		// Ignore at the end of the query string.
 		if (it == end) goto done;
 		if (prev > ' ' && prev != '+' && prev != '-' && prev != '(') {
 		    // Or if not after whitespace, +, -, or an open bracket.
 		    break;
 		}
 		if (!is_wordchar(*it)) {
 		    // Ignore if not followed by a word character.
 		    break;
 		}
 		if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
 		    Parse(pParser, SYNONYM, NULL, &state);
 		    goto just_had_operator;
 		}
 		break;
 	    }
 	    // Skip any other characters.
 	    continue;
 	}

 	Assert(is_wordchar(*it));

 	size_t term_start_index = it.raw() - qs.data();

 	newprev = 'A'; // Any letter will do...

 	// A term, a prefix, or a boolean operator.
 	const PrefixInfo * prefixinfo = NULL;
 	if ((mode == DEFAULT || mode == IN_GROUP) && !prefixmap.empty()) {
 	    // Check for a fieldname prefix (e.g. title:historical).
 	    Utf8Iterator p = find_if(it, end, is_not_wordchar);
 	    if (p != end && *p == ':' && ++p != end && *p > ' ' && *p != ')') {
 		string field;
 		p = it;
 		while (*p != ':')
 		    Unicode::append_utf8(field, *p++);
 		map<string, PrefixInfo>::const_iterator f;
 		f = prefixmap.find(field);
 		if (f != prefixmap.end()) {
 		    // Special handling for prefixed fields, depending on the
 		    // type of the prefix.
 		    unsigned ch = *++p;
 		    prefixinfo = &(f->second);

 		    if (prefixinfo->filter) {
 			// Drop out of IN_GROUP if we're in it.
 			mode = DEFAULT;
 			// Can't boolean filter prefix a subexpression or
 			// phrase; just use anything following the prefix
 			// until the next space or ')' as part of the boolean
 			// filter term.
 			it = p;
 			string name;
 			while (it != end && *it > ' ' && *it != ')')
 			    Unicode::append_utf8(name, *it++);
 			// Build the unstemmed form in field.
 			field += ':';
 			field += name;
 			const list<string> & prefixes = prefixinfo->prefixes;
 			Term * token = new Term(&state, name, prefixes, field);
 			Parse(pParser, BOOLEAN_FILTER, token, &state);
 			continue;
 		    }

 		    if (ch == '"' && (flags & FLAG_PHRASE)) {
 			// Prefixed phrase, e.g.: subject:"space flight"
 			mode = IN_PREFIXED_QUOTES;
 			Parse(pParser, QUOTE, NULL, &state);
 			it = p;
 			newprev = ch;
 			++it;
 			prefix_stack.push_back(prefixinfo);
 			continue;
 		    }

 		    if (ch == '(' && (flags & FLAG_BOOLEAN)) {
 			// Prefixed subexpression, e.g.: title:(fast NEAR food)
 			mode = DEFAULT;
 			Parse(pParser, BRA, NULL, &state);
 			it = p;
 			newprev = ch;
 			++it;
 			prefix_stack.push_back(prefixinfo);
 			continue;
 		    }

 		    if (is_wordchar(ch)) {
 			// Prefixed term.
 			it = p;
 		    } else {
 			// It looks like a prefix but isn't, so parse it as
 			// text instead.
 			prefixinfo = NULL;
 		    }
 		}
 	    }
 	}

 phrased_term:
 	bool was_acronym;
 	string term = parse_term(it, end, was_acronym);

 	// Boolean operators.
 	if ((mode == DEFAULT || mode == IN_GROUP) &&
 	    (flags & FLAG_BOOLEAN) &&
 	    // Don't want to interpret A.N.D. as an AND operator.
 	    !was_acronym &&
 	    !prefixinfo &&
 	    term.size() >= 2 && term.size() <= 4 && U_isalpha(term[0])) {

 	    string op = term;
 	    if (flags & FLAG_BOOLEAN_ANY_CASE) {
 		for (string::iterator i = op.begin(); i != op.end(); ++i) {
 		    *i = C_toupper(*i);
 		}
 	    }
 	    if (op.size() == 3) {
 		if (op == "AND") {
 		    Parse(pParser, AND, NULL, &state);
 		    goto just_had_operator;
 		}
 		if (op == "NOT") {
 		    Parse(pParser, NOT, NULL, &state);
 		    goto just_had_operator;
 		}
 		if (op == "XOR") {
 		    Parse(pParser, XOR, NULL, &state);
 		    goto just_had_operator;
 		}
 		if (op == "ADJ") {
 		    if (it != end && *it == '/') {
 			size_t width = 0;
 			Utf8Iterator p = it;
 			while (++p != end && U_isdigit(*p)) {
 			    width = (width * 10) + (*p - '0');
 			}
 			if (width && (p == end || is_whitespace(*p))) {
 			    it = p;
 			    Parse(pParser, ADJ, new Term(width), &state);
 			    goto just_had_operator;
 			}
 		    }

 		    Parse(pParser, ADJ, NULL, &state);
 		    goto just_had_operator;
 		}
 	    } else if (op.size() == 2) {
 		if (op == "OR") {
 		    Parse(pParser, OR, NULL, &state);
 		    goto just_had_operator;
 		}
 	    } else if (op.size() == 4) {
 		if (op == "NEAR") {
 		    if (it != end && *it == '/') {
 			size_t width = 0;
 			Utf8Iterator p = it;
 			while (++p != end && U_isdigit(*p)) {
 			    width = (width * 10) + (*p - '0');
 			}
 			if (width && (p == end || is_whitespace(*p))) {
 			    it = p;
 			    Parse(pParser, NEAR, new Term(width), &state);
 			    goto just_had_operator;
 			}
 		    }

 		    Parse(pParser, NEAR, NULL, &state);
 		    goto just_had_operator;
 		}
 	    }
 	}

 	// If no prefix is set, use the default one.
 	if (!prefixinfo) prefixinfo = prefix_stack.back();

 	Assert(!prefixinfo->filter);

 	{
 	    string unstemmed_term(term);
 	    term = Unicode::tolower(term);

 	    // Reuse stem_strategy - STEM_SOME here means "stem terms except
 	    // when used with positional operators".
 	    stem_strategy stem_term = stem_action;
 	    if (stem_term != STEM_NONE) {
 		if (!stemmer.internal.get()) {
 		    // No stemmer is set.
 		    stem_term = STEM_NONE;
 		} else if (stem_term == STEM_SOME) {
 		    if (!should_stem(unstemmed_term) ||
 			(it != end && is_stem_preventer(*it))) {
 			// Don't stem this particular term.
 			stem_term = STEM_NONE;
 		    }
 		}
 	    }

 	    Term * term_obj = new Term(&state, term, prefixinfo->prefixes,
 				       unstemmed_term, stem_term, term_pos++);

 	    // Check spelling, if we're a normal term, and any of the prefixes
 	    // are empty.
 	    if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
 		list<string>::const_iterator prefixiter;
 		for (prefixiter = prefixinfo->prefixes.begin();
 		     prefixiter != prefixinfo->prefixes.end();
 		     ++prefixiter) {
 		    if (!prefixiter->empty())
 			continue;
 		    if (!db.term_exists(term)) {
 			string suggestion = db.get_spelling_suggestion(term);
 			if (!suggestion.empty()) {
 			    if (corrected_query.empty()) corrected_query = qs;
 			    size_t term_end_index = it.raw() - qs.data();
 			    size_t n = term_end_index - term_start_index;
 			    size_t pos = term_start_index + correction_offset;
 			    corrected_query.replace(pos, n, suggestion);
 			    correction_offset += suggestion.size();
 			    correction_offset -= n;
 			}
 		    }
 		    break;
 		}
 	    }

 	    if (mode == IN_PHRASED_TERM) {
 		Parse(pParser, PHR_TERM, term_obj, &state);
 	    } else {
 		if (mode == DEFAULT || mode == IN_GROUP) {
 		    if (it != end) {
 			if ((flags & FLAG_WILDCARD) && *it == '*') {
 			    Utf8Iterator p(it);
 			    ++p;
 			    if (p == end || !is_wordchar(*p)) {
 				it = p;
 				// Wildcard at end of term (also known as
 				// "right truncation").
 				Parse(pParser, WILD_TERM, term_obj, &state);
 				continue;
 			    }
 			}
 		    } else {
 			if (flags & FLAG_PARTIAL) {
 			    // Final term of a partial match query, with no
 			    // following characters - treat as a wildcard.
 			    Parse(pParser, PARTIAL_TERM, term_obj, &state);
 			    continue;
 			}
 		    }
 		}

 		// See if the next token will be PHR_TERM - if so, this one
 		// needs to be TERM not GROUP_TERM.
 		if (mode == IN_GROUP && is_phrase_generator(*it)) {
 		    // FIXME: can we clean this up?
 		    Utf8Iterator p = it;
 		    do {
 			++p;
 		    } while (p != end && is_phrase_generator(*p));
 		    // Don't generate a phrase unless the phrase generators are
 		    // immediately followed by another term.
 		    if (p != end && is_wordchar(*p)) {
 			mode = DEFAULT;
 		    }
 		}

 		Parse(pParser, (mode == IN_GROUP ? GROUP_TERM : TERM),
 		      term_obj, &state);
 		if (mode != DEFAULT && mode != IN_GROUP) continue;
 	    }
 	}

 	if (it == end) break;

 	if (is_phrase_generator(*it)) {
 	    // Skip multiple phrase generators.
 	    do {
 		++it;
 	    } while (it != end && is_phrase_generator(*it));
 	    // Don't generate a phrase unless the phrase generators are
 	    // immediately followed by another term.
 	    if (it != end && is_wordchar(*it)) {
 		mode = IN_PHRASED_TERM;
 		term_start_index = it.raw() - qs.data();
 		goto phrased_term;
 	    }
 	} else if (mode == DEFAULT || mode == IN_GROUP) {
 	    mode = DEFAULT;
 	    if (!last_was_operator && is_whitespace(*it)) {
 		newprev = ' ';
 		// Skip multiple whitespace.
 		do {
 		    ++it;
 		} while (it != end && is_whitespace(*it));
 		// Don't generate a group unless the terms are only separated
 		// by whitespace.
 		if (it != end && is_wordchar(*it)) {
 		    mode = IN_GROUP;
 		}
 	    }
 	}
     }
 done:
     // Implicitly close any unclosed quotes...
     if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
 	Parse(pParser, QUOTE, NULL, &state);
     Parse(pParser, 0, NULL, &state);
     ParseFree(pParser);

     errmsg = state.error;
     return state.query;
 }

 struct ProbQuery {
     Query * query;
     Query * love;
     Query * hate;
     // filter is a map from prefix to a query for that prefix.  Queries with
     // the same prefix are combined with OR, and the results of this are
     // combined with AND to get the full filter.
     map<filter_group_id, Query> filter;

     ProbQuery() : query(0), love(0), hate(0) { }
     ~ProbQuery() {
 	delete query;
 	delete love;
 	delete hate;
     }

     Query merge_filters() const {
 	map<filter_group_id, Query>::const_iterator i = filter.begin();
 	Assert(i != filter.end());
 	Query q = i->second;
 	while (++i != filter.end()) {
 	    q = Query(Query::OP_AND, q, i->second);
 	}
 	return q;
     }
 };

 class TermGroup {
     list<Term *> terms;

   public:
     TermGroup() { }

     /// Add a Term object to this TermGroup object.
     void add_term(Term * term) {
 	terms.push_back(term);
     }

     /// Convert to a Xapian::Query * using default_op.
     Query * as_group(State *state) const;

     /** Provide a way to explicitly delete an object of this class.  The
      *  destructor is protected to prevent auto-variables of this type.
      */
     void destroy() { delete this; }

   protected:
     /** Protected destructor, so an auto-variable of this type is a
      *  compile-time error - you must allocate this object with new.
      */
     ~TermGroup() {
 	list<Term*>::const_iterator i;
 	for (i = terms.begin(); i != terms.end(); ++i) {
 	    delete *i;
 	}
     }
 };

 Query *
 TermGroup::as_group(State *state) const
 {
     Query * query = NULL;
     Query::op default_op = state->default_op();
     if (state->flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
 	// Check for multi-word synonyms.
 	Database db = state->get_database();

 	string key;
 	list<Term*>::const_iterator begin = terms.begin();
 	list<Term*>::const_iterator i = begin;
 	while (i != terms.end()) {
 	    key.resize(0);
 	    while (i != terms.end()) {
 		if (!key.empty()) key += ' ';
 		key += (*i)->name;
 		++i;
 	    }
 	    // Greedily try to match as many consecutive words as possible.
 	    TermIterator syn, end;
 	    while (true) {
 		syn = db.synonyms_begin(key);
 		end = db.synonyms_end(key);
 		if (syn != end) break;
 		if (--i == begin) break;
 		key.resize(key.size() - (*i)->name.size() - 1);
 	    }
 	    if (i == begin) {
 		// No multi-synonym matches.
 		if (state->is_stopword(*i)) {
 		    state->add_to_stoplist(*i);
 		} else {
 		    add_to_query(query, default_op,
 				 (*i)->get_query_with_auto_synonyms());
 		}
 		begin = ++i;
 		continue;
 	    }

 	    Query * q = NULL;
 	    list<Term*>::const_iterator j;
 	    for (j = begin; j != i; ++j) {
 		if (state->is_stopword(*j)) {
 		    state->add_to_stoplist(*j);
 		} else {
 		    add_to_query(q, default_op, (*j)->get_query());
 		}
 	    }

 	    // Use the position of the first term for the synonyms.
 	    Xapian::termpos pos = (*begin)->pos;
 	    begin = i;
 	    while (syn != end) {
 		add_to_query(q, Query::OP_OR, Query(*syn, 1, pos));
 		++syn;
 	    }
 	    add_to_query(query, default_op, q);
 	}
     } else {
 	list<Term*>::const_iterator i;
 	for (i = terms.begin(); i != terms.end(); ++i) {
 	    if (state->is_stopword(*i)) {
 		state->add_to_stoplist(*i);
 	    } else {
 		add_to_query(query, default_op,
 			     (*i)->get_query_with_auto_synonyms());
 	    }
 	}
     }
     delete this;
     return query;
 }

 class TermList {
     list<Term *> terms;
     size_t window;

     /** Keep track of whether the terms added all have the same list of
      *  prefixes.  If so, we'll build a set of phrases, one using each prefix.
      *  This works around the limitation that a phrase cannot have multiple
      *  components which are "OR" combinations of terms, but is also probably
      *  what users expect: ie, if a user specifies a phrase in a field, and that
      *  field maps to multiple prefixes, the user probably wants a phrase
      *  returned with all terms having one of those prefixes, rather than a
      *  phrase comprised of terms with differing prefixes.
      */
     bool uniform_prefixes;

     /** The list of prefixes of the terms added.
      *  This will be empty if the terms have different prefixes.
      */
     list<string> prefixes;

   public:
     TermList() : window(0), uniform_prefixes(true) { }

     /// Add an unstemmed Term object to this TermList object.
     void add_positional_term(Term * term) {
         if (terms.empty()) {
 	    prefixes = term->prefixes;
 	} else if (uniform_prefixes && prefixes != term->prefixes)  {
 	    prefixes.clear();
 	    uniform_prefixes = false;
 	}
 	term->need_positions();
 	terms.push_back(term);
     }

     void adjust_window(size_t alternative_window) {
 	if (alternative_window > window) window = alternative_window;
     }

     /// Convert to a query using the given operator and window size.
     Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const {
 	Query * q = NULL;
 	// Call terms.size() just once since std::list::size() may be O(n).
 	size_t n_terms = terms.size();
 	Xapian::termcount w = w_delta + terms.size();
 	if (uniform_prefixes) {
 	    list<string>::const_iterator piter;
 	    for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
 		vector<Query> subqs;
 		subqs.reserve(n_terms);
 		list<Term *>::const_iterator titer;
 		for (titer = terms.begin(); titer != terms.end(); ++titer) {
 		    Term * t = *titer;
 		    subqs.push_back(Query(t->make_term(*piter), 1, t->pos));
 		}
 		add_to_query(q, Query::OP_OR,
 			     Query(op, subqs.begin(), subqs.end(), w));
 	    }
 	} else {
 	    vector<Query> subqs;
 	    subqs.reserve(n_terms);
 	    list<Term *>::const_iterator titer;
 	    for (titer = terms.begin(); titer != terms.end(); ++titer) {
 		subqs.push_back((*titer)->get_query());
 	    }
 	    q = new Query(op, subqs.begin(), subqs.end(), w);
 	}

 	delete this;
 	return q;
     }

     /// Convert to a Xapian::Query * using adjacent OP_PHRASE.
     Query * as_phrase_query() const {
 	return as_opwindow_query(Query::OP_PHRASE, 0);
     }

     /// Convert to a Xapian::Query * using OP_NEAR.
     Query * as_near_query() const {
 	// The common meaning of 'a NEAR b' is "a within 10 terms of b", which
 	// means a window size of 11.  For more than 2 terms, we just add one
 	// to the window size for each extra term.
 	size_t w = window;
 	if (w == 0) w = 10;
 	return as_opwindow_query(Query::OP_NEAR, w - 1);
     }

     /// Convert to a Xapian::Query * using OP_PHRASE to implement ADJ.
     Query * as_adj_query() const {
 	// The common meaning of 'a ADJ b' is "a at most 10 terms before b",
 	// which means a window size of 11.  For more than 2 terms, we just add
 	// one to the window size for each extra term.
 	size_t w = window;
 	if (w == 0) w = 10;
 	return as_opwindow_query(Query::OP_PHRASE, w - 1);
     }

     /** Provide a way to explicitly delete an object of this class.  The
      *  destructor is protected to prevent auto-variables of this type.
      */
     void destroy() { delete this; }

   protected:
     /** Protected destructor, so an auto-variable of this type is a
      *  compile-time error - you must allocate this object with new.
      */
     ~TermList() {
 	list<Term *>::const_iterator t;
 	for (t = terms.begin(); t != terms.end(); ++t) {
 	    delete *t;
 	}
     }
 };

 // Helper macro for converting a boolean operation into a Xapian::Query.
 #define BOOL_OP_TO_QUERY(E, A, OP, B, OP_TXT) \
     do {\
 	if (!A || !B) {\
 	    state->error = "Syntax: <expression> "OP_TXT" <expression>";\
 	    yy_parse_failed(yypParser);\
 	    return;\
 	}\
 	E = new Query(OP, *A, *B);\
 	delete A;\
 	delete B;\
     } while (0)

 }

 %token_type {Term *}
 %token_destructor {delete $$;}

 %extra_argument {State * state}

 %parse_failure {
     // If we've not already set an error message, set a default one.
     if (!state->error) state->error = "parse error";
 }

 // Operators, grouped in order of increasing precedence:
 %nonassoc ERROR.
 %left OR.
 %left XOR.
 %left AND NOT.
 %left NEAR ADJ.
 %left LOVE HATE SYNONYM.

 // Destructors for terminal symbols:

 // TERM is a query term, including prefix (if any).
 %destructor TERM {delete $$;}

 // GROUP_TERM is a query term which follows a TERM or another GROUP_TERM and
 // is only separated by whitespace characters.
 %destructor GROUP_TERM {delete $$;}

 // PHR_TERM is a query term which follows a TERM or another PHR_TERM and is
 // separated only by one or more phrase generator characters (hyphen and
 // apostrophe are common examples - see is_phrase_generator() for the list
 // of all punctuation which does this).
 %destructor PHR_TERM {delete $$;}

 // WILD_TERM is like a TERM, but has a trailing wildcard which needs to be
 // expanded.
 %destructor WILD_TERM {delete $$;}

 // PARTIAL_TERM is like a TERM, but it's at the end of the query string and
 // we're doing "search as you type".  It expands to something like WILD_TERM
 // OR stemmed_form.
 %destructor PARTIAL_TERM {delete $$;}

 // BOOLEAN_FILTER is a query term with a prefix registered using
 // add_bool_prefix().  It's added to the query using an OP_FILTER operator,
 // (or OP_AND_NOT if it's negated) e.g. site:xapian.org or -site:xapian.org
 %destructor BOOLEAN_FILTER {delete $$;}

 // Grammar rules:

 // query - The whole query - just an expr or nothing.

 // query non-terminal doesn't need a type, so just give a dummy one.
 %type query {int}

 query ::= expr(E). {
     // Save the parsed query in the State structure so we can return it.
     if (E) {
 	state->query = *E;
 	delete E;
     } else {
 	state->query = Query();
     }
 }

 query ::= . {
     // Handle a query string with no terms in.
     state->query = Query();
 }

 // expr - A query expression.

 %type expr {Query *}
 %destructor expr {delete $$;}

 expr(E) ::= prob_expr(P).
 	{ E = P; }

 expr(E) ::= bool_arg(A) AND bool_arg(B).
 	{ BOOL_OP_TO_QUERY(E, A, Query::OP_AND, B, "AND"); }

 expr(E) ::= bool_arg(A) NOT bool_arg(B). {
     // 'NOT foo' -> '<alldocuments> NOT foo'
     if (!A && (state->flags & QueryParser::FLAG_PURE_NOT)) {
 	A = new Query("", 1, 0);
     }
     BOOL_OP_TO_QUERY(E, A, Query::OP_AND_NOT, B, "NOT");
 }

 expr(E) ::= bool_arg(A) AND NOT bool_arg(B). [NOT]
 	{ BOOL_OP_TO_QUERY(E, A, Query::OP_AND_NOT, B, "AND NOT"); }

 expr(E) ::= bool_arg(A) OR bool_arg(B).
 	{ BOOL_OP_TO_QUERY(E, A, Query::OP_OR, B, "OR"); }

 expr(E) ::= bool_arg(A) XOR bool_arg(B).
 	{ BOOL_OP_TO_QUERY(E, A, Query::OP_XOR, B, "XOR"); }

 // bool_arg - an argument to a boolean operator such as AND or OR.

 %type bool_arg {Query *}
 %destructor bool_arg {delete $$;}

 bool_arg(A) ::= expr(E). { A = E; }

 bool_arg(A) ::= . [ERROR] {
     // Set the argument to NULL, which enables the bool_arg-using rules in
     // expr above to report uses of AND, OR, etc which don't have two
     // arguments.
     A = NULL;
 }

 // prob_expr - a single compound term, or a prob.

 %type prob_expr {Query *}
 %destructor prob_expr {delete $$;}

 prob_expr(E) ::= prob(P). {
     E = P->query;
     P->query = NULL;
     // Handle any "+ terms".
     if (P->love) {
 	if (P->love->empty()) {
 	    // +<nothing>.
 	    delete E;
 	    E = P->love;
 	} else if (E) {
 	    swap(E, P->love);
 	    add_to_query(E, Query::OP_AND_MAYBE, P->love);
 	} else {
 	    E = P->love;
 	}
 	P->love = NULL;
     }
     // Handle any boolean filters.
     if (!P->filter.empty()) {
 	if (E) {
 	    add_to_query(E, Query::OP_FILTER, P->merge_filters());
 	} else {
 	    // Make the query a boolean one.
 	    E = new Query(Query::OP_SCALE_WEIGHT, P->merge_filters(), 0.0);
 	}
     }
     // Handle any "- terms".
     if (P->hate && !P->hate->empty()) {
 	if (!E) {
 	    // Can't just hate!
 	    yy_parse_failed(yypParser);
 	    return;
 	}
 	*E = Query(Query::OP_AND_NOT, *E, *P->hate);
     }
     // FIXME what if E && E->empty() (all terms are stopwords)?
     delete P;
 }

 prob_expr(E) ::= term(T). {
     E = T;
 }

 // prob - a probabilistic sub-expression consisting of stop_terms, "+" terms,
 // "-" terms, boolean filters, and/or value ranges.
 //
 // Note: stop_term can also be several other things other than a simple term!

 %type prob {ProbQuery *}
 %destructor prob {delete $$;}

 prob(P) ::= RANGE_START(A) RANGE_END(B). {
     Query range;
     Xapian::valueno valno = state->value_range(range, A, B);
     if (valno == BAD_VALUENO) {
 	yy_parse_failed(yypParser);
 	return;
     }
     P = new ProbQuery;
     P->filter[filter_group_id(valno)] = range;
 }

 prob(P) ::= stop_prob(Q) RANGE_START(A) RANGE_END(B). {
     Query range;
     Xapian::valueno valno = state->value_range(range, A, B);
     if (valno == BAD_VALUENO) {
 	yy_parse_failed(yypParser);
 	return;
     }
     P = Q;
     Query & q = P->filter[filter_group_id(valno)];
     q = Query(Query::OP_OR, q, range);
 }

 prob(P) ::= stop_term(T) stop_term(U). {
     P = new ProbQuery;
     P->query = T;
     if (U) add_to_query(P->query, state->default_op(), U);
 }

 prob(P) ::= prob(Q) stop_term(T). {
     P = Q;
     // If T is a stopword, there's nothing to do here.
     if (T) add_to_query(P->query, state->default_op(), T);
 }

 prob(P) ::= LOVE term(T). {
     P = new ProbQuery;
     if (state->default_op() == Query::OP_AND) {
 	P->query = T;
     } else {
 	P->love = T;
     }
 }

 prob(P) ::= stop_prob(Q) LOVE term(T). {
     P = Q;
     if (state->default_op() == Query::OP_AND) {
 	/* The default op is AND, so we just put loved terms into the query
 	 * (in this case the only effect of love is to ignore the stopword
 	 * list). */
 	add_to_query(P->query, Query::OP_AND, T);
     } else {
 	add_to_query(P->love, Query::OP_AND, T);
     }
 }

 prob(P) ::= HATE term(T). {
     P = new ProbQuery;
     P->hate = T;
 }

 prob(P) ::= stop_prob(Q) HATE term(T). {
     P = Q;
     add_to_query(P->hate, Query::OP_OR, T);
 }

 prob(P) ::= HATE BOOLEAN_FILTER(T). {
     P = new ProbQuery;
     P->hate = new Query(T->get_query());
     delete T;
 }

 prob(P) ::= stop_prob(Q) HATE BOOLEAN_FILTER(T). {
     P = Q;
     add_to_query(P->hate, Query::OP_OR, T->get_query());
     delete T;
 }

 prob(P) ::= BOOLEAN_FILTER(T). {
     P = new ProbQuery;
     P->filter[T->get_filter_group_id()] = T->get_query();
     delete T;
 }

 prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). {
     P = Q;
     // We OR filters with the same prefix...
     Query & q = P->filter[T->get_filter_group_id()];
     q = Query(Query::OP_OR, q, T->get_query());
     delete T;
 }

 prob(P) ::= LOVE BOOLEAN_FILTER(T). {
     // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
     P = new ProbQuery;
     P->filter[T->get_filter_group_id()] = T->get_query();
     delete T;
 }

 prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). {
     // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
     P = Q;
     // We OR filters with the same prefix...
     Query & q = P->filter[T->get_filter_group_id()];
     q = Query(Query::OP_OR, q, T->get_query());
     delete T;
 }

 // stop_prob - A prob or a stop_term.

 %type stop_prob {ProbQuery *}
 %destructor stop_prob {delete $$;}

 stop_prob(P) ::= prob(Q).
     { P = Q; }

 stop_prob(P) ::= stop_term(T). {
     P = new ProbQuery;
     P->query = T;
 }

 // stop_term - A term which should be checked against the stopword list,
 // or a compound_term.
 //
 // If a term is loved, hated, or in a phrase, we don't want to consult the
 // stopword list, so stop_term isn't used there (instead term is).

 %type stop_term {Query *}
 %destructor stop_term {delete $$;}

 stop_term(T) ::= TERM(U). {
     if (state->is_stopword(U)) {
 	T = NULL;
 	state->add_to_stoplist(U);
     } else {
 	T = new Query(U->get_query_with_auto_synonyms());
     }
     delete U;
 }

 stop_term(T) ::= compound_term(U). {
     T = U;
 }

 // term - A term or a compound_term.

 %type term {Query *}
 %destructor term {delete $$;}

 term(T) ::= TERM(U). {
     T = new Query(U->get_query_with_auto_synonyms());
     delete U;
 }

 term(T) ::= compound_term(U). {
     T = U;
 }

 // compound_term - A WILD_TERM, a quoted phrase (with or without prefix), a
 // phrased_term, group, near_expr, adj_expr, or a bracketed subexpression (with
 // or without prefix).

 %type compound_term {Query *}
 %destructor compound_term {delete $$;}

 compound_term(T) ::= WILD_TERM(U).
 	{ T = U->as_wildcarded_query(state); }

 compound_term(T) ::= PARTIAL_TERM(U).
 	{ T = U->as_partial_query(state); }

 compound_term(T) ::= QUOTE phrase(P) QUOTE.
 	{ T = P->as_phrase_query(); }

 compound_term(T) ::= phrased_term(P).
 	{ T = P->as_phrase_query(); }

 compound_term(T) ::= group(P).  {
     T = P->as_group(state);
 }

 compound_term(T) ::= near_expr(P).
 	{ T = P->as_near_query(); }

 compound_term(T) ::= adj_expr(P).
 	{ T = P->as_adj_query(); }

 compound_term(T) ::= BRA expr(E) KET.
 	{ T = E; }

 compound_term(T) ::= SYNONYM TERM(U). {
     T = new Query(U->get_query_with_synonyms());
     delete U;
 }

 // phrase - The "inside the quotes" part of a double-quoted phrase.

 %type phrase {TermList *}

 %destructor phrase {$$->destroy();}

 phrase(P) ::= TERM(T). {
     P = new TermList;
     P->add_positional_term(T);
 }

 phrase(P) ::= phrase(Q) TERM(T). {
     P = Q;
     P->add_positional_term(T);
 }

 // phrased_term - A phrased term works like a single term, but is actually
 // 2 or more terms linked together into a phrase by punctuation.  There must be
 // at least 2 terms in order to be able to have punctuation between the terms!

 %type phrased_term {TermList *}
 %destructor phrased_term {$$->destroy();}

 phrased_term(P) ::= TERM(T) PHR_TERM(U). {
     P = new TermList;
     P->add_positional_term(T);
     P->add_positional_term(U);
 }

 phrased_term(P) ::= phrased_term(Q) PHR_TERM(T). {
     P = Q;
     P->add_positional_term(T);
 }

 // group - A group of terms separated only by whitespace - candidates for
 // multi-term synonyms.

 %type group {TermGroup *}
 %destructor group {$$->destroy();}

 group(P) ::= TERM(T) GROUP_TERM(U). {
     P = new TermGroup;
     P->add_term(T);
     P->add_term(U);
 }

 group(P) ::= group(Q) GROUP_TERM(T). {
     P = Q;
     P->add_term(T);
 }

 // near_expr - 2 or more terms with NEAR in between.  There must be at least 2
 // terms in order for there to be any NEAR operators!

 %type near_expr {TermList *}
 %destructor near_expr {$$->destroy();}

 near_expr(P) ::= TERM(T) NEAR(N) TERM(U). {
     P = new TermList;
     P->add_positional_term(T);
     P->add_positional_term(U);
     if (N) {
 	P->adjust_window(N->get_termpos());
 	delete N;
     }
 }

 near_expr(P) ::= near_expr(Q) NEAR(N) TERM(T). {
     P = Q;
     P->add_positional_term(T);
     if (N) {
 	P->adjust_window(N->get_termpos());
 	delete N;
     }
 }

 // adj_expr - 2 or more terms with ADJ in between.  There must be at least 2
 // terms in order for there to be any ADJ operators!

 %type adj_expr {TermList *}
 %destructor adj_expr {$$->destroy();}

 adj_expr(P) ::= TERM(T) ADJ(N) TERM(U). {
     P = new TermList;
     P->add_positional_term(T);
     P->add_positional_term(U);
     if (N) {
 	P->adjust_window(N->get_termpos());
 	delete N;
     }
 }

 adj_expr(P) ::= adj_expr(Q) ADJ(N) TERM(T). {
     P = Q;
     P->add_positional_term(T);
     if (N) {
 	P->adjust_window(N->get_termpos());
 	delete N;
     }
 }

 // Select yacc syntax highlighting in vim editor: vim: syntax=yacc
 // (lemon syntax colouring isn't supplied by default; yacc does an OK job).