blob: c30fc6c66b01dd925a5e4a8118fc321aef23bf42 [file] [log] [blame]
%include {
/* queryparser.lemony: build a Xapian::Query object from a user query string.
*
* Copyright (C) 2004,2005,2006,2007 Olly Betts
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
#include <config.h>
#include "omassert.h"
#include "queryparser_internal.h"
#include <xapian/error.h>
#include <xapian/unicode.h>
#include "stringutils.h"
// Include the list of token values lemon generates.
#include "queryparser_token.h"
#include <algorithm>
#include <list>
#include <string>
#include <string.h>
using namespace std;
using namespace Xapian;
inline bool
U_isupper(unsigned ch) {
return (ch < 128 && C_isupper((unsigned char)ch));
}
inline bool
U_isdigit(unsigned ch) {
return (ch < 128 && C_isdigit((unsigned char)ch));
}
inline bool
U_isalpha(unsigned ch) {
return (ch < 128 && C_isalpha((unsigned char)ch));
}
using Xapian::Unicode::is_whitespace;
inline bool
is_not_whitespace(unsigned ch) {
return !is_whitespace(ch);
}
using Xapian::Unicode::is_wordchar;
inline bool
is_not_wordchar(unsigned ch) {
return !is_wordchar(ch);
}
inline bool
is_digit(unsigned ch) {
return (Unicode::get_category(ch) == Unicode::DECIMAL_DIGIT_NUMBER);
}
// FIXME: we used to keep trailing "-" (e.g. Cl-) but it's of dubious utility
// and there's the risk of hyphens getting stuck onto the end of terms...
inline bool
is_suffix(unsigned ch) {
return ch == '+' || ch == '#';
}
inline bool
prefix_needs_colon(const string & prefix, unsigned ch)
{
if (!U_isupper(ch)) return false;
string::size_type len = prefix.length();
return (len > 1 && prefix[len - 1] != ':');
}
using Unicode::is_currency;
/// A structure identifying a group of filter terms.
struct filter_group_id {
/** The prefix of the filter terms.
* This is used for boolean filter terms.
*/
list<string> prefixes;
/** The value number of the filter terms.
* This is used for value range terms.
*/
Xapian::valueno valno;
/// Make a new filter_group_id for boolean filter terms.
explicit filter_group_id(const list<string> & prefixes_)
: prefixes(prefixes_), valno(Xapian::BAD_VALUENO) {}
/// Make a new filter_group_id for value range terms.
explicit filter_group_id(Xapian::valueno valno_)
: prefixes(), valno(valno_) {}
/// Compare to another filter_group_id.
bool operator<(const filter_group_id & other) const {
if (prefixes != other.prefixes) {
return prefixes < other.prefixes;
}
return valno < other.valno;
}
};
/** Class used to pass information about a token from lexer to parser.
*
* Generally an instance of this class carries term information, but it can be
* used for the start or end of a value range, with some operators (e.g. the
* distance in NEAR/3 or ADJ/3, etc).
*/
class Term {
State * state;
public:
string name;
list<string> prefixes;
string unstemmed;
QueryParser::stem_strategy stem;
termpos pos;
Term(const string &name_, termpos pos_) : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { }
Term(const string &name_) : name(name_), stem(QueryParser::STEM_NONE), pos(0) { }
Term(const string &name_, const list<string> &prefixes_)
: name(name_), prefixes(prefixes_), stem(QueryParser::STEM_NONE), pos(0) { }
Term(termpos pos_) : stem(QueryParser::STEM_NONE), pos(pos_) { }
Term(State * state_, const string &name_, const list<string> &prefixes_,
const string &unstemmed_,
QueryParser::stem_strategy stem_ = QueryParser::STEM_NONE,
termpos pos_ = 0)
: state(state_), name(name_), prefixes(prefixes_), unstemmed(unstemmed_),
stem(stem_), pos(pos_) { }
std::string make_term(const string & prefix) const;
void need_positions() {
if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
}
termpos get_termpos() const { return pos; }
filter_group_id get_filter_group_id() const { return filter_group_id(prefixes); }
Query * as_wildcarded_query(State * state) const;
Query * as_partial_query(State * state_) const;
Query get_query() const;
Query get_query_with_synonyms() const;
Query get_query_with_auto_synonyms() const;
};
/// Parser State shared between the lexer and the parser.
class State {
QueryParser::Internal * qpi;
public:
Query query;
const char * error;
unsigned flags;
State(QueryParser::Internal * qpi_, unsigned flags_)
: qpi(qpi_), error(NULL), flags(flags_) { }
string stem_term(const string &term) {
return qpi->stemmer(term);
}
void add_to_stoplist(const Term * term) {
qpi->stoplist.push_back(term->name);
}
void add_to_unstem(const string & term, const string & unstemmed) {
qpi->unstem.insert(make_pair(term, unstemmed));
}
valueno value_range(Query & q, Term *a, Term *b) {
string start = a->name;
string end = b->name;
Xapian::valueno valno = Xapian::BAD_VALUENO;
list<ValueRangeProcessor *>::const_iterator i;
for (i = qpi->valrangeprocs.begin(); i != qpi->valrangeprocs.end(); ++i) {
valno = (**i)(start, end);
if (valno != Xapian::BAD_VALUENO) {
delete a;
delete b;
q = Query(Query::OP_VALUE_RANGE, valno, start, end);
return valno;
}
}
// FIXME: Do we want to report an error for this? If not we need
// to perform the above check in the tokeniser and if none of the
// ValueRangeProcessor classes like the range, we rollback to
// parsing the query without treating this as a range. Needs
// more thought and probably a look at queries users actually
// enter.
error = "Unknown range operation";
return valno;
}
Query::op default_op() const { return qpi->default_op; }
bool is_stopword(const Term *term) const {
return qpi->stopper && (*qpi->stopper)(term->name);
}
Database get_database() const {
return qpi->db;
}
};
string
Term::make_term(const string & prefix) const
{
string term;
if (stem == QueryParser::STEM_SOME) term += 'Z';
if (!prefix.empty()) {
term += prefix;
if (prefix_needs_colon(prefix, name[0])) term += ':';
}
if (stem != QueryParser::STEM_NONE) {
term += state->stem_term(name);
} else {
term += name;
}
if (!unstemmed.empty())
state->add_to_unstem(term, unstemmed);
return term;
}
Query
Term::get_query_with_synonyms() const
{
Query q = get_query();
// Handle single-word synonyms with each prefix.
list<string>::const_iterator piter;
for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
// First try the unstemmed term:
string term;
if (!piter->empty()) {
term += *piter;
if (prefix_needs_colon(*piter, name[0])) term += ':';
}
term += name;
Xapian::Database db = state->get_database();
Xapian::TermIterator syn = db.synonyms_begin(term);
Xapian::TermIterator end = db.synonyms_end(term);
if (syn == end && stem != QueryParser::STEM_NONE) {
// If that has no synonyms, try the stemmed form:
term = 'Z';
if (!piter->empty()) {
term += *piter;
if (prefix_needs_colon(*piter, name[0])) term += ':';
}
term += state->stem_term(name);
syn = db.synonyms_begin(term);
end = db.synonyms_end(term);
}
while (syn != end) {
q = Query(Query::OP_OR, q, Query(*syn, 1, pos));
++syn;
}
}
return q;
}
Query
Term::get_query_with_auto_synonyms() const
{
if (state->flags & QueryParser::FLAG_AUTO_SYNONYMS)
return get_query_with_synonyms();
return get_query();
}
static void
add_to_query(Query *& q, Query::op op, Query * term)
{
Assert(term);
if (q) {
*q = Query(op, *q, *term);
delete term;
} else {
q = term;
}
}
static void
add_to_query(Query *& q, Query::op op, const Query & term)
{
if (q) {
*q = Query(op, *q, term);
} else {
q = new Query(term);
}
}
Query
Term::get_query() const
{
Assert(prefixes.size() >= 1);
list<string>::const_iterator piter = prefixes.begin();
Query q(make_term(*piter), 1, pos);
while (++piter != prefixes.end()) {
q = Query(Query::OP_OR, q, Query(make_term(*piter), 1, pos));
}
return q;
}
Query *
Term::as_wildcarded_query(State * state_) const
{
Database db = state_->get_database();
Query * q = new Query;
list<string>::const_iterator piter;
for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
string root = *piter;
root += name;
TermIterator t = db.allterms_begin(root);
while (t != db.allterms_end(root)) {
add_to_query(q, Query::OP_OR, Query(*t, 1, pos));
++t;
}
}
delete this;
return q;
}
Query *
Term::as_partial_query(State * state_) const
{
Database db = state_->get_database();
Query * q = new Query;
list<string>::const_iterator piter;
for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
string root = *piter;
root += name;
TermIterator t = db.allterms_begin(root);
while (t != db.allterms_end(root)) {
add_to_query(q, Query::OP_OR, Query(*t, 1, pos));
++t;
}
// Add the term, as it would normally be handled, as an alternative.
add_to_query(q, Query::OP_OR, Query(make_term(*piter), 1, pos));
}
delete this;
return q;
}
inline bool
is_phrase_generator(unsigned ch)
{
// These characters generate a phrase search.
// Ordered mostly by frequency of calls to this function done when
// running queryparsertest.
return (ch && ch < 128 && strchr(".-/:\\@", ch) != NULL);
}
inline bool
is_stem_preventer(unsigned ch)
{
return (ch && ch < 128 && strchr("(/\\@<>=*[{\"", ch) != NULL);
}
inline bool
should_stem(const std::string & term)
{
const unsigned int SHOULD_STEM_MASK =
(1 << Unicode::LOWERCASE_LETTER) |
(1 << Unicode::TITLECASE_LETTER) |
(1 << Unicode::MODIFIER_LETTER) |
(1 << Unicode::OTHER_LETTER);
Utf8Iterator u(term);
return ((SHOULD_STEM_MASK >> Unicode::get_category(*u)) & 1);
}
inline unsigned check_infix(unsigned ch) {
if (ch == '\'' || ch == '&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
// Unicode includes all these except '&' in it's word boundary rules,
// as well as 0x2019 (which we handle below) and ':' (for Swedish
// apparently, but we ignore this for now as it's problematic in
// real world cases).
return ch;
}
// 0x2019 is Unicode apostrophe and single closing quote.
// 0x201b is Unicode single opening quote with the tail rising.
if (ch == 0x2019 || ch == 0x201b) return '\'';
return 0;
}
inline unsigned check_infix_digit(unsigned ch) {
// This list of characters comes from Unicode's word identifying algorithm.
switch (ch) {
case ',':
case '.':
case ';':
case 0x037e: // GREEK QUESTION MARK
case 0x0589: // ARMENIAN FULL STOP
case 0x060D: // ARABIC DATE SEPARATOR
case 0x07F8: // NKO COMMA
case 0x2044: // FRACTION SLASH
case 0xFE10: // PRESENTATION FORM FOR VERTICAL COMMA
case 0xFE13: // PRESENTATION FORM FOR VERTICAL COLON
case 0xFE14: // PRESENTATION FORM FOR VERTICAL SEMICOLON
return ch;
}
return 0;
}
struct yyParser;
// Prototype the functions lemon generates.
static yyParser *ParseAlloc();
static void ParseFree(yyParser *);
static void Parse(yyParser *, int, Term *, State *);
void
QueryParser::Internal::add_prefix(const string &field, const string &prefix,
bool filter)
{
map<string, PrefixInfo>::iterator p = prefixmap.find(field);
if (p == prefixmap.end()) {
prefixmap.insert(make_pair(field, PrefixInfo(filter, prefix)));
} else {
// Check that this is the same type of filter as the existing one(s).
if (p->second.filter != filter) {
throw Xapian::InvalidOperationError("Can't use add_prefix() and add_bool_prefix() on the same field name");
}
p->second.prefixes.push_back(prefix);
}
}
string
QueryParser::Internal::parse_term(Utf8Iterator &it, const Utf8Iterator &end,
bool &was_acronym)
{
string term;
// Look for initials separated by '.' (e.g. P.T.O., U.N.C.L.E).
// Don't worry if there's a trailing '.' or not.
if (U_isupper(*it)) {
string t;
Utf8Iterator p = it;
do {
Unicode::append_utf8(t, *p++);
} while (p != end && *p == '.' && ++p != end && U_isupper(*p));
// One letter does not make an acronym! If we handled a single
// uppercase letter here, we wouldn't catch M&S below.
if (t.length() > 1) {
// Check there's not a (lower case) letter or digit
// immediately after it.
// FIXME: should I.B.M..P.T.O be a range search?
if (p == end || !is_wordchar(*p)) {
it = p;
swap(term, t);
}
}
}
was_acronym = !term.empty();
if (term.empty()) {
unsigned prevch = *it;
Unicode::append_utf8(term, prevch);
while (++it != end) {
unsigned ch = *it;
if (!is_wordchar(ch)) {
// Treat a single embedded '&' or "'" or similar as a word
// character (e.g. AT&T, Fred's). Also, normalise
// apostrophes to ASCII apostrophe.
Utf8Iterator p = it;
++p;
if (p == end || !is_wordchar(*p)) break;
unsigned nextch = *p;
if (is_digit(prevch) &&
is_digit(nextch)) {
ch = check_infix_digit(ch);
} else {
ch = check_infix(ch);
}
if (!ch) break;
}
Unicode::append_utf8(term, ch);
prevch = ch;
}
if (it != end && is_suffix(*it)) {
string suff_term = term;
Utf8Iterator p = it;
// Keep trailing + (e.g. C++, Na+) or # (e.g. C#).
do {
if (suff_term.size() - term.size() == 3) {
suff_term.resize(0);
break;
}
suff_term += *p;
} while (is_suffix(*++p));
if (!suff_term.empty() && (p == end || !is_wordchar(*p))) {
// If the suffixed term doesn't exist, check that the
// non-suffixed term does. This also takes care of
// the case when QueryParser::set_database() hasn't
// been called.
bool use_suff_term = false;
string lc = Unicode::tolower(suff_term);
if (db.term_exists(lc)) {
use_suff_term = true;
} else {
lc = Unicode::tolower(term);
if (!db.term_exists(lc)) use_suff_term = true;
}
if (use_suff_term) {
term = suff_term;
it = p;
}
}
}
}
return term;
}
Query
QueryParser::Internal::parse_query(const string &qs, unsigned flags,
const string &default_prefix)
{
yyParser * pParser = ParseAlloc();
// Set value_ranges if we may have to handle value ranges in the query.
bool value_ranges;
value_ranges = !valrangeprocs.empty() && (qs.find("..") != string::npos);
termpos term_pos = 1;
Utf8Iterator it(qs), end;
State state(this, flags);
// To successfully apply more than one spelling correction to a query
// string, we must keep track of the offset due to previous corrections.
int correction_offset = 0;
corrected_query.resize(0);
// Stack of prefixes, used for phrases and subexpressions.
list<const PrefixInfo *> prefix_stack;
// If default_prefix is specified, use it. Otherwise, use any list
// that has been set for the empty prefix.
const PrefixInfo def_pfx(false, default_prefix);
{
const PrefixInfo * default_prefixinfo = &def_pfx;
if (default_prefix.empty()) {
map<string, PrefixInfo>::const_iterator f = prefixmap.find("");
if (f != prefixmap.end()) default_prefixinfo = &(f->second);
}
// We always have the current prefix on the top of the stack.
prefix_stack.push_back(default_prefixinfo);
}
unsigned newprev = ' ';
main_lex_loop:
enum {
DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP
} mode = DEFAULT;
while (it != end) {
bool last_was_operator = false;
if (false) {
just_had_operator:
if (it == end) break;
last_was_operator = true;
mode = DEFAULT;
}
if (mode == IN_PHRASED_TERM) mode = DEFAULT;
if (is_whitespace(*it)) {
newprev = ' ';
++it;
it = find_if(it, end, is_not_whitespace);
if (it == end) break;
}
if ((mode == DEFAULT || mode == IN_GROUP) && value_ranges) {
// Scan forward to see if this could be the "start of range"
// token. Sadly this has O(n^2) tendencies, though at least
// "n" is the number of words in a query which is likely to
// remain fairly small. FIXME: can we tokenise more elegantly?
Utf8Iterator p = it;
unsigned ch = 0;
while (p != end) {
if (ch == '.' && *p == '.') {
++p;
if (p == end || *p <= ' ' || *p == ')') break;
string r;
do {
Unicode::append_utf8(r, *it++);
} while (it != p);
// Trim off the trailing "..".
r.resize(r.size() - 2);
Parse(pParser, RANGE_START, new Term(r), &state);
r.resize(0);
// Allow any character except whitespace and ')' in a
// RANGE_END. Or should we be consistent with RANGE_START?
do {
Unicode::append_utf8(r, *p++);
} while (p != end && *p > ' ' && *p != ')');
Parse(pParser, RANGE_END, new Term(r), &state);
it = p;
goto main_lex_loop;
}
ch = *p;
if (!(is_wordchar(ch) || is_currency(ch) ||
(ch < 128 && strchr("%,-./:@", ch)))) break;
++p;
}
}
if (!is_wordchar(*it)) {
unsigned prev = newprev;
unsigned ch = *it++;
newprev = ch;
// Drop out of IN_GROUP mode.
if (mode == IN_GROUP) mode = DEFAULT;
switch (ch) {
case '"': // Quoted phrase.
if (mode == DEFAULT) {
// Skip whitespace.
it = find_if(it, end, is_not_whitespace);
if (it == end) {
// Ignore an unmatched " at the end of the query to
// avoid generating an empty pair of QUOTEs which will
// cause a parse error.
goto done;
}
if (*it == '"') {
// Ignore empty "" (but only if we're not already
// IN_QUOTES as we don't merge two adjacent quoted
// phrases!)
newprev = *it++;
break;
}
}
if (flags & QueryParser::FLAG_PHRASE) {
Parse(pParser, QUOTE, NULL, &state);
if (mode == DEFAULT) {
mode = IN_QUOTES;
} else {
// Remove the prefix we pushed for this phrase.
if (mode == IN_PREFIXED_QUOTES)
prefix_stack.pop_back();
mode = DEFAULT;
}
}
break;
case '+': case '-': // Loved or hated term/phrase/subexpression.
// Ignore + or - at the end of the query string.
if (it == end) goto done;
if (prev > ' ' && prev != '(') {
// Or if not after whitespace or an open bracket.
break;
}
if (is_whitespace(*it) || *it == '+' || *it == '-') {
// Ignore + or - followed by a space, or further + or -.
// Postfix + (such as in C++ and H+) is handled as part of
// the term lexing code in parse_term().
newprev = *it++;
break;
}
if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
Parse(pParser, (ch == '+' ? LOVE : HATE), NULL, &state);
goto just_had_operator;
}
// Need to prevent the term after a LOVE or HATE starting a
// term group...
break;
case '(': // Bracketed subexpression.
// Skip whitespace.
it = find_if(it, end, is_not_whitespace);
// Ignore ( at the end of the query string.
if (it == end) goto done;
if (prev > ' ' && strchr("()+-", prev) == NULL) {
// Or if not after whitespace or a bracket or '+' or '-'.
break;
}
if (*it == ')') {
// Ignore empty ().
newprev = *it++;
break;
}
if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
prefix_stack.push_back(prefix_stack.back());
Parse(pParser, BRA, NULL, &state);
}
break;
case ')': // End of bracketed subexpression.
if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
// Remove the prefix we pushed for the corresponding BRA.
// If brackets are unmatched, it's a syntax error, but
// that's no excuse to SEGV!
if (prefix_stack.size() > 1) prefix_stack.pop_back();
Parse(pParser, KET, NULL, &state);
}
break;
case '~': // Synonym expansion.
// Ignore at the end of the query string.
if (it == end) goto done;
if (prev > ' ' && prev != '+' && prev != '-' && prev != '(') {
// Or if not after whitespace, +, -, or an open bracket.
break;
}
if (!is_wordchar(*it)) {
// Ignore if not followed by a word character.
break;
}
if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
Parse(pParser, SYNONYM, NULL, &state);
goto just_had_operator;
}
break;
}
// Skip any other characters.
continue;
}
Assert(is_wordchar(*it));
size_t term_start_index = it.raw() - qs.data();
newprev = 'A'; // Any letter will do...
// A term, a prefix, or a boolean operator.
const PrefixInfo * prefixinfo = NULL;
if ((mode == DEFAULT || mode == IN_GROUP) && !prefixmap.empty()) {
// Check for a fieldname prefix (e.g. title:historical).
Utf8Iterator p = find_if(it, end, is_not_wordchar);
if (p != end && *p == ':' && ++p != end && *p > ' ' && *p != ')') {
string field;
p = it;
while (*p != ':')
Unicode::append_utf8(field, *p++);
map<string, PrefixInfo>::const_iterator f;
f = prefixmap.find(field);
if (f != prefixmap.end()) {
// Special handling for prefixed fields, depending on the
// type of the prefix.
unsigned ch = *++p;
prefixinfo = &(f->second);
if (prefixinfo->filter) {
// Drop out of IN_GROUP if we're in it.
mode = DEFAULT;
// Can't boolean filter prefix a subexpression or
// phrase; just use anything following the prefix
// until the next space or ')' as part of the boolean
// filter term.
it = p;
string name;
while (it != end && *it > ' ' && *it != ')')
Unicode::append_utf8(name, *it++);
// Build the unstemmed form in field.
field += ':';
field += name;
const list<string> & prefixes = prefixinfo->prefixes;
Term * token = new Term(&state, name, prefixes, field);
Parse(pParser, BOOLEAN_FILTER, token, &state);
continue;
}
if (ch == '"' && (flags & FLAG_PHRASE)) {
// Prefixed phrase, e.g.: subject:"space flight"
mode = IN_PREFIXED_QUOTES;
Parse(pParser, QUOTE, NULL, &state);
it = p;
newprev = ch;
++it;
prefix_stack.push_back(prefixinfo);
continue;
}
if (ch == '(' && (flags & FLAG_BOOLEAN)) {
// Prefixed subexpression, e.g.: title:(fast NEAR food)
mode = DEFAULT;
Parse(pParser, BRA, NULL, &state);
it = p;
newprev = ch;
++it;
prefix_stack.push_back(prefixinfo);
continue;
}
if (is_wordchar(ch)) {
// Prefixed term.
it = p;
} else {
// It looks like a prefix but isn't, so parse it as
// text instead.
prefixinfo = NULL;
}
}
}
}
phrased_term:
bool was_acronym;
string term = parse_term(it, end, was_acronym);
// Boolean operators.
if ((mode == DEFAULT || mode == IN_GROUP) &&
(flags & FLAG_BOOLEAN) &&
// Don't want to interpret A.N.D. as an AND operator.
!was_acronym &&
!prefixinfo &&
term.size() >= 2 && term.size() <= 4 && U_isalpha(term[0])) {
string op = term;
if (flags & FLAG_BOOLEAN_ANY_CASE) {
for (string::iterator i = op.begin(); i != op.end(); ++i) {
*i = C_toupper(*i);
}
}
if (op.size() == 3) {
if (op == "AND") {
Parse(pParser, AND, NULL, &state);
goto just_had_operator;
}
if (op == "NOT") {
Parse(pParser, NOT, NULL, &state);
goto just_had_operator;
}
if (op == "XOR") {
Parse(pParser, XOR, NULL, &state);
goto just_had_operator;
}
if (op == "ADJ") {
if (it != end && *it == '/') {
size_t width = 0;
Utf8Iterator p = it;
while (++p != end && U_isdigit(*p)) {
width = (width * 10) + (*p - '0');
}
if (width && (p == end || is_whitespace(*p))) {
it = p;
Parse(pParser, ADJ, new Term(width), &state);
goto just_had_operator;
}
}
Parse(pParser, ADJ, NULL, &state);
goto just_had_operator;
}
} else if (op.size() == 2) {
if (op == "OR") {
Parse(pParser, OR, NULL, &state);
goto just_had_operator;
}
} else if (op.size() == 4) {
if (op == "NEAR") {
if (it != end && *it == '/') {
size_t width = 0;
Utf8Iterator p = it;
while (++p != end && U_isdigit(*p)) {
width = (width * 10) + (*p - '0');
}
if (width && (p == end || is_whitespace(*p))) {
it = p;
Parse(pParser, NEAR, new Term(width), &state);
goto just_had_operator;
}
}
Parse(pParser, NEAR, NULL, &state);
goto just_had_operator;
}
}
}
// If no prefix is set, use the default one.
if (!prefixinfo) prefixinfo = prefix_stack.back();
Assert(!prefixinfo->filter);
{
string unstemmed_term(term);
term = Unicode::tolower(term);
// Reuse stem_strategy - STEM_SOME here means "stem terms except
// when used with positional operators".
stem_strategy stem_term = stem_action;
if (stem_term != STEM_NONE) {
if (!stemmer.internal.get()) {
// No stemmer is set.
stem_term = STEM_NONE;
} else if (stem_term == STEM_SOME) {
if (!should_stem(unstemmed_term) ||
(it != end && is_stem_preventer(*it))) {
// Don't stem this particular term.
stem_term = STEM_NONE;
}
}
}
Term * term_obj = new Term(&state, term, prefixinfo->prefixes,
unstemmed_term, stem_term, term_pos++);
// Check spelling, if we're a normal term, and any of the prefixes
// are empty.
if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
list<string>::const_iterator prefixiter;
for (prefixiter = prefixinfo->prefixes.begin();
prefixiter != prefixinfo->prefixes.end();
++prefixiter) {
if (!prefixiter->empty())
continue;
if (!db.term_exists(term)) {
string suggestion = db.get_spelling_suggestion(term);
if (!suggestion.empty()) {
if (corrected_query.empty()) corrected_query = qs;
size_t term_end_index = it.raw() - qs.data();
size_t n = term_end_index - term_start_index;
size_t pos = term_start_index + correction_offset;
corrected_query.replace(pos, n, suggestion);
correction_offset += suggestion.size();
correction_offset -= n;
}
}
break;
}
}
if (mode == IN_PHRASED_TERM) {
Parse(pParser, PHR_TERM, term_obj, &state);
} else {
if (mode == DEFAULT || mode == IN_GROUP) {
if (it != end) {
if ((flags & FLAG_WILDCARD) && *it == '*') {
Utf8Iterator p(it);
++p;
if (p == end || !is_wordchar(*p)) {
it = p;
// Wildcard at end of term (also known as
// "right truncation").
Parse(pParser, WILD_TERM, term_obj, &state);
continue;
}
}
} else {
if (flags & FLAG_PARTIAL) {
// Final term of a partial match query, with no
// following characters - treat as a wildcard.
Parse(pParser, PARTIAL_TERM, term_obj, &state);
continue;
}
}
}
// See if the next token will be PHR_TERM - if so, this one
// needs to be TERM not GROUP_TERM.
if (mode == IN_GROUP && is_phrase_generator(*it)) {
// FIXME: can we clean this up?
Utf8Iterator p = it;
do {
++p;
} while (p != end && is_phrase_generator(*p));
// Don't generate a phrase unless the phrase generators are
// immediately followed by another term.
if (p != end && is_wordchar(*p)) {
mode = DEFAULT;
}
}
Parse(pParser, (mode == IN_GROUP ? GROUP_TERM : TERM),
term_obj, &state);
if (mode != DEFAULT && mode != IN_GROUP) continue;
}
}
if (it == end) break;
if (is_phrase_generator(*it)) {
// Skip multiple phrase generators.
do {
++it;
} while (it != end && is_phrase_generator(*it));
// Don't generate a phrase unless the phrase generators are
// immediately followed by another term.
if (it != end && is_wordchar(*it)) {
mode = IN_PHRASED_TERM;
term_start_index = it.raw() - qs.data();
goto phrased_term;
}
} else if (mode == DEFAULT || mode == IN_GROUP) {
mode = DEFAULT;
if (!last_was_operator && is_whitespace(*it)) {
newprev = ' ';
// Skip multiple whitespace.
do {
++it;
} while (it != end && is_whitespace(*it));
// Don't generate a group unless the terms are only separated
// by whitespace.
if (it != end && is_wordchar(*it)) {
mode = IN_GROUP;
}
}
}
}
done:
// Implicitly close any unclosed quotes...
if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
Parse(pParser, QUOTE, NULL, &state);
Parse(pParser, 0, NULL, &state);
ParseFree(pParser);
errmsg = state.error;
return state.query;
}
struct ProbQuery {
Query * query;
Query * love;
Query * hate;
// filter is a map from prefix to a query for that prefix. Queries with
// the same prefix are combined with OR, and the results of this are
// combined with AND to get the full filter.
map<filter_group_id, Query> filter;
ProbQuery() : query(0), love(0), hate(0) { }
~ProbQuery() {
delete query;
delete love;
delete hate;
}
Query merge_filters() const {
map<filter_group_id, Query>::const_iterator i = filter.begin();
Assert(i != filter.end());
Query q = i->second;
while (++i != filter.end()) {
q = Query(Query::OP_AND, q, i->second);
}
return q;
}
};
class TermGroup {
list<Term *> terms;
public:
TermGroup() { }
/// Add a Term object to this TermGroup object.
void add_term(Term * term) {
terms.push_back(term);
}
/// Convert to a Xapian::Query * using default_op.
Query * as_group(State *state) const;
/** Provide a way to explicitly delete an object of this class. The
* destructor is protected to prevent auto-variables of this type.
*/
void destroy() { delete this; }
protected:
/** Protected destructor, so an auto-variable of this type is a
* compile-time error - you must allocate this object with new.
*/
~TermGroup() {
list<Term*>::const_iterator i;
for (i = terms.begin(); i != terms.end(); ++i) {
delete *i;
}
}
};
Query *
TermGroup::as_group(State *state) const
{
Query * query = NULL;
Query::op default_op = state->default_op();
if (state->flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
// Check for multi-word synonyms.
Database db = state->get_database();
string key;
list<Term*>::const_iterator begin = terms.begin();
list<Term*>::const_iterator i = begin;
while (i != terms.end()) {
key.resize(0);
while (i != terms.end()) {
if (!key.empty()) key += ' ';
key += (*i)->name;
++i;
}
// Greedily try to match as many consecutive words as possible.
TermIterator syn, end;
while (true) {
syn = db.synonyms_begin(key);
end = db.synonyms_end(key);
if (syn != end) break;
if (--i == begin) break;
key.resize(key.size() - (*i)->name.size() - 1);
}
if (i == begin) {
// No multi-synonym matches.
if (state->is_stopword(*i)) {
state->add_to_stoplist(*i);
} else {
add_to_query(query, default_op,
(*i)->get_query_with_auto_synonyms());
}
begin = ++i;
continue;
}
Query * q = NULL;
list<Term*>::const_iterator j;
for (j = begin; j != i; ++j) {
if (state->is_stopword(*j)) {
state->add_to_stoplist(*j);
} else {
add_to_query(q, default_op, (*j)->get_query());
}
}
// Use the position of the first term for the synonyms.
Xapian::termpos pos = (*begin)->pos;
begin = i;
while (syn != end) {
add_to_query(q, Query::OP_OR, Query(*syn, 1, pos));
++syn;
}
add_to_query(query, default_op, q);
}
} else {
list<Term*>::const_iterator i;
for (i = terms.begin(); i != terms.end(); ++i) {
if (state->is_stopword(*i)) {
state->add_to_stoplist(*i);
} else {
add_to_query(query, default_op,
(*i)->get_query_with_auto_synonyms());
}
}
}
delete this;
return query;
}
class TermList {
list<Term *> terms;
size_t window;
/** Keep track of whether the terms added all have the same list of
* prefixes. If so, we'll build a set of phrases, one using each prefix.
* This works around the limitation that a phrase cannot have multiple
* components which are "OR" combinations of terms, but is also probably
* what users expect: ie, if a user specifies a phrase in a field, and that
* field maps to multiple prefixes, the user probably wants a phrase
* returned with all terms having one of those prefixes, rather than a
* phrase comprised of terms with differing prefixes.
*/
bool uniform_prefixes;
/** The list of prefixes of the terms added.
* This will be empty if the terms have different prefixes.
*/
list<string> prefixes;
public:
TermList() : window(0), uniform_prefixes(true) { }
/// Add an unstemmed Term object to this TermList object.
void add_positional_term(Term * term) {
if (terms.empty()) {
prefixes = term->prefixes;
} else if (uniform_prefixes && prefixes != term->prefixes) {
prefixes.clear();
uniform_prefixes = false;
}
term->need_positions();
terms.push_back(term);
}
void adjust_window(size_t alternative_window) {
if (alternative_window > window) window = alternative_window;
}
/// Convert to a query using the given operator and window size.
Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const {
Query * q = NULL;
// Call terms.size() just once since std::list::size() may be O(n).
size_t n_terms = terms.size();
Xapian::termcount w = w_delta + terms.size();
if (uniform_prefixes) {
list<string>::const_iterator piter;
for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
vector<Query> subqs;
subqs.reserve(n_terms);
list<Term *>::const_iterator titer;
for (titer = terms.begin(); titer != terms.end(); ++titer) {
Term * t = *titer;
subqs.push_back(Query(t->make_term(*piter), 1, t->pos));
}
add_to_query(q, Query::OP_OR,
Query(op, subqs.begin(), subqs.end(), w));
}
} else {
vector<Query> subqs;
subqs.reserve(n_terms);
list<Term *>::const_iterator titer;
for (titer = terms.begin(); titer != terms.end(); ++titer) {
subqs.push_back((*titer)->get_query());
}
q = new Query(op, subqs.begin(), subqs.end(), w);
}
delete this;
return q;
}
/// Convert to a Xapian::Query * using adjacent OP_PHRASE.
Query * as_phrase_query() const {
return as_opwindow_query(Query::OP_PHRASE, 0);
}
/// Convert to a Xapian::Query * using OP_NEAR.
Query * as_near_query() const {
// The common meaning of 'a NEAR b' is "a within 10 terms of b", which
// means a window size of 11. For more than 2 terms, we just add one
// to the window size for each extra term.
size_t w = window;
if (w == 0) w = 10;
return as_opwindow_query(Query::OP_NEAR, w - 1);
}
/// Convert to a Xapian::Query * using OP_PHRASE to implement ADJ.
Query * as_adj_query() const {
// The common meaning of 'a ADJ b' is "a at most 10 terms before b",
// which means a window size of 11. For more than 2 terms, we just add
// one to the window size for each extra term.
size_t w = window;
if (w == 0) w = 10;
return as_opwindow_query(Query::OP_PHRASE, w - 1);
}
/** Provide a way to explicitly delete an object of this class. The
* destructor is protected to prevent auto-variables of this type.
*/
void destroy() { delete this; }
protected:
/** Protected destructor, so an auto-variable of this type is a
* compile-time error - you must allocate this object with new.
*/
~TermList() {
list<Term *>::const_iterator t;
for (t = terms.begin(); t != terms.end(); ++t) {
delete *t;
}
}
};
// Helper macro for converting a boolean operation into a Xapian::Query.
#define BOOL_OP_TO_QUERY(E, A, OP, B, OP_TXT) \
do {\
if (!A || !B) {\
state->error = "Syntax: <expression> "OP_TXT" <expression>";\
yy_parse_failed(yypParser);\
return;\
}\
E = new Query(OP, *A, *B);\
delete A;\
delete B;\
} while (0)
}
%token_type {Term *}
%token_destructor {delete $$;}
%extra_argument {State * state}
%parse_failure {
// If we've not already set an error message, set a default one.
if (!state->error) state->error = "parse error";
}
// Operators, grouped in order of increasing precedence:
%nonassoc ERROR.
%left OR.
%left XOR.
%left AND NOT.
%left NEAR ADJ.
%left LOVE HATE SYNONYM.
// Destructors for terminal symbols:
// TERM is a query term, including prefix (if any).
%destructor TERM {delete $$;}
// GROUP_TERM is a query term which follows a TERM or another GROUP_TERM and
// is only separated by whitespace characters.
%destructor GROUP_TERM {delete $$;}
// PHR_TERM is a query term which follows a TERM or another PHR_TERM and is
// separated only by one or more phrase generator characters (hyphen and
// apostrophe are common examples - see is_phrase_generator() for the list
// of all punctuation which does this).
%destructor PHR_TERM {delete $$;}
// WILD_TERM is like a TERM, but has a trailing wildcard which needs to be
// expanded.
%destructor WILD_TERM {delete $$;}
// PARTIAL_TERM is like a TERM, but it's at the end of the query string and
// we're doing "search as you type". It expands to something like WILD_TERM
// OR stemmed_form.
%destructor PARTIAL_TERM {delete $$;}
// BOOLEAN_FILTER is a query term with a prefix registered using
// add_bool_prefix(). It's added to the query using an OP_FILTER operator,
// (or OP_AND_NOT if it's negated) e.g. site:xapian.org or -site:xapian.org
%destructor BOOLEAN_FILTER {delete $$;}
// Grammar rules:
// query - The whole query - just an expr or nothing.
// query non-terminal doesn't need a type, so just give a dummy one.
%type query {int}
query ::= expr(E). {
// Save the parsed query in the State structure so we can return it.
if (E) {
state->query = *E;
delete E;
} else {
state->query = Query();
}
}
query ::= . {
// Handle a query string with no terms in.
state->query = Query();
}
// expr - A query expression.
%type expr {Query *}
%destructor expr {delete $$;}
expr(E) ::= prob_expr(P).
{ E = P; }
expr(E) ::= bool_arg(A) AND bool_arg(B).
{ BOOL_OP_TO_QUERY(E, A, Query::OP_AND, B, "AND"); }
expr(E) ::= bool_arg(A) NOT bool_arg(B). {
// 'NOT foo' -> '<alldocuments> NOT foo'
if (!A && (state->flags & QueryParser::FLAG_PURE_NOT)) {
A = new Query("", 1, 0);
}
BOOL_OP_TO_QUERY(E, A, Query::OP_AND_NOT, B, "NOT");
}
expr(E) ::= bool_arg(A) AND NOT bool_arg(B). [NOT]
{ BOOL_OP_TO_QUERY(E, A, Query::OP_AND_NOT, B, "AND NOT"); }
expr(E) ::= bool_arg(A) OR bool_arg(B).
{ BOOL_OP_TO_QUERY(E, A, Query::OP_OR, B, "OR"); }
expr(E) ::= bool_arg(A) XOR bool_arg(B).
{ BOOL_OP_TO_QUERY(E, A, Query::OP_XOR, B, "XOR"); }
// bool_arg - an argument to a boolean operator such as AND or OR.
%type bool_arg {Query *}
%destructor bool_arg {delete $$;}
bool_arg(A) ::= expr(E). { A = E; }
bool_arg(A) ::= . [ERROR] {
// Set the argument to NULL, which enables the bool_arg-using rules in
// expr above to report uses of AND, OR, etc which don't have two
// arguments.
A = NULL;
}
// prob_expr - a single compound term, or a prob.
%type prob_expr {Query *}
%destructor prob_expr {delete $$;}
prob_expr(E) ::= prob(P). {
E = P->query;
P->query = NULL;
// Handle any "+ terms".
if (P->love) {
if (P->love->empty()) {
// +<nothing>.
delete E;
E = P->love;
} else if (E) {
swap(E, P->love);
add_to_query(E, Query::OP_AND_MAYBE, P->love);
} else {
E = P->love;
}
P->love = NULL;
}
// Handle any boolean filters.
if (!P->filter.empty()) {
if (E) {
add_to_query(E, Query::OP_FILTER, P->merge_filters());
} else {
// Make the query a boolean one.
E = new Query(Query::OP_SCALE_WEIGHT, P->merge_filters(), 0.0);
}
}
// Handle any "- terms".
if (P->hate && !P->hate->empty()) {
if (!E) {
// Can't just hate!
yy_parse_failed(yypParser);
return;
}
*E = Query(Query::OP_AND_NOT, *E, *P->hate);
}
// FIXME what if E && E->empty() (all terms are stopwords)?
delete P;
}
prob_expr(E) ::= term(T). {
E = T;
}
// prob - a probabilistic sub-expression consisting of stop_terms, "+" terms,
// "-" terms, boolean filters, and/or value ranges.
//
// Note: stop_term can also be several other things other than a simple term!
%type prob {ProbQuery *}
%destructor prob {delete $$;}
prob(P) ::= RANGE_START(A) RANGE_END(B). {
Query range;
Xapian::valueno valno = state->value_range(range, A, B);
if (valno == BAD_VALUENO) {
yy_parse_failed(yypParser);
return;
}
P = new ProbQuery;
P->filter[filter_group_id(valno)] = range;
}
prob(P) ::= stop_prob(Q) RANGE_START(A) RANGE_END(B). {
Query range;
Xapian::valueno valno = state->value_range(range, A, B);
if (valno == BAD_VALUENO) {
yy_parse_failed(yypParser);
return;
}
P = Q;
Query & q = P->filter[filter_group_id(valno)];
q = Query(Query::OP_OR, q, range);
}
prob(P) ::= stop_term(T) stop_term(U). {
P = new ProbQuery;
P->query = T;
if (U) add_to_query(P->query, state->default_op(), U);
}
prob(P) ::= prob(Q) stop_term(T). {
P = Q;
// If T is a stopword, there's nothing to do here.
if (T) add_to_query(P->query, state->default_op(), T);
}
prob(P) ::= LOVE term(T). {
P = new ProbQuery;
if (state->default_op() == Query::OP_AND) {
P->query = T;
} else {
P->love = T;
}
}
prob(P) ::= stop_prob(Q) LOVE term(T). {
P = Q;
if (state->default_op() == Query::OP_AND) {
/* The default op is AND, so we just put loved terms into the query
* (in this case the only effect of love is to ignore the stopword
* list). */
add_to_query(P->query, Query::OP_AND, T);
} else {
add_to_query(P->love, Query::OP_AND, T);
}
}
prob(P) ::= HATE term(T). {
P = new ProbQuery;
P->hate = T;
}
prob(P) ::= stop_prob(Q) HATE term(T). {
P = Q;
add_to_query(P->hate, Query::OP_OR, T);
}
prob(P) ::= HATE BOOLEAN_FILTER(T). {
P = new ProbQuery;
P->hate = new Query(T->get_query());
delete T;
}
prob(P) ::= stop_prob(Q) HATE BOOLEAN_FILTER(T). {
P = Q;
add_to_query(P->hate, Query::OP_OR, T->get_query());
delete T;
}
prob(P) ::= BOOLEAN_FILTER(T). {
P = new ProbQuery;
P->filter[T->get_filter_group_id()] = T->get_query();
delete T;
}
prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). {
P = Q;
// We OR filters with the same prefix...
Query & q = P->filter[T->get_filter_group_id()];
q = Query(Query::OP_OR, q, T->get_query());
delete T;
}
prob(P) ::= LOVE BOOLEAN_FILTER(T). {
// LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
P = new ProbQuery;
P->filter[T->get_filter_group_id()] = T->get_query();
delete T;
}
prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). {
// LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
P = Q;
// We OR filters with the same prefix...
Query & q = P->filter[T->get_filter_group_id()];
q = Query(Query::OP_OR, q, T->get_query());
delete T;
}
// stop_prob - A prob or a stop_term.
%type stop_prob {ProbQuery *}
%destructor stop_prob {delete $$;}
stop_prob(P) ::= prob(Q).
{ P = Q; }
stop_prob(P) ::= stop_term(T). {
P = new ProbQuery;
P->query = T;
}
// stop_term - A term which should be checked against the stopword list,
// or a compound_term.
//
// If a term is loved, hated, or in a phrase, we don't want to consult the
// stopword list, so stop_term isn't used there (instead term is).
%type stop_term {Query *}
%destructor stop_term {delete $$;}
stop_term(T) ::= TERM(U). {
if (state->is_stopword(U)) {
T = NULL;
state->add_to_stoplist(U);
} else {
T = new Query(U->get_query_with_auto_synonyms());
}
delete U;
}
stop_term(T) ::= compound_term(U). {
T = U;
}
// term - A term or a compound_term.
%type term {Query *}
%destructor term {delete $$;}
term(T) ::= TERM(U). {
T = new Query(U->get_query_with_auto_synonyms());
delete U;
}
term(T) ::= compound_term(U). {
T = U;
}
// compound_term - A WILD_TERM, a quoted phrase (with or without prefix), a
// phrased_term, group, near_expr, adj_expr, or a bracketed subexpression (with
// or without prefix).
%type compound_term {Query *}
%destructor compound_term {delete $$;}
compound_term(T) ::= WILD_TERM(U).
{ T = U->as_wildcarded_query(state); }
compound_term(T) ::= PARTIAL_TERM(U).
{ T = U->as_partial_query(state); }
compound_term(T) ::= QUOTE phrase(P) QUOTE.
{ T = P->as_phrase_query(); }
compound_term(T) ::= phrased_term(P).
{ T = P->as_phrase_query(); }
compound_term(T) ::= group(P). {
T = P->as_group(state);
}
compound_term(T) ::= near_expr(P).
{ T = P->as_near_query(); }
compound_term(T) ::= adj_expr(P).
{ T = P->as_adj_query(); }
compound_term(T) ::= BRA expr(E) KET.
{ T = E; }
compound_term(T) ::= SYNONYM TERM(U). {
T = new Query(U->get_query_with_synonyms());
delete U;
}
// phrase - The "inside the quotes" part of a double-quoted phrase.
%type phrase {TermList *}
%destructor phrase {$$->destroy();}
phrase(P) ::= TERM(T). {
P = new TermList;
P->add_positional_term(T);
}
phrase(P) ::= phrase(Q) TERM(T). {
P = Q;
P->add_positional_term(T);
}
// phrased_term - A phrased term works like a single term, but is actually
// 2 or more terms linked together into a phrase by punctuation. There must be
// at least 2 terms in order to be able to have punctuation between the terms!
%type phrased_term {TermList *}
%destructor phrased_term {$$->destroy();}
phrased_term(P) ::= TERM(T) PHR_TERM(U). {
P = new TermList;
P->add_positional_term(T);
P->add_positional_term(U);
}
phrased_term(P) ::= phrased_term(Q) PHR_TERM(T). {
P = Q;
P->add_positional_term(T);
}
// group - A group of terms separated only by whitespace - candidates for
// multi-term synonyms.
%type group {TermGroup *}
%destructor group {$$->destroy();}
group(P) ::= TERM(T) GROUP_TERM(U). {
P = new TermGroup;
P->add_term(T);
P->add_term(U);
}
group(P) ::= group(Q) GROUP_TERM(T). {
P = Q;
P->add_term(T);
}
// near_expr - 2 or more terms with NEAR in between. There must be at least 2
// terms in order for there to be any NEAR operators!
%type near_expr {TermList *}
%destructor near_expr {$$->destroy();}
near_expr(P) ::= TERM(T) NEAR(N) TERM(U). {
P = new TermList;
P->add_positional_term(T);
P->add_positional_term(U);
if (N) {
P->adjust_window(N->get_termpos());
delete N;
}
}
near_expr(P) ::= near_expr(Q) NEAR(N) TERM(T). {
P = Q;
P->add_positional_term(T);
if (N) {
P->adjust_window(N->get_termpos());
delete N;
}
}
// adj_expr - 2 or more terms with ADJ in between. There must be at least 2
// terms in order for there to be any ADJ operators!
%type adj_expr {TermList *}
%destructor adj_expr {$$->destroy();}
adj_expr(P) ::= TERM(T) ADJ(N) TERM(U). {
P = new TermList;
P->add_positional_term(T);
P->add_positional_term(U);
if (N) {
P->adjust_window(N->get_termpos());
delete N;
}
}
adj_expr(P) ::= adj_expr(Q) ADJ(N) TERM(T). {
P = Q;
P->add_positional_term(T);
if (N) {
P->adjust_window(N->get_termpos());
delete N;
}
}
// Select yacc syntax highlighting in vim editor: vim: syntax=yacc
// (lemon syntax colouring isn't supplied by default; yacc does an OK job).