Nutch的搜索前台的默认搜索方式是and,也就是所有关键词都必须出现。现在想实现or查询发现nutch本身并不支持,在去官方论坛搜索无果,改源代码无果的情况下想到用插件实现or查询,于是参照query-base插件改写了个query-or插件,源码如下:

public class OrQueryFilter implements QueryFilter {
	private Configuration conf;
	float myBoost = 0f;
	private String[] FIELDS = { "url", "anchor", "content", "title", "host" };
	private static final int URL_BOOST = 0;
	private static final int ANCHOR_BOOST = 1;
	private static final int CONTENT_BOOST = 2;
	private static final int TITLE_BOOST = 3;
	private static final int HOST_BOOST = 4;

	private static int SLOP = Integer.MAX_VALUE;

	private float PHRASE_BOOST;

	private float[] FIELD_BOOSTS = new float[5];

	/**
	 * Set the boost factor for url matches, relative to content and anchor
	 * matches
	 */
	public void setUrlBoost(float boost) {
		FIELD_BOOSTS[URL_BOOST] = boost;
	}

	/**
	 * Set the boost factor for title/anchor matches, relative to url and
	 * content matches.
	 */
	public void setAnchorBoost(float boost) {
		FIELD_BOOSTS[ANCHOR_BOOST] = boost;
	}

	/**
	 * Set the boost factor for sloppy phrase matches relative to unordered term
	 * matches.
	 */
	public void setPhraseBoost(float boost) {
		PHRASE_BOOST = boost;
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}

	public Configuration getConf() {
		return this.conf;
	}

	@Override
	public BooleanQuery filter(Query input, BooleanQuery output)
			throws QueryException {

		for (Clause c : input.getClauses()) {

			if (!c.getField().equals("or"))
				continue;

			String value = c.getTerm().toString();
			BooleanQuery bq = new BooleanQuery();

			for (int f = 0; f < FIELDS.length; f++) {
				Clause o = c;
				if (o.isPhrase()) { 
					String[] opt = new CommonGrams(getConf())
							.optimizePhrase(o.getPhrase(), FIELDS[f]);
					if (opt.length == 1) {
						o = new Clause(new Term(opt[0]), o.isRequired(),
							o.isProhibited(), getConf());
					} else {
						o = new Clause(new Phrase(opt), o.isRequired(),
							o.isProhibited(), getConf());
					}
				}
				bq.add(o.isPhrase() ? exactPhrase(o.getPhrase(), FIELDS[f],
					FIELD_BOOSTS[f]) : termQuery(FIELDS[f],
					o.getTerm(), FIELD_BOOSTS[f]),
					BooleanClause.Occur.SHOULD);				

			}
			bq.setBoost(myBoost);
			output.add(bq, BooleanClause.Occur.SHOULD);
					}

		return output;
	}

	private org.apache.lucene.search.Query exactPhrase(Phrase nutchPhrase,
			String field, float boost) {
		Term[] terms = nutchPhrase.getTerms();
		PhraseQuery exactPhrase = new PhraseQuery();
		for (int i = 0; i < terms.length; i++) {
			exactPhrase.add(luceneTerm(field, terms[i]));
		}
		exactPhrase.setBoost(boost);
		return exactPhrase;
	}

	private org.apache.lucene.search.Query termQuery(String field, Term term,
			float boost) {
		TermQuery result = new TermQuery(luceneTerm(field, term));
		result.setBoost(boost);
		return result;
	}

	/** Utility to construct a Lucene Term given a Nutch query term and field. */
	private static org.apache.lucene.index.Term luceneTerm(String field,
			Term term) {
		return new org.apache.lucene.index.Term(field, term.toString());
	}
}

使用方式:查询是输入 or:关键词1or:关键词2