Java Lucene 4.5如何通过大小写不敏感搜索

我们已经实现了Java Lucene搜索引擎4.5,我试图搜索内容,即使字段值不区分大小写(例如,如果我搜索名为“Banglore”的城市我得到一个结果,但是当我搜索一个名字的城市时“banglore”我得到0结果)。

我已经使用StandardAnalyzer来分析数据,使用WildcardQuery来匹配Like条件(我试过这里没有成功)。

我不知道我哪里出错了。 我很感激有关修复此区分大小写问题的任何指导。

 public SearchHelper { Analyzer analyzer; Directory index; public IndexSearcher searcher = null; public IndexWriter indexWriter = null; public QueryParser parser = null; private static int hitsPerPage = 100; /** * @param indexFileLocation * @throws IOException */ public SearchHelper(String indexFileLocation) throws IOException { // this.analyzer =new StandardAnalyzer(); this.analyzer = new CaseStandardAnalyzer(); // analyzer = new ThaiAnalyzer(); this.index = FSDirectory.open(java.nio.file.Paths.get(indexFileLocation)); } /** * @param create * @return * @throws IOException */ public IndexWriter getIndexWriter(boolean create) throws IOException { if (indexWriter == null) { IndexWriterConfig iwc = new IndexWriterConfig(this.analyzer); this.indexWriter = new IndexWriter(this.index, iwc); } return this.indexWriter; } //End of getIndexWriter /** * @throws IOException */ public void closeIndexWriter() throws IOException { if (this.indexWriter != null) { this.indexWriter.commit();//optimize(); LUCENE_36 this.indexWriter.close(); } } //End closeIndexWriter /** * @param indexFileLocation * @throws CorruptIndexException * @throws IOException */ public void startSearch(String indexFileLocation) throws CorruptIndexException, IOException { // searcher = new IndexSearcher(FSDirectory.open(new File(indexFileLocation))); IndexReader reader = DirectoryReader.open(FSDirectory.open(java.nio.file.Paths.get(indexFileLocation))); // IndexReader.open(this.index); // open(getIndexWriter(true), true); this.searcher = new IndexSearcher(reader); } /** * @param fieldNames * @param fieldValues * @return * @throws IOException * @throws ParseException * * 

* https://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser */ public ScoreDoc[] searchSEO(String[] fieldNames, String[] fieldValues, int limitSize) throws IOException, ParseException { this.analyzer = new StandardAnalyzer(); int searchFieldSize = (null == fieldNames) ? 0 : fieldNames.length; BooleanQuery booleanQuery = new BooleanQuery(); for (int i = 0; i 0) { collector = TopScoreDocCollector.create(limitSize); } else { collector = TopScoreDocCollector.create(hitsPerPage); } this.searcher.search(booleanQuery,collector); return collector.topDocs().scoreDocs; } /** * @param whichField * @param searchString * @return * @throws IOException * @throws ParseException */ public Query searchIndexWithWildcardQuery(String whichField, String searchString) throws IOException, ParseException { Term term = addTerm(whichField, "*" + searchString + "*"); Query query = new WildcardQuery(term); return query; } /** * @param whichField * @param searchString * @return */ public Term addTerm(String whichField, String searchString) { Term term = new Term(whichField, searchString); return term; } /** * @param searchString * @param operation * @return * @throws ParseException */ public Query addConditionOpertaion(String searchString, String operation) throws ParseException { Query query = null; if ("and".equals(operation)) { parser.setDefaultOperator(QueryParser.AND_OPERATOR); } else if("or".equals(operation)) { parser.setDefaultOperator(QueryParser.AND_OPERATOR); } query = parser.parse(searchString); return query; } /** * @param booleanQuery BooleanQuery * @param q Query * @param type int , 1--> Must, 2-->Should, 3 --> Must Not */ public void addQueries(BooleanQuery booleanQuery, Query q, int type) { switch(type) { case 1: booleanQuery.add(q, Occur.MUST); break; case 2: booleanQuery.add(q, Occur.SHOULD); break; default:booleanQuery.add(q, Occur.MUST_NOT); break; } //End of switch } public QueryParser getParser() { return parser; } public void setParser(String fieldName) { this.parser = new QueryParser(fieldName, this.analyzer); } public void getDefaultByStatus(int status) { this.analyzer = new StandardAnalyzer(); this.parser = new QueryParser("status", this.analyzer); } protected void doClear(File dir,boolean deleteSubDir) { for (File file: dir.listFiles()) { if (file.isDirectory() && deleteSubDir) { doClear(file,deleteSubDir); } file.delete(); } } //End of doClear(); protected void doClose() throws IOException { this.searcher.getIndexReader().close(); } public boolean add(Object Obj) throws Exception { User currentUser = (User)Obj; boolean isAdded = false; org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document(); luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES)); luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES)); luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES)); luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES)); luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO)); luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES)); luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES)); // addRelatedFields(luceneDoc,city.getStateCode()); IndexWriter writer = getIndexWriter(false); writer.addDocument(luceneDoc); closeIndexWriter(); isAdded = true; System.out.println(isAdded); return isAdded; } // End of add public boolean update(Object Obj) throws Exception { boolean isUpdated = false; User currentUser = (User) Obj; org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document(); // luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES)); luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES)); luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES)); luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES)); luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES)); luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO)); luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES)); luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES)); // addRelatedFields(luceneDoc,city.getStateCode()); IndexWriter writer = getIndexWriter(false); writer.updateDocument(new Term("login", currentUser.getLogin()),luceneDoc); closeIndexWriter(); isUpdated = true; return isUpdated; } // End of update public boolean delete(Object Obj) throws Exception { boolean isDeleted = false; User currentUser = (User) Obj; Term deleteTerm = new Term("login", currentUser.getLogin()); IndexWriter writer = getIndexWriter(false); writer.deleteDocuments(deleteTerm); // Or use Query writer.forceMergeDeletes(); closeIndexWriter(); isDeleted = true; return isDeleted; } // End of delete @Override public Object search(String[] fieldNames, String[] fieldValues, int returnType, int limit) throws Exception { Object obj = null; org.apache.lucene.search.ScoreDoc[] hits = searchSEO(fieldNames,fieldValues, limit); int hitSize = (null == hits) ? 0 : hits.length; System.out.println("total:" + hitSize); doClose(); return obj; } // End of search public void addThreadUser() { User user = new User(); addUserPojo(user); add(user); } public void updateThreadUser() { User user = new User(); addUserPojo(user); update(user); } public void deleteThreadUser() { User user = new User(); addUserPojo(user); delete(user); } private void addUserPojo(User user) { user.setOid(3); user.setLogin("senthil"); user.setFirstName("Semthil"); user.setLastName("Semthil"); user.setStatus(1); user.setCity("Combiatore"); user.setEmailId("semthil@xyz.com"); } public void searchUser() { searchUser(new String[] {"login"}, new String[] {"Se"}, null); } public static void main(String[] args) { SearchHelper test = new SearchHelper(); test.searchUser(); } }

您正在使用StringField索引数据,但此字段将绕过分析器链,并始终将您的术语逐字索引为一个标记,无论您的分析器如何。 如果要分析数据并且StandardAnalyzer已经进行了较低的套管,则应使用TextField 。 除此之外, WildcardQuery不会分析它的术语,所以如果你搜索Banglore,它将与索引中现在较小的banglore不匹配。 您必须自己小写搜索项(或使用分析器)。

使用LowerCaseFilter作为您引用的post建议:

  TokenStream stream = new StandardFilter(Version.LUCENE_CURRENT, tokenizer); stream = new LowerCaseFilter(Version.LUCENE_CURRENT, stream); 

这篇文章中有一个更完整的例子。

您可以使用custome比较类

 class CaseIgonreCompare extends FieldComparator{ private String field; private String bottom; private String topValue; private BinaryDocValues cache; private String[] values; public CaseIgonreCompare(String field, int numHits) { this.field = field; this.values = new String[numHits]; } @Override public int compare(int arg0, int arg1) { return compareValues(values[arg0], values[arg1]); } @Override public int compareBottom(int arg0) throws IOException { return compareValues(bottom, cache.get(arg0).utf8ToString()); } @Override public int compareTop(int arg0) throws IOException { return compareValues(topValue, cache.get(arg0).utf8ToString()); } public int compareValues(String first, String second) { int val = first.length() - second.length(); return val == 0 ? first.compareToIgnoreCase(second) : val; }; @Override public void copy(int arg0, int arg1) throws IOException { values[arg0] = cache.get(arg1).utf8ToString(); } @Override public void setBottom(int arg0) { this.bottom = values[arg0]; } @Override public FieldComparator setNextReader(AtomicReaderContext arg0) throws IOException { this.cache = FieldCache.DEFAULT.getTerms(arg0.reader(), field , true); return this; } @Override public void setTopValue(String arg0) { this.topValue = arg0; } @Override public String value(int arg0) { return values[arg0]; } }