Apache Jackrabbit JCA 2.7.5 .docx和.xlsx索引

我正在使用Appache Jackrabbit JCA 2.7.5,问题是文件.docx和.xlsx没有编入索引。

我的步骤:

  • 将Jackrabbit JCA部署为glassfish上的resource adapter
  • resource adapter创建一个Connector Connection Pool ,指示ConfigFile=path/to/the/repository.xmlHomeDir=path/to/the //miss the repository.xml
  • 为连接器池(jndi)创建Connector Resources
  • 创建Web应用程序
  • 创建类以从连接器资源获取会话(下面的代码)

     import java.io.Serializable; import java.net.MalformedURLException; import javax.annotation.Resource; import javax.ejb.Stateless; import javax.jcr.LoginException; import javax.jcr.Repository; import javax.jcr.RepositoryException; import javax.jcr.Session; import javax.jcr.SimpleCredentials; import javax.naming.InitialContext; import javax.naming.NamingException; @Stateless public class OcmRepository implements Serializable { public Repository repository; public Session session; public OcmRepository() { } public Session getSession(String log, String mdp) throws LoginException, RepositoryException, NamingException, MalformedURLException { InitialContext initalContext = new InitialContext(); repository = (Repository) initalContext.lookup("jndi/jca"); session = repository.login(new SimpleCredentials(log, mdp.toCharArray()), null); return session; } } 
  • 创建自定义文件类型

     import javax.jcr.PropertyType; import javax.jcr.Session; import javax.jcr.nodetype.NodeType; import javax.jcr.nodetype.NodeTypeManager; import javax.jcr.nodetype.NodeTypeTemplate; import javax.jcr.nodetype.PropertyDefinitionTemplate; /** * * @author nathan */ public class FileType { public static void RegisterFileType(Session session) throws Exception { NodeTypeManager nodeTypeManager = session.getWorkspace().getNodeTypeManager(); NodeTypeTemplate nodeType = nodeTypeManager.createNodeTypeTemplate(); nodeType.setName("FileType"); String[] str = {"nt:resource"}; nodeType.setDeclaredSuperTypeNames(str); nodeType.setMixin(false); nodeType.setQueryable(true); PropertyDefinitionTemplate path = nodeTypeManager.createPropertyDefinitionTemplate(); path.setName("jcr:path"); path.setRequiredType(PropertyType.PATH); path.setQueryOrderable(false); path.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(path); PropertyDefinitionTemplate nom = nodeTypeManager.createPropertyDefinitionTemplate(); nom.setName("jcr:nom"); nom.setRequiredType(PropertyType.STRING); nom.setQueryOrderable(true); nom.setFullTextSearchable(true); nodeType.getPropertyDefinitionTemplates().add(nom); PropertyDefinitionTemplate description = nodeTypeManager.createPropertyDefinitionTemplate(); description.setName("jcr:description"); description.setRequiredType(PropertyType.STRING); description.setQueryOrderable(true); description.setFullTextSearchable(true); nodeType.getPropertyDefinitionTemplates().add(description); PropertyDefinitionTemplate motsCles = nodeTypeManager.createPropertyDefinitionTemplate(); motsCles.setName("jcr:motsCles"); motsCles.setRequiredType(PropertyType.STRING); motsCles.setQueryOrderable(true); motsCles.setFullTextSearchable(true); nodeType.getPropertyDefinitionTemplates().add(motsCles); PropertyDefinitionTemplate size = nodeTypeManager.createPropertyDefinitionTemplate(); size.setName("jcr:size"); size.setRequiredType(PropertyType.STRING); size.setQueryOrderable(true); size.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(size); PropertyDefinitionTemplate users = nodeTypeManager.createPropertyDefinitionTemplate(); users.setName("jcr:users"); users.setRequiredType(PropertyType.STRING); users.setQueryOrderable(true); users.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(users); PropertyDefinitionTemplate groupe = nodeTypeManager.createPropertyDefinitionTemplate(); groupe.setName("jcr:groupe"); groupe.setRequiredType(PropertyType.STRING); groupe.setQueryOrderable(true); groupe.setFullTextSearchable(false); nodeType.getPropertyDefinitionTemplates().add(groupe); NodeType newnodetype = nodeTypeManager.registerNodeType(nodeType, true); session.save(); } } 
  • 为持久性创建抽象类

     import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.jcr.Session; import org.apache.jackrabbit.ocm.query.Filter; import org.apache.jackrabbit.ocm.query.impl.FilterImpl; import org.apache.jackrabbit.ocm.query.impl.QueryImpl; import org.apache.jackrabbit.ocm.query.Query; import org.apache.jackrabbit.ocm.query.QueryManager; import org.apache.jackrabbit.ocm.manager.ObjectContentManager; import org.apache.jackrabbit.ocm.manager.impl.ObjectContentManagerImpl; import org.apache.jackrabbit.ocm.mapper.Mapper; import org.apache.jackrabbit.ocm.mapper.impl.annotation.AnnotationMapperImpl; import org.apache.jackrabbit.ocm.reflection.ReflectionUtils; /** * * @author nathan */ public abstract class AbstractBean { private Class entityClass; private ObjectContentManager ocm; private Mapper mapper; public AbstractBean(Class entityClass){ this.entityClass = entityClass; } /** * Construct the Bean according to the extended class * This will be also construct the ObjectContentManager nammed ocm with the default Mapper * @param session javax.jcr.Session attached to the Bean * @return The mapping class found for the desired java bean class */ public AbstractBean(Class entityClass,Session session){ this.entityClass = entityClass; ocm = new ObjectContentManagerImpl(session, this.getDefaultMapper()); } /** * @return ObjectContentManager of the Bean */ public ObjectContentManager getOcm() throws Exception{ return ocm; } /** * Construct the Bean according to the extended class * This will be also construct the ObjectContentManager nammed ocm with the param Mapper given * @param session from "javax.jcr.Session" attached to the Bean * @param map from "org.apache.jackrabbit.ocm.mapper.Mapper" which * is the use to map entity between apllication and The repository * @return ObjectContentManager of the Bean */ public ObjectContentManager getOcm(Session session, Mapper map) throws Exception{ return new ObjectContentManagerImpl(session, map); } public void setOcm(ObjectContentManager ocm) { this.ocm = ocm; } private Mapper getDefaultMapper(){ ReflectionUtils.setClassLoader(com.ged.ocm.entity.Groupe.class.getClassLoader()); List classes = new ArrayList(); classes.add(com.ged.ocm.entity.Fichier.class); classes.add(com.ged.ocm.entity.Dossier.class); classes.add(com.ged.ocm.entity.Groupe.class); classes.add(com.ged.ocm.entity.SimpleNode.class); return new AnnotationMapperImpl(classes); } public Mapper getMapper() { return mapper; } public void setMapper(Mapper mapper) { this.mapper = mapper; } public void setLoader(Class classe){ ReflectionUtils.setClassLoader(classe.getClassLoader()); } public void create(T entity) { ocm.insert(entity); ocm.save(); } public void edit(T entity) { ocm.update(entity); ocm.save(); } public void remove(T entity) { ocm.remove(entity); ocm.save(); } public void refresh(){ ocm.refresh(true); ocm.save(); } public void copy(String orgPath, String destPath){ ocm.copy(orgPath, destPath); ocm.save(); } public void move(String orgPath, String destPath){ ocm.move(orgPath, destPath); ocm.save(); } public void removeByPath(String path) { ocm.remove(path); ocm.save(); } public void removeAllByEqual(Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); ocm.remove(query); ocm.save(); } public void removeAllByEqual(String nodePath,Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); ocm.remove(query); ocm.save(); } public boolean isPathExist(String path){ return ocm.objectExists(path); } public T findByPath(String path) { try { return (T)ocm.getObject(path); } catch (Exception e) { return null; } } public T findOneByEqual(Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); T result = null; try { result = results.get(0); } catch (Exception e) { } return result; } public List findAllByEqual(Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope("//"); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); return results; } public List findAllByLike(Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope("//"); for (String key : filters.keySet())filter.addLike(key, filters.get(key)); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); return results; } public List findAllByLikeScoped(String scope,Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(scope); for (String key : filters.keySet())filter.addLike(key, filters.get(key)); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); return results; } public List findAllByOrLike(String attr,String[] val){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope("//"); filter.addOrFilter(attr, val); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); return results; } public T findOneByEqual(String nodePath, Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); T result = results.get(0); return result; } public List findAllByEqual(String nodePath, Map filters){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); return results; } public List findAllByString(String query){ List results = (List) ocm.getObjects(query,javax.jcr.query.Query.JCR_SQL2); return results; } public List findAllByParentPath(String nodePath){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); return results; } public List findAllByParentPathOrder(String nodePath, String ordering){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); Query query = queryManager.createQuery(filter); // query.addOrderByDescending(ordering); query.addOrderByAscending(ordering); List results = (List) ocm.getObjects(query); return results; } public int coutChild(String nodePath){ QueryManager queryManager = ocm.getQueryManager(); Filter filter; filter = queryManager.createFilter(entityClass); filter.setScope(nodePath); Query query = queryManager.createQuery(filter); List results = (List) ocm.getObjects(query); return results.size(); } public boolean ifExistByPath(String path){ return ocm.objectExists(path); } public String getParentPath(String path){ String parent=""; String[] tmp=path.split("/"); for (int i = 1; i < (tmp.length-1); i++) { parent+="/"+tmp[i]; } return parent; } } 
  • 创建bean

     import javax.ejb.Stateless; import com.ged.ocm.entity.Fichier; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.jcr.Node; import javax.jcr.NodeIterator; import javax.jcr.Session; import javax.jcr.Workspace; import javax.jcr.query.QueryResult; import javax.jcr.query.qom.FullTextSearch; import javax.jcr.query.qom.StaticOperand; import org.apache.jackrabbit.ocm.query.Filter; import org.apache.jackrabbit.ocm.query.Query; import org.apache.jackrabbit.ocm.query.QueryManager; @Stateless public class FichierBean extends AbstractBean{ public FichierBean() { super(Fichier.class); } public FichierBean(Session session) { super(Fichier.class,session); } public List findAllByContains(String motCles) throws Exception { String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*')"; List results = (List) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2); return results; } public List findAllByContains(String path,String motCles) throws Exception { String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*') ORDER BY Res.nom"; List tmp = (List) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2); List results = new ArrayList(); for (Fichier fichier : tmp) { if(fichier.getPath().startsWith(path))results.add(fichier); } return results; } public List fulltextByOCM(String motCles) throws Exception { QueryManager queryManager = this.getOcm().getQueryManager(); Filter filter; filter = queryManager.createFilter(com.ged.ocm.entity.Fichier.class); filter.addContains(".", "*"+motCles+"*"); Query query = queryManager.createQuery(filter); List results = (List) this.getOcm().getObjects(query); return results; } } 

我的配置文件:

  • 的repository.xml

           -->                                                                                   <!-- Search index for content that is shared repository wide (/jcr:system tree, contains mainly versions)      -->    
  • 蒂卡-config.xml中

          application/xml image/svg+xml   application/x-tika-msoffice application/msword application/vnd.ms-excel application/vnd.ms-excel.sheet.binary.macroenabled.12 application/vnd.ms-powerpoint application/vnd.visio application/vnd.ms-outlook   application/x-tika-ooxml application/vnd.openxmlformats-package.core-properties+xml application/vnd.openxmlformats-officedocument.spreadsheetml.sheet application/vnd.openxmlformats-officedocument.spreadsheetml.template application/vnd.ms-excel.sheet.macroenabled.12 application/vnd.ms-excel.template.macroenabled.12 application/vnd.ms-excel.addin.macroenabled.12 application/vnd.openxmlformats-officedocument.presentationml.presentation application/vnd.openxmlformats-officedocument.presentationml.template application/vnd.openxmlformats-officedocument.presentationml.slideshow application/vnd.ms-powerpoint.presentation.macroenabled.12 application/vnd.ms-powerpoint.slideshow.macroenabled.12 application/vnd.ms-powerpoint.addin.macroenabled.12 application/vnd.openxmlformats-officedocument.wordprocessingml.document application/vnd.openxmlformats-officedocument.wordprocessingml.template application/vnd.ms-word.document.macroenabled.12 application/vnd.ms-word.template.macroenabled.12   text/html application/xhtml+xml application/x-asp   application/rtf   application/pdf   text/plain   application/vnd.sun.xml.writer application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.graphics application/vnd.oasis.opendocument.presentation application/vnd.oasis.opendocument.spreadsheet application/vnd.oasis.opendocument.chart application/vnd.oasis.opendocument.image application/vnd.oasis.opendocument.formula application/vnd.oasis.opendocument.text-master application/vnd.oasis.opendocument.text-web application/vnd.oasis.opendocument.text-template application/vnd.oasis.opendocument.graphics-template application/vnd.oasis.opendocument.presentation-template application/vnd.oasis.opendocument.spreadsheet-template application/vnd.oasis.opendocument.chart-template application/vnd.oasis.opendocument.image-template application/vnd.oasis.opendocument.formula-template application/x-vnd.oasis.opendocument.text application/x-vnd.oasis.opendocument.graphics application/x-vnd.oasis.opendocument.presentation application/x-vnd.oasis.opendocument.spreadsheet application/x-vnd.oasis.opendocument.chart application/x-vnd.oasis.opendocument.image application/x-vnd.oasis.opendocument.formula application/x-vnd.oasis.opendocument.text-master application/x-vnd.oasis.opendocument.text-web application/x-vnd.oasis.opendocument.text-template application/x-vnd.oasis.opendocument.graphics-template application/x-vnd.oasis.opendocument.presentation-template application/x-vnd.oasis.opendocument.spreadsheet-template application/x-vnd.oasis.opendocument.chart-template application/x-vnd.oasis.opendocument.image-template application/x-vnd.oasis.opendocument.formula-template   image/bmp image/gif image/jpeg image/png image/tiff image/vnd.wap.wbmp image/x-icon image/x-psd image/x-xcf   application/x-tika-java-class   audio/mpeg   application/x-midi audio/midi   audio/basic audio/x-wav audio/x-aiff    

所有来自bean的查询都工作,除非我调用函数public List findAllByContains(String path,String motCles)到全文搜索到.docx和.xslx文件。 全文搜索.pdf,.txt,.xml,.xls,.doc,…工作完美。

参考: http : //jackrabbit.510166.n4.nabble.com/Office-2007-documents-not-being-indexed-in-Jackrabbit-2-4-3-td4657380.html

在同一行,我观察到在OOXML类型的文档(即office 2007文档)的情况下,Tika解析器需要commons-compress-1.5.jar。

现在,我可以使用以下两个步骤索引和搜索大多数类型的文档(office 2007 – docx,pptx,xlsx,office 2003 – doc,ppt,xls,PDF):

(1)更新了repository.xml并添加了更多详细信息, 请访问https://issues.apache.org/jira/browse/JCR-3287

(2)在运行jackrabbit-standalone-2.6.2.jar时添加了commons-compress-1.5.jar类路径

该解决方案专注于jackrabbit-jca-2.7.5.rar的JAR !

依赖性存在错误,因此我进行了以下更改:

  • 添加apache-mime4j-0.6.jar
  • 添加apache-mime4j-core-0.7.jar
  • 添加commons-compress-1.5.jar

在部署之前,在jackrabbit-jca-2.7.5.rar中添加这些JAR!

并且.docx,.xlsx,……的索引成功地恶化了!

谢谢@Ashok Felix

Interesting Posts