在java或scala中使用dbpedia spotlight

有谁知道在java或scala中如何使用dbpedia聚光灯？或者任何人都可以解释它是如何完成的？我找不到任何关于此的信息……

你需要下载dbpedia spotlight（jar文件）之后你可以使用下两个类（作者pablomendes）我只做一些改变。

public class db extends AnnotationClient { //private final static String API_URL = "http://jodaiber.dyndns.org:2222/"; private static String API_URL = "http://spotlight.dbpedia.org:80/"; private static double CONFIDENCE = 0.0; private static int SUPPORT = 0; private static String powered_by ="non"; private static String spotter ="CoOccurrenceBasedSelector";//"LingPipeSpotter"=Annotate all spots //AtLeastOneNounSelector"=No verbs and adjs. //"CoOccurrenceBasedSelector" =No 'common words' //"NESpotter"=Only Per.,Org.,Loc. private static String disambiguator ="Default";//Default ;Occurrences=Occurrence-centric;Document=Document-centric private static String showScores ="yes"; @SuppressWarnings("static-access") public void configiration(double CONFIDENCE,int SUPPORT, String powered_by,String spotter,String disambiguator,String showScores){ this.CONFIDENCE=CONFIDENCE; this.SUPPORT=SUPPORT; this.powered_by=powered_by; this.spotter=spotter; this.disambiguator=disambiguator; this.showScores=showScores; } public List extract(Text text) throws AnnotationException { LOG.info("Querying API."); String spotlightResponse; try { String Query=API_URL + "rest/annotate/?" + "confidence=" + CONFIDENCE + "&support=" + SUPPORT + "&spotter=" + spotter + "&disambiguator=" + disambiguator + "&showScores=" + showScores + "&powered_by=" + powered_by + "&text=" + URLEncoder.encode(text.text(), "utf-8"); LOG.info(Query); GetMethod getMethod = new GetMethod(Query); getMethod.addRequestHeader(new Header("Accept", "application/json")); spotlightResponse = request(getMethod); } catch (UnsupportedEncodingException e) { throw new AnnotationException("Could not encode text.", e); } assert spotlightResponse != null; JSONObject resultJSON = null; JSONArray entities = null; try { resultJSON = new JSONObject(spotlightResponse); entities = resultJSON.getJSONArray("Resources"); } catch (JSONException e) { //throw new AnnotationException("Received invalid response from DBpedia Spotlight API."); } LinkedList resources = new LinkedList(); if(entities!=null) for(int i = 0; i < entities.length(); i++) { try { JSONObject entity = entities.getJSONObject(i); resources.add( new DBpediaResource(entity.getString("@URI"), Integer.parseInt(entity.getString("@support")))); } catch (JSONException e) { LOG.error("JSON exception "+e); } } return resources; } }

二等

 /** * @author pablomendes */ public abstract class AnnotationClient { public Logger LOG = Logger.getLogger(this.getClass()); private List RES = new ArrayList(); // Create an instance of HttpClient. private static HttpClient client = new HttpClient(); public List getResu(){ return RES; } public String request(HttpMethod method) throws AnnotationException { String response = null; // Provide custom retry handler is necessary method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); try { // Execute the method. int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { LOG.error("Method failed: " + method.getStatusLine()); } // Read the response body. byte[] responseBody = method.getResponseBody(); //TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended. // Deal with the response. // Use caution: ensure correct character encoding and is not binary data response = new String(responseBody); } catch (HttpException e) { LOG.error("Fatal protocol violation: " + e.getMessage()); throw new AnnotationException("Protocol error executing HTTP request.",e); } catch (IOException e) { LOG.error("Fatal transport error: " + e.getMessage()); LOG.error(method.getQueryString()); throw new AnnotationException("Transport error executing HTTP request.",e); } finally { // Release the connection. method.releaseConnection(); } return response; } protected static String readFileAsString(String filePath) throws java.io.IOException{ return readFileAsString(new File(filePath)); } protected static String readFileAsString(File file) throws IOException { byte[] buffer = new byte[(int) file.length()]; @SuppressWarnings("resource") BufferedInputStream f = new BufferedInputStream(new FileInputStream(file)); f.read(buffer); return new String(buffer); } static abstract class LineParser { public abstract String parse(String s) throws ParseException; static class ManualDatasetLineParser extends LineParser { public String parse(String s) throws ParseException { return s.trim(); } } static class OccTSVLineParser extends LineParser { public String parse(String s) throws ParseException { String result = s; try { result = s.trim().split("\t")[3]; } catch (ArrayIndexOutOfBoundsException e) { throw new ParseException(e.getMessage(), 3); } return result; } } } public void saveExtractedEntitiesSet(String Question, LineParser parser, int restartFrom) throws Exception { String text = Question; int i=0; //int correct =0 ; int error = 0;int sum = 0; for (String snippet: text.split("\n")) { String s = parser.parse(snippet); if (s!= null && !s.equals("")) { i++; if (i entities = new ArrayList(); try { entities = extract(new Text(snippet.replaceAll("\\s+"," "))); System.out.println(entities.get(0).getFullUri()); } catch (AnnotationException e) { // error++; LOG.error(e); e.printStackTrace(); } for (DBpediaResource e: entities) { RES.add(e.uri()); } } } } public abstract List extract(Text text) throws AnnotationException; public void evaluate(String Question) throws Exception { evaluateManual(Question,0); } public void evaluateManual(String Question, int restartFrom) throws Exception { saveExtractedEntitiesSet(Question,new LineParser.ManualDatasetLineParser(), restartFrom); } }

主要（）

 public static void main(String[] args) throws Exception { String Question ="Is the Amazon river longer than the Nile River?"; db c = new db (); c.configiration(0.0, 0, "non", "CoOccurrenceBasedSelector", "Default", "yes"); System.out.println("resource : "+c.getResu()); }

我只为你的答案添加一个小修复。如果添加evaluate方法调用，则代码正在运行：

 public static void main(String[] args) throws Exception { String question = "Is the Amazon river longer than the Nile River?"; db c = new db (); c.configiration(0.0, 0, "non", "CoOccurrenceBasedSelector", "Default", "yes"); c.evaluate(question); System.out.println("resource : "+c.getResu()); }

拉明

在Adel的答案中，第二类（ AnnotationClient ）的request方法中，作者Pablo Mendes还没有完成

 TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended.

这是一个恼人的警告，需要通过更换来删除

 byte[] responseBody = method.getResponseBody(); //TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended. // Deal with the response. // Use caution: ensure correct character encoding and is not binary data response = new String(responseBody);

同

 Reader in = new InputStreamReader(method.getResponseBodyAsStream(), "UTF-8"); StringWriter writer = new StringWriter(); org.apache.commons.io.IOUtils.copy(in, writer); response = writer.toString();

在java或scala中使用dbpedia spotlight

如何查询多个实体的特定DBpedia资源/页面？