|
a |
|
b/src/main/java/org/sba_research/timbus/kb/importer/FreebaseImporter.java |
|
|
1 |
/**
|
|
|
2 |
* Copyright (c) 2013/2014 Verein zur Foerderung der IT-Sicherheit in Oesterreich (SBA).
|
|
|
3 |
* The work has been developed in the TIMBUS Project and the above-mentioned are Members of the TIMBUS Consortium.
|
|
|
4 |
* TIMBUS is supported by the European Union under the 7th Framework Programme for research and technological
|
|
|
5 |
* development and demonstration activities (FP7/2007-2013) under grant agreement no. 269940.
|
|
|
6 |
*
|
|
|
7 |
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
|
8 |
* the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0
|
|
|
9 |
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
|
10 |
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including without
|
|
|
11 |
* limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTIBITLY, or FITNESS FOR A PARTICULAR
|
|
|
12 |
* PURPOSE. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise,
|
|
|
13 |
* unless required by applicable law or agreed to in writing, shall any Contributor be liable for damages, including
|
|
|
14 |
* any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this
|
|
|
15 |
* License or out of the use or inability to use the Work.
|
|
|
16 |
* See the License for the specific language governing permissions and limitation under the License.
|
|
|
17 |
*/
|
|
|
18 |
package org.sba_research.timbus.kb.importer;
|
|
|
19 |
|
|
|
20 |
import com.google.api.client.http.*;
|
|
|
21 |
import com.google.api.client.http.javanet.NetHttpTransport;
|
|
|
22 |
import com.jayway.jsonpath.JsonPath;
|
|
|
23 |
import org.apache.commons.io.FileUtils;
|
|
|
24 |
import org.apache.commons.lang.StringUtils;
|
|
|
25 |
import org.json.simple.JSONArray;
|
|
|
26 |
import org.json.simple.JSONObject;
|
|
|
27 |
import org.json.simple.parser.JSONParser;
|
|
|
28 |
import org.json.simple.parser.ParseException;
|
|
|
29 |
import org.sbaresearch.owl.OwlApiFacade;
|
|
|
30 |
import org.sbaresearch.owl.OwlElementNotFoundException;
|
|
|
31 |
import org.semanticweb.owlapi.model.OWLNamedIndividual;
|
|
|
32 |
import org.semanticweb.owlapi.vocab.OWL2Datatype;
|
|
|
33 |
import uk.ac.manchester.cs.owl.owlapi.OWL2DatatypeImpl;
|
|
|
34 |
|
|
|
35 |
import java.io.File;
|
|
|
36 |
import java.io.IOException;
|
|
|
37 |
import java.util.Arrays;
|
|
|
38 |
import java.util.logging.Logger;
|
|
|
39 |
|
|
|
40 |
public class FreebaseImporter implements DataImporter {
|
|
|
41 |
|
|
|
42 |
private static final Logger LOG = Logger.getLogger(FreebaseImporter.class.getName());
|
|
|
43 |
private OwlApiFacade owl;
|
|
|
44 |
private String kb;
|
|
|
45 |
|
|
|
46 |
/**
|
|
|
47 |
* If the usage limit is exceeded an API key has to be provided.
|
|
|
48 |
* @see FreebaseImporter::recreateCache
|
|
|
49 |
* @see <a href="https://developers.google.com/freebase/usage-limits">Freebase usage limits</a>
|
|
|
50 |
*/
|
|
|
51 |
//private static Properties properties = new Properties();
|
|
|
52 |
|
|
|
53 |
@Override
|
|
|
54 |
public void populate(OwlApiFacade owl, String kb) throws IOException, OwlElementNotFoundException, DataImporterException {
|
|
|
55 |
LOG.info("loading data...");
|
|
|
56 |
File file = new File("cache_freebase.json");
|
|
|
57 |
if (!file.exists()) {
|
|
|
58 |
recreateCache(file);
|
|
|
59 |
}
|
|
|
60 |
this.owl = owl;
|
|
|
61 |
this.kb = kb;
|
|
|
62 |
|
|
|
63 |
LOG.info("populating owl...");
|
|
|
64 |
JSONArray results = loadCache(file);
|
|
|
65 |
for (Object result : results) {
|
|
|
66 |
try {
|
|
|
67 |
addFormat(result);
|
|
|
68 |
} catch (InvalidInputException e) {
|
|
|
69 |
LOG.severe(e.getMessage());
|
|
|
70 |
}
|
|
|
71 |
}
|
|
|
72 |
}
|
|
|
73 |
|
|
|
74 |
private void addFormat(Object result) throws OwlElementNotFoundException, InvalidInputException {
|
|
|
75 |
String formatName = cleanName(jsonGet(result, "$.name"));
|
|
|
76 |
LOG.fine(String.format("%s, %s, %s, %s", formatName, jsonGet(result, "$.extension[*]"), jsonGet(result, "$.written_by"), jsonGet(result, "$.read_by")));
|
|
|
77 |
OWLNamedIndividual format = addFormat(owl, formatName);
|
|
|
78 |
OWLNamedIndividual registry = addRegistry(formatName, format);
|
|
|
79 |
safeAddEntryExtensionToRegistry(formatName, getExtensions((JSONObject) result), registry);
|
|
|
80 |
for (Object app : (JSONArray) ((JSONObject) result).get("read_by")) {
|
|
|
81 |
addToolAndAction(formatName, format, cleanName(app), "read", kb + "#isReading");
|
|
|
82 |
}
|
|
|
83 |
for (Object app : (JSONArray) ((JSONObject) result).get("written_by")) {
|
|
|
84 |
addToolAndAction(formatName, format, cleanName(app), "write", kb + "#isWriting");
|
|
|
85 |
}
|
|
|
86 |
}
|
|
|
87 |
|
|
|
88 |
private OWLNamedIndividual addRegistry(String formatName, OWLNamedIndividual format) {
|
|
|
89 |
OWLNamedIndividual registry = owl.addIndividual("registry_format_" + formatName + "_freebase", kb + "#FormatRegistry");
|
|
|
90 |
owl.addObjectProperty(format, kb + "#isIdentifiedBy", registry);
|
|
|
91 |
return registry;
|
|
|
92 |
}
|
|
|
93 |
|
|
|
94 |
private String getExtensions(JSONObject result) {
|
|
|
95 |
JSONArray extensionsList = (JSONArray) result.get("extension");
|
|
|
96 |
return StringUtils.join(extensionsList, ", ");
|
|
|
97 |
}
|
|
|
98 |
|
|
|
99 |
private JSONArray loadCache(File file) throws IOException, DataImporterException {
|
|
|
100 |
JSONParser parser = new JSONParser();
|
|
|
101 |
JSONObject response;
|
|
|
102 |
try {
|
|
|
103 |
response = (JSONObject) parser.parse(FileUtils.readFileToString(file));
|
|
|
104 |
} catch (ParseException e) {
|
|
|
105 |
throw new DataImporterException(e);
|
|
|
106 |
}
|
|
|
107 |
return (JSONArray) response.get("result");
|
|
|
108 |
}
|
|
|
109 |
|
|
|
110 |
private void safeAddEntryExtensionToRegistry(String formatName, String extensions, OWLNamedIndividual registry) {
|
|
|
111 |
if (extensions.trim().isEmpty()) return;
|
|
|
112 |
OWLNamedIndividual registryEntryExtension = owl.addIndividual("registry_format_" + formatName + "_freebase_extension", kb + "#RegistryEntry");
|
|
|
113 |
owl.addDataProperty(registryEntryExtension, kb + "#hasKey", owl.getOWLLiteral("extension", OWL2DatatypeImpl.getDatatype(OWL2Datatype.XSD_STRING)));
|
|
|
114 |
for (String ext : extensions.split(" ")) {
|
|
|
115 |
ext = StringUtils.strip(ext, " .,").toLowerCase();
|
|
|
116 |
owl.addDataProperty(registryEntryExtension, kb + "#hasValue", owl.getOWLLiteral(ext, OWL2DatatypeImpl.getDatatype(OWL2Datatype.XSD_STRING)));
|
|
|
117 |
}
|
|
|
118 |
owl.addObjectProperty(registry, kb + "#isConsistingOf", registryEntryExtension);
|
|
|
119 |
}
|
|
|
120 |
|
|
|
121 |
private String cleanName(Object app) {
|
|
|
122 |
return app.toString().replace(" ", "-").replace("|", "");
|
|
|
123 |
}
|
|
|
124 |
|
|
|
125 |
private OWLNamedIndividual addFormat(OwlApiFacade owl, String formatName) {
|
|
|
126 |
return owl.addIndividual(formatName, kb + "#FileFormat");
|
|
|
127 |
}
|
|
|
128 |
|
|
|
129 |
private void addToolAndAction(String formatName, OWLNamedIndividual formatIndiv, String appName, String action, String actionProperty) throws OwlElementNotFoundException {
|
|
|
130 |
OWLNamedIndividual toolAction = owl.addIndividual(String.format("action_" + action + "_%s_%s", formatName, appName), kb + "#ToolAction");
|
|
|
131 |
String abstractActionName = String.format("action_" + action + "_%s", formatName);
|
|
|
132 |
OWLNamedIndividual abstractAction = safeAddAbstractAction(formatIndiv, abstractActionName, actionProperty);
|
|
|
133 |
owl.addObjectProperty(toolAction, kb + "#isProviding", abstractAction);
|
|
|
134 |
|
|
|
135 |
OWLNamedIndividual tool = safeAddTool(appName);
|
|
|
136 |
owl.addObjectProperty(tool, kb + "#isProviding", toolAction);
|
|
|
137 |
}
|
|
|
138 |
|
|
|
139 |
private OWLNamedIndividual safeAddAbstractAction(OWLNamedIndividual formatIndiv, String abstractActionName, String actionProperty) throws OwlElementNotFoundException {
|
|
|
140 |
if (!owl.containsIndividual(abstractActionName)) {
|
|
|
141 |
OWLNamedIndividual tmp = owl.addIndividual(abstractActionName, kb + "#AbstractAction");
|
|
|
142 |
owl.addObjectProperty(tmp, actionProperty, formatIndiv);
|
|
|
143 |
}
|
|
|
144 |
return owl.getIndividual(abstractActionName);
|
|
|
145 |
}
|
|
|
146 |
|
|
|
147 |
private OWLNamedIndividual safeAddTool(String appName) throws OwlElementNotFoundException {
|
|
|
148 |
if (!owl.containsIndividual(appName)) {
|
|
|
149 |
OWLNamedIndividual tool = owl.addIndividual(appName, kb + "#Tool");
|
|
|
150 |
OWLNamedIndividual toolRegistry = owl.addIndividual(String.format("registry_tool_%s_%s", appName, "Freebase"), kb + "#ToolRegistry");
|
|
|
151 |
owl.addObjectProperty(tool, kb + "#isIdentifiedBy", toolRegistry);
|
|
|
152 |
}
|
|
|
153 |
return owl.getIndividual(appName);
|
|
|
154 |
}
|
|
|
155 |
|
|
|
156 |
private void recreateCache(File file) throws IOException {
|
|
|
157 |
// properties.load(new FileInputStream("freebase.properties"));
|
|
|
158 |
HttpTransport httpTransport = new NetHttpTransport();
|
|
|
159 |
HttpRequestFactory requestFactory = httpTransport.createRequestFactory();
|
|
|
160 |
String query = "[{\"id\":null,\"name\":null,\"extension\":[],\"written_by\":[],\"read_by\":[],\"type\":\"/computer/file_format\",\"limit\":4000}]";
|
|
|
161 |
GenericUrl url = new GenericUrl("https://www.googleapis.com/freebase/v1/mqlread");
|
|
|
162 |
url.put("query", query);
|
|
|
163 |
// url.put("key", properties.get("API_KEY"));
|
|
|
164 |
HttpRequest request = requestFactory.buildGetRequest(url);
|
|
|
165 |
HttpResponse httpResponse = request.execute();
|
|
|
166 |
FileUtils.writeLines(file, Arrays.asList(httpResponse.parseAsString()));
|
|
|
167 |
}
|
|
|
168 |
|
|
|
169 |
private String jsonGet(Object result, String jsonPath) throws InvalidInputException {
|
|
|
170 |
try {
|
|
|
171 |
return JsonPath.read(result, jsonPath).toString();
|
|
|
172 |
} catch (NullPointerException e) {
|
|
|
173 |
throw new InvalidInputException(e);
|
|
|
174 |
}
|
|
|
175 |
}
|
|
|
176 |
}
|