a b/src/main/java/org/sba_research/timbus/kb/importer/FreebaseImporter.java
1
/**
2
 * Copyright (c) 2013/2014 Verein zur Foerderung der IT-Sicherheit in Oesterreich (SBA).
3
 * The work has been developed in the TIMBUS Project and the above-mentioned are Members of the TIMBUS Consortium.
4
 * TIMBUS is supported by the European Union under the 7th Framework Programme for research and technological
5
 * development and demonstration activities (FP7/2007-2013) under grant agreement no. 269940.
6
 *
7
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
8
 * the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0
9
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
10
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including without
11
 * limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTIBITLY, or FITNESS FOR A PARTICULAR
12
 * PURPOSE. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise,
13
 * unless required by applicable law or agreed to in writing, shall any Contributor be liable for damages, including
14
 * any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this
15
 * License or out of the use or inability to use the Work.
16
 * See the License for the specific language governing permissions and limitation under the License.
17
 */
18
package org.sba_research.timbus.kb.importer;
19
20
import com.google.api.client.http.*;
21
import com.google.api.client.http.javanet.NetHttpTransport;
22
import com.jayway.jsonpath.JsonPath;
23
import org.apache.commons.io.FileUtils;
24
import org.apache.commons.lang.StringUtils;
25
import org.json.simple.JSONArray;
26
import org.json.simple.JSONObject;
27
import org.json.simple.parser.JSONParser;
28
import org.json.simple.parser.ParseException;
29
import org.sbaresearch.owl.OwlApiFacade;
30
import org.sbaresearch.owl.OwlElementNotFoundException;
31
import org.semanticweb.owlapi.model.OWLNamedIndividual;
32
import org.semanticweb.owlapi.vocab.OWL2Datatype;
33
import uk.ac.manchester.cs.owl.owlapi.OWL2DatatypeImpl;
34
35
import java.io.File;
36
import java.io.IOException;
37
import java.util.Arrays;
38
import java.util.logging.Logger;
39
40
public class FreebaseImporter implements DataImporter {
41
42
    private static final Logger LOG = Logger.getLogger(FreebaseImporter.class.getName());
43
    private OwlApiFacade owl;
44
    private String kb;
45
46
    /**
47
     * If the usage limit is exceeded an API key has to be provided.
48
     * @see FreebaseImporter::recreateCache
49
     * @see <a href="https://developers.google.com/freebase/usage-limits">Freebase usage limits</a>
50
     */
51
    //private static Properties properties = new Properties();
52
53
    @Override
54
    public void populate(OwlApiFacade owl, String kb) throws IOException, OwlElementNotFoundException, DataImporterException {
55
        LOG.info("loading data...");
56
        File file = new File("cache_freebase.json");
57
        if (!file.exists()) {
58
            recreateCache(file);
59
        }
60
        this.owl = owl;
61
        this.kb = kb;
62
63
        LOG.info("populating owl...");
64
        JSONArray results = loadCache(file);
65
        for (Object result : results) {
66
            try {
67
                addFormat(result);
68
            } catch (InvalidInputException e) {
69
                LOG.severe(e.getMessage());
70
            }
71
        }
72
    }
73
74
    private void addFormat(Object result) throws OwlElementNotFoundException, InvalidInputException {
75
        String formatName = cleanName(jsonGet(result, "$.name"));
76
        LOG.fine(String.format("%s, %s, %s, %s", formatName, jsonGet(result, "$.extension[*]"), jsonGet(result, "$.written_by"), jsonGet(result, "$.read_by")));
77
        OWLNamedIndividual format = addFormat(owl, formatName);
78
        OWLNamedIndividual registry = addRegistry(formatName, format);
79
        safeAddEntryExtensionToRegistry(formatName, getExtensions((JSONObject) result), registry);
80
        for (Object app : (JSONArray) ((JSONObject) result).get("read_by")) {
81
            addToolAndAction(formatName, format, cleanName(app), "read", kb + "#isReading");
82
        }
83
        for (Object app : (JSONArray) ((JSONObject) result).get("written_by")) {
84
            addToolAndAction(formatName, format, cleanName(app), "write", kb + "#isWriting");
85
        }
86
    }
87
88
    private OWLNamedIndividual addRegistry(String formatName, OWLNamedIndividual format) {
89
        OWLNamedIndividual registry = owl.addIndividual("registry_format_" + formatName + "_freebase", kb + "#FormatRegistry");
90
        owl.addObjectProperty(format, kb + "#isIdentifiedBy", registry);
91
        return registry;
92
    }
93
94
    private String getExtensions(JSONObject result) {
95
        JSONArray extensionsList = (JSONArray) result.get("extension");
96
        return StringUtils.join(extensionsList, ", ");
97
    }
98
99
    private JSONArray loadCache(File file) throws IOException, DataImporterException {
100
        JSONParser parser = new JSONParser();
101
        JSONObject response;
102
        try {
103
            response = (JSONObject) parser.parse(FileUtils.readFileToString(file));
104
        } catch (ParseException e) {
105
            throw new DataImporterException(e);
106
        }
107
        return (JSONArray) response.get("result");
108
    }
109
110
    private void safeAddEntryExtensionToRegistry(String formatName, String extensions, OWLNamedIndividual registry) {
111
        if (extensions.trim().isEmpty()) return;
112
        OWLNamedIndividual registryEntryExtension = owl.addIndividual("registry_format_" + formatName + "_freebase_extension", kb + "#RegistryEntry");
113
        owl.addDataProperty(registryEntryExtension, kb + "#hasKey", owl.getOWLLiteral("extension", OWL2DatatypeImpl.getDatatype(OWL2Datatype.XSD_STRING)));
114
        for (String ext : extensions.split(" ")) {
115
            ext = StringUtils.strip(ext, " .,").toLowerCase();
116
            owl.addDataProperty(registryEntryExtension, kb + "#hasValue", owl.getOWLLiteral(ext, OWL2DatatypeImpl.getDatatype(OWL2Datatype.XSD_STRING)));
117
        }
118
        owl.addObjectProperty(registry, kb + "#isConsistingOf", registryEntryExtension);
119
    }
120
121
    private String cleanName(Object app) {
122
        return app.toString().replace(" ", "-").replace("|", "");
123
    }
124
125
    private OWLNamedIndividual addFormat(OwlApiFacade owl, String formatName) {
126
        return owl.addIndividual(formatName, kb + "#FileFormat");
127
    }
128
129
    private void addToolAndAction(String formatName, OWLNamedIndividual formatIndiv, String appName, String action, String actionProperty) throws OwlElementNotFoundException {
130
        OWLNamedIndividual toolAction = owl.addIndividual(String.format("action_" + action + "_%s_%s", formatName, appName), kb + "#ToolAction");
131
        String abstractActionName = String.format("action_" + action + "_%s", formatName);
132
        OWLNamedIndividual abstractAction = safeAddAbstractAction(formatIndiv, abstractActionName, actionProperty);
133
        owl.addObjectProperty(toolAction, kb + "#isProviding", abstractAction);
134
135
        OWLNamedIndividual tool = safeAddTool(appName);
136
        owl.addObjectProperty(tool, kb + "#isProviding", toolAction);
137
    }
138
139
    private OWLNamedIndividual safeAddAbstractAction(OWLNamedIndividual formatIndiv, String abstractActionName, String actionProperty) throws OwlElementNotFoundException {
140
        if (!owl.containsIndividual(abstractActionName)) {
141
            OWLNamedIndividual tmp = owl.addIndividual(abstractActionName, kb + "#AbstractAction");
142
            owl.addObjectProperty(tmp, actionProperty, formatIndiv);
143
        }
144
        return owl.getIndividual(abstractActionName);
145
    }
146
147
    private OWLNamedIndividual safeAddTool(String appName) throws OwlElementNotFoundException {
148
        if (!owl.containsIndividual(appName)) {
149
            OWLNamedIndividual tool = owl.addIndividual(appName, kb + "#Tool");
150
            OWLNamedIndividual toolRegistry = owl.addIndividual(String.format("registry_tool_%s_%s", appName, "Freebase"), kb + "#ToolRegistry");
151
            owl.addObjectProperty(tool, kb + "#isIdentifiedBy", toolRegistry);
152
        }
153
        return owl.getIndividual(appName);
154
    }
155
156
    private void recreateCache(File file) throws IOException {
157
        // properties.load(new FileInputStream("freebase.properties"));
158
        HttpTransport httpTransport = new NetHttpTransport();
159
        HttpRequestFactory requestFactory = httpTransport.createRequestFactory();
160
        String query = "[{\"id\":null,\"name\":null,\"extension\":[],\"written_by\":[],\"read_by\":[],\"type\":\"/computer/file_format\",\"limit\":4000}]";
161
        GenericUrl url = new GenericUrl("https://www.googleapis.com/freebase/v1/mqlread");
162
        url.put("query", query);
163
        // url.put("key", properties.get("API_KEY"));
164
        HttpRequest request = requestFactory.buildGetRequest(url);
165
        HttpResponse httpResponse = request.execute();
166
        FileUtils.writeLines(file, Arrays.asList(httpResponse.parseAsString()));
167
    }
168
169
    private String jsonGet(Object result, String jsonPath) throws InvalidInputException {
170
        try {
171
            return JsonPath.read(result, jsonPath).toString();
172
        } catch (NullPointerException e) {
173
            throw new InvalidInputException(e);
174
        }
175
    }
176
}