WikimediasFetcher.java
package org.wikidata.analyzer.Fetcher;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
public class WikimediasFetcher extends WikiDataFetcher {
static final String CACHE_FILENAME = "mediawikis.json";
public static final String ENTITYID_KEY = "entityID";
public static final String DBNAME_KEY = "dbname";
public WikimediasFetcher(File dataDirectory) {
super(dataDirectory);
}
public List<HashMap<String, String>> getMediawikis() {
String location = dataDirectory.getAbsolutePath() + CACHE_FILENAME;
long now = System.currentTimeMillis();
File cacheFile = new File(location);
if (
cacheFile.exists()
&& now - cacheFile.lastModified() < MAX_CACHE_AGE
) {
return readWikimediasFromCacheFile(location);
}
return getWikimediasFromWikidata();
}
private List<HashMap<String, String>> readWikimediasFromCacheFile(String cacheLocation) {
JSONParser parser = new JSONParser();
try {
JSONArray jsonObject = (JSONArray) parser.parse(new FileReader(cacheLocation));
List<HashMap<String, String>> result = (ArrayList<HashMap<String, String>>) jsonObject;
return result;
} catch (IOException e) {
System.out.println("Error accessing cache file:");
System.out.println(e.getMessage());
e.printStackTrace();
System.exit(1);
} catch (ParseException e) {
System.out.println("Error parsing mediawikis cache file with message:");
System.out.println(e.getMessage());
e.printStackTrace();
System.exit(1);
}
return null;
}
private List<HashMap<String, String>> getWikimediasFromWikidata() {
String querySelect = "SELECT ?Wikimedia_project ?Wikimedia_database_name WHERE {\n" +
" ?Wikimedia_project wdt:P31?/wdt:P279* wd:Q14827288.\n" +
" ?Wikimedia_project wdt:P1800 ?Wikimedia_database_name.\n" +
"}";
JSONObject wikimediasJSON = queryDataFromWikidata(querySelect);
List<HashMap<String, String>> mediawikis = this.parseJsonResponse(wikimediasJSON);
writeCache(mediawikis);
return mediawikis;
}
private List<HashMap<String, String>> parseJsonResponse(JSONObject apiObject) {
Integer urlPrefixLength = ENTITY_URL_PREFIX.length();
JSONArray resultItems = getResultsList(apiObject);
List<HashMap<String, String>> resultList = new ArrayList<>();
for (JSONObject item : (Iterable<JSONObject>) resultItems) {
if (!item.containsKey("Wikimedia_database_name")) {
continue;
}
JSONObject langEditionItem = (JSONObject) item.get("Wikimedia_project");
String entityId = ((String) langEditionItem.get("value")).substring(urlPrefixLength);
JSONObject dbNameObject = (JSONObject) item.get("Wikimedia_database_name");
String dbName = (String) dbNameObject.get("value");
HashMap<String, String> itemMap = new HashMap<>();
itemMap.put(ENTITYID_KEY, entityId);
itemMap.put(DBNAME_KEY, dbName);
resultList.add(itemMap);
}
return resultList;
}
private void writeCache(List<HashMap<String, String>> cacheData) {
ObjectMapper mapper = new ObjectMapper();
String filename = dataDirectory.getAbsolutePath() + CACHE_FILENAME;
try {
mapper.writeValue(new File(filename), cacheData);
} catch (IOException e) {
System.out.println("Error writing data retrieved from wikidata to filesystem cache: " + filename);
System.out.println(e.getMessage());
e.printStackTrace();
}
}
}