From 0a2753e6bf944e0a7735fa7c8f59c32b3d4c69e5 Mon Sep 17 00:00:00 2001 From: serso Date: Fri, 12 Feb 2016 14:24:37 +0100 Subject: [PATCH] Wiki translator --- settings.gradle | 2 +- wiki/.gitignore | 1 + wiki/build.gradle | 8 ++ .../src/main/java/org/solovyev/wiki/Main.java | 136 ++++++++++++++++++ 4 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 wiki/.gitignore create mode 100644 wiki/build.gradle create mode 100644 wiki/src/main/java/org/solovyev/wiki/Main.java diff --git a/settings.gradle b/settings.gradle index 1b96a949..0bbe5e7d 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1 +1 @@ -include ':app', ':jscl' +include ':app', ':jscl', ':wiki' diff --git a/wiki/.gitignore b/wiki/.gitignore new file mode 100644 index 00000000..796b96d1 --- /dev/null +++ b/wiki/.gitignore @@ -0,0 +1 @@ +/build diff --git a/wiki/build.gradle b/wiki/build.gradle new file mode 100644 index 00000000..8c547604 --- /dev/null +++ b/wiki/build.gradle @@ -0,0 +1,8 @@ +apply plugin: 'java' + +dependencies { + compile fileTree(dir: 'libs', include: ['*.jar']) + compile 'commons-cli:commons-cli:1.3' + compile 'org.apache.httpcomponents:httpclient:4.5.1' + compile 'org.json:json:20151123' +} \ No newline at end of file diff --git a/wiki/src/main/java/org/solovyev/wiki/Main.java b/wiki/src/main/java/org/solovyev/wiki/Main.java new file mode 100644 index 00000000..ee64dfbe --- /dev/null +++ b/wiki/src/main/java/org/solovyev/wiki/Main.java @@ -0,0 +1,136 @@ +package org.solovyev.wiki; + +import org.apache.commons.codec.Charsets; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.apache.http.util.TextUtils; +import org.json.JSONArray; +import org.json.JSONObject; + +import java.io.Closeable; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class Main { + public static void main(String... args) throws UnsupportedEncodingException { + final List words = new ArrayList<>(); + words.add("Time"); + words.add("Amount of substance"); + words.add("Electric current"); + words.add("Length"); + words.add("Mass"); + words.add("Temperature"); + final List languages = new ArrayList<>(); + languages.add("ar"); + languages.add("cs"); + languages.add("es"); + languages.add("de"); + languages.add("fi"); + languages.add("fr"); + languages.add("it"); + languages.add("nl"); + languages.add("pl"); + languages.add("pt"); + languages.add("ru"); + languages.add("tr"); + languages.add("vi"); + languages.add("ja"); + languages.add("ja"); + languages.add("zh"); + + final CloseableHttpClient client = HttpClients.createDefault(); + try { + for (String language : languages) { + final Map translations = new HashMap<>(); + for (String word : words) { + final String translation = translate(client, word, language); + if (!TextUtils.isEmpty(translation)) { + translations.put(word, translation); + } + } + writeTranslations(translations, language); + } + + } finally { + close(client); + } + } + + private static String translate(CloseableHttpClient client, String word, String language) + throws UnsupportedEncodingException { + final String uri = + "https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles=" + + URLEncoder.encode(word, Charsets.UTF_8.toString()) + "&lllang=" + + language; + final HttpGet request = new HttpGet( + uri); + CloseableHttpResponse response = null; + try { + response = client.execute(request); + final String result = EntityUtils.toString(response.getEntity()); + if (TextUtils.isEmpty(result)) { + System.out.println("No translation for " + word); + return null; + } + final JSONObject json = new JSONObject(result); + final JSONObject jsonQuery = json.getJSONObject("query"); + final JSONObject jsonPages = jsonQuery.getJSONObject("pages"); + for (String key : jsonPages.keySet()) { + final JSONObject jsonPage = jsonPages.getJSONObject(key); + final JSONArray jsonLangLinks = jsonPage.getJSONArray("langlinks"); + if (jsonLangLinks.length() > 0) { + final JSONObject jsonLangLink = jsonLangLinks.getJSONObject(0); + return jsonLangLink.getString("*"); + } + } + } catch (IOException | RuntimeException e) { + e.printStackTrace(); + System.err.println("Uri=" + uri); + } finally { + close(response); + } + return null; + } + + private static void writeTranslations(Map translations, String language) { + File dir = new File("out"); + dir.mkdirs(); + FileWriter out = null; + try { + out = new FileWriter(new File(dir, language + ".xml")); + out.write("\n"); + out.write("\n"); + for (Map.Entry entry : translations.entrySet()) { + out.write("" + entry.getValue() + + "\n"); + } + out.write("\n"); + } catch (IOException e) { + e.printStackTrace(); + } finally { + close(out); + } + + } + + private static void close(Closeable closeable) { + if (closeable == null) { + return; + } + try { + closeable.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } +}