001/**
002 * Copyright (c) 2012, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.tools.twitter.modes.preprocessing;
031
032import java.io.IOException;
033import java.util.Map;
034
035import org.apache.jena.query.ParameterizedSparqlString;
036import org.apache.jena.rdf.model.Model;
037import org.apache.jena.rdf.model.Resource;
038import org.apache.jena.update.UpdateAction;
039import org.openimaj.io.FileUtils;
040import org.openimaj.text.nlp.language.LanguageDetector;
041import org.openimaj.twitter.GeneralJSON;
042import org.openimaj.twitter.GeneralJSONRDF;
043import org.openimaj.twitter.RDFAnalysisProvider;
044import org.openimaj.twitter.USMFStatus;
045
046/**
047 * A gateway class which loads and uses the #LanguageDetector
048 *
049 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
050 *
051 */
052public class LanguageDetectionMode extends TwitterPreprocessingMode<Map<String, Object>> {
053
054        private LanguageDetector detector;
055        final static String LANGUAGES = "langid";
056
057        /**
058         * Loads the language detector
059         *
060         * @throws IOException
061         */
062        public LanguageDetectionMode() throws IOException {
063                detector = new LanguageDetector();
064        }
065
066        @Override
067        public Map<String, Object> process(USMFStatus twitterStatus) {
068                Map<String, Object> language = null;
069                try {
070                        language = detector.classify(twitterStatus.text).asMap();
071
072                } catch (final Exception e) {
073                }
074                twitterStatus.addAnalysis(LANGUAGES, language);
075                return language;
076
077        }
078
079        @Override
080        public RDFAnalysisProvider rdfAnalysisProvider() {
081                return new RDFAnalysisProvider() {
082                        private static final String DETECTED_LANGUAGE_INSERT_SPARQL = "/org/openimaj/tools/twiiter/rdf/detected_language_insert.sparql";
083                        private String query;
084
085                        @Override
086                        public void addAnalysis(Model m, Resource socialEvent, GeneralJSON analysisSource) {
087                                final Map<String, Object> analysis = analysisSource.getAnalysis(LANGUAGES);
088                                if (analysis == null)
089                                        return;
090
091                                final ParameterizedSparqlString pss = new ParameterizedSparqlString(query); // wasteful?
092                                                                                                                                                                                        // makes
093                                                                                                                                                                                        // it
094                                                                                                                                                                                        // threadsafe
095                                                                                                                                                                                        // but
096                                                                                                                                                                                        // is
097                                                                                                                                                                                        // it
098                                                                                                                                                                                        // bad?
099                                pss.setParam("socialEvent", socialEvent);
100                                final Resource langNode = m.createResource();
101                                pss.setParam("langid", langNode);
102                                pss.setLiteral("language", analysis.get("language").toString());
103                                pss.setLiteral("confidence", (Double) analysis.get("confidence"));
104                                UpdateAction.execute(pss.asUpdate(), m);
105                        }
106
107                        @Override
108                        public void init() {
109                                try {
110                                        query = FileUtils.readall(GeneralJSONRDF.class.getResourceAsStream(DETECTED_LANGUAGE_INSERT_SPARQL));
111                                } catch (final IOException e) {
112                                        throw new RuntimeException(e);
113                                }
114
115                        }
116                };
117        }
118
119        @Override
120        public String getAnalysisKey() {
121                return LANGUAGES;
122        }
123}