001/** 002 * 003 */ 004package org.openimaj.picslurper; 005 006import java.io.IOException; 007import java.sql.Connection; 008import java.sql.DriverManager; 009import java.sql.ResultSet; 010import java.sql.SQLException; 011import java.sql.Statement; 012import java.util.Date; 013import java.util.HashMap; 014import java.util.Map; 015 016import javax.xml.datatype.DatatypeConfigurationException; 017import javax.xml.datatype.DatatypeFactory; 018import javax.xml.datatype.XMLGregorianCalendar; 019 020import twitter4j.GeoLocation; 021import twitter4j.HashtagEntity; 022import twitter4j.MediaEntity; 023import twitter4j.Place; 024import twitter4j.RateLimitStatus; 025import twitter4j.Status; 026import twitter4j.URLEntity; 027import twitter4j.User; 028import twitter4j.UserMentionEntity; 029 030/** 031 * This class is a status feeder for picslurper that takes statuses from a 032 * database. The default database driver is the MySQL driver and expects a mysql 033 * URL in the constructor; e.g. jdbc:mysql://localhost/database 034 * <p> 035 * The class connects to the database lazily (that is, only when the feed is 036 * started), so the driver can be changed after construction using 037 * {@link #setDriver(String)}. 038 * <p> 039 * The schema of the table must include at least two columns: one that contains 040 * the tweet text and one that contains the created time. The default column 041 * names are expected to be <code>text</code> and <code>created_at</code> 042 * respectively. These can be changed by passing in a map, where the key is the 043 * expected column name and the value is the actual column name; e.g. 044 * <code>text -> tweet_text</code> 045 * <p> 046 * The results are paged through in steps of 25 by default, so the database must 047 * support the <code>LIMIT</code> command. 048 * 049 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 050 * @created 18 Sep 2013 051 */ 052public class DatabaseStatusFeeder implements StatusFeeder { 053 protected static class DatabaseFeederStatus implements Status { 054 private final XMLGregorianCalendar cal; 055 private final String text; 056 057 public DatabaseFeederStatus(final String text, final XMLGregorianCalendar cal) { 058 this.text = text; 059 this.cal = cal; 060 } 061 062 /** */ 063 private static final long serialVersionUID = 1L; 064 065 @Override 066 public int compareTo(final Status o) { 067 return 0; 068 } 069 070 @Override 071 public RateLimitStatus getRateLimitStatus() { 072 return null; 073 } 074 075 @Override 076 public int getAccessLevel() { 077 return 0; 078 } 079 080 @Override 081 public UserMentionEntity[] getUserMentionEntities() { 082 return null; 083 } 084 085 @Override 086 public URLEntity[] getURLEntities() { 087 return null; 088 } 089 090 @Override 091 public HashtagEntity[] getHashtagEntities() { 092 return null; 093 } 094 095 @Override 096 public MediaEntity[] getMediaEntities() { 097 return null; 098 } 099 100 @Override 101 public Date getCreatedAt() { 102 return this.cal.toGregorianCalendar().getTime(); 103 } 104 105 @Override 106 public long getId() { 107 return 0; 108 } 109 110 @Override 111 public String getText() { 112 return this.text; 113 } 114 115 @Override 116 public String getSource() { 117 return null; 118 } 119 120 @Override 121 public boolean isTruncated() { 122 return false; 123 } 124 125 @Override 126 public long getInReplyToStatusId() { 127 return 0; 128 } 129 130 @Override 131 public long getInReplyToUserId() { 132 return 0; 133 } 134 135 @Override 136 public String getInReplyToScreenName() { 137 return null; 138 } 139 140 @Override 141 public GeoLocation getGeoLocation() { 142 return null; 143 } 144 145 @Override 146 public Place getPlace() { 147 return null; 148 } 149 150 @Override 151 public boolean isFavorited() { 152 return false; 153 } 154 155 @Override 156 public User getUser() { 157 return null; 158 } 159 160 @Override 161 public boolean isRetweet() { 162 return false; 163 } 164 165 @Override 166 public Status getRetweetedStatus() { 167 return null; 168 } 169 170 @Override 171 public long[] getContributors() { 172 return null; 173 } 174 175 @Override 176 public long getRetweetCount() { 177 return 0; 178 } 179 180 @Override 181 public boolean isRetweetedByMe() { 182 return false; 183 } 184 185 @Override 186 public long getCurrentUserRetweetId() { 187 return 0; 188 } 189 190 @Override 191 public boolean isPossiblySensitive() { 192 return false; 193 } 194 195 } 196 197 /** The map of column names */ 198 private Map<String, String> columnNames = new HashMap<String, String>(); 199 200 /** The username to connect to the database */ 201 private final String username; 202 203 /** The password to connect to the database */ 204 private final String password; 205 206 /** The database table where the tweets are stored */ 207 private final String table; 208 209 /** The database connection that is created */ 210 private Connection connection; 211 212 /** The URL to the database */ 213 private final String url; 214 215 /** The database driver to use */ 216 private String databaseDriver = "com.mysql.jdbc.Driver"; 217 218 /** The size of each page of results to retrieve from the database */ 219 private final int pageSize = 25; 220 221 /** 222 * Create a database status feeder using the database, table, username and 223 * password provided. 224 * 225 * @param databaseURL 226 * The URL to the database 227 * @param table 228 * The table to use 229 * @param username 230 * The user name 231 * @param password 232 * The password 233 */ 234 public DatabaseStatusFeeder(final String databaseURL, final String table, 235 final String username, final String password) 236 { 237 this.url = databaseURL; 238 this.table = table; 239 this.username = username; 240 this.password = password; 241 } 242 243 /** 244 * Create a database status feeder using the database, table, username and 245 * password provided with the given column mapping 246 * 247 * @param databaseURL 248 * The URL to the database 249 * @param table 250 * The table to use 251 * @param username 252 * The user name 253 * @param password 254 * The password 255 * @param columnNames 256 * The column mapping 257 */ 258 public DatabaseStatusFeeder(final String databaseURL, final String table, 259 final String username, final String password, final Map<String, String> columnNames) 260 { 261 this(databaseURL, table, username, password); 262 this.columnNames = columnNames; 263 } 264 265 /** 266 * Create a connection using the given driver for the given database. 267 * 268 * @param driver 269 * The driver 270 * @param url 271 * The URL to the database 272 * @param username 273 * The username 274 * @param password 275 * The password 276 * @return The database connection 277 * @throws ClassNotFoundException 278 * If the driver cannot be found 279 * @throws SQLException 280 * If the connection could not be created 281 */ 282 private static Connection createConnection(final String driver, final String url, 283 final String username, final String password) 284 throws ClassNotFoundException, SQLException 285 { 286 Class.forName(driver); 287 if (username == null || password == null || 288 username.trim().length() == 0 || password.trim().length() == 0) 289 return DriverManager.getConnection(url); 290 else 291 return DriverManager.getConnection(url, username, password); 292 } 293 294 /** 295 * {@inheritDoc} 296 * 297 * @see org.openimaj.picslurper.StatusFeeder#feedStatus(org.openimaj.picslurper.PicSlurper) 298 */ 299 @Override 300 public void feedStatus(final PicSlurper slurper) throws IOException { 301 try { 302 // Create the connection to the database 303 this.connection = DatabaseStatusFeeder.createConnection( 304 this.databaseDriver, this.url, this.username, this.password); 305 306 // Create a scrolling result set type 307 final Statement s = this.connection.createStatement( 308 ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY); 309 s.setFetchSize(this.pageSize); 310 s.setMaxRows(this.pageSize); 311 312 // First count how many rows there are so we know how many 313 // pages there will be. 314 String sql = "SELECT COUNT(*) FROM " + this.table; 315 ResultSet r = s.executeQuery(sql); 316 r.next(); 317 final int rows = r.getInt(1); 318 final int pages = (int) Math.ceil(rows / (double) this.pageSize); 319 320 for (int pageNumber = 1; pageNumber <= pages; pageNumber++) { 321 sql = "SELECT " + 322 this.getColumn("created_at", true) + ", " + 323 this.getColumn("text", true) + 324 " from " + this.table + 325 " LIMIT " + (pageNumber * this.pageSize) + "," + this.pageSize; 326 327 r = s.executeQuery(sql); 328 329 // Read the results sets 330 while (r.next()) { 331 try { 332 final String createdAt = r.getString(1); 333 final XMLGregorianCalendar cal = DatatypeFactory.newInstance() 334 .newXMLGregorianCalendar(createdAt); 335 final String text = r.getString(2); 336 337 final DatabaseFeederStatus status = new DatabaseFeederStatus(text, cal); 338 slurper.handleStatus(status); 339 } catch (final DatatypeConfigurationException e) { 340 e.printStackTrace(); 341 } 342 } 343 } 344 } catch (final ClassNotFoundException e) { 345 e.printStackTrace(); 346 } catch (final SQLException e) { 347 e.printStackTrace(); 348 } 349 } 350 351 /** 352 * Get a column name through the column name mapping, if one exists. 353 * 354 * @param defaultName 355 * The name to look up 356 * @param tf 357 * Whether to enclose in back-ticks 358 * @return The column name to use 359 */ 360 private String getColumn(final String defaultName, final boolean tf) { 361 if (this.columnNames.get(defaultName) != null) 362 return tf ? "`" + this.columnNames.get(defaultName) + "`" : this.columnNames.get(defaultName); 363 return tf ? "`" + defaultName + "`" : defaultName; 364 } 365 366 /** 367 * Set the database driver to use. 368 * 369 * @param driverName 370 * The driver name. 371 */ 372 public void setDriver(final String driverName) { 373 this.databaseDriver = driverName; 374 } 375 376 /** 377 * @param args 378 */ 379 public static void main(final String[] args) { 380 try { 381 // Connect to local database 382 final DatabaseStatusFeeder dbsf = new DatabaseStatusFeeder( 383 "jdbc:mysql://localhost:3306/swr", "tweets", "swr", "swr"); 384 385 final PicSlurper ps = new PicSlurper(); 386 dbsf.feedStatus(ps); 387 } catch (final IOException e) { 388 e.printStackTrace(); 389 } 390 } 391}