/**
 * Java program for classifying short text messages into two classes.
 */

import weka.core.*;
import weka.classifiers.*;
import weka.filters.*;
import java.io.*;
import java.util.*;

public class MessageClassifier implements Serializable {

  /* Our (rather arbitrary) set of keywords. */
  private final String[] m_Keywords = {"product", "only", "offer", "great", "amazing", "phantastic", "opportunity", "buy", "now"};

  /* The training data. */
  private Instances m_Data = null;

  /* The filter. */
  private Filter m_Filter = new DiscretizeFilter();

  /* The classifier. */
  private Classifier m_Classifier = new IBk();

  /**
   * Constructs empty training dataset.
   */
  public MessageClassifier() throws Exception {

    String nameOfDataset = "MessageClassificationProblem";

    // Create numeric attributes.
    FastVector attributes = new FastVector(m_Keywords.length + 1);
    for (int i = 0 ; i < m_Keywords.length; i++) {
      attributes.addElement(new Attribute(m_Keywords[i]));
    }

    // Add class attribute.
    FastVector classValues = new FastVector(2);
    classValues.addElement("miss");
    classValues.addElement("hit");
    attributes.addElement(new Attribute("Class", classValues));

    // Create dataset with initial capacity of 100, and set index of class.
    m_Data = new Instances(nameOfDataset, attributes, 100);
    m_Data.setClassIndex(m_Data.numAttributes() - 1);
  }

  /**
   * Updates model using the given training message.
   */
  public void updateModel(String message, String classValue) 
    throws Exception {

    // Convert message string into instance.
    Instance instance = makeInstance(cleanupString(message));

    // Add class value to instance.
    instance.setClassValue(classValue);

    // Add instance to training data.
    m_Data.add(instance);

    // Use filter.
    m_Filter.inputFormat(m_Data);
    Instances filteredData = Filter.useFilter(m_Data, m_Filter);

    // Rebuild classifier.
    m_Classifier.buildClassifier(filteredData);
  }

  /**
   * Classifies a given message.
   */
  public void classifyMessage(String message) throws Exception {

    // Check if classifier has been built.
    if (m_Data.numInstances() == 0) {
      throw new Exception("No classifier available.");
    }

    // Convert message string into instance.
    Instance instance = makeInstance(cleanupString(message));

    // Filter instance.
    m_Filter.input(instance);
    Instance filteredInstance = m_Filter.output();

    // Get index of predicted class value.
    double predicted = m_Classifier.classifyInstance(filteredInstance);

    // Classify instance.
    System.err.println("Message classified as : " +
                    m_Data.classAttribute().value((int)predicted));
  }

  /**
   * Method that converts a text message into an instance.
   */
  private Instance makeInstance(String messageText) {

    StringTokenizer tokenizer = new StringTokenizer(messageText);
    Instance instance = new Instance(m_Keywords.length + 1);
    String token;

    // Initialize counts to zero.
    for (int i = 0; i < m_Keywords.length; i++) {
      instance.setValue(i, 0);
    }

    // Compute attribute values.
    while (tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken();
      for (int i = 0; i < m_Keywords.length; i++) {
        if (token.equals(m_Keywords[i])) {
          instance.setValue(i, instance.value(i) + 1.0);
          break;
        }
      }
    }

    // Give instance access to attribute information from the dataset.
    instance.setDataset(m_Data);

    return instance;
  }

  /**
   * Method that deletes all non-letters from a string, and lowercases it.
   */
  private String cleanupString(String messageText) {

    char[] result = new char[messageText.length()];
    int position = 0;

    for (int i = 0; i < messageText.length(); i++) {
      if (Character.isLetter(messageText.charAt(i)) ||
        Character.isWhitespace(messageText.charAt(i))) {
        result[position++] = Character.toLowerCase(messageText.charAt(i));
      }
    }
    return new String(result);
  }

  /**
   * Main method.
   */
  public static void main(String[] options) {

    MessageClassifier messageCl;
    byte[] charArray;

    try {

      // Read message file into string.
      String messageFileString = Utils.getOption('m', options);
      if (messageFileString.length() != 0) {
        FileInputStream messageFile = new FileInputStream(messageFileString);
        int numChars = messageFile.available();
        charArray = new byte[numChars];
        messageFile.read(charArray);
        messageFile.close();
      } else {
        throw new Exception ("Name of message file not provided.");
      }

      // Check if class value is given.
      String classValue = Utils.getOption('c', options);

      // Check for model file. If existent, read it, otherwise create new
      // one.
      String modelFileString = Utils.getOption('t', options);
      if (modelFileString.length() != 0) {
        try {
          FileInputStream modelInFile = new FileInputStream(modelFileString);
          ObjectInputStream modelInObjectFile = 
            new ObjectInputStream(modelInFile);
          messageCl = (MessageClassifier) modelInObjectFile.readObject();
          modelInFile.close();
        } catch (FileNotFoundException e) {
          messageCl = new MessageClassifier();
        }
      } else {
        throw new Exception ("Name of data file not provided.");
      }

      // Check if there are any options left
      Utils.checkForRemainingOptions(options);

      // Process message.
      if (classValue.length() != 0) {
        messageCl.updateModel(new String(charArray), classValue);
      } else {
        messageCl.classifyMessage(new String(charArray));
      }

      // If class has been given, updated message classifier must be saved
      if (classValue.length() != 0) {
        FileOutputStream modelOutFile =
          new FileOutputStream(modelFileString);
        ObjectOutputStream modelOutObjectFile = 
          new ObjectOutputStream(modelOutFile);
        modelOutObjectFile.writeObject(messageCl);
        modelOutObjectFile.flush();
        modelOutFile.close();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}