1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import weka.core.Instances;
import java.io.BufferedReader;
import java.io.FileReader;

import java.io.FileNotFoundException;
import java.io.IOException;

import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

import weka.core.Instances;
import weka.classifiers.Evaluation;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.Writer;

/**
	* Classify instances (similarity pairs). Input: arff format.
	* Classes: same event / not same event
	*/
public class Classifier { 

	static BufferedReader reader;
	static Instances train;
	static Instances test;
	static Instances unlabeled;

	public static void main(String [] args) {

		System.out.println("Classifier...");

		try {

			//train data
			reader = new BufferedReader(new FileReader(args[1]));
			train = new Instances(reader);

		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if (reader != null) {
					reader.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}	


		try {

			reader = new BufferedReader(new FileReader(args[2]));
			test = new Instances(reader);

		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if (reader != null) {
					reader.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

		try {

			//Filter attributes
			String[] options = new String[3];
			options[0] = "-R";                                    // "range"
			options[1] = "1"; 
			options[2] = "2";                                    // first attribute
			Remove remove = new Remove();                         // new instance of filter
			remove.setOptions(options);                           // set options
			remove.setInputFormat(train);                          // inform filter about dataset **AFTER** setting options
			train = Filter.useFilter(train, remove);  				 // apply filter
			test = Filter.useFilter(test, remove);  				 // apply filter

			// setting class attribute
			train.setClassIndex(train.numAttributes() - 1);
			test.setClassIndex(test.numAttributes() - 1);

			weka.classifiers.Classifier classifier = new weka.classifiers.functions.SMO();
			
			if (args[0].equals("SMO") ) {
				// Support vector machine classifier
				System.out.println("SMO");
				classifier = new weka.classifiers.functions.SMO();
				classifier.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.0010 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
			}

			if (args[0].equals("logistic") ) {
				// Logistic regression classifier
				System.out.println("Logistic Regression");
				classifier = new weka.classifiers.functions.Logistic();
				classifier.setOptions(weka.core.Utils.splitOptions("-R 1.0E-8 -M -1"));
			}

			classifier.buildClassifier(train);

			//evaluate classifier
			Evaluation eval = new Evaluation(train);
			eval.evaluateModel(classifier, test);
			System.out.println(eval.toSummaryString("\nResults\n======\n", false));

			
			// load unlabeled data
			unlabeled = new Instances(new BufferedReader(new FileReader(args[3])));

			//remove ids
			Filter.useFilter(unlabeled, remove);  				 // apply filter
			// set class attribute

			unlabeled.setClassIndex(unlabeled.numAttributes() - 1);

			// create copy
			Instances labeled = new Instances(unlabeled);

			System.out.println(unlabeled.numInstances());
			System.out.println(unlabeled.instance(20));
			System.out.println(classifier.classifyInstance(unlabeled.instance(20)));

			// label instances
			//for (int i = 0; i < unlabeled.numInstances(); i++) {
				//double clsLabel = classifier.classifyInstance(unlabeled.instance(20));
				//labeled.instance(i).setClassValue(clsLabel);
				//}

				//Print the string instead of the double
				//System.out.println(clsLabel + " -> " + unlabeled.classAttribute().value((int) clsLabel));

				/*
					System.out.println(labeled.toString());

				/*save labeled data
					BufferedWriter writer = new BufferedWriter(new FileWriter("data/labeled.arff"));
				writer.write(labeled.toString());
				writer.newLine();
				writer.flush();
				writer.close();
				*/

				} catch (Exception e) {
					e.printStackTrace();
				}

			}	
		}