/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.text.model;

import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.FancyLoader;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextBase;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public class UnigramModel {
    private static final Double[] CACHED_DOUBLES = new Double[10];
    private Map<String, Double> freq = new HashMap<String, Double>();
    private double total = 0.0;

    public void load(File file) throws IOException, FileNotFoundException {
        String line;
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        while ((line = in.readLine()) != null) {
            String[] words = line.trim().split("\\s+");
            if (words.length != 2) {
                this.badLine(line, in);
            }
            int n = 0;
            try {
                n = Integer.parseInt(words[0]);
            }
            catch (NumberFormatException e) {
                this.badLine(line, in);
            }
            this.total += (double)n;
            this.freq.put(words[1], this.getDouble(n));
        }
        in.close();
    }

    private void badLine(String line, LineNumberReader in) {
        throw new IllegalStateException("bad input at line " + in.getLineNumber() + ": " + line);
    }

    public void save(File file) throws IOException {
        PrintStream out = new PrintStream(new BufferedOutputStream(new FileOutputStream(file)));
        for (Map.Entry<String, Double> e : this.freq.entrySet()) {
            out.println(e.getValue().intValue() + " " + e.getKey());
        }
        out.close();
    }

    private Double getDouble(int n) {
        if (n < CACHED_DOUBLES.length) {
            return CACHED_DOUBLES[n];
        }
        return new Double(n);
    }

    public double score(Span span) {
        double sum = 0.0;
        double prior = 0.1 / this.total;
        for (int i = 0; i < span.size(); ++i) {
            int f = this.getFrequency(span.getToken(i).getValue().toLowerCase());
            sum += this.estimatedLogProb(f, this.total, prior, 1.0);
        }
        return sum;
    }

    public double getTotalWordCount() {
        return this.total;
    }

    public int getFrequency(String s) {
        String s1 = s.toLowerCase();
        Double f = this.freq.get(s1);
        if (f == null) {
            return 0;
        }
        return f.intValue();
    }

    public void incrementFrequency(String s) {
        String s1 = s.toLowerCase();
        this.freq.put(s1, this.getDouble(this.getFrequency(s1) + 1));
    }

    private double estimatedLogProb(double k, double n, double prior, double pseudoCounts) {
        return Math.log((k + prior * pseudoCounts) / (n + pseudoCounts));
    }

    public static void main(String[] args) throws IOException {
        if (args.length == 0) {
            System.out.println("usage 1: modelfile span1 span2...");
            System.out.println("usage 2: textbase modelfile");
        }
        if (args.length == 2) {
            UnigramModel model = new UnigramModel();
            TextBase base = FancyLoader.loadTextLabels(args[0]).getTextBase();
            Iterator<Span> i = base.documentSpanIterator();
            while (i.hasNext()) {
                Span s = i.next();
                for (int j = 0; j < s.size(); ++j) {
                    model.incrementFrequency(s.getToken(j).getValue());
                }
            }
            model.save(new File(args[1]));
        } else {
            UnigramModel model = new UnigramModel();
            model.load(new File(args[0]));
            BasicTextBase base = new BasicTextBase();
            for (int i = 1; i < args.length; ++i) {
                base.loadDocument("argv." + i, args[i]);
            }
            Iterator<Span> j = base.documentSpanIterator();
            while (j.hasNext()) {
                Span s = j.next();
                System.out.println(s.asString() + " => " + model.score(s));
                for (int k = 0; k < s.size(); ++k) {
                    String w = s.getToken(k).getValue();
                    System.out.print(" " + w + ":" + model.getFrequency(w));
                }
                System.out.println();
            }
        }
    }

    static {
        for (int i = 0; i < CACHED_DOUBLES.length; ++i) {
            UnigramModel.CACHED_DOUBLES[i] = new Double(i);
        }
    }
}

