/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.crf;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.Chunker;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.ConfidenceChunker;
import com.aliasi.chunk.NBestChunker;
import com.aliasi.chunk.TagChunkCodec;
import com.aliasi.corpus.Corpus;
import com.aliasi.corpus.ObjectHandler;
import com.aliasi.crf.ChainCrf;
import com.aliasi.crf.ChainCrfFeatureExtractor;
import com.aliasi.io.Reporter;
import com.aliasi.io.Reporters;
import com.aliasi.stats.AnnealingSchedule;
import com.aliasi.stats.RegressionPrior;
import com.aliasi.tag.ScoredTagging;
import com.aliasi.tag.StringTagging;
import com.aliasi.tag.TagLattice;
import com.aliasi.tag.Tagging;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.ScoredObject;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class ChainCrfChunker
implements Chunker,
ConfidenceChunker,
NBestChunker,
Serializable {
    static final long serialVersionUID = -2244399751558084581L;
    private final ChainCrf<String> mCrf;
    private final TokenizerFactory mTokenizerFactory;
    private final TagChunkCodec mCodec;
    static final boolean ALLOW_UNSEEN_TAG_TRANSITIONS = false;

    public ChainCrfChunker(ChainCrf<String> crf, TokenizerFactory tokenizerFactory, TagChunkCodec codec) {
        this.mCrf = crf;
        this.mTokenizerFactory = tokenizerFactory;
        this.mCodec = codec;
    }

    public ChainCrf<String> crf() {
        return this.mCrf;
    }

    public TagChunkCodec codec() {
        return this.mCodec;
    }

    public TokenizerFactory tokenizerFactory() {
        return this.mTokenizerFactory;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("TagChunkCodec=" + this.codec());
        sb.append("\n");
        sb.append("Tokenizer Factory=" + this.tokenizerFactory());
        sb.append("\n");
        sb.append("CRF=\n");
        sb.append(this.crf().toString());
        return sb.toString();
    }

    @Override
    public Chunking chunk(CharSequence cSeq) {
        char[] cs = Strings.toCharArray(cSeq);
        return this.chunk(cs, 0, cs.length);
    }

    @Override
    public Chunking chunk(char[] cs, int start, int end) {
        PreTagging preTagging = this.preTag(cs, start, end);
        List<String> tokens = preTagging.mTokens;
        Tagging<String> tagging = this.mCrf.tag(tokens);
        return ChainCrfChunker.toChunking(tagging, preTagging, cs, start, end, this.mCodec);
    }

    @Override
    public Iterator<ScoredObject<Chunking>> nBest(char[] cs, int start, int end, int maxResults) {
        PreTagging preTagging = this.preTag(cs, start, end);
        List<String> tokens = preTagging.mTokens;
        Iterator<ScoredTagging<String>> it = this.mCrf.tagNBest(tokens, maxResults);
        return new IteratorWrapper(it, preTagging, cs, start, end, this.mCodec);
    }

    public Iterator<ScoredObject<Chunking>> nBestConditional(char[] cs, int start, int end, int maxResults) {
        PreTagging preTagging = this.preTag(cs, start, end);
        List<String> tokens = preTagging.mTokens;
        Iterator<ScoredTagging<String>> it = this.mCrf.tagNBestConditional(tokens, maxResults);
        return new IteratorWrapper(it, preTagging, cs, start, end, this.mCodec);
    }

    @Override
    public Iterator<Chunk> nBestChunks(char[] cs, int start, int end, int maxNBest) {
        PreTagging preTagging = this.preTag(cs, start, end);
        List<String> tokens = preTagging.mTokens;
        TagLattice<String> lattice = this.mCrf.tagMarginal(tokens);
        return this.mCodec.nBestChunks(lattice, preTagging.mTokenStarts, preTagging.mTokenEnds, maxNBest);
    }

    PreTagging preTag(char[] cs, int start, int end) {
        String token;
        ArrayList<Integer> tokenStarts = new ArrayList<Integer>();
        ArrayList<Integer> tokenEnds = new ArrayList<Integer>();
        ArrayList<String> tokens = new ArrayList<String>();
        Tokenizer tokenizer = this.mTokenizerFactory.tokenizer(cs, start, end - start);
        while ((token = tokenizer.nextToken()) != null) {
            tokens.add(token);
            tokenStarts.add(tokenizer.lastTokenStartPosition());
            tokenEnds.add(tokenizer.lastTokenEndPosition());
        }
        return new PreTagging(tokens, ChainCrfChunker.toArray(tokenStarts), ChainCrfChunker.toArray(tokenEnds));
    }

    Object writeReplace() {
        return new Serializer(this);
    }

    public static ChainCrfChunker estimate(Corpus<ObjectHandler<Chunking>> chunkingCorpus, TagChunkCodec codec, TokenizerFactory tokenizerFactory, ChainCrfFeatureExtractor<String> featureExtractor, boolean addInterceptFeature, int minFeatureCount, boolean cacheFeatureVectors, RegressionPrior prior, int priorBlockSize, AnnealingSchedule annealingSchedule, double minImprovement, int minEpochs, int maxEpochs, Reporter reporter) throws IOException {
        if (reporter == null) {
            reporter = Reporters.silent();
        }
        reporter.info("Training chain CRF chunker");
        reporter.info("Converting chunk corpus to tag corpus using codec.");
        TagCorpus taggingCorpus = new TagCorpus(chunkingCorpus, codec);
        ChainCrf<String> crf = ChainCrf.estimate(taggingCorpus, featureExtractor, addInterceptFeature, minFeatureCount, cacheFeatureVectors, false, prior, priorBlockSize, annealingSchedule, minImprovement, minEpochs, maxEpochs, reporter);
        return new ChainCrfChunker(crf, tokenizerFactory, codec);
    }

    static Chunking toChunking(Tagging<String> tagging, PreTagging preTagging, char[] cs, int start, int end, TagChunkCodec codec) {
        String s = new String(cs, start, end - start);
        List<String> tokens = preTagging.mTokens;
        int[] tokenStarts = preTagging.mTokenStarts;
        int[] tokenEnds = preTagging.mTokenEnds;
        List<String> tags = tagging.tags();
        StringTagging stringTagging = new StringTagging(tokens, tags, (CharSequence)s, tokenStarts, tokenEnds);
        return codec.toChunking(stringTagging);
    }

    static int[] toArray(List<Integer> xs) {
        int len = xs.size();
        int[] ys = new int[len];
        int i = 0;
        while (i < len) {
            ys[i] = xs.get(i);
            ++i;
        }
        return ys;
    }

    static class ChunkingAdapter
    implements ObjectHandler<Chunking> {
        private final ObjectHandler<Tagging<String>> mTagHandler;
        private final TagChunkCodec mCodec;

        public ChunkingAdapter(ObjectHandler<Tagging<String>> tagHandler, TagChunkCodec codec) {
            this.mTagHandler = tagHandler;
            this.mCodec = codec;
        }

        @Override
        public void handle(Chunking chunking) {
            Tagging<String> tagging = this.mCodec.toTagging(chunking);
            this.mTagHandler.handle(tagging);
        }
    }

    static class IteratorWrapper
    implements Iterator<ScoredObject<Chunking>> {
        private final Iterator<ScoredTagging<String>> mIt;
        private final PreTagging mPreTagging;
        private final char[] mCs;
        private final int mStart;
        private final int mEnd;
        private final TagChunkCodec mCodec;

        IteratorWrapper(Iterator<ScoredTagging<String>> it, PreTagging preTagging, char[] cs, int start, int end, TagChunkCodec codec) {
            this.mIt = it;
            this.mPreTagging = preTagging;
            this.mCs = cs;
            this.mStart = start;
            this.mEnd = end;
            this.mCodec = codec;
        }

        @Override
        public boolean hasNext() {
            return this.mIt.hasNext();
        }

        @Override
        public void remove() {
            this.mIt.remove();
        }

        @Override
        public ScoredObject<Chunking> next() {
            ScoredTagging<String> tagging = this.mIt.next();
            double score = tagging.score();
            Chunking chunking = ChainCrfChunker.toChunking(tagging, this.mPreTagging, this.mCs, this.mStart, this.mEnd, this.mCodec);
            return new ScoredObject<Chunking>(chunking, score);
        }
    }

    static class PreTagging {
        final List<String> mTokens;
        final int[] mTokenStarts;
        final int[] mTokenEnds;

        public PreTagging(List<String> tokens, int[] tokenStarts, int[] tokenEnds) {
            this.mTokens = tokens;
            this.mTokenStarts = tokenStarts;
            this.mTokenEnds = tokenEnds;
        }
    }

    static class Serializer
    extends AbstractExternalizable {
        static final long serialVersionUID = 2460314741682974199L;
        private final ChainCrfChunker mChunker;

        public Serializer() {
            this(null);
        }

        public Serializer(ChainCrfChunker chunker) {
            this.mChunker = chunker;
        }

        @Override
        public Object read(ObjectInput in) throws IOException, ClassNotFoundException {
            ChainCrf crf = (ChainCrf)in.readObject();
            TokenizerFactory factory = (TokenizerFactory)in.readObject();
            TagChunkCodec codec = (TagChunkCodec)in.readObject();
            return new ChainCrfChunker(crf, factory, codec);
        }

        @Override
        public void writeExternal(ObjectOutput out) throws IOException {
            out.writeObject(this.mChunker.mCrf);
            out.writeObject(this.mChunker.mTokenizerFactory);
            out.writeObject(this.mChunker.mCodec);
        }
    }

    static class TagCorpus
    extends Corpus<ObjectHandler<Tagging<String>>> {
        private final Corpus<ObjectHandler<Chunking>> mChunkingCorpus;
        private final TagChunkCodec mCodec;

        public TagCorpus(Corpus<ObjectHandler<Chunking>> chunkingCorpus, TagChunkCodec codec) {
            this.mChunkingCorpus = chunkingCorpus;
            this.mCodec = codec;
        }

        @Override
        public void visitTrain(ObjectHandler<Tagging<String>> handler) throws IOException {
            this.mChunkingCorpus.visitTrain(new ChunkingAdapter(handler, this.mCodec));
        }

        @Override
        public void visitTest(ObjectHandler<Tagging<String>> handler) throws IOException {
            this.mChunkingCorpus.visitTest(new ChunkingAdapter(handler, this.mCodec));
        }
    }
}

