/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.test.unit.sentences;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.ChunkFactory;
import com.aliasi.sentences.IndoEuropeanSentenceModel;
import com.aliasi.sentences.SentenceChunker;
import com.aliasi.sentences.SentenceModel;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.Set;
import junit.framework.Assert;
import org.junit.Test;

public class SentenceChunkerTest {
    static final TokenizerFactory tf = IndoEuropeanTokenizerFactory.INSTANCE;
    static final SentenceModel sm = new IndoEuropeanSentenceModel();

    @Test
    public void testSentenceChunks() throws IOException, ClassNotFoundException {
        SentenceChunker sentenceChunker = new SentenceChunker(tf, sm);
        String[] sents = new String[]{"John ran.", "He saw Susan."};
        String[] whitespaces = new String[]{"  ", " ", ""};
        this.assertChunks(sentenceChunker, sents, whitespaces);
        sents = new String[]{"His temperature was 99.5 and rising."};
        whitespaces = new String[]{" ", ""};
        this.assertChunks(sentenceChunker, sents, whitespaces);
        sents = new String[]{};
        whitespaces = new String[]{""};
        this.assertChunks(sentenceChunker, sents, whitespaces);
        sents = new String[]{"Transcription of the nirIX gene cluster itself was controlled by NNR, a member of the family of FNR-like transcriptional activators.", "The NirI sequence corresponds to that of a membrane-bound protein with six transmembrane helices, a large periplasmic domain and cysteine-rich cytoplasmic domains that resemble the binding sites of [4Fe-4S] clusters in many ferredoxin-like proteins.", "An NNR binding sequence is located in the middle of the intergenic region between the nirI and nirS genes with its centre located at position -41.5 relative to the transcription start sites of both genes.", "In eight families we identified six novel MLH1 and two novel MSH2 mutations comprising one frame shift mutation (c.1420 del C), two missense mutations (L622H and R687W), two splice site mutations (c.1990-1 G>A and c.453+2 T>C and one nonsense mutation (K329X) in the MLH1 gene as well as two frame shift mutations (c.1979-1980 del AT and c.1704-1705 del AG) in the MSH2 gene."};
        whitespaces = new String[]{" ", "  ", "  ", "  ", ""};
        this.assertChunks(sentenceChunker, sents, whitespaces);
    }

    void assertChunks(SentenceChunker sentenceChunker, String[] sents, String[] whitespaces) throws IOException, ClassNotFoundException {
        this.assertChunks1(sentenceChunker, sents, whitespaces);
        SentenceChunker sentenceChunker2 = (SentenceChunker)AbstractExternalizable.serializeDeserialize(sentenceChunker);
        this.assertChunks1(sentenceChunker2, sents, whitespaces);
    }

    void assertChunks1(SentenceChunker sentenceChunker, String[] sents, String[] whitespaces) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < sents.length) {
            sb.append(whitespaces[i]);
            sb.append(sents[i]);
            ++i;
        }
        sb.append(whitespaces[sents.length]);
        String input = sb.toString();
        char[] cs = input.toCharArray();
        LinkedHashSet<Chunk> expectedChunks = new LinkedHashSet<Chunk>();
        int end = 0;
        int start = 0;
        int i2 = 0;
        while (i2 < sents.length) {
            start = end + whitespaces[i2].length();
            end = start + sents[i2].length();
            Chunk chunk = ChunkFactory.createChunk(start, start + sents[i2].length(), "S");
            expectedChunks.add(chunk);
            ++i2;
        }
        Set<Chunk> foundChunks = sentenceChunker.chunk(cs, 0, input.length()).chunkSet();
        Assert.assertEquals(expectedChunks, foundChunks);
    }
}

