/*
 * Decompiled with CFR 0.152.
 */
package org.textmining.extraction.excel;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
import org.textmining.extraction.TextExtractor;

public class ExcelTextExtractor
implements TextExtractor {
    byte[] _recordStream;
    int _offset;

    public ExcelTextExtractor(InputStream in) throws IOException {
        POIFSFileSystem poifs = new POIFSFileSystem(in);
        DocumentEntry headerProps = (DocumentEntry)poifs.getRoot().getEntry("Workbook");
        DocumentInputStream din = poifs.createDocumentInputStream("Workbook");
        this._recordStream = new byte[headerProps.getSize()];
        din.read(this._recordStream);
        din.close();
    }

    @Override
    public String getText() throws IOException {
        StringWriter writer = new StringWriter();
        this.getText(writer);
        return writer.toString();
    }

    @Override
    public void getText(Writer writer) throws IOException {
        while (this._offset < this._recordStream.length) {
            short type = LittleEndian.getShort((byte[])this._recordStream, (int)this._offset);
            this._offset += 2;
            if (type == 10) break;
            short size = LittleEndian.getShort((byte[])this._recordStream, (int)this._offset);
            this._offset += 2;
            if (type == 252) {
                int totalStrings = LittleEndian.getInt((byte[])this._recordStream, (int)this._offset);
                this._offset += 4;
                int sharedStrings = LittleEndian.getInt((byte[])this._recordStream, (int)this._offset);
                this._offset += 4;
                for (int x = 0; x < sharedStrings; ++x) {
                    byte flags;
                    int strLength = LittleEndian.getShort((byte[])this._recordStream, (int)this._offset);
                    this._offset += 2;
                    boolean compression = ((flags = this._recordStream[this._offset++]) & 1) == 0;
                    boolean asian = (flags & 4) != 0;
                    boolean richText = (flags & 8) != 0;
                    short numRuns = 0;
                    int sizeofAsian = 0;
                    if (richText) {
                        numRuns = LittleEndian.getShort((byte[])this._recordStream, (int)this._offset);
                        this._offset += 2;
                    }
                    if (asian) {
                        sizeofAsian = LittleEndian.getInt((byte[])this._recordStream, (int)this._offset);
                        this._offset += 2;
                    }
                    int byteLength = !compression ? strLength * 2 : strLength;
                    String string = new String(this._recordStream, this._offset, byteLength, compression ? "Cp1252" : "UTF-16LE");
                    writer.write(string + ' ');
                    this._offset += byteLength;
                    if (!richText) continue;
                    this._offset += numRuns * 4;
                }
                continue;
            }
            this._offset += size;
        }
    }
}

