package org.textmining.text.extraction;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hwpf.model.CHPBinTable;
import org.apache.poi.hwpf.model.CHPX;
import org.apache.poi.hwpf.model.ComplexFileTable;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
import org.textmining.text.extraction.sprm.SprmIterator;
import org.textmining.text.extraction.sprm.SprmOperation;

/* loaded from: classes2.dex */
public class WordExtractor {
    private boolean isDeleted(byte[] bArr) {
        SprmIterator sprmIterator = new SprmIterator(bArr);
        while (sprmIterator.hasNext()) {
            SprmOperation next = sprmIterator.next();
            if (next.getOperation() == 0 && next.getOperand() != 0) {
                return true;
            }
        }
        return false;
    }

    public String extractText(InputStream inputStream) {
        new ArrayList();
        POIFSFileSystem pOIFSFileSystem = new POIFSFileSystem(inputStream);
        DocumentEntry documentEntry = (DocumentEntry) pOIFSFileSystem.getRoot().getEntry("WordDocument");
        DocumentInputStream createDocumentInputStream = pOIFSFileSystem.createDocumentInputStream("WordDocument");
        byte[] bArr = new byte[documentEntry.getSize()];
        createDocumentInputStream.read(bArr);
        createDocumentInputStream.close();
        short s = LittleEndian.getShort(bArr, 10);
        if ((s & 4) != 0) {
            throw new FastSavedException("Fast-saved files are unsupported at this time");
        }
        if ((s & 256) != 0) {
            throw new PasswordProtectedException("This document is password protected");
        }
        switch (LittleEndian.getShort(bArr, 2)) {
            case 101:
            case 102:
            case 103:
            case 104:
                return new Word6Extractor().extractText(bArr);
            default:
                boolean z = (s & 512) != 0;
                int i = LittleEndian.getInt(bArr, 418);
                String str = z ? "1Table" : "0Table";
                byte[] bArr2 = new byte[((DocumentEntry) pOIFSFileSystem.getRoot().getEntry(str)).getSize()];
                DocumentInputStream createDocumentInputStream2 = pOIFSFileSystem.createDocumentInputStream(str);
                createDocumentInputStream2.read(bArr2);
                createDocumentInputStream2.close();
                int i2 = LittleEndian.getInt(bArr, 250);
                int i3 = LittleEndian.getInt(bArr, 254);
                int i4 = LittleEndian.getInt(bArr, 24);
                CHPBinTable cHPBinTable = new CHPBinTable(bArr, bArr2, i2, i3, i4);
                List textPieces = new ComplexFileTable(bArr, bArr2, i, i4).getTextPieceTable().getTextPieces();
                Iterator it2 = textPieces.iterator();
                TextPiece textPiece = (TextPiece) it2.next();
                int start = textPiece.getStart();
                int end = textPiece.getEnd();
                WordTextBuffer wordTextBuffer = new WordTextBuffer();
                for (CHPX chpx : cHPBinTable.getTextRuns()) {
                    if (!isDeleted(chpx.getGrpprl())) {
                        int start2 = chpx.getStart();
                        int end2 = chpx.getEnd();
                        while (start2 >= end) {
                            textPiece = (TextPiece) it2.next();
                            start = textPiece.getStart();
                            end = textPiece.getEnd();
                        }
                        if (end2 < end) {
                            wordTextBuffer.append(textPiece.substring(start2 - start, end2 - start));
                        } else if (end2 > end) {
                            while (end2 > end) {
                                wordTextBuffer.append(textPiece.substring(start2 - start, end - start));
                                if (!it2.hasNext()) {
                                    return wordTextBuffer.toString();
                                }
                                textPiece = (TextPiece) it2.next();
                                start = textPiece.getStart();
                                end = textPiece.getEnd();
                                start2 = start;
                            }
                            wordTextBuffer.append(textPiece.substring(0, end2 - start));
                        } else {
                            String substring = textPiece.substring(start2 - start, end2 - start);
                            if (it2.hasNext()) {
                                textPiece = (TextPiece) it2.next();
                                start = textPiece.getStart();
                                end = textPiece.getEnd();
                            }
                            wordTextBuffer.append(substring);
                        }
                    }
                }
                return wordTextBuffer.toString();
        }
    }
}
