package org.gcn.plinguacore.simulator.fuzzy;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import jcuda.Pointer;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.JCudaDriver;
import jcuda.runtime.dim3;
import jcuda.utils.KernelLauncher;
import org.antlr.runtime.debug.Profiler;

/* loaded from: input_file:org/gcn/plinguacore/simulator/fuzzy/TrapezoidalFuzzyMatrixCUDA.class */
public class TrapezoidalFuzzyMatrixCUDA implements TrapezoidalFuzzyMatrix {
    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean timesMaxMinWithMatrix(float[] fArr, float[] fArr2, float[] fArr3, int i, int i2, int i3, int i4, boolean z) {
        if (!(i >= 1 && i3 >= 1 && i2 >= 1 && i4 >= 1 && i2 == i3)) {
            return false;
        }
        JCudaDriver.setExceptionsEnabled(true);
        String str = null;
        try {
            str = preparePtxFile("kernelTrap.cu");
        } catch (IOException e) {
        }
        int blockSize = getBlockSize();
        KernelLauncher load = KernelLauncher.load(str, blockSize == 16 ? "MatrixTimesMaxMinKernelExtPrefetch16" : "MatrixTimesMaxMinKernelExtPrefetch16");
        CUdeviceptr cUdeviceptr = new CUdeviceptr();
        CUdeviceptr cUdeviceptr2 = new CUdeviceptr();
        CUdeviceptr cUdeviceptr3 = new CUdeviceptr();
        int i5 = 4 * i * i2;
        int i6 = 4 * i3 * i4 * 4;
        int i7 = 4 * i * i4 * 4;
        JCudaDriver.cuMemAlloc(cUdeviceptr, i5);
        JCudaDriver.cuMemAlloc(cUdeviceptr2, i6);
        JCudaDriver.cuMemAlloc(cUdeviceptr3, i7);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr, Pointer.to(fArr), i5);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr2, Pointer.to(fArr2), i6);
        KernelLauncher upVar = load.setup(new dim3((i4 / blockSize) + (i4 % blockSize == 0 ? 0 : 1), (i / blockSize) + (i % blockSize == 0 ? 0 : 1), 1), new dim3(blockSize, blockSize, 1));
        Object[] objArr = new Object[8];
        objArr[0] = cUdeviceptr;
        objArr[1] = cUdeviceptr2;
        objArr[2] = cUdeviceptr3;
        objArr[3] = Integer.valueOf(i);
        objArr[4] = Integer.valueOf(i2);
        objArr[5] = Integer.valueOf(i3);
        objArr[6] = Integer.valueOf(i4);
        objArr[7] = Integer.valueOf(z ? 1 : 0);
        upVar.call(objArr);
        JCudaDriver.cuCtxSynchronize();
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr3), cUdeviceptr3, i7);
        JCudaDriver.cuMemFree(cUdeviceptr);
        JCudaDriver.cuMemFree(cUdeviceptr2);
        JCudaDriver.cuMemFree(cUdeviceptr3);
        return true;
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean timesMaxMinWithVector(float[] fArr, float[] fArr2, float[] fArr3, int i, int i2, int i3, boolean z) {
        return timesMaxMinWithMatrix(fArr, fArr2, fArr3, i, i2, i3, 1, z);
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean diagonalMultiplication(float[] fArr, float[] fArr2, float[] fArr3, int i, int i2, int i3) {
        if (!(i >= 1 && i3 >= 1 && i2 >= 1 && i == i2 && i2 == i3)) {
            return false;
        }
        JCudaDriver.setExceptionsEnabled(true);
        String str = null;
        try {
            str = preparePtxFile("kernelTrap.cu");
        } catch (IOException e) {
        }
        KernelLauncher load = KernelLauncher.load(str, "DiagonalMulKernel");
        CUdeviceptr cUdeviceptr = new CUdeviceptr();
        CUdeviceptr cUdeviceptr2 = new CUdeviceptr();
        CUdeviceptr cUdeviceptr3 = new CUdeviceptr();
        int i4 = 4 * i * i2 * 4;
        int i5 = 4 * i3 * 1 * 4;
        int i6 = 4 * i * 1 * 4;
        JCudaDriver.cuMemAlloc(cUdeviceptr, i4);
        JCudaDriver.cuMemAlloc(cUdeviceptr2, i5);
        JCudaDriver.cuMemAlloc(cUdeviceptr3, i6);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr, Pointer.to(fArr), i4);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr2, Pointer.to(fArr2), i5);
        int blockSize = getBlockSize();
        load.setup(new dim3((i / blockSize) + (i % blockSize == 0 ? 0 : 1), 1, 1), new dim3(blockSize, 1, 1)).call(cUdeviceptr, cUdeviceptr2, cUdeviceptr3, Integer.valueOf(i), Integer.valueOf(i2), Integer.valueOf(i3), Integer.valueOf(blockSize));
        JCudaDriver.cuCtxSynchronize();
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr3), cUdeviceptr3, i6);
        JCudaDriver.cuMemFree(cUdeviceptr);
        JCudaDriver.cuMemFree(cUdeviceptr2);
        JCudaDriver.cuMemFree(cUdeviceptr3);
        return true;
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean multiplicationWithMatrix(float[] fArr, float[] fArr2, float[] fArr3, int i, int i2, int i3, int i4) {
        if (!(i >= 1 && i3 >= 1 && i2 >= 1 && i4 >= 1 && i2 == i3)) {
            return false;
        }
        JCudaDriver.setExceptionsEnabled(true);
        String str = null;
        try {
            str = preparePtxFile("kernelTrap.cu");
        } catch (IOException e) {
        }
        int blockSize = getBlockSize();
        KernelLauncher load = KernelLauncher.load(str, blockSize == 16 ? "MatrixMulKernelExtPrefetch16" : "MatrixMulKernelExtPrefetch16");
        CUdeviceptr cUdeviceptr = new CUdeviceptr();
        CUdeviceptr cUdeviceptr2 = new CUdeviceptr();
        CUdeviceptr cUdeviceptr3 = new CUdeviceptr();
        int i5 = 4 * i * i2;
        int i6 = 4 * i3 * i4 * 4;
        int i7 = 4 * i * i4 * 4;
        JCudaDriver.cuMemAlloc(cUdeviceptr, i5);
        JCudaDriver.cuMemAlloc(cUdeviceptr2, i6);
        JCudaDriver.cuMemAlloc(cUdeviceptr3, i7);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr, Pointer.to(fArr), i5);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr2, Pointer.to(fArr2), i6);
        load.setup(new dim3((i4 / blockSize) + (i4 % blockSize == 0 ? 0 : 1), (i / blockSize) + (i % blockSize == 0 ? 0 : 1), 1), new dim3(blockSize, blockSize, 1)).call(cUdeviceptr, cUdeviceptr2, cUdeviceptr3, Integer.valueOf(i), Integer.valueOf(i2), Integer.valueOf(i3), Integer.valueOf(i4));
        JCudaDriver.cuCtxSynchronize();
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr3), cUdeviceptr3, i7);
        JCudaDriver.cuMemFree(cUdeviceptr);
        JCudaDriver.cuMemFree(cUdeviceptr2);
        JCudaDriver.cuMemFree(cUdeviceptr3);
        return true;
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean multiplicationWithVector(float[] fArr, float[] fArr2, float[] fArr3, int i, int i2, int i3) {
        return multiplicationWithMatrix(fArr, fArr2, fArr3, i, i2, i3, 1);
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean additionWithVector(float[] fArr, float[] fArr2, float[] fArr3, int i, int i2) {
        if (!(i >= 1 && i2 >= 1 && i == i2)) {
            return false;
        }
        JCudaDriver.setExceptionsEnabled(true);
        String str = null;
        try {
            str = preparePtxFile("kernelTrap.cu");
        } catch (IOException e) {
        }
        KernelLauncher load = KernelLauncher.load(str, "VectorAddKernel");
        CUdeviceptr cUdeviceptr = new CUdeviceptr();
        CUdeviceptr cUdeviceptr2 = new CUdeviceptr();
        CUdeviceptr cUdeviceptr3 = new CUdeviceptr();
        int i3 = 4 * i * 4;
        int i4 = 4 * i2 * 4;
        int i5 = 4 * i * 4;
        JCudaDriver.cuMemAlloc(cUdeviceptr, i3);
        JCudaDriver.cuMemAlloc(cUdeviceptr2, i4);
        JCudaDriver.cuMemAlloc(cUdeviceptr3, i5);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr, Pointer.to(fArr), i3);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr2, Pointer.to(fArr2), i4);
        int blockSize = getBlockSize();
        load.setup(new dim3((i / blockSize) + (i % blockSize == 0 ? 0 : 1), 1, 1), new dim3(blockSize, 1, 1)).call(cUdeviceptr, cUdeviceptr2, cUdeviceptr3, Integer.valueOf(i), Integer.valueOf(blockSize));
        JCudaDriver.cuCtxSynchronize();
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr3), cUdeviceptr3, i5);
        JCudaDriver.cuMemFree(cUdeviceptr);
        JCudaDriver.cuMemFree(cUdeviceptr2);
        JCudaDriver.cuMemFree(cUdeviceptr3);
        return true;
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean transpose(float[] fArr, float[] fArr2, int i, int i2, int i3, int i4) {
        if (!(i >= 1 && i2 >= 1 && i3 >= 1 && i4 >= 1 && i == i4 && i2 == i3)) {
            return false;
        }
        JCudaDriver.setExceptionsEnabled(true);
        String str = null;
        try {
            str = preparePtxFile("kernelTrap.cu");
        } catch (IOException e) {
        }
        KernelLauncher load = KernelLauncher.load(str, "MatrixTransposeKernel");
        CUdeviceptr cUdeviceptr = new CUdeviceptr();
        CUdeviceptr cUdeviceptr2 = new CUdeviceptr();
        int i5 = 4 * i * i2;
        int i6 = 4 * i * i2;
        JCudaDriver.cuMemAlloc(cUdeviceptr, i5);
        JCudaDriver.cuMemAlloc(cUdeviceptr2, i6);
        JCudaDriver.cuMemcpyHtoD(cUdeviceptr, Pointer.to(fArr), i5);
        int blockSize = getBlockSize();
        load.setup(new dim3((i2 / blockSize) + (i2 % blockSize == 0 ? 0 : 1), (i / blockSize) + (i % blockSize == 0 ? 0 : 1), 1), new dim3(blockSize, blockSize, 1)).call(cUdeviceptr, cUdeviceptr2, Integer.valueOf(i), Integer.valueOf(i2), Integer.valueOf(blockSize));
        JCudaDriver.cuCtxSynchronize();
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr2), cUdeviceptr2, i6);
        JCudaDriver.cuMemFree(cUdeviceptr);
        JCudaDriver.cuMemFree(cUdeviceptr2);
        return true;
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public void printMatrixTrap(String str, float[] fArr, int i, int i2) {
        System.out.print(String.valueOf(str) + " =\n");
        for (int i3 = 0; i3 < i; i3++) {
            System.out.print("|\t");
            for (int i4 = 0; i4 < i2; i4++) {
                System.out.print("(");
                for (int i5 = 0; i5 < 3; i5++) {
                    System.out.print(String.valueOf(fArr[(((i3 * i2) + i4) * 4) + i5]) + ",");
                }
                System.out.print(fArr[((((i3 * i2) + i4) * 4) + 4) - 1]);
                System.out.print(")\t");
            }
            System.out.print("|\n");
        }
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public void printMatrixDouble(String str, float[] fArr, int i, int i2) {
        System.out.print(String.valueOf(str) + " =\n");
        for (int i3 = 0; i3 < i; i3++) {
            System.out.print("|\t");
            for (int i4 = 0; i4 < i2; i4++) {
                System.out.print(String.valueOf(fArr[(i3 * i2) + i4]) + Profiler.DATA_SEP);
            }
            System.out.print("|\n");
        }
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public void initializeMatrix(float[] fArr, int i, int i2) {
        if (i >= 1 && i2 >= 1) {
            int i3 = i * i2 * 4;
            for (int i4 = 0; i4 < i3; i4++) {
                fArr[i4] = 0.0f;
            }
        }
    }

    @Override // org.gcn.plinguacore.simulator.fuzzy.TrapezoidalFuzzyMatrix
    public boolean isMin(float[] fArr, int i, int i2) {
        boolean z = true;
        for (int i3 = 0; i3 < 4 && z; i3++) {
            z = ((double) fArr[(i * 4) + i3]) == 0.0d;
        }
        return z;
    }

    private String preparePtxFile(String str) throws IOException {
        int lastIndexOf = str.lastIndexOf(46);
        if (lastIndexOf == -1) {
            lastIndexOf = str.length() - 1;
        }
        String str2 = String.valueOf(str.substring(0, lastIndexOf + 1)) + "ptx";
        if (new File(str2).exists()) {
            return str2;
        }
        File file = new File(str);
        if (!file.exists()) {
            throw new IOException("Input file not found: " + str);
        }
        String str3 = "nvcc " + ("-m" + System.getProperty("sun.arch.data.model")) + " -ptx " + file.getPath() + " -o " + str2;
        System.out.println("Executing\n" + str3);
        Process exec = Runtime.getRuntime().exec(str3);
        String str4 = new String(toByteArray(exec.getErrorStream()));
        String str5 = new String(toByteArray(exec.getInputStream()));
        try {
            int waitFor = exec.waitFor();
            if (waitFor == 0) {
                System.out.println("Finished creating PTX file");
                return str2;
            }
            System.out.println("nvcc process exitValue " + waitFor);
            System.out.println("errorMessage:\n" + str4);
            System.out.println("outputMessage:\n" + str5);
            throw new IOException("Could not create .ptx file: " + str4);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IOException("Interrupted while waiting for nvcc output", e);
        }
    }

    private byte[] toByteArray(InputStream inputStream) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        byte[] bArr = new byte[8192];
        while (true) {
            int read = inputStream.read(bArr);
            if (read == -1) {
                return byteArrayOutputStream.toByteArray();
            }
            byteArrayOutputStream.write(bArr, 0, read);
        }
    }

    /* JADX WARN: Unreachable blocks removed: 16, instructions: 60 */
    private int getBlockSize() {
        return 16;
    }
}
