昨天我问一个问题关于并行矩阵乘法Java 7中使用fork /join框架这里。在axtavt的帮助下,我的示例程序开始工作。现在,我仅使用Java
6功能来实现等效程序。我遇到了与昨天相同的问题,尽管应用了axtavt给我的反馈(我认为)。我在俯视什么吗?码:
package algorithms;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
public class Java6MatrixMultiply implements Algorithm {
private static final int SIZE = 1024;
private static final int THRESHOLD = 64;
private static final int MAX_THREADS = Runtime.getRuntime().availableProcessors();
private final ExecutorService executor = Executors.newFixedThreadPool(MAX_THREADS);
private float[][] a = new float[SIZE][SIZE];
private float[][] b = new float[SIZE][SIZE];
private float[][] c = new float[SIZE][SIZE];
@Override
public void initialize() {
init(a, b, SIZE);
}
@Override
public void execute() {
MatrixMultiplyTask task = new MatrixMultiplyTask(a, 0, 0, b, 0, 0, c, 0, 0, SIZE);
task.split();
executor.shutdown();
try {
executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.DAYS);
} catch (InterruptedException e) {
System.out.println("Error: " + e.getMessage());
}
}
@Override
public void printResult() {
check(c, SIZE);
for (int i = 0; i < SIZE && i <= 10; i++) {
for (int j = 0; j < SIZE && j <= 10; j++) {
if(j == 10) {
System.out.print("...");
}
else {
System.out.print(c[i][j] + " ");
}
}
if(i == 10) {
System.out.println();
for(int k = 0; k < 10; k++) System.out.print(" ... ");
}
System.out.println();
}
System.out.println();
}
// To simplify checking, fill with all 1's. Answer should be all n's.
static void init(float[][] a, float[][] b, int n) {
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
a[i][j] = 1.0F;
b[i][j] = 1.0F;
}
}
}
static void check(float[][] c, int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (c[i][j] != n) {
throw new Error("Check Failed at [" + i + "][" + j + "]: " + c[i][j]);
//System.out.println("Check Failed at [" + i + "][" + j + "]: " + c[i][j]);
}
}
}
}
public class Seq implements Runnable {
private final MatrixMultiplyTask a;
private final MatrixMultiplyTask b;
public Seq(MatrixMultiplyTask a, MatrixMultiplyTask b, int size) {
this.a = a;
this.b = b;
if (size <= THRESHOLD) {
executor.submit(this);
} else {
a.split();
b.split();
}
}
public void run() {
a.multiplyStride2();
b.multiplyStride2();
}
}
private class MatrixMultiplyTask {
private final float[][] A; // Matrix A
private final int aRow; // first row of current quadrant of A
private final int aCol; // first column of current quadrant of A
private final float[][] B; // Similarly for B
private final int bRow;
private final int bCol;
private final float[][] C; // Similarly for result matrix C
private final int cRow;
private final int cCol;
private final int size;
MatrixMultiplyTask(float[][] A, int aRow, int aCol, float[][] B,
int bRow, int bCol, float[][] C, int cRow, int cCol, int size) {
this.A = A;
this.aRow = aRow;
this.aCol = aCol;
this.B = B;
this.bRow = bRow;
this.bCol = bCol;
this.C = C;
this.cRow = cRow;
this.cCol = cCol;
this.size = size;
}
public void split() {
int h = size / 2;
new Seq(new MatrixMultiplyTask(A,
aRow, aCol, // A11
B, bRow, bCol, // B11
C, cRow, cCol, // C11
h),
new MatrixMultiplyTask(A, aRow, aCol + h, // A12
B, bRow + h, bCol, // B21
C, cRow, cCol, // C11
h), h);
new Seq(new MatrixMultiplyTask(A,
aRow, aCol, // A11
B, bRow, bCol + h, // B12
C, cRow, cCol + h, // C12
h),
new MatrixMultiplyTask(A, aRow, aCol + h, // A12
B, bRow + h, bCol + h, // B22
C, cRow, cCol + h, // C12
h), h);
new Seq(new MatrixMultiplyTask(A, aRow
+ h, aCol, // A21
B, bRow, bCol, // B11
C, cRow + h, cCol, // C21
h),
new MatrixMultiplyTask(A, aRow + h, aCol + h, // A22
B, bRow + h, bCol, // B21
C, cRow + h, cCol, // C21
h), h);
new Seq(new MatrixMultiplyTask(A, aRow
+ h, aCol, // A21
B, bRow, bCol + h, // B12
C, cRow + h, cCol + h, // C22
h),
new MatrixMultiplyTask(A, aRow + h, aCol + h, // A22
B, bRow + h, bCol + h, // B22
C, cRow + h, cCol + h, // C22
h), h);
}
public void multiplyStride2() {
for (int j = 0; j < size; j += 2) {
for (int i = 0; i < size; i += 2) {
float[] a0 = A[aRow + i];
float[] a1 = A[aRow + i + 1];
float s00 = 0.0F;
float s01 = 0.0F;
float s10 = 0.0F;
float s11 = 0.0F;
for (int k = 0; k < size; k += 2) {
float[] b0 = B[bRow + k];
s00 += a0[aCol + k] * b0[bCol + j];
s10 += a1[aCol + k] * b0[bCol + j];
s01 += a0[aCol + k] * b0[bCol + j + 1];
s11 += a1[aCol + k] * b0[bCol + j + 1];
float[] b1 = B[bRow + k + 1];
s00 += a0[aCol + k + 1] * b1[bCol + j];
s10 += a1[aCol + k + 1] * b1[bCol + j];
s01 += a0[aCol + k + 1] * b1[bCol + j + 1];
s11 += a1[aCol + k + 1] * b1[bCol + j + 1];
}
C[cRow + i][cCol + j] += s00;
C[cRow + i][cCol + j + 1] += s01;
C[cRow + i + 1][cCol + j] += s10;
C[cRow + i + 1][cCol + j + 1] += s11;
}
}
}
}
}
阅读了这个问题后,我决定改编我的程序。我的新程序无需同步即可运行良好。谢谢您的想法,彼得。
新代码:
package algorithms;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
public class Java6MatrixMultiply implements Algorithm {
private static final int SIZE = 2048;
private static final int THRESHOLD = 64;
private static final int MAX_THREADS = Runtime.getRuntime().availableProcessors();
private final ExecutorService executor = Executors.newFixedThreadPool(MAX_THREADS);
private float[][] a = new float[SIZE][SIZE];
private float[][] b = new float[SIZE][SIZE];
private float[][] c = new float[SIZE][SIZE];
@Override
public void initialize() {
init(a, b, SIZE);
}
@Override
public void execute() {
MatrixMultiplyTask mainTask = new MatrixMultiplyTask(a, 0, 0, b, 0, 0, c, 0, 0, SIZE);
Future future = executor.submit(mainTask);
try {
future.get();
} catch (Exception e) {
System.out.println("Error: " + e.getMessage());
}
}
@Override
public void printResult() {
check(c, SIZE);
for (int i = 0; i < SIZE && i <= 10; i++) {
for (int j = 0; j < SIZE && j <= 10; j++) {
if(j == 10) {
System.out.print("...");
}
else {
System.out.print(c[i][j] + " ");
}
}
if(i == 10) {
System.out.println();
for(int k = 0; k < 10; k++) System.out.print(" ... ");
}
System.out.println();
}
System.out.println();
}
// To simplify checking, fill with all 1's. Answer should be all n's.
static void init(float[][] a, float[][] b, int n) {
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
a[i][j] = 1.0F;
b[i][j] = 1.0F;
}
}
}
static void check(float[][] c, int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (c[i][j] != n) {
throw new Error("Check Failed at [" + i + "][" + j + "]: " + c[i][j]);
//System.out.println("Check Failed at [" + i + "][" + j + "]: " + c[i][j]);
}
}
}
}
public class Seq implements Runnable {
private final MatrixMultiplyTask a;
private final MatrixMultiplyTask b;
public Seq(MatrixMultiplyTask a, MatrixMultiplyTask b) {
this.a = a;
this.b = b;
}
public void run() {
a.run();
b.run();
}
}
private class MatrixMultiplyTask implements Runnable {
private final float[][] A; // Matrix A
private final int aRow; // first row of current quadrant of A
private final int aCol; // first column of current quadrant of A
private final float[][] B; // Similarly for B
private final int bRow;
private final int bCol;
private final float[][] C; // Similarly for result matrix C
private final int cRow;
private final int cCol;
private final int size;
public MatrixMultiplyTask(float[][] A, int aRow, int aCol, float[][] B,
int bRow, int bCol, float[][] C, int cRow, int cCol, int size) {
this.A = A;
this.aRow = aRow;
this.aCol = aCol;
this.B = B;
this.bRow = bRow;
this.bCol = bCol;
this.C = C;
this.cRow = cRow;
this.cCol = cCol;
this.size = size;
}
public void run() {
//System.out.println("Thread: " + Thread.currentThread().getName());
if (size <= THRESHOLD) {
multiplyStride2();
} else {
int h = size / 2;
Seq seq1 = new Seq(new MatrixMultiplyTask(A,
aRow, aCol, // A11
B, bRow, bCol, // B11
C, cRow, cCol, // C11
h),
new MatrixMultiplyTask(A, aRow, aCol + h, // A12
B, bRow + h, bCol, // B21
C, cRow, cCol, // C11
h));
Seq seq2 = new Seq(new MatrixMultiplyTask(A,
aRow, aCol, // A11
B, bRow, bCol + h, // B12
C, cRow, cCol + h, // C12
h),
new MatrixMultiplyTask(A, aRow, aCol + h, // A12
B, bRow + h, bCol + h, // B22
C, cRow, cCol + h, // C12
h));
Seq seq3 = new Seq(new MatrixMultiplyTask(A, aRow
+ h, aCol, // A21
B, bRow, bCol, // B11
C, cRow + h, cCol, // C21
h),
new MatrixMultiplyTask(A, aRow + h, aCol + h, // A22
B, bRow + h, bCol, // B21
C, cRow + h, cCol, // C21
h));
Seq seq4 = new Seq(new MatrixMultiplyTask(A, aRow
+ h, aCol, // A21
B, bRow, bCol + h, // B12
C, cRow + h, cCol + h, // C22
h),
new MatrixMultiplyTask(A, aRow + h, aCol + h, // A22
B, bRow + h, bCol + h, // B22
C, cRow + h, cCol + h, // C22
h));
final FutureTask s1Task = new FutureTask(seq2, null);
final FutureTask s2Task = new FutureTask(seq3, null);
final FutureTask s3Task = new FutureTask(seq4, null);
executor.execute(s1Task);
executor.execute(s2Task);
executor.execute(s3Task);
seq1.run();
s1Task.run();
s2Task.run();
s3Task.run();
try {
s1Task.get();
s2Task.get();
s3Task.get();
} catch (Exception e) {
System.out.println("Error: " + e.getMessage());
executor.shutdownNow();
}
}
}
public void multiplyStride2() {
for (int j = 0; j < size; j += 2) {
for (int i = 0; i < size; i += 2) {
float[] a0 = A[aRow + i];
float[] a1 = A[aRow + i + 1];
float s00 = 0.0F;
float s01 = 0.0F;
float s10 = 0.0F;
float s11 = 0.0F;
for (int k = 0; k < size; k += 2) {
float[] b0 = B[bRow + k];
s00 += a0[aCol + k] * b0[bCol + j];
s10 += a1[aCol + k] * b0[bCol + j];
s01 += a0[aCol + k] * b0[bCol + j + 1];
s11 += a1[aCol + k] * b0[bCol + j + 1];
float[] b1 = B[bRow + k + 1];
s00 += a0[aCol + k + 1] * b1[bCol + j];
s10 += a1[aCol + k + 1] * b1[bCol + j];
s01 += a0[aCol + k + 1] * b1[bCol + j + 1];
s11 += a1[aCol + k + 1] * b1[bCol + j + 1];
}
C[cRow + i][cCol + j] += s00;
C[cRow + i][cCol + j + 1] += s01;
C[cRow + i + 1][cCol + j] += s10;
C[cRow + i + 1][cCol + j + 1] += s11;
}
}
}
}
}
我正在计算两大组向量(具有相同特征)之间的余弦相似度。每组向量表示为一个scipy CSR稀疏矩阵a和B。我想计算一个x B^T,它不会稀疏。但是,我只需要跟踪超过某个阈值的值,例如0.8。我正试图用vanilla RDD在Pyspark中实现这一点,目的是使用为scipy CSR矩阵实现的快速向量操作。 A和B的行是标准化的,所以为了计算余弦相似度,我只需要找到A中每一行与B中每一行的点积。A的
主要内容:逐元素矩阵乘法,矩阵乘积运算,矩阵点积矩阵乘法是将两个矩阵作为输入值,并将 A 矩阵的行与 B 矩阵的列对应位置相乘再相加,从而生成一个新矩阵,如下图所示: 注意:必须确保第一个矩阵中的行数等于第二个矩阵中的列数,否则不能进行矩阵乘法运算。 图1:矩阵乘法 矩阵乘法运算被称为向量化操作,向量化的主要目的是减少使用的 for 循环次数或者根本不使用。这样做的目的是为了加速程序的计算。 下面介绍 NumPy 提供的三种矩阵乘法,从而进一步
问题内容: 在numpy中,我有N个3x3矩阵的数组。这将是我如何存储它们的示例(我正在提取内容): 我也有一个由3个向量组成的数组,这将是一个示例: 我似乎无法弄清楚如何通过numpy将它们相乘,从而实现如下效果: 与的形状(在投射到阵列)是。但是,由于速度的原因,列表实现是不可能的。 我尝试了各种换位的np.dot,但最终结果没有得到正确的形状。 问题答案: 使用 脚步 : 1)保持第一根轴对
我想使用寄存器(逐行信息)通过向量算法创建矩阵乘法。打开外循环4次我有空洞matvec_XMM(双* a,双* x,双* y,整数n,整数磅)函数的问题,它返回了不好的结果,这是算法wchich我必须使用: 它是ma代码:
问题内容: 我正在尝试使用Apache Spark和Java执行矩阵乘法。 我有两个主要问题: 如何创建可以表示Apache Spark中矩阵的RDD? 如何将两个这样的RDD相乘? 问题答案: 所有这些都取决于输入数据和维度,但总的来说,您需要的不是的分布式数据结构之一。目前,它提供了四种不同的实现 -可以直接从被创建,其中由行索引和 import org.apache.spark.mllib.
考虑两个矩阵A和B.如果A是mxn矩阵而B是nxp矩阵,它们可以相乘以产生mxn矩阵C.只有当A中的列数n等于数量时才可以进行矩阵乘法在B.中的行n 在矩阵乘法中,第一矩阵中的行的元素与第二矩阵中的对应列相乘。 在得到的矩阵C中的第 (i,j)位置中的每个元素是第i行的第i行中的元素与第二矩阵的第 j列中的对应元素的乘积的总和。 MATLAB中的矩阵乘法是使用*运算符执行的。 例子 (Exampl