update 20221219:
我发现之前那个版本慢的亿批,于是整出这个新版本:
from paddle.jit import to_static
@to_static
def paddle_cdist_v3(x, y, p=2):
y_len = y.shape[0]
out = paddle.concat(
[paddle.linalg.norm(x-y[i], p=p, axis=1, keepdim=True) for i in range(y_len)],
axis=1
)
return out
to_static
这个还是很好用的,我下边做了个实验:
def paddle_cdist_v1(x, y, p=2):
x_len = x.shape[0]
y_len = y.shape[0]
out = []
for x_item in x:
for y_item in y:
item = paddle.dist(x_item, y_item, p)
out.append( item )
out = paddle.concat(out)
return out.reshape([x_len, y_len])
def paddle_cdist_v2(x, y, p=2):
y_len = y.shape[0]
out = paddle.concat(
[paddle.linalg.norm(x-y[i], p=p, axis=1, keepdim=True) for i in range(y_len)],
axis=1
)
return out
from paddle.jit import to_static
@to_static
def paddle_cdist_v3(x, y, p=2):
y_len = y.shape[0]
out = paddle.concat(
[paddle.linalg.norm(x-y[i], p=p, axis=1, keepdim=True) for i in range(y_len)],
axis=1
)
return out
v123分别是旧版本,新版本和static修饰的新版本
import time
x = paddle.rand([4000, 4])
y = paddle.rand([40, 4])
s_v1 = time.time()
z_v1 = paddle_cdist_v1(x, y)
print(z_v1.shape, time.time()-s_v1)
s_v2 = time.time()
for i in range(1000):
z_v2 = paddle_cdist_v2(x, y)
print(z_v2.shape, (time.time()-s_v2)/1000 )
s_v3 = time.time()
for i in range(1000):
z_v3 = paddle_cdist_v3(x, y)
print(z_v3.shape, (time.time()-s_v3)/1000 )
输出:
[4000, 40] 23.982308864593506
[4000, 40] 0.005060146570205689
[4000, 40] 0.0034564528465270998
新版直接是毫秒级的hhhh
再看一下误差:
>>> (z_v1 - z_v2).abs().sum()
Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
[0.00138187])
>>> (z_v1 - z_v3).abs().sum()
Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
[0.00138187])
近乎可忽略
以下是旧版本:
def paddle_cdist(x, y, p=2):
x_len = x.shape[0]
y_len = y.shape[0]
out = []
for x_ix, x_item in enumerate(x):
for y_ix, y_item in enumerate(y):
item = paddle.dist(x_item, y_item, p)
out.append( item )
out = paddle.concat(out)
return out.reshape([x_len, y_len])
import numpy as np
np.random.seed(1107)
x = np.random.rand(4, 4)
np.random.seed(1107*2)
y = np.random.rand(2, 4)
x = paddle.to_tensor(x)
y = paddle.to_tensor(y)
out = paddle_cdist(x, y, 2)
print(out) # out = [1.]
Tensor(shape=[4, 2], dtype=float64, place=Place(gpu:0), stop_gradient=True,
[[0.86650367, 0.97836384],
[0.37499482, 0.76791689],
[0.74998959, 0.82276324],
[0.80922280, 0.44565161]])
import numpy as np
np.random.seed(1107)
x = np.random.rand(4, 4)
np.random.seed(1107*2)
y = np.random.rand(2, 4)
x = torch.as_tensor(x)
y = torch.as_tensor(y)
out = torch.cdist(x, y, 2)
print(out) # out = [1.]
tensor([[0.8665, 0.9784],
[0.3750, 0.7679],
[0.7500, 0.8228],
[0.8092, 0.4457]], dtype=torch.float64)
来顺便看看 torch.cdist
到底还做了个啥:
import torch
a = torch.tensor([[0.9041, 0.0196],
[-0.3108, -2.4423],
[-0.4821, 1.059]])
b = torch.tensor([[-2.1763, -0.4713],
[-0.6986, 1.3702]])
c = torch.cdist(a, b, p=2)
print(c)
tensor([[3.1193, 2.0959],
[2.7138, 3.8322],
[2.2830, 0.3791]])
res = torch.zeros([a.shape[0], b.shape[0]])
for a_ix, a_item in enumerate(a):
for b_ix, b_item in enumerate(b):
res[a_ix][b_ix] = torch.norm((a_item - b_item), p=2)
print(res)
tensor([[3.1193, 2.0959],
[2.7138, 3.8322],
[2.2830, 0.3791]])