paddle 中 torch.cdist 实现

齐永昌

2023-12-01

update 20221219：
我发现之前那个版本慢的亿批，于是整出这个新版本：

from paddle.jit import to_static

@to_static
def paddle_cdist_v3(x, y, p=2):
    y_len = y.shape[0]
    out = paddle.concat(
        [paddle.linalg.norm(x-y[i], p=p, axis=1, keepdim=True) for i in range(y_len)],
        axis=1
    )
    return out

to_static 这个还是很好用的，我下边做了个实验：

def paddle_cdist_v1(x, y, p=2):
    x_len = x.shape[0]
    y_len = y.shape[0]

    out = []
    for x_item in x:
        for y_item in y:
            item = paddle.dist(x_item, y_item, p)
            out.append( item )
    out = paddle.concat(out)
    return out.reshape([x_len, y_len])


def paddle_cdist_v2(x, y, p=2):
    y_len = y.shape[0]
    out = paddle.concat(
        [paddle.linalg.norm(x-y[i], p=p, axis=1, keepdim=True) for i in range(y_len)],
        axis=1
    )
    return out


from paddle.jit import to_static

@to_static
def paddle_cdist_v3(x, y, p=2):
    y_len = y.shape[0]
    out = paddle.concat(
        [paddle.linalg.norm(x-y[i], p=p, axis=1, keepdim=True) for i in range(y_len)],
        axis=1
    )
    return out

v123分别是旧版本，新版本和static修饰的新版本

import time

x = paddle.rand([4000, 4])
y = paddle.rand([40,   4])

s_v1 = time.time()
z_v1 = paddle_cdist_v1(x, y)
print(z_v1.shape, time.time()-s_v1)


s_v2 = time.time()
for i in range(1000):
    z_v2 = paddle_cdist_v2(x, y)
print(z_v2.shape, (time.time()-s_v2)/1000 )


s_v3 = time.time()
for i in range(1000):
    z_v3 = paddle_cdist_v3(x, y)
print(z_v3.shape, (time.time()-s_v3)/1000 )

输出：

[4000, 40] 23.982308864593506
[4000, 40] 0.005060146570205689
[4000, 40] 0.0034564528465270998

新版直接是毫秒级的hhhh

再看一下误差：

>>> (z_v1 - z_v2).abs().sum()
Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [0.00138187])
       
>>> (z_v1 - z_v3).abs().sum()
Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [0.00138187])

近乎可忽略

以下是旧版本：

def paddle_cdist(x, y, p=2):
    x_len = x.shape[0]
    y_len = y.shape[0]

    out = []
    for x_ix, x_item in enumerate(x):
        for y_ix, y_item in enumerate(y):
            item = paddle.dist(x_item, y_item, p)
            out.append( item )
    out = paddle.concat(out)
    return out.reshape([x_len, y_len])

Paddle结果

import numpy as np

np.random.seed(1107)
x = np.random.rand(4, 4)
np.random.seed(1107*2)
y = np.random.rand(2, 4)

x = paddle.to_tensor(x)
y = paddle.to_tensor(y)
out = paddle_cdist(x, y, 2)
print(out) # out = [1.]

Tensor(shape=[4, 2], dtype=float64, place=Place(gpu:0), stop_gradient=True,
       [[0.86650367, 0.97836384],
        [0.37499482, 0.76791689],
        [0.74998959, 0.82276324],
        [0.80922280, 0.44565161]])

torch 结果

import numpy as np

np.random.seed(1107)
x = np.random.rand(4, 4)
np.random.seed(1107*2)
y = np.random.rand(2, 4)

x = torch.as_tensor(x)
y = torch.as_tensor(y)
out = torch.cdist(x, y, 2)
print(out) # out = [1.]

tensor([[0.8665, 0.9784],
        [0.3750, 0.7679],
        [0.7500, 0.8228],
        [0.8092, 0.4457]], dtype=torch.float64)

来顺便看看 torch.cdist 到底还做了个啥：

import torch 

a = torch.tensor([[0.9041,  0.0196], 
                  [-0.3108, -2.4423], 
                  [-0.4821,  1.059]])
b = torch.tensor([[-2.1763, -0.4713], 
                  [-0.6986,  1.3702]])
c = torch.cdist(a, b, p=2)
print(c)

tensor([[3.1193, 2.0959],
        [2.7138, 3.8322],
        [2.2830, 0.3791]])

res = torch.zeros([a.shape[0],  b.shape[0]])
for a_ix, a_item in enumerate(a):
    for b_ix, b_item in enumerate(b):
        res[a_ix][b_ix] = torch.norm((a_item - b_item), p=2)       
print(res)

tensor([[3.1193, 2.0959],
        [2.7138, 3.8322],
        [2.2830, 0.3791]])

paddle 中 torch.cdist 实现

Paddle结果

torch 结果

相关阅读

相关文章

相关问答

相关文档