Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions aten/src/ATen/native/hammerblade/And.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include <cmath>
#include <ATen/Dispatch.h>
#include <ATen/hammerblade/HammerBladeContext.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/ReduceOps.h>
#include <ATen/native/hammerblade/Offload.h>

namespace at { namespace native {

Tensor and_kernel_hb(const Tensor& self, const Tensor& other) {
TORCH_CHECK(self.numel() == other.numel(), "The size of two tensors should match.");
TORCH_CHECK(self.scalar_type() == other.scalar_type(), "two inputs should have the same type");
TORCH_CHECK(other.scalar_type() == kInt || other.scalar_type() == kBool, "HammerBlade and is implemented for Int and Bool only");
Tensor result = at::empty_like(self, self.options());
if (self.scalar_type() ==kInt) {
hb_offload_kernel(result, self, other, "tensorlib_and_int");
}
else {
hb_offload_kernel(result, self, other, "tensorlib_and_bool");
}

return result;
}

}} // namespace at::native
1 change: 1 addition & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3983,6 +3983,7 @@
dispatch:
CPU: legacy::cpu::_th_and
CUDA: legacy::cuda::_th_and
HammerBlade: and_kernel_hb

- func: __iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
variants: method
Expand Down
60 changes: 60 additions & 0 deletions hammerblade/torch/kernel/kernel_and.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//========================================================================
// Element-wise and kernel
//========================================================================
//
// Authors : Janice Wei
// Date : 09/25/2020

#include <kernel_common.hpp>
#include <cstdint>

extern "C" {

__attribute__ ((noinline)) int tensorlib_and_int(
hb_tensor_t* t0_p,
hb_tensor_t* t1_p,
hb_tensor_t* t2_p) {
auto res = HBTensor<int>(t0_p);
auto input1 = HBTensor<int>(t1_p);
auto input2 = HBTensor<int>(t2_p);

bsg_cuda_print_stat_kernel_start();

hb_tiled_foreach(
[](int a, int b) {
return a & b;
},
res, input1, input2);

bsg_cuda_print_stat_kernel_end();

g_barrier.sync();
return 0;
}

HB_EMUL_REG_KERNEL(tensorlib_and_int, hb_tensor_t*, hb_tensor_t*, hb_tensor_t*)

__attribute__ ((noinline)) bool tensorlib_and_bool(
hb_tensor_t* t0_p,
hb_tensor_t* t1_p,
hb_tensor_t* t2_p) {
auto res = HBTensor<bool>(t0_p);
auto input1 = HBTensor<bool>(t1_p);
auto input2 = HBTensor<bool>(t2_p);

bsg_cuda_print_stat_kernel_start();

hb_tiled_foreach(
[](bool a, bool b) {
return a & b;
},
res, input1, input2);

bsg_cuda_print_stat_kernel_end();

g_barrier.sync();
return 0;
}

HB_EMUL_REG_KERNEL(tensorlib_and_bool, hb_tensor_t*, hb_tensor_t*, hb_tensor_t*)
}
67 changes: 67 additions & 0 deletions hammerblade/torch/tests/test_and.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
tests of and kernel
Authors : Janice Wei
Date : 09/25/2020
"""

import torch
import random
from hypothesis import given, settings
from .hypothesis_test_util import HypothesisUtil as hu

torch.manual_seed(42)
random.seed(42)

# ------------------------------------------------------------------------
# test of x1 & x2
# ------------------------------------------------------------------------

def _test_and(x1, x2):
h1 = x1.hammerblade()
h2 = x2.hammerblade()
assert h1 is not x1
assert h2 is not x2
y_c = x1 & x2
y_h = h1 & h2
assert y_h.device == torch.device("hammerblade")
assert torch.equal(y_c, y_h.cpu())

# ------------------------------------------------------------------------
# tests of and kernel with integer elements
# ------------------------------------------------------------------------

def test_and_1():
x = torch.ones(1, 10, dtype=torch.int)
_test_and(x, x)

def test_and_2():
x1 = torch.ones(4, 5, dtype=torch.int)
x2 = torch.ones(4, 5, dtype=torch.int)
_test_and(x1, x2)

def test_and_3():
x = torch.randint(-2 ** 30, 2 ** 30 - 1, (1, 128)).to(torch.int32)
y = torch.randint(-2 ** 30, 2 ** 30 - 1, (1, 128)).to(torch.int32)
_test_and(x, y)

def test_and_4():
x = torch.randint(-2 ** 30, 2 ** 30 - 1, (16, 32)).to(torch.int32)
y = torch.randint(-2 ** 30, 2 ** 30 - 1, (16, 32)).to(torch.int32)
_test_and(x, y)

def test_and_bool1():
x = torch.randint(0, 2, (16, 32)).to(torch.bool)
y = torch.randint(0, 2, (16, 32)).to(torch.bool)
_test_and(x, y)

def test_and_bool2():
x = torch.randint(0, 2, (1, 128)).to(torch.bool)
y = torch.randint(0, 2, (1, 128)).to(torch.bool)
_test_and(x, y)

@settings(deadline=None)
@given(inputs=hu.tensors(n=2))
def test_and_hypothesis(inputs):
x1 = torch.tensor(inputs[0]).to(torch.int32)
x2 = torch.tensor(inputs[1]).to(torch.int32)
_test_and(x1, x2)