cornell-brg · Janicewei · Oct 1, 2020 · Oct 1, 2020 · Oct 2, 2020 · Oct 8, 2020
diff --git a/aten/src/ATen/native/hammerblade/And.cpp b/aten/src/ATen/native/hammerblade/And.cpp
@@ -0,0 +1,25 @@
+#include <cmath>
+#include <ATen/Dispatch.h>
+#include <ATen/hammerblade/HammerBladeContext.h>
+#include <ATen/native/TensorIterator.h>
+#include <ATen/native/ReduceOps.h>
+#include <ATen/native/hammerblade/Offload.h>
+
+namespace at { namespace native {
+
+Tensor and_kernel_hb(const Tensor& self, const Tensor& other) {
+  TORCH_CHECK(self.numel() == other.numel(), "The size of two tensors should match.");
+  TORCH_CHECK(self.scalar_type() == other.scalar_type(), "two inputs should have the same type");
+  TORCH_CHECK(other.scalar_type() == kInt || other.scalar_type() == kBool, "HammerBlade and is implemented for Int and Bool only");
+  Tensor result = at::empty_like(self, self.options());
+  if (self.scalar_type() ==kInt) {
+    hb_offload_kernel(result, self, other, "tensorlib_and_int");
+  }
+  else {
+    hb_offload_kernel(result, self, other, "tensorlib_and_bool");
+  }
+
+  return result;
+}
+
+}} // namespace at::native
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -3983,6 +3983,7 @@
   dispatch:
     CPU: legacy::cpu::_th_and
     CUDA: legacy::cuda::_th_and
+    HammerBlade: and_kernel_hb
 
 - func: __iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method

diff --git a/hammerblade/torch/kernel/kernel_and.cpp b/hammerblade/torch/kernel/kernel_and.cpp
@@ -0,0 +1,60 @@
+//========================================================================
+// Element-wise and kernel
+//========================================================================
+//
+// Authors : Janice Wei
+// Date    : 09/25/2020
+
+#include <kernel_common.hpp>
+#include <cstdint>
+
+extern "C" {
+
+  __attribute__ ((noinline))  int tensorlib_and_int(
+          hb_tensor_t* t0_p,
+          hb_tensor_t* t1_p,
+          hb_tensor_t* t2_p) {
+    auto res = HBTensor<int>(t0_p);
+    auto input1 = HBTensor<int>(t1_p);
+    auto input2 = HBTensor<int>(t2_p);
+
+    bsg_cuda_print_stat_kernel_start();
+
+    hb_tiled_foreach(
+      [](int a, int b) {
+        return a & b;
+      },
+      res, input1, input2);
+
+    bsg_cuda_print_stat_kernel_end();
+
+    g_barrier.sync();
+    return 0;
+  }
+
+  HB_EMUL_REG_KERNEL(tensorlib_and_int, hb_tensor_t*, hb_tensor_t*, hb_tensor_t*)
+
+  __attribute__ ((noinline))  bool tensorlib_and_bool(
+          hb_tensor_t* t0_p,
+          hb_tensor_t* t1_p,
+          hb_tensor_t* t2_p) {
+    auto res = HBTensor<bool>(t0_p);
+    auto input1 = HBTensor<bool>(t1_p);
+    auto input2 = HBTensor<bool>(t2_p);
+
+    bsg_cuda_print_stat_kernel_start();
+
+    hb_tiled_foreach(
+      [](bool a, bool b) {
+        return a & b;
+      },
+      res, input1, input2);
+
+    bsg_cuda_print_stat_kernel_end();
+
+    g_barrier.sync();
+    return 0;
+  }
+
+  HB_EMUL_REG_KERNEL(tensorlib_and_bool, hb_tensor_t*, hb_tensor_t*, hb_tensor_t*)
+}
diff --git a/hammerblade/torch/tests/test_and.py b/hammerblade/torch/tests/test_and.py
@@ -0,0 +1,67 @@
+"""
+tests of and kernel
+Authors : Janice Wei
+Date    : 09/25/2020
+"""
+
+import torch
+import random
+from hypothesis import given, settings
+from .hypothesis_test_util import HypothesisUtil as hu
+
+torch.manual_seed(42)
+random.seed(42)
+
+# ------------------------------------------------------------------------
+# test of x1 & x2
+# ------------------------------------------------------------------------
+
+def _test_and(x1, x2):
+    h1 = x1.hammerblade()
+    h2 = x2.hammerblade()
+    assert h1 is not x1
+    assert h2 is not x2
+    y_c = x1 & x2
+    y_h = h1 & h2
+    assert y_h.device == torch.device("hammerblade")
+    assert torch.equal(y_c, y_h.cpu())
+
+# ------------------------------------------------------------------------
+# tests of and kernel with integer elements
+# ------------------------------------------------------------------------
+
+def test_and_1():
+    x = torch.ones(1, 10, dtype=torch.int)
+    _test_and(x, x)
+
+def test_and_2():
+    x1 = torch.ones(4, 5, dtype=torch.int)
+    x2 = torch.ones(4, 5, dtype=torch.int)
+    _test_and(x1, x2)
+
+def test_and_3():
+    x = torch.randint(-2 ** 30, 2 ** 30 - 1, (1, 128)).to(torch.int32)
+    y = torch.randint(-2 ** 30, 2 ** 30 - 1, (1, 128)).to(torch.int32)
+    _test_and(x, y)
+
+def test_and_4():
+    x = torch.randint(-2 ** 30, 2 ** 30 - 1, (16, 32)).to(torch.int32)
+    y = torch.randint(-2 ** 30, 2 ** 30 - 1, (16, 32)).to(torch.int32)
+    _test_and(x, y)
+
+def test_and_bool1():
+    x = torch.randint(0, 2, (16, 32)).to(torch.bool)
+    y = torch.randint(0, 2, (16, 32)).to(torch.bool)
+    _test_and(x, y)
+
+def test_and_bool2():
+    x = torch.randint(0, 2, (1, 128)).to(torch.bool)
+    y = torch.randint(0, 2, (1, 128)).to(torch.bool)
+    _test_and(x, y)
+
+@settings(deadline=None)
+@given(inputs=hu.tensors(n=2))
+def test_and_hypothesis(inputs):
+    x1 = torch.tensor(inputs[0]).to(torch.int32)
+    x2 = torch.tensor(inputs[1]).to(torch.int32)
+    _test_and(x1, x2)