Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 2 additions & 22 deletions src/cuda/add/kernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,6 @@

namespace infini::ops {

namespace detail {

template <typename T, typename = void>
struct HasHAdd : std::false_type {};

template <typename T>
struct HasHAdd<
T, std::void_t<
decltype(__hadd(std::declval<T>(), std::declval<T>())),
std::enable_if_t<std::is_convertible_v<
decltype(__hadd(std::declval<T>(), std::declval<T>())), T>>>>
: std::true_type {};

template <typename T>
inline constexpr bool HasHAddValue = HasHAdd<T>::value;

} // namespace detail

struct AddOp {
static constexpr std::size_t num_inputs = 2;

Expand All @@ -31,10 +13,8 @@ struct AddOp {
const T& other) const {
if constexpr (std::is_same_v<T, half2>) {
return __hadd2(input, other);
} else if constexpr ((std::is_same_v<T, half> ||
std::is_same_v<T,
TypeMapType<DataType::kBFloat16>>) &&
detail::HasHAddValue<T>) {
} else if constexpr (std::is_same_v<T, half> ||
std::is_same_v<T, TypeMapType<DataType::kBFloat16>>) {
return __hadd(input, other);
} else if constexpr (std::is_same_v<T, float>) {
return __fadd_rn(input, other);
Expand Down
4 changes: 0 additions & 4 deletions src/cuda/swiglu/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@

#include <cstdint>

// clang-format off
#include <cuda_runtime.h> // TODO: Remove this
// clang-format on

#include "base/swiglu.h"
#include "common/generic_utils.h"
#include "cuda/swiglu/kernel.cuh"
Expand Down
4 changes: 4 additions & 0 deletions src/moore/add/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#include <musa_runtime.h>
// clang-format on

// clang-format off
#include "moore/polyfills.cuh"
// clang-format on

#include "cuda/add/kernel.h"

namespace infini::ops {
Expand Down
41 changes: 41 additions & 0 deletions src/moore/polyfills.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef INFINI_OPS_MOORE_POLYFILLS_CUH_
#define INFINI_OPS_MOORE_POLYFILLS_CUH_

#include <type_traits>

// clang-format off
#include <musa_bf16.h>
// clang-format on

namespace infini::ops {

template <typename T>
__device__ __forceinline__ T __hadd(const T& a, const T& b) {
return a + b;
}

template <typename T>
__device__ __forceinline__ auto __high2bfloat16(const T& a) {
return __float2bfloat16_rn(::__high2float(a));
}

template <typename T>
__device__ __forceinline__ T __hneg(const T& a) {
return -a;
}

template <typename T>
__device__ __forceinline__ auto __low2bfloat16(const T& a) {
return __float2bfloat16_rn(::__low2float(a));
}

template <typename T>
__device__ __forceinline__ T hrcp(const T& a) {
return T(__frcp_rn(static_cast<float>(a)));
}

} // namespace infini::ops

#define hrcp infini::ops::hrcp

#endif
49 changes: 49 additions & 0 deletions src/moore/swiglu/kernel.h
Comment thread
voltjia marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#ifndef INFINI_OPS_MOORE_SWIGLU_KERNEL_H_
#define INFINI_OPS_MOORE_SWIGLU_KERNEL_H_

#include <utility>

// clang-format off
#include <musa_runtime.h>
// clang-format on

// clang-format off
#include "moore/polyfills.cuh"
// clang-format on

#include "cuda/swiglu/kernel.h"

namespace infini::ops {

namespace swiglu {

struct MooreBackend {
using stream_t = musaStream_t;

static constexpr auto malloc = [](auto&&... args) {
return musaMalloc(std::forward<decltype(args)>(args)...);
};

static constexpr auto memcpy = [](auto&&... args) {
return musaMemcpy(std::forward<decltype(args)>(args)...);
};

static constexpr auto free = [](auto&&... args) {
return musaFree(std::forward<decltype(args)>(args)...);
};

static constexpr auto memcpyH2D = musaMemcpyHostToDevice;
};

} // namespace swiglu

template <>
class Operator<Swiglu, Device::Type::kMoore>
: public CudaSwiglu<swiglu::MooreBackend> {
public:
using CudaSwiglu<swiglu::MooreBackend>::CudaSwiglu;
};

} // namespace infini::ops

#endif