kernels-community
/

quantization-eetq

Model card Files Files and versions Community

quantization-eetq / torch-ext /torch_binding.cpp

danieldk's picture

danieldk HF staff

quant_weights is a CPU function

8294a79 10 days ago

921 Bytes

	#include <torch/library.h>

	#include "registration.h"
	#include "torch_binding.h"

	TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
	ops.def("w8_a16_gemm(Tensor input, Tensor weight, Tensor scale) -> Tensor");
	ops.impl("w8_a16_gemm", torch::kCUDA, &w8_a16_gemm_forward_cuda);
	ops.def("w8_a16_gemm_(Tensor input, Tensor weight, Tensor scale, Tensor! output,"
	"int m, int n, int k) -> Tensor");
	ops.impl("w8_a16_gemm_", torch::kCUDA, &w8_a16_gemm_forward_cuda_);
	ops.def("preprocess_weights(Tensor origin_weight, bool is_int4) -> Tensor");
	ops.impl("preprocess_weights", torch::kCUDA, &preprocess_weights_cuda);
	ops.def("quant_weights(Tensor origin_weight, ScalarType quant_type,"
	"bool return_unprocessed_quantized_tensor) -> Tensor[]");
	ops.impl("quant_weights", torch::kCPU, &symmetric_quantize_last_axis_of_tensor);
	}

	REGISTER_EXTENSION(TORCH_EXTENSION_NAME)