Skip to content

Commit

Permalink
Define the record_stream method in native_functions.yaml (pytorch#44301)
Browse files Browse the repository at this point in the history
Summary:
The record_stream method was hard coded for CUDA device. Define the record_stream in the native_functions.yaml to enable the dynamic dispatch to different end device.

Fixes pytorch#36556

Pull Request resolved: pytorch#44301

Reviewed By: glaringlee

Differential Revision: D23763954

Pulled By: ezyang

fbshipit-source-id: e6d24f5e7892b56101fa858a6cad2abc5cdc4293
  • Loading branch information
chengjunlu authored and facebook-github-bot committed Oct 13, 2020
1 parent d705083 commit 5741de8
Show file tree
Hide file tree
Showing 35 changed files with 286 additions and 34 deletions.
6 changes: 6 additions & 0 deletions aten/src/ATen/core/ivalue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ TypePtr IValue::type() const {
return RRefType::create(toRRef()->type());
case Tag::Device:
return DeviceObjType::get();
case Tag::Stream:
return StreamObjType::get();
case Tag::Object:
return toObjectRef().type();
case Tag::PyObject:
Expand Down Expand Up @@ -269,6 +271,8 @@ IValue IValue::equals(const IValue& rhs) const {
return rhs.isGenericDict() && lhs.toGenericDict() == rhs.toGenericDict();
case Tag::Tuple:
return rhs.isTuple() && *lhs.toTuple() == *rhs.toTuple();
case Tag::Stream:
return rhs.isStream() && lhs.toStream() == lhs.toStream();
case Tag::Device:
return rhs.isDevice() && lhs.toDevice() == rhs.toDevice();
case Tag::GenericList:
Expand Down Expand Up @@ -634,6 +638,8 @@ std::ostream& operator<<(std::ostream & out, const IValue & v) {
return out << "Uninitialized";
case IValue::Tag::Device:
return out << v.toDevice();
case IValue::Tag::Stream:
return out << v.toStream();
case IValue::Tag::GenericDict:
return printDict(out, v.toGenericDict(), formatter);
case IValue::Tag::PyObject: {
Expand Down
10 changes: 10 additions & 0 deletions aten/src/ATen/core/ivalue.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ struct OptionalArray {
_(GenericDict) \
_(Future) \
_(Device) \
_(Stream) \
_(Object) \
_(PyObject) \
_(Uninitialized) \
Expand Down Expand Up @@ -551,6 +552,15 @@ struct CAFFE2_API IValue final {
return c10::Device(payload.as_device.type, payload.as_device.index);
}

//Stream
IValue(c10::Stream stream)
: tag(Tag::Stream), is_intrusive_ptr(false) {
payload.as_int = stream.pack();
}
c10::Stream toStream() &&;
c10::Stream toStream() const &;
bool isStream() const { return Tag::Stream == tag; }

// ScalarType
IValue(ScalarType t)
: IValue(static_cast<std::underlying_type<ScalarType>::type>(t)) {}
Expand Down
7 changes: 7 additions & 0 deletions aten/src/ATen/core/ivalue_inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ inline at::Tensor IValue::toTensor() const & {
AT_ASSERT(isTensor(), "Expected Tensor but got ", tagKind());
return at::Tensor(toIntrusivePtr<at::TensorImpl, at::UndefinedTensorImpl>());
}
inline c10::Stream IValue::toStream() && {
return c10::Stream::unpack(payload.as_int);
}
inline c10::Stream IValue::toStream() const & {
return c10::Stream::unpack(payload.as_int);
}
inline c10::intrusive_ptr<caffe2::Blob> IValue::toBlob() && {
AT_ASSERT(isBlob(), "Expected Blob but got ", tagKind());
return moveToIntrusivePtr<caffe2::Blob>();
Expand Down Expand Up @@ -645,6 +651,7 @@ inline type IValue::to<type>() const & { \
return this->method_name(); \
}
DEFINE_TO(at::Tensor, toTensor)
DEFINE_TO(c10::Stream, toStream)
DEFINE_TO(float, toDouble)
DEFINE_TO(double, toDouble)
DEFINE_TO(unsigned char, toInt)
Expand Down
29 changes: 29 additions & 0 deletions aten/src/ATen/core/jit_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ using OptNameList = c10::optional<std::vector<std::string>>;
_(OptionalType) \
_(VarType) \
_(DeviceObjType) \
_(StreamObjType) \
_(FunctionType) \
_(ClassType) \
_(PyObjectType) \
Expand Down Expand Up @@ -1550,6 +1551,28 @@ struct CAFFE2_API DeviceObjType : public Type {
DeviceObjType() : Type(TypeKind::DeviceObjType) {}
};

struct StreamObjType;
using StreamObjTypePtr = std::shared_ptr<StreamObjType>;
// This type represents a Generator
struct CAFFE2_API StreamObjType : public Type {
static StreamObjTypePtr create() {
return StreamObjTypePtr(
new StreamObjType()); // NOLINT(modernize-make-shared)
}
bool operator==(const Type& rhs) const override {
return rhs.kind() == kind();
}
std::string str() const override {
return "Stream";
}
static const TypeKind Kind = TypeKind::StreamObjType;
// global singleton
static StreamObjTypePtr get();

private:
StreamObjType() : Type(TypeKind::StreamObjType) {}
};

struct VarType;
using VarTypePtr = std::shared_ptr<VarType>;
// This type represents a type variable, used in FunctionSchema
Expand Down Expand Up @@ -1727,6 +1750,12 @@ struct getTypePtr_<at::Tensor> final {
}
};
template <>
struct getTypePtr_<c10::Stream> final {
static TypePtr call() {
return StreamObjType::get();
}
};
template <>
struct getTypePtr_<double> final {
static TypePtr call() {
return FloatType::get();
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/core/type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ DeviceObjTypePtr DeviceObjType::get() {
static auto value = DeviceObjType::create();
return value;
}
StreamObjTypePtr StreamObjType::get() {
static auto value = StreamObjType::create();
return value;
}
ScalarTypeTypePtr ScalarTypeType::get() {
static auto value = ScalarTypeType::create();
return value;
Expand Down
Empty file.
7 changes: 7 additions & 0 deletions aten/src/ATen/native/cuda/RecordStream.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <ATen/ATen.h>
#include <c10/cuda/CUDACachingAllocator.h>
namespace at { namespace native {
void record_stream_cuda(Tensor& self, c10::Stream stream) {
c10::cuda::CUDACachingAllocator::recordStream(self.storage().data_ptr(), at::cuda::CUDAStream::unpack(stream.pack()));
}
}} // namespace at::native
6 changes: 6 additions & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8161,6 +8161,12 @@
variants: function, method
device_guard: False

- func: record_stream(Tensor(a!) self, Stream s) -> ()
use_c10_dispatcher: full
variants: method
dispatch:
CUDA: record_stream_cuda

- func: isposinf(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
Expand Down
3 changes: 3 additions & 0 deletions aten/src/ATen/templates/TensorBody.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <c10/core/Layout.h>
#include <c10/core/MemoryFormat.h>
#include <c10/core/QScheme.h>
#include <c10/core/Stream.h>
#include <c10/core/Scalar.h>
#include <c10/core/ScalarType.h>
#include <c10/core/Storage.h>
Expand Down Expand Up @@ -49,6 +50,8 @@ namespace at {
class Tensor;
using TensorList = ArrayRef<Tensor>;

using Stream = c10::Stream;

namespace impl {
inline bool variable_excluded_from_dispatch() {
#ifdef C10_MOBILE
Expand Down
3 changes: 3 additions & 0 deletions aten/src/ATen/templates/TensorMethods.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <c10/core/Scalar.h>
#include <c10/core/MemoryFormat.h>
#include <c10/core/QScheme.h>
#include <c10/core/Stream.h>
#include <c10/macros/Macros.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/intrusive_ptr.h>
Expand All @@ -14,6 +15,8 @@

namespace at {

using Stream = c10::Stream;

Tensor Tensor::cpu() const {
return to(options().device(DeviceType::CPU), /*non_blocking*/ false, /*copy*/ false);
}
Expand Down
2 changes: 2 additions & 0 deletions test/test_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,8 @@ def instance_gen():
func_args.append(False)
elif t.startswith('int') or t in {'Dimname', 'DimnameList'}:
func_args.append(0)
elif t in {'Stream'}:
func_args.append(torch.Stream())
elif t.startswith('float') or t == 'double':
func_args.append(1.0)
elif t in {'Generator', 'MemoryFormat', 'TensorOptions'}:
Expand Down
1 change: 1 addition & 0 deletions tools/autograd/gen_python_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ def create_python_bindings(python_functions, is_python_method, module):
UNPACK_METHODS = {
'const Tensor &': 'tensor',
'Tensor &': 'tensor',
'Stream': 'stream',
'c10::optional<Tensor>': 'optionalTensor',
'const c10::optional<Tensor>&': 'optionalTensor',
'c10::optional<Generator>': 'generator',
Expand Down
2 changes: 2 additions & 0 deletions tools/autograd/gen_variable_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@
'bucketize',
# Functions that return booleans are not differentiable
'isnan', 'isposinf', 'isneginf', 'isinf'
# Functions return none are not differentiable
'record_stream',
}

# The C -> R functions at the time of adding this are still being audited and tested
Expand Down
25 changes: 2 additions & 23 deletions tools/autograd/templates/python_variable_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/jit/frontend/tracer.h"
#ifdef USE_CUDA
#include "torch/csrc/cuda/Stream.h"
#include "torch/csrc/cuda/Event.h"
#endif
#include "torch/csrc/utils/cuda_lazy_init.h"
Expand All @@ -30,6 +29,7 @@

#include <ATen/ATen.h>
#include "c10/util/Optional.h"
#include "c10/core/Stream.h"

#include <stdexcept>

Expand All @@ -40,6 +40,7 @@ using at::Backend;
using at::Scalar;
using at::ScalarType;
using at::Tensor;
using c10::Stream;
using namespace torch::autograd::utils;

namespace torch { namespace autograd {
Expand Down Expand Up @@ -704,27 +705,6 @@ static PyObject * THPVariable_numpy(PyObject* self, PyObject* arg)
END_HANDLE_TH_ERRORS
}

// TODO: move this to ATen. We would need to expose Stream objects in ATen.
static PyObject * THPVariable_record_stream(PyObject* self, PyObject* arg)
{
HANDLE_TH_ERRORS
if (check_has_torch_function(self)) {
auto args = py::make_tuple(py::handle(arg));
return handle_torch_function(self, "record_stream", args.ptr());
}
#ifdef USE_CUDA
auto& self_ = reinterpret_cast<THPVariable*>(self)->cdata;
if (!THCPStream_Check(arg)) {
return PyErr_Format(PyExc_TypeError, "expected Stream object");
}
c10::cuda::CUDACachingAllocator::recordStream(self_.storage().data_ptr(), at::cuda::CUDAStream::unpack(((THCPStream*)arg)->cdata));
Py_RETURN_NONE;
#else
throw std::runtime_error("PyTorch compiled without CUDA support");
#endif
END_HANDLE_TH_ERRORS
}

static PyObject * THPVariable_requires_grad_(PyObject* self, PyObject* args, PyObject* kwargs)
{
HANDLE_TH_ERRORS
Expand Down Expand Up @@ -1181,7 +1161,6 @@ PyMethodDef variable_methods[] = {
{"nonzero", (PyCFunction)(void(*)(void))THPVariable_nonzero, METH_VARARGS | METH_KEYWORDS, NULL},
{"numel", (PyCFunction)THPVariable_numel, METH_NOARGS, NULL},
{"numpy", (PyCFunction)THPVariable_numpy, METH_NOARGS, NULL},
{"record_stream", (PyCFunction)THPVariable_record_stream, METH_O, NULL},
{"requires_grad_", (PyCFunction)(void(*)(void))THPVariable_requires_grad_, METH_VARARGS | METH_KEYWORDS, NULL},
{"set_", (PyCFunction)(void (*)(void))THPVariable_set_, METH_VARARGS | METH_KEYWORDS, NULL},
{"short", (PyCFunction)(void(*)(void))THPVariable_short, METH_VARARGS | METH_KEYWORDS, NULL},
Expand Down
1 change: 1 addition & 0 deletions tools/build_variables.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ libtorch_python_core_sources = [
"torch/csrc/python_dimname.cpp",
"torch/csrc/Size.cpp",
"torch/csrc/Storage.cpp",
"torch/csrc/Stream.cpp",
"torch/csrc/TypeInfo.cpp",
"torch/csrc/api/src/python/init.cpp",
"torch/csrc/autograd/functions/init.cpp",
Expand Down
2 changes: 1 addition & 1 deletion tools/codegen/api/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def valuetype_type(t: Type) -> Optional[str]:
elif t.name in [BaseTy.bool, BaseTy.QScheme, BaseTy.Scalar,
BaseTy.ScalarType, BaseTy.Generator, BaseTy.Storage,
BaseTy.Layout, BaseTy.Device, BaseTy.MemoryFormat,
BaseTy.Dimname, BaseTy.ConstQuantizerPtr]:
BaseTy.Dimname, BaseTy.Stream, BaseTy.ConstQuantizerPtr]:
# These C++ names line up with their schema names
return t.name.name
else:
Expand Down
1 change: 1 addition & 0 deletions tools/codegen/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ def is_list_like(self) -> Optional['ListType']:
'MemoryFormat',
'QScheme',
'Storage',
'Stream',
'ConstQuantizerPtr', # TODO: rename
))

Expand Down
3 changes: 2 additions & 1 deletion tools/pyi/gen_pyi.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ def type_to_python(typename, size=None):
'Dimname': 'Union[str, ellipsis, None]',
'DimnameList': 'Sequence[Union[str, ellipsis, None]]',
'QScheme': '_qscheme',
'ArrayRef<double>' : 'Sequence[float]'
'ArrayRef<double>' : 'Sequence[float]',
'Stream': 'Stream',
}[typename]

return typename
Expand Down
11 changes: 11 additions & 0 deletions torch/_C/__init__.pyi.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ class device:

def __reduce__(self) -> Tuple[Any, ...]: ... # THPDevice_reduce

# Defined in torch/csrc/Stream.cpp
class Stream:
_cdata: _int # Stream handle
device: device # The device of the stream

...

# Defined in torch/csrc/Size.cpp
class Size(Tuple[_int, ...]):
# TODO: __reduce__
Expand Down Expand Up @@ -676,6 +683,10 @@ class DeviceObjType(JitType):
@staticmethod
def get() -> DeviceObjType: ...

class StreamObjType(JitType):
@staticmethod
def get() -> StreamObjType: ...

class ListType(JitType):
def __init__(self, a: JitType) -> None: ...
def getElementType(self) -> JitType: ...
Expand Down
2 changes: 2 additions & 0 deletions torch/csrc/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <torch/csrc/THP.h>
#include <torch/csrc/DynamicTypes.h>
#include <torch/csrc/Device.h>
#include <torch/csrc/Stream.h>
#include <torch/csrc/Dtype.h>
#include <torch/csrc/DataLoader.h>
#include <torch/csrc/Generator.h>
Expand Down Expand Up @@ -723,6 +724,7 @@ PyObject* initModule() {
THPMemoryFormat_init(module);
THPQScheme_init(module);
THPDevice_init(module);
THPStream_init(module);
ASSERT_TRUE(THPVariable_initModule(module));
ASSERT_TRUE(THPFunction_initModule(module));
ASSERT_TRUE(THPEngine_initModule(module));
Expand Down
Loading

0 comments on commit 5741de8

Please sign in to comment.