diff --git a/python/paddle/distribution/kl.py b/python/paddle/distribution/kl.py index 6a0838e588c41..ce3b828eaebad 100644 --- a/python/paddle/distribution/kl.py +++ b/python/paddle/distribution/kl.py @@ -38,11 +38,11 @@ def kl_divergence(p, q): KL(p||q) = \int p(x)log\frac{p(x)}{q(x)} \mathrm{d}x Args: - p (Distribution): ``Distribution`` object. - q (Distribution): ``Distribution`` object. + p (Distribution): ``Distribution`` object. Inherits from the Distribution Base class. + q (Distribution): ``Distribution`` object. Inherits from the Distribution Base class. Returns: - Tensor: Batchwise KL-divergence between distribution p and q. + Tensor, Batchwise KL-divergence between distribution p and q. Examples: @@ -71,8 +71,8 @@ def register_kl(cls_p, cls_q): implemention funciton by the decorator. Args: - cls_p(Distribution): Subclass derived from ``Distribution``. - cls_q(Distribution): Subclass derived from ``Distribution``. + cls_p (Distribution): The Distribution type of Instance p. Subclass derived from ``Distribution``. + cls_q (Distribution): The Distribution type of Instance q. Subclass derived from ``Distribution``. Examples: .. code-block:: python diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py index 8a9e5cd7372a7..c9235dc940665 100644 --- a/python/paddle/distribution/normal.py +++ b/python/paddle/distribution/normal.py @@ -36,7 +36,7 @@ class Normal(distribution.Distribution): .. math:: - pdf(x; \mu, \sigma) = \\frac{1}{Z}e^{\\frac {-0.5 (x - \mu)^2} {\sigma^2} } + pdf(x; \mu, \sigma) = \frac{1}{Z}e^{\frac {-0.5 (x - \mu)^2} {\sigma^2} } .. math:: @@ -49,43 +49,43 @@ class Normal(distribution.Distribution): * :math:`Z`: is the normalization constant. Args: - loc(int|float|list|tuple|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. - scale(int|float|list|tuple|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. + loc(int|float|list|tuple|numpy.ndarray|Tensor): The mean of normal distribution.The data type is float32 and float64. + scale(int|float|list|tuple|numpy.ndarray|Tensor): The std of normal distribution.The data type is float32 and float64. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Examples: .. code-block:: python - import paddle - from paddle.distribution import Normal - - # Define a single scalar Normal distribution. - dist = Normal(loc=0., scale=3.) - # Define a batch of two scalar valued Normals. - # The first has mean 1 and standard deviation 11, the second 2 and 22. - dist = Normal(loc=[1., 2.], scale=[11., 22.]) - # Get 3 samples, returning a 3 x 2 tensor. - dist.sample([3]) - - # Define a batch of two scalar valued Normals. - # Both have mean 1, but different standard deviations. - dist = Normal(loc=1., scale=[11., 22.]) - - # Complete example - value_tensor = paddle.to_tensor([0.8], dtype="float32") - - normal_a = Normal([0.], [1.]) - normal_b = Normal([0.5], [2.]) - sample = normal_a.sample([2]) - # a random tensor created by normal distribution with shape: [2, 1] - entropy = normal_a.entropy() - # [1.4189385] with shape: [1] - lp = normal_a.log_prob(value_tensor) - # [-1.2389386] with shape: [1] - p = normal_a.probs(value_tensor) - # [0.28969154] with shape: [1] - kl = normal_a.kl_divergence(normal_b) - # [0.34939718] with shape: [1] + import paddle + from paddle.distribution import Normal + + # Define a single scalar Normal distribution. + dist = Normal(loc=0., scale=3.) + # Define a batch of two scalar valued Normals. + # The first has mean 1 and standard deviation 11, the second 2 and 22. + dist = Normal(loc=[1., 2.], scale=[11., 22.]) + # Get 3 samples, returning a 3 x 2 tensor. + dist.sample([3]) + + # Define a batch of two scalar valued Normals. + # Both have mean 1, but different standard deviations. + dist = Normal(loc=1., scale=[11., 22.]) + + # Complete example + value_tensor = paddle.to_tensor([0.8], dtype="float32") + + normal_a = Normal([0.], [1.]) + normal_b = Normal([0.5], [2.]) + sample = normal_a.sample([2]) + # a random tensor created by normal distribution with shape: [2, 1] + entropy = normal_a.entropy() + # [1.4189385] with shape: [1] + lp = normal_a.log_prob(value_tensor) + # [-1.2389386] with shape: [1] + p = normal_a.probs(value_tensor) + # [0.28969154] with shape: [1] + kl = normal_a.kl_divergence(normal_b) + # [0.34939718] with shape: [1] """ def __init__(self, loc, scale, name=None): @@ -132,11 +132,11 @@ def sample(self, shape, seed=0): """Generate samples of the specified shape. Args: - shape (list): 1D `int32`. Shape of the generated samples. - seed (int): Python integer number. + shape (list): 1D `int32`. Shape of the generated samples. + seed (int): Python integer number. Returns: - Tensor: A tensor with prepended dimensions shape.The data type is float32. + Tensor, A tensor with prepended dimensions shape.The data type is float32. """ if not _non_static_mode(): @@ -177,14 +177,14 @@ def entropy(self): .. math:: - entropy(\sigma) = 0.5 \\log (2 \pi e \sigma^2) + entropy(\sigma) = 0.5 \log (2 \pi e \sigma^2) In the above equation: * :math:`scale = \sigma`: is the std. Returns: - Tensor: Shannon entropy of normal distribution.The data type is float32. + Tensor, Shannon entropy of normal distribution.The data type is float32. """ name = self.name + '_entropy' @@ -221,10 +221,10 @@ def probs(self, value): """Probability density/mass function. Args: - value (Tensor): The input tensor. + value (Tensor): The input tensor. Returns: - Tensor: probability.The data type is same with value. + Tensor, probability. The data type is same with value. """ name = self.name + '_probs' @@ -243,11 +243,11 @@ def kl_divergence(self, other): .. math:: - KL\_divergence(\mu_0, \sigma_0; \mu_1, \sigma_1) = 0.5 (ratio^2 + (\\frac{diff}{\sigma_1})^2 - 1 - 2 \\ln {ratio}) + KL\_divergence(\mu_0, \sigma_0; \mu_1, \sigma_1) = 0.5 (ratio^2 + (\frac{diff}{\sigma_1})^2 - 1 - 2 \ln {ratio}) .. math:: - ratio = \\frac{\sigma_0}{\sigma_1} + ratio = \frac{\sigma_0}{\sigma_1} .. math:: @@ -266,7 +266,7 @@ def kl_divergence(self, other): other (Normal): instance of Normal. Returns: - Tensor: kl-divergence between two normal distributions.The data type is float32. + Tensor, kl-divergence between two normal distributions.The data type is float32. """ if not _non_static_mode(): diff --git a/python/paddle/distribution/transform.py b/python/paddle/distribution/transform.py index efa3248965157..890b7c737aa71 100644 --- a/python/paddle/distribution/transform.py +++ b/python/paddle/distribution/transform.py @@ -58,7 +58,7 @@ class Transform(object): Suppose :math:`X` is a K-dimensional random variable with probability density function :math:`p_X(x)`. A new random variable :math:`Y = f(X)` may be defined by transforming :math:`X` with a suitably well-behaved funciton - :math:`f`. It suffices for what follows to note that if f is one-to-one and + :math:`f`. It suffices for what follows to note that if `f` is one-to-one and its inverse :math:`f^{-1}` have a well-defined Jacobian, then the density of :math:`Y` is @@ -1001,8 +1001,9 @@ class StackTransform(Transform): specific axis. Args: - transforms(Sequence[Transform]): The sequence of transformations. - axis(int): The axis along which will be transformed. + transforms (Sequence[Transform]): The sequence of transformations. + axis (int, optional): The axis along which will be transformed. default + value is 0. Examples: @@ -1010,7 +1011,6 @@ class StackTransform(Transform): import paddle - x = paddle.stack( (paddle.to_tensor([1., 2., 3.]), paddle.to_tensor([1, 2., 3.])), 1) t = paddle.distribution.StackTransform( @@ -1023,11 +1023,13 @@ class StackTransform(Transform): # [[2.71828175 , 1. ], # [7.38905621 , 4. ], # [20.08553696, 9. ]]) + print(t.inverse(t.forward(x))) # Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True, # [[1., 1.], # [2., 2.], # [3., 3.]]) + print(t.forward_log_det_jacobian(x)) # Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True, # [[1. , 0.69314718], diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py index 7c085da315686..961d846a527c6 100644 --- a/python/paddle/distribution/uniform.py +++ b/python/paddle/distribution/uniform.py @@ -37,7 +37,7 @@ class Uniform(distribution.Distribution): .. math:: - pdf(x; a, b) = \\frac{1}{Z}, \ a <=x 1 : scale_factor = (in_size-1.0)/(out_size-1.0) else: scale_factor = float(in_size/out_size) - Linear interpolation: + # Linear interpolation: if: align_corners = False , align_mode = 0 input : (N,C,W_in) @@ -243,7 +245,7 @@ def interpolate(x, output: (N,C,W_out) where: W_out = W_{in} * scale_{factor} - Nearest neighbor interpolation: + # Nearest neighbor interpolation: align_corners = False input : (N,C,H_in,W_in) @@ -251,7 +253,7 @@ def interpolate(x, H_out = floor (H_{in} * scale_{factor}) W_out = floor (W_{in} * scale_{factor}) - Bilinear interpolation: + # Bilinear interpolation: if: align_corners = False , align_mode = 0 input : (N,C,H_in,W_in) @@ -264,7 +266,7 @@ def interpolate(x, H_out = H_{in} * scale_{factor} W_out = W_{in} * scale_{factor} - Bicubic interpolation: + # Bicubic interpolation: if: align_corners = False input : (N,C,H_in,W_in) @@ -277,7 +279,7 @@ def interpolate(x, H_out = H_{in} * scale_{factor} W_out = W_{in} * scale_{factor} - Trilinear interpolation: + # Trilinear interpolation: if: align_corners = False , align_mode = 0 input : (N,C,D_in,H_in,W_in) @@ -907,15 +909,16 @@ def dropout(x, training (bool, optional): A flag indicating whether it is in train phrase or not. Default True. mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer']. - 1. upscale_in_train(default), upscale the output at training time + 1. upscale_in_train(default), upscale the output at training time + + - train: out = input * mask / ( 1.0 - dropout_prob ) + - inference: out = input - - train: out = input * mask / ( 1.0 - dropout_prob ) - - inference: out = input + 2. downscale_in_infer, downscale the output at inference - 2. downscale_in_infer, downscale the output at inference + - train: out = input * mask + - inference: out = input * (1.0 - dropout_prob) - - train: out = input * mask - - inference: out = input * (1.0 - dropout_prob) name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: @@ -1776,12 +1779,12 @@ def linear(x, weight, bias=None, name=None): def label_smooth(label, prior_dist=None, epsilon=0.1, name=None): r""" Label smoothing is a mechanism to regularize the classifier layer and is called - label-smoothing regularization (LSR). + label-smoothing regularization (LSR).Label smoothing is proposed to encourage + the model to be less confident, since optimizing the log-likelihood of the + correct label directly may cause overfitting and reduce the ability of the + model to adapt. - Label smoothing is proposed to encourage the model to be less confident, - since optimizing the log-likelihood of the correct label directly may - cause overfitting and reduce the ability of the model to adapt. Label - smoothing replaces the ground-truth label :math:`y` with the weighted sum + Label smoothing replaces the ground-truth label :math:`y` with the weighted sum of itself and some fixed distribution :math:`\mu`. For class :math:`k`, i.e. diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 3f5637fa3922b..3e89ef519e9d4 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -854,15 +854,18 @@ def hsigmoid_loss(input, """ The hierarchical sigmoid organizes the classes into a complete binary tree to reduce the computational complexity and speed up the model training, especially the training of language model. + Each leaf node of the complete binary tree represents a class(word) and each non-leaf node acts as a binary classifier. For each class(word), there's a unique path from root to itself, hsigmoid calculate the cost for each non-leaf node on the path, and sum them to get a total cost. - Comparing to softmax, the OP can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N` + + Comparing to softmax, hsigmoid can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N` represents the number of classes or the size of word dict. - The OP supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural - Network Language Model `_. For the custom - tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example): + The API supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural + Network Language Model `_. + + For the custom tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example): 1. Using a custom word dict to build a binary tree, each leaf node should be an word in the word dict. 2. Creating a dict map word_id -> path that from the word to the root node, we call it path_table. @@ -1731,9 +1734,7 @@ def margin_cross_entropy(logits, .. hint:: The API supports single GPU and multi GPU, and don't supports CPU. - For data parallel mode, set ``group=False``. - For model parallel mode, set ``group=None`` or the group instance return by paddle.distributed.new_group. And logits.shape[-1] can be different at each rank. @@ -1756,12 +1757,12 @@ def margin_cross_entropy(logits, Default value is `'mean'`. Returns: - ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \ - `return_softmax` is False, otherwise the tuple \ - (loss, softmax), softmax is shard_softmax when \ - using model parallel, otherwise softmax is in \ - the same shape with input logits. If ``reduction == None``, \ - the shape of loss is ``[N, 1]``, otherwise the shape is ``[1]``. + Tensor|tuple[Tensor, Tensor], return the cross entropy loss if + `return_softmax` is False, otherwise the tuple (loss, softmax), + softmax is shard_softmax when using model parallel, otherwise + softmax is in the same shape with input logits. If + ``reduction == None``, the shape of loss is ``[N, 1]``, otherwise + the shape is ``[1]``. Examples: diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 03ba72fdda344..f278ad22244ff 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -31,7 +31,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): r""" - This op normalizes ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes + Normalize ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes .. math:: @@ -45,7 +45,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): Parameters: x (Tensor): The input tensor could be N-D tensor, and the input data type could be float32 or float64. - p (float|int, optional): The exponent value in the norm formulation. Default: 2 + p (float|int, optional): The exponent value in the norm formulation. Default: 2. axis (int, optional): The axis on which to apply normalization. If `axis < 0`, the dimension to normalization is `x.ndim + axis`. -1 is the last dimension. epsilon (float, optional): Small float added to denominator to avoid dividing by zero. Default is 1e-12. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 847ba013a0f38..7f7a18d0a2ed2 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -124,7 +124,7 @@ def grid_sample(x, align_corners=True, name=None): """ - This operation samples input X by using bilinear interpolation or + Sample input X by using bilinear interpolation or nearest interpolation based on flow field grid, which is usually generated by :code:`affine_grid` . When the input X is 4-D Tensor, the grid of shape [N, H, W, 2] is the concatenation of (x, y) @@ -209,6 +209,7 @@ def grid_sample(x, None by default. Returns: + Tensor, The shape of output is [N, C, grid_H, grid_W] or [N, C, grid_D, grid_H, grid_W] in which `grid_D` is the depth of grid, `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py index 785e3bc1c2dd7..dcfeebcdc32f9 100644 --- a/python/paddle/profiler/profiler.py +++ b/python/paddle/profiler/profiler.py @@ -132,12 +132,12 @@ def make_scheduler(*, skip_first(int, optional): The number of first steps to drop, not participate in the state transform, and at ProfilerState.CLOSED state. Default value is 0. Returns: - A scheduler function, conforms to above state transform setting. The function will takes one parameter step_num, and returns corresponding ProfilerState. + A scheduler function, conforms to above state transform setting. The function will takes one parameter `step_num`, and returns corresponding ProfilerState. Examples: - 1. profiling range [2, 5] + 1. profiling range [2, 5]. - Assume batch 0: closed, batch 1: ready, batch [2, 5] record + Assume batch 0: closed, batch 1: ready, batch [2, 5] record. .. code-block:: python :name: code-example1 @@ -146,9 +146,9 @@ def make_scheduler(*, profiler.make_scheduler(closed=1, ready=1, record=4, repeat=1) - 2. profiling range [3,6], [9,12], [15,18]... + 2. profiling range [3,6], [9,12], [15,18]. - Assume batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat + Assume batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat. .. code-block:: python :name: code-example2 @@ -196,12 +196,12 @@ def export_chrome_tracing(dir_name: str, worker_name: Optional[str] = None) -> Callable: r""" Return a callable, used for outputing tracing data to chrome tracing format file. - The output file will be saved in directory ``dir_name``, and file name will be set as worker_name. - if worker_name is not set, the default name is [hostname]_[pid]. + The output file will be saved in directory ``dir_name``, and file name will be set as `worker_name`. + if `worker_name` is not set, the default name is `[hostname]_[pid]`. Args: dir_name(str): Directory to save profiling data. - worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid]. + worker_name(str, optional): Prefix of the file name saved, default is `[hostname]_[pid]`. Returns: A callable, which takes a Profiler object as parameter and calls its export method to save data to chrome tracing format file. @@ -246,12 +246,12 @@ def export_protobuf(dir_name: str, worker_name: Optional[str] = None) -> Callable: r""" Return a callable, used for outputing tracing data to protobuf file. - The output file will be saved in directory ``dir_name``, and file name will be set as worker_name. - if worker_name is not set, the default name is [hostname]_[pid]. + The output file will be saved in directory ``dir_name``, and file name will be set as ``worker_name``. + if ``worker_name`` is not set, the default name is `[hostname]_[pid]`. Args: dir_name(str): Directory to save profiling data. - worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid]. + worker_name(str, optional): Prefix of the file name saved, default is `[hostname]_[pid]`. Returns: A callable, which takes a Profiler object as parameter and calls its export method to save data to protobuf file. @@ -317,7 +317,7 @@ class Profiler: If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch, which means profiling range [start_batch, end_batch). on_trace_ready (Callable, optional): Callable object, serves as callback function, and takes the Profiler object as parameter, which provides a way for users to do post-processing. - This callable object will be called when ``scheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. The default value is :ref:`export_chrome_tracing ` (./profiler_log/). + This callable object will be called when ``scheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. The default value is :ref:`export_chrome_tracing `. timer_only (bool, optional): If it is True, the cost of Dataloader and every step of the model will be count without profiling. Otherwise, the model will be timed and profiled. Default: False. record_shapes (bool, optional): If it is True, collect op's input shape information. Default: False. @@ -339,7 +339,7 @@ class Profiler: #train() p.step() - 2. profiling range [2,4], [7, 9], [11,13] + 2. profiling range [2,4], [7, 9], [11,13]. .. code-block:: python :name: code-example2 @@ -354,7 +354,7 @@ class Profiler: #train() p.step() - 3. Use profiler without context manager, and use default parameters + 3. Use profiler without context manager, and use default parameters. .. code-block:: python :name: code-example3 @@ -369,7 +369,7 @@ class Profiler: p.stop() p.summary() - 4. Use profiler to get throughput and cost of the model + 4. Use profiler to get throughput and cost of the model. .. code-block:: python :name: code-example-timer1 @@ -399,8 +399,7 @@ def forward(self, image, label=None): dataset = RandomDataset(20 * 4) simple_net = SimpleNet() - opt = paddle.optimizer.SGD(learning_rate=1e-3, - parameters=simple_net.parameters()) + opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=simple_net.parameters()) BATCH_SIZE = 4 loader = paddle.io.DataLoader( dataset, @@ -531,7 +530,7 @@ def start(self): prof.stop() ''' - # Timing only without profiling + # Timing only without profiling. benchmark().begin() if not self.timer_only or self.emit_nvtx: utils._is_profiler_used = True @@ -584,7 +583,7 @@ def stop(self): if self.profile_memory: disable_memory_recorder() # self.current_state -> CLOSED - # In this situation, RECORD state is regarded as RECORD_AND_RETURN + # In this situation, RECORD state is regarded as RECORD_AND_RETURN. if self.record_event: self.record_event.end() self.record_event = None @@ -607,7 +606,7 @@ def step(self, num_samples: Optional[int] = None): Args: num_samples (int|None, optional): Specifies the batch size of every step of the model - that is used to compute throughput when timer_only is True. Default: None. + that is used to compute throughput when `timer_only` is True. Default: None. Examples: .. code-block:: python @@ -645,7 +644,7 @@ def step_info(self, unit=None): r""" Get statistics for current step. If the function is called at certain iteration intervals, the result is the average of all steps between the previous call and - this call. Statistics are as follows: + this call. Statistics are as follows: 1. reader_cost: the cost of loading data measured in seconds. @@ -751,7 +750,7 @@ def export(self, path="", format="json"): Args: path(str): file path of the output. - format(str, optional): output format, can be chosen from ['json', 'pb], 'json' for chrome tracing and 'pb' for protobuf, default value is "json". + format(str, optional): output format, can be chosen from ['json', 'pb'], 'json' for chrome tracing and 'pb' for protobuf, default value is 'json'. Examples: diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py index 6eeea876a9c90..fe05aaeb81f9e 100644 --- a/python/paddle/profiler/utils.py +++ b/python/paddle/profiler/utils.py @@ -36,8 +36,10 @@ class RecordEvent(ContextDecorator): Interface for recording a time range by user defined. Args: - name(str): Name of the record event - event_type(TracerEventType, optional): Optional, default value is TracerEventType.PythonUserDefined. It is reserved for internal purpose, and it is better not to specify this parameter. + name (str): Name of the record event. + event_type (TracerEventType, optional): Optional, default value is + `TracerEventType.PythonUserDefined`. It is reserved for internal + purpose, and it is better not to specify this parameter. Examples: .. code-block:: python @@ -59,7 +61,7 @@ class RecordEvent(ContextDecorator): record_event.end() **Note**: - RecordEvent will take effect only when :ref:`Profiler ` is on and at the state of RECORD. + RecordEvent will take effect only when :ref:`Profiler ` is on and at the state of `RECORD`. """ def __init__( @@ -134,7 +136,7 @@ def load_profiler_result(filename: str): filename(str): Name of the exported protobuf file of profiler data. Returns: - ProfilerResult object, which stores profiling data. + ``ProfilerResult`` object, which stores profiling data. Examples: .. code-block:: python diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index e9369b5da380b..15dde6a6e3b8e 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -4122,9 +4122,8 @@ def lerp_(x, y, weight, name=None): def erfinv(x, name=None): r""" - The inverse error function of x. + The inverse error function of x. Please refer to :ref:`api_paddle_erf` - Equation: .. math:: erfinv(erf(x)) = x. @@ -4237,7 +4236,6 @@ def deg2rad(x, name=None): r""" Convert each of the elements of input x from degrees to angles in radians. - Equation: .. math:: deg2rad(x)=\pi * x / 180 @@ -4253,7 +4251,6 @@ def deg2rad(x, name=None): .. code-block:: python import paddle - import numpy as np x1 = paddle.to_tensor([180.0, -180.0, 360.0, -360.0, 90.0, -90.0]) result1 = paddle.deg2rad(x1) @@ -4679,18 +4676,18 @@ def angle(x, name=None): return out def heaviside(x, y, name=None): - """ + r""" Computes the Heaviside step function determined by corresponding element in y for each element in x. The equation is .. math:: heaviside(x, y)= \left\{ - \\begin{array}{lcl} - 0,& &\\text{if} \ x < 0, \\\\ - y,& &\\text{if} \ x = 0, \\\\ - 1,& &\\text{if} \ x > 0. + \begin{array}{lcl} + 0,& &\text{if} \ x < 0, \\ + y,& &\text{if} \ x = 0, \\ + 1,& &\text{if} \ x > 0. \end{array} - \\right. + \right. Note: ``paddle.heaviside`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting`. @@ -4716,7 +4713,7 @@ def heaviside(x, y, name=None): paddle.heaviside(x, y) # [[0. , 0.20000000, 1. ], # [0. , 1. , 0.30000001]] - """ + """ op_type = 'elementwise_heaviside' axis = -1 act = None diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py index 1d87e4857cd0d..a14266412c4b3 100644 --- a/python/paddle/utils/cpp_extension/cpp_extension.py +++ b/python/paddle/utils/cpp_extension/cpp_extension.py @@ -293,7 +293,7 @@ def CUDAExtension(sources, *args, **kwargs): **kwargs(dict[option], optional): Specify other arguments same as ``setuptools.Extension`` . Returns: - setuptools.Extension: An instance of setuptools.Extension + setuptools.Extension: An instance of setuptools.Extension. """ kwargs = normalize_extension_kwargs(kwargs, use_cuda=True) # Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will