PaddlePaddle · Ligoml · Sep 22, 2022 · Sep 5, 2022 · Sep 5, 2022 · Sep 5, 2022
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
@@ -146,7 +146,7 @@ def gelu(x, approximate=False, name=None):
     gelu activation.
 
     if approximate is True
-
+    The activation function of Gelu is calculated element by element. More information refers to :ref: `Gaussian Error Linear Units`.
     .. math::
 
         gelu(x) = 0.5 * x * (1 + tanh(\sqrt{\frac{2}{\pi}} * (x + 0.044715x^{3})))

diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
@@ -176,6 +176,7 @@ def interpolate(x,
     """
 
     This API resizes a batch of images.
+
     The input must be a 3-D Tensor of the shape (num_batches, channels, in_w)
     or 4-D (num_batches, channels, in_h, in_w), or a 5-D Tensor of the shape
     (num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels),
@@ -185,10 +186,15 @@ def interpolate(x,
 
     Supporting resample methods:
         'linear' : Linear interpolation
+
         'bilinear' : Bilinear interpolation
+
         'trilinear' : Trilinear interpolation
+
         'nearest' : Nearest neighbor interpolation
+
         'bicubic' : Bicubic interpolation
+
         'area': Area interpolation
 
     Linear interpolation is the method of using a line connecting two known quantities 
@@ -226,13 +232,13 @@ def interpolate(x,
 
     .. code-block:: text
 
-        For scale_factor:
+        # For scale_factor:
             if align_corners = True && out_size > 1 :
               scale_factor = (in_size-1.0)/(out_size-1.0)
             else:
               scale_factor = float(in_size/out_size)
 
-        Linear interpolation:
+        # Linear interpolation:
             if:
                 align_corners = False , align_mode = 0
                 input : (N,C,W_in)
@@ -243,15 +249,15 @@ def interpolate(x,
                 output: (N,C,W_out) where:
                 W_out = W_{in} * scale_{factor}
 
-        Nearest neighbor interpolation:
+        # Nearest neighbor interpolation:
 
               align_corners = False
               input : (N,C,H_in,W_in)
               output: (N,C,H_out,W_out) where:
               H_out = floor (H_{in} * scale_{factor})
               W_out = floor (W_{in} * scale_{factor})
 
-        Bilinear interpolation:
+        # Bilinear interpolation:
           if:
               align_corners = False , align_mode = 0
               input : (N,C,H_in,W_in)
@@ -264,7 +270,7 @@ def interpolate(x,
               H_out = H_{in} * scale_{factor}
               W_out = W_{in} * scale_{factor}
 
-        Bicubic interpolation:
+        # Bicubic interpolation:
           if:
               align_corners = False
               input : (N,C,H_in,W_in)
@@ -277,7 +283,7 @@ def interpolate(x,
               H_out = H_{in} * scale_{factor}
               W_out = W_{in} * scale_{factor}
 
-        Trilinear interpolation:
+        # Trilinear interpolation:
           if:
               align_corners = False , align_mode = 0
               input : (N,C,D_in,H_in,W_in)
@@ -1776,11 +1782,12 @@ def linear(x, weight, bias=None, name=None):
 def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
     r"""
     Label smoothing is a mechanism to regularize the classifier layer and is called
-    label-smoothing regularization (LSR).
-
-    Label smoothing is proposed to encourage the model to be less confident,
-    since optimizing the log-likelihood of the correct label directly may
-    cause overfitting and reduce the ability of the model to adapt. Label
+    label-smoothing regularization (LSR).Label smoothing is proposed to encourage 
+    the model to be less confident, since optimizing the log-likelihood of the 
+    correct label directly may cause overfitting and reduce the ability of the 
+    model to adapt. 
+
+    Label
     smoothing replaces the ground-truth label :math:`y` with the weighted sum
     of itself and some fixed distribution :math:`\mu`. For class :math:`k`,
     i.e.

diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
@@ -854,15 +854,19 @@ def hsigmoid_loss(input,
     """
     The hierarchical sigmoid organizes the classes into a complete binary tree to reduce the computational complexity
     and speed up the model training, especially the training of language model.
+
     Each leaf node of the complete binary tree represents a class(word) and each non-leaf node acts as a binary classifier.
     For each class(word), there's a unique path from root to itself, hsigmoid calculate the cost for each non-leaf node on
     the path, and sum them to get a total cost.
+
     Comparing to softmax, the OP can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N`
     represents the number of classes or the size of word dict.
 
     The OP supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural
-    Network Language Model <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`_. For the custom
-    tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example):
+    Network Language Model <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`_. 
+
+    For the custom tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an
+     example):
 
     1. Using a custom word dict to build a binary tree, each leaf node should be an word in the word dict.
     2. Creating a dict map word_id -> path that from the word to the root node, we call it path_table.
@@ -1731,9 +1735,7 @@ def margin_cross_entropy(logits,
 
     .. hint::
         The API supports single GPU and multi GPU, and don't supports CPU.
-
         For data parallel mode, set ``group=False``.
-
         For model parallel mode, set ``group=None`` or the group instance return by paddle.distributed.new_group.
         And logits.shape[-1] can be different at each rank.
 
@@ -1756,7 +1758,7 @@ def margin_cross_entropy(logits,
                     Default value is `'mean'`.
 
     Returns:
-        ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \
+            Return the cross entropy loss if \
             `return_softmax` is False, otherwise the tuple \
             (loss, softmax), softmax is shard_softmax when \
             using model parallel, otherwise softmax is in \

diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py
@@ -45,7 +45,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
 
     Parameters:
         x (Tensor): The input tensor could be N-D tensor, and the input data type could be float32 or float64.
-        p (float|int, optional): The exponent value in the norm formulation. Default: 2
+        p (float|int, optional): The exponent value in the norm formulation. Default: 2.
         axis (int, optional): The axis on which to apply normalization. If `axis < 0`, the dimension to normalization is `x.ndim + axis`. -1 is the last dimension.
         epsilon (float, optional): Small float added to denominator to avoid dividing by zero. Default is 1e-12.
         name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py
@@ -132,12 +132,12 @@ def make_scheduler(*,
         skip_first(int, optional): The number of first steps to drop, not participate in the state transform, and at ProfilerState.CLOSED state. Default value is 0.
 
     Returns:
-        A scheduler function, conforms to above state transform setting. The function will takes one parameter step_num, and returns corresponding ProfilerState.
+        A scheduler function, conforms to above state transform setting. The function will takes one parameter `step_num`, and returns corresponding ProfilerState.
 
     Examples:
-        1. profiling range [2, 5]
+        1. profiling range [2, 5].
 
-        Assume batch 0: closed, batch 1: ready, batch [2, 5] record
+        Assume batch 0: closed, batch 1: ready, batch [2, 5] record.
 
             .. code-block:: python
                 :name: code-example1
@@ -146,9 +146,9 @@ def make_scheduler(*,
                 profiler.make_scheduler(closed=1, ready=1, record=4, repeat=1)
 
 
-        2. profiling range [3,6], [9,12], [15,18]...
+        2. profiling range [3,6], [9,12], [15,18].
 
-        Assume batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat
+        Assume batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat.
 
             .. code-block:: python
                 :name: code-example2
@@ -196,12 +196,12 @@ def export_chrome_tracing(dir_name: str,
                           worker_name: Optional[str] = None) -> Callable:
     r"""
     Return a callable, used for outputing tracing data to chrome tracing format file.
-    The output file will be saved in directory ``dir_name``, and file name will be set as worker_name.
-    if worker_name is not set, the default name is [hostname]_[pid].
+    The output file will be saved in directory ``dir_name``, and file name will be set as `worker_name`.
+    if `worker_name` is not set, the default name is `[hostname]_[pid]`.
 
     Args:
         dir_name(str): Directory to save profiling data.
-        worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid].
+        worker_name(str, optional): Prefix of the file name saved, default is `[hostname]_[pid]`.
 
     Returns:
         A callable, which takes a Profiler object as parameter and calls its export method to save data to chrome tracing format file.
@@ -246,12 +246,12 @@ def export_protobuf(dir_name: str,
                     worker_name: Optional[str] = None) -> Callable:
     r"""
     Return a callable, used for outputing tracing data to protobuf file.
-    The output file will be saved in directory ``dir_name``, and file name will be set as worker_name.
-    if worker_name is not set, the default name is [hostname]_[pid].
+    The output file will be saved in directory ``dir_name``, and file name will be set as ``worker_name``.
+    if ``worker_name`` is not set, the default name is `[hostname]_[pid]`.
 
     Args:
         dir_name(str): Directory to save profiling data.
-        worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid].
+        worker_name(str, optional): Prefix of the file name saved, default is `[hostname]_[pid]`.
 
     Returns:
         A callable, which takes a Profiler object as parameter and calls its export method to save data to protobuf file.
@@ -317,7 +317,7 @@ class Profiler:
             If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch,
             which means profiling range [start_batch, end_batch).
         on_trace_ready (Callable, optional): Callable object, serves as callback function, and takes the Profiler object as parameter, which provides a way for users to do post-processing.
-            This callable object will be called when ``scheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. The default value is :ref:`export_chrome_tracing <api_paddle_profiler_export_chrome_tracing>` (./profiler_log/).
+            This callable object will be called when ``scheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. The default value is :ref:`export_chrome_tracing <api_paddle_profiler_export_chrome_tracing>`.
         timer_only (bool, optional): If it is True, the cost of Dataloader and every step of the model will be count without profiling. Otherwise, the model will
             be timed and profiled. Default: False.
         record_shapes (bool, optional): If it is True, collect op's input shape information. Default: False.
@@ -339,7 +339,7 @@ class Profiler:
                         #train()
                         p.step()
 
-        2. profiling range [2,4], [7, 9], [11,13]
+        2. profiling range [2,4], [7, 9], [11,13].
 
             .. code-block:: python
                 :name: code-example2
@@ -354,7 +354,7 @@ class Profiler:
                         #train()
                         p.step()
 
-        3. Use profiler without context manager, and use default parameters
+        3. Use profiler without context manager, and use default parameters.
 
             .. code-block:: python
                 :name: code-example3
@@ -369,7 +369,7 @@ class Profiler:
                 p.stop()
                 p.summary()
 
-        4. Use profiler to get throughput and cost of the model
+        4. Use profiler to get throughput and cost of the model.
 
             .. code-block:: python
                 :name: code-example-timer1
@@ -399,8 +399,7 @@ def forward(self, image, label=None):
 
                 dataset = RandomDataset(20 * 4)
                 simple_net = SimpleNet()
-                opt = paddle.optimizer.SGD(learning_rate=1e-3,
-                                           parameters=simple_net.parameters())
+                opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=simple_net.parameters())
                 BATCH_SIZE = 4
                 loader = paddle.io.DataLoader(
                     dataset,
@@ -531,7 +530,7 @@ def start(self):
                 prof.stop()
 
         '''
-        # Timing only without profiling
+        # Timing only without profiling.
         benchmark().begin()
         if not self.timer_only or self.emit_nvtx:
             utils._is_profiler_used = True
@@ -584,7 +583,7 @@ def stop(self):
         if self.profile_memory:
             disable_memory_recorder()
         # self.current_state -> CLOSED
-        # In this situation, RECORD state is regarded as RECORD_AND_RETURN
+        # In this situation, RECORD state is regarded as RECORD_AND_RETURN.
         if self.record_event:
             self.record_event.end()
             self.record_event = None
@@ -607,7 +606,7 @@ def step(self, num_samples: Optional[int] = None):
 
         Args:
             num_samples (int|None, optional): Specifies the batch size of every step of the model
-                that is used to compute throughput when timer_only is True. Default: None.
+                that is used to compute throughput when `timer_only` is True. Default: None.
 
         Examples:
             .. code-block:: python
@@ -645,7 +644,7 @@ def step_info(self, unit=None):
         r"""
         Get statistics for current step. If the function is called at certain iteration
         intervals, the result is the average of all steps between the previous call and
-        this call. Statistics are as follows：
+        this call. Statistics are as follows:
 
         1. reader_cost: the cost of loading data measured in seconds.
 
@@ -751,7 +750,7 @@ def export(self, path="", format="json"):
 
         Args:
             path(str): file path of the output.
-            format(str, optional): output format, can be chosen from ['json', 'pb], 'json' for chrome tracing and 'pb' for protobuf, default value is "json".
+            format(str, optional): output format, can be chosen from ['json', 'pb'], 'json' for chrome tracing and 'pb' for protobuf, default value is 'json'.
 
 
         Examples:

diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py
@@ -36,8 +36,8 @@ class RecordEvent(ContextDecorator):
     Interface for recording a time range by user defined.
 
     Args:
-        name(str): Name of the record event
-        event_type(TracerEventType, optional): Optional, default value is TracerEventType.PythonUserDefined. It is reserved for internal purpose, and it is better not to specify this parameter. 
+        name(str): Name of the record event.
+        event_type(TracerEventType, optional): Optional, default value is `TracerEventType.PythonUserDefined`. It is reserved for internal purpose, and it is better not to specify this parameter. 
 
     Examples:
         .. code-block:: python
@@ -59,7 +59,7 @@ class RecordEvent(ContextDecorator):
             record_event.end()
 
     **Note**:
-        RecordEvent will take effect only when :ref:`Profiler <api_paddle_profiler_Profiler>` is on and at the state of RECORD.
+        RecordEvent will take effect only when :ref:`Profiler <api_paddle_profiler_Profiler>` is on and at the state of `RECORD`.
     """
 
     def __init__(
@@ -134,7 +134,7 @@ def load_profiler_result(filename: str):
         filename(str): Name of the exported protobuf file of profiler data.
 
     Returns:
-        ProfilerResult object, which stores profiling data.
+        ``ProfilerResult`` object, which stores profiling data.
 
     Examples:
         .. code-block:: python

diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -293,7 +293,7 @@ def CUDAExtension(sources, *args, **kwargs):
         **kwargs(dict[option], optional): Specify other arguments same as ``setuptools.Extension`` .
 
     Returns:
-        setuptools.Extension: An instance of setuptools.Extension
+        setuptools.Extension: An instance of setuptools.Extension.
     """
     kwargs = normalize_extension_kwargs(kwargs, use_cuda=True)
     # Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will