diff --git a/paconvert/api_mapping.json b/paconvert/api_mapping.json
index 236aa3614..691691d25 100644
--- a/paconvert/api_mapping.json
+++ b/paconvert/api_mapping.json
@@ -126,7 +126,6 @@
       "return_attn_probs"
     ],
     "unsupport_args": [
-      "softmax_scale",
       "window_size",
       "softcap",
       "alibi_slopes",
@@ -451,10 +450,18 @@
     ]
   },
   "torch.Tensor.addmm": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "mat1": "x",
+      "mat2": "y"
+    }
   },
   "torch.Tensor.addmm_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "mat1": "x",
+      "mat2": "y"
+    }
   },
   "torch.Tensor.addmv": {
     "Matcher": "AddMRMatcher",
@@ -568,7 +575,11 @@
     "min_input_args": 0
   },
   "torch.Tensor.as_strided": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "size": "shape",
+      "storage_offset": "offset"
+    }
   },
   "torch.Tensor.as_subclass": {},
   "torch.Tensor.asin": {
@@ -587,7 +598,10 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.atan2": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.atan2_": {},
   "torch.Tensor.atan_": {
@@ -618,10 +632,18 @@
     ]
   },
   "torch.Tensor.baddbmm": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "batch1": "x",
+      "batch2": "y"
+    }
   },
   "torch.Tensor.baddbmm_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "batch1": "x",
+      "batch2": "y"
+    }
   },
   "torch.Tensor.bernoulli": {
     "Matcher": "TensorFunc2PaddleFunc",
@@ -651,16 +673,28 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.bitwise_and": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_and_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_left_shift": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_left_shift_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_not": {
     "Matcher": "ChangePrefixMatcher"
@@ -669,22 +703,40 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.bitwise_or": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_or_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_right_shift": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_right_shift_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_xor": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bitwise_xor_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.bmm": {
     "Matcher": "ChangePrefixMatcher"
@@ -699,7 +751,11 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.cauchy_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "median": "loc",
+      "sigma": "scale"
+    }
   },
   "torch.Tensor.cdouble": {
     "Matcher": "ChangePrefixMatcher"
@@ -776,10 +832,16 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.copysign": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.copysign_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.corrcoef": {
     "Matcher": "ChangePrefixMatcher"
@@ -825,7 +887,11 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.cross": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y",
+      "dim": "axis"
+    }
   },
   "torch.Tensor.crow_indices": {
     "Matcher": "ChangeAPIMatcher",
@@ -874,10 +940,16 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.cumsum": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "dim": "axis"
+    }
   },
   "torch.Tensor.cumsum_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "dim": "axis"
+    }
   },
   "torch.Tensor.data_ptr": {
     "Matcher": "ChangePrefixMatcher"
@@ -901,7 +973,10 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.diag": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "diagonal": "offset"
+    }
   },
   "torch.Tensor.diag_embed": {
     "Matcher": "ChangePrefixMatcher"
@@ -944,7 +1019,10 @@
     "Matcher": "DimOrderMatcher"
   },
   "torch.Tensor.dist": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.div": {
     "Matcher": "ChangePrefixMatcher"
@@ -1097,7 +1175,10 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.flip": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "dims": "axis"
+    }
   },
   "torch.Tensor.fliplr": {
     "Matcher": "GenericMatcher",
@@ -1153,10 +1234,16 @@
     }
   },
   "torch.Tensor.fmax": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.fmin": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.fmod": {
     "Matcher": "Num2TensorBinaryMatcher",
@@ -1216,7 +1303,10 @@
     }
   },
   "torch.Tensor.geometric_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "p": "probs"
+    }
   },
   "torch.Tensor.geqrf": {},
   "torch.Tensor.ger": {
@@ -1336,7 +1426,10 @@
     }
   },
   "torch.Tensor.hypot_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.i0": {
     "Matcher": "ChangePrefixMatcher"
@@ -1405,10 +1498,28 @@
     ]
   },
   "torch.Tensor.index_fill": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.Tensor.index_fill",
+    "args_list": [
+      "dim",
+      "index",
+      "value"
+    ],
+    "kwargs_change": {
+      "dim": "axis"
+    }
   },
   "torch.Tensor.index_fill_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.Tensor.index_fill_",
+    "args_list": [
+      "dim",
+      "index",
+      "value"
+    ],
+    "kwargs_change": {
+      "dim": "axis"
+    }
   },
   "torch.Tensor.index_put": {
     "Matcher": "ChangePrefixMatcher"
@@ -1530,7 +1641,10 @@
     }
   },
   "torch.Tensor.ldexp_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.le": {
     "Matcher": "ChangePrefixMatcher"
@@ -1550,10 +1664,26 @@
     }
   },
   "torch.Tensor.lerp": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.Tensor.lerp",
+    "args_list": [
+      "end",
+      "weight"
+    ],
+    "kwargs_change": {
+      "end": "y"
+    }
   },
   "torch.Tensor.lerp_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.Tensor.lerp_",
+    "args_list": [
+      "end",
+      "weight"
+    ],
+    "kwargs_change": {
+      "end": "y"
+    }
   },
   "torch.Tensor.less": {
     "Matcher": "ChangePrefixMatcher"
@@ -2002,7 +2132,14 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.nextafter": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.Tensor.nextafter",
+    "args_list": [
+      "other"
+    ],
+    "kwargs_change": {
+      "other": "y"
+    }
   },
   "torch.Tensor.nextafter_": {},
   "torch.Tensor.nonzero": {
@@ -2167,10 +2304,18 @@
     "min_input_args": 0
   },
   "torch.Tensor.renorm": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "dim": "axis",
+      "maxnorm": "max_norm"
+    }
   },
   "torch.Tensor.renorm_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "dim": "axis",
+      "maxnorm": "max_norm"
+    }
   },
   "torch.Tensor.repeat": {
     "Matcher": "ChangePrefixMatcher"
@@ -2438,7 +2583,18 @@
   },
   "torch.Tensor.sspaddmm": {},
   "torch.Tensor.std": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.Tensor.std",
+    "args_list": [
+      "dim",
+      "unbiased",
+      "keepdim",
+      "*",
+      "correction"
+    ],
+    "kwargs_change": {
+      "dim": "axis"
+    }
   },
   "torch.Tensor.stft": {
     "Matcher": "TensorStftMatcher",
@@ -2701,7 +2857,10 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.Tensor.var": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "dim": "axis"
+    }
   },
   "torch.Tensor.vdot": {
     "Matcher": "GenericMatcher",
@@ -3217,7 +3376,15 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.abs_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.abs_",
+    "min_input_args": 1,
+    "args_list": [
+      "input"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.acos": {
     "Matcher": "ChangePrefixMatcher"
@@ -3270,7 +3437,22 @@
     ]
   },
   "torch.addmm": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.addmm",
+    "min_input_args": 3,
+    "args_list": [
+      "input",
+      "mat1",
+      "mat2",
+      "*",
+      "beta",
+      "alpha",
+      "out"
+    ],
+    "kwargs_change": {
+      "mat1": "x",
+      "mat2": "y"
+    }
   },
   "torch.addmv": {
     "Matcher": "AddMRMatcher",
@@ -3311,8 +3493,20 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.allclose": {
-    "Matcher": "ChangeAPIMatcher",
-    "paddle_api": "paddle.compat.allclose"
+    "Matcher": "AllcloseMatcher",
+    "paddle_api": "paddle.allclose",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "rtol",
+      "atol",
+      "equal_nan"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.alpha_dropout": {
     "Matcher": "GenericMatcher",
@@ -3349,7 +3543,17 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.angle": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.angle",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.any": {
     "Matcher": "ChangePrefixMatcher"
@@ -3357,6 +3561,47 @@
   "torch.arange": {
     "Matcher": "ChangePrefixMatcher"
   },
+  "torch.arcsinh": {
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.asinh",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
+  },
+  "torch.arctan": {
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.atan",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
+  },
+  "torch.arctan2": {
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.atan2",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
+  },
   "torch.argmax": {
     "Matcher": "ChangePrefixMatcher"
   },
@@ -3370,7 +3615,12 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.as_strided": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "input": "x",
+      "size": "shape",
+      "storage_offset": "offset"
+    }
   },
   "torch.as_tensor": {
     "Matcher": "ChangePrefixMatcher"
@@ -3382,13 +3632,45 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.asinh": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.asinh",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.atan": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.atan",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.atan2": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.atan2",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.atanh": {
     "Matcher": "ChangePrefixMatcher"
@@ -3635,7 +3917,22 @@
     "min_input_args": 0
   },
   "torch.baddbmm": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.baddbmm",
+    "min_input_args": 3,
+    "args_list": [
+      "input",
+      "batch1",
+      "batch2",
+      "*",
+      "beta",
+      "alpha",
+      "out"
+    ],
+    "kwargs_change": {
+      "batch1": "x",
+      "batch2": "y"
+    }
   },
   "torch.batch_norm": {},
   "torch.bernoulli": {
@@ -3654,31 +3951,112 @@
   },
   "torch.binary_cross_entropy_with_logits": {},
   "torch.bincount": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.bincount",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "weights",
+      "minlength"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.bitwise_and": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.bitwise_and",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.bitwise_left_shift": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.bitwise_left_shift",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.bitwise_not": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.bitwise_not",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.bitwise_or": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.bitwise_or",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.bitwise_right_shift": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.bitwise_right_shift",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.bitwise_xor": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.bitwise_xor",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.blackman_window": {
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.block_diag": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "SequenceArgsAsListMatcher",
+    "paddle_api": "paddle.block_diag"
   },
   "torch.bmm": {
     "Matcher": "ChangePrefixMatcher"
@@ -3687,7 +4065,8 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.broadcast_tensors": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "SequenceArgsAsListMatcher",
+    "paddle_api": "paddle.broadcast_tensors"
   },
   "torch.broadcast_to": {
     "Matcher": "ChangePrefixMatcher"
@@ -3703,13 +4082,21 @@
     ]
   },
   "torch.cartesian_prod": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "SequenceArgsAsListMatcher",
+    "paddle_api": "paddle.cartesian_prod"
   },
   "torch.cat": {
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.cdist": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "CDistMatcher",
+    "min_input_args": 2,
+    "args_list": [
+      "x1",
+      "x2",
+      "p",
+      "compute_mode"
+    ]
   },
   "torch.ceil": {
     "Matcher": "ChangePrefixMatcher"
@@ -3864,7 +4251,17 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.conj": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.conj",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.conj_physical": {
     "Matcher": "GenericMatcher",
@@ -3889,7 +4286,19 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.copysign": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.copysign",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.corrcoef": {
     "Matcher": "GenericMatcher",
@@ -3973,7 +4382,21 @@
     }
   },
   "torch.cross": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.cross",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "dim",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y",
+      "dim": "axis"
+    }
   },
   "torch.ctc_loss": {},
   "torch.cuda.BFloat16Tensor": {
@@ -4226,7 +4649,11 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.cumsum": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "input": "x",
+      "dim": "axis"
+    }
   },
   "torch.cumulative_trapezoid": {
     "Matcher": "GenericMatcher",
@@ -4244,7 +4671,17 @@
     }
   },
   "torch.deg2rad": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.deg2rad",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.det": {
     "Matcher": "GenericMatcher",
@@ -4268,7 +4705,19 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.diag": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.diag",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "diagonal",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "diagonal": "offset"
+    }
   },
   "torch.diag_embed": {
     "Matcher": "ChangePrefixMatcher"
@@ -4310,7 +4759,11 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.dist": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.distributed.Backend": {
     "Matcher": "DistributedBackendMatcher",
@@ -4525,7 +4978,7 @@
     "min_input_args": 1
   },
   "torch.distributed.is_available": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "DistributedIsAvailableMatcher"
   },
   "torch.distributed.is_initialized": {
     "Matcher": "ChangePrefixMatcher"
@@ -5490,7 +5943,12 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.fft.fftshift": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "FFTShiftMatcher",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "dim"
+    ]
   },
   "torch.fft.hfft": {
     "Matcher": "ChangePrefixMatcher"
@@ -5511,7 +5969,12 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.fft.ifftshift": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "FFTShiftMatcher",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "dim"
+    ]
   },
   "torch.fft.ihfft": {
     "Matcher": "ChangePrefixMatcher"
@@ -5560,7 +6023,11 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.flip": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "input": "x",
+      "dims": "axis"
+    }
   },
   "torch.fliplr": {
     "Matcher": "GenericMatcher",
@@ -5610,10 +6077,34 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.fmax": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.fmax",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.fmin": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.fmin",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.fmod": {
     "Matcher": "Num2TensorBinaryConvertTypeMatcher",
@@ -5685,9 +6176,31 @@
   "torch.functional.atleast_3d": {
     "Matcher": "ChangePrefixMatcher"
   },
+  "torch.functional.block_diag": {
+    "Matcher": "SequenceArgsAsListMatcher",
+    "paddle_api": "paddle.block_diag"
+  },
   "torch.functional.broadcast_shapes": {
     "Matcher": "ChangePrefixMatcher"
   },
+  "torch.functional.broadcast_tensors": {
+    "Matcher": "SequenceArgsAsListMatcher",
+    "paddle_api": "paddle.broadcast_tensors"
+  },
+  "torch.functional.cartesian_prod": {
+    "Matcher": "SequenceArgsAsListMatcher",
+    "paddle_api": "paddle.cartesian_prod"
+  },
+  "torch.functional.cdist": {
+    "Matcher": "CDistMatcher",
+    "min_input_args": 2,
+    "args_list": [
+      "x1",
+      "x2",
+      "p",
+      "compute_mode"
+    ]
+  },
   "torch.functional.einsum": {
     "Matcher": "ChangePrefixMatcher"
   },
@@ -5790,7 +6303,19 @@
     }
   },
   "torch.heaviside": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.heaviside",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "values",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "values": "y"
+    }
   },
   "torch.hinge_embedding_loss": {},
   "torch.histc": {
@@ -5961,7 +6486,17 @@
     }
   },
   "torch.i0": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.i0",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.iinfo": {
     "Matcher": "GenericMatcher",
@@ -6001,7 +6536,18 @@
     ]
   },
   "torch.index_fill": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.index_fill",
+    "args_list": [
+      "input",
+      "dim",
+      "index",
+      "value"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "dim": "axis"
+    }
   },
   "torch.index_put": {
     "Matcher": "ChangePrefixMatcher"
@@ -6037,7 +6583,17 @@
   },
   "torch.instance_norm": {},
   "torch.inverse": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.inverse",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.is_autocast_enabled": {
     "Matcher": "ChangePrefixMatcher"
@@ -6276,7 +6832,19 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.lerp": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.lerp",
+    "args_list": [
+      "input",
+      "end",
+      "weight",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "end": "y"
+    }
   },
   "torch.less": {
     "Matcher": "ChangePrefixMatcher"
@@ -6947,14 +7515,7 @@
   },
   "torch.margin_ranking_loss": {},
   "torch.masked_fill": {
-    "Matcher": "GenericMatcher",
-    "paddle_api": "paddle.masked_fill",
-    "min_input_args": 3,
-    "args_list": [
-      "input",
-      "mask",
-      "value"
-    ],
+    "Matcher": "ChangePrefixMatcher",
     "kwargs_change": {
       "input": "x"
     }
@@ -6973,16 +7534,43 @@
     "paddle_api": "paddle.compat.max"
   },
   "torch.max_pool1d": {
-    "Matcher": "ChangeAPIMatcher",
-    "paddle_api": "paddle.nn.functional.max_pool1d"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.functional.max_pool1d",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "ceil_mode"
+    ]
   },
   "torch.max_pool2d": {
-    "Matcher": "ChangeAPIMatcher",
-    "paddle_api": "paddle.nn.functional.max_pool2d"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.functional.max_pool2d",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "ceil_mode"
+    ]
   },
   "torch.max_pool3d": {
-    "Matcher": "ChangeAPIMatcher",
-    "paddle_api": "paddle.nn.functional.max_pool3d"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.functional.max_pool3d",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "ceil_mode"
+    ]
   },
   "torch.maximum": {
     "Matcher": "ChangePrefixMatcher"
@@ -7005,7 +7593,15 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.mm": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StarredGenericMatcher",
+    "paddle_api": "paddle.mm",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "mat2",
+      "*",
+      "out"
+    ]
   },
   "torch.mode": {
     "Matcher": "DoubleAssignMatcher",
@@ -7231,7 +7827,18 @@
     }
   },
   "torch.nextafter": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.nextafter",
+    "args_list": [
+      "input",
+      "other",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "other": "y"
+    }
   },
   "torch.nn.AdaptiveAvgPool1d": {
     "Matcher": "ChangePrefixMatcher"
@@ -7635,7 +8242,11 @@
     "unsupport_args": [
       "output_ratio",
       "_random_samples"
-    ]
+    ],
+    "kwargs_change": {
+      "return_indices": "return_mask",
+      "_random_samples": "random_u"
+    }
   },
   "torch.nn.FractionalMaxPool3d": {
     "Matcher": "GenericMatcher",
@@ -7651,7 +8262,11 @@
     "unsupport_args": [
       "output_ratio",
       "_random_samples"
-    ]
+    ],
+    "kwargs_change": {
+      "return_indices": "return_mask",
+      "_random_samples": "random_u"
+    }
   },
   "torch.nn.GELU": {
     "Matcher": "ChangePrefixMatcher"
@@ -7918,7 +8533,13 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.nn.LeakyReLU": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "DropInplaceModuleMatcher",
+    "paddle_api": "paddle.nn.LeakyReLU",
+    "min_input_args": 0,
+    "args_list": [
+      "negative_slope",
+      "inplace"
+    ]
   },
   "torch.nn.Linear": {
     "Matcher": "ChangeAPIMatcher",
@@ -7966,22 +8587,55 @@
     "min_input_args": 0
   },
   "torch.nn.MaxPool1d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.MaxPool1D",
+    "min_input_args": 1,
+    "args_list": [
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "return_indices",
+      "ceil_mode"
+    ]
   },
   "torch.nn.MaxPool2d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.MaxPool2D",
+    "min_input_args": 1,
+    "args_list": [
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "return_indices",
+      "ceil_mode"
+    ]
   },
   "torch.nn.MaxPool3d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.MaxPool3D",
+    "min_input_args": 1,
+    "args_list": [
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "return_indices",
+      "ceil_mode"
+    ]
   },
   "torch.nn.MaxUnpool1d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.nn.MaxUnPool1D"
   },
   "torch.nn.MaxUnpool2d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.nn.MaxUnPool2D"
   },
   "torch.nn.MaxUnpool3d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.nn.MaxUnPool3D"
   },
   "torch.nn.Mish": {
     "Matcher": "GenericMatcher",
@@ -8350,7 +9004,12 @@
     ]
   },
   "torch.nn.ReLU": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "DropInplaceModuleMatcher",
+    "paddle_api": "paddle.nn.ReLU",
+    "min_input_args": 0,
+    "args_list": [
+      "inplace"
+    ]
   },
   "torch.nn.ReLU6": {
     "Matcher": "GenericMatcher",
@@ -9297,10 +9956,24 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.nn.functional.leaky_relu": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ActivationFunctionMatcher",
+    "paddle_api": "paddle.nn.functional.leaky_relu",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "negative_slope",
+      "inplace"
+    ]
   },
   "torch.nn.functional.leaky_relu_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ActivationFunctionMatcher",
+    "paddle_api": "paddle.nn.functional.leaky_relu_",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "negative_slope",
+      "inplace"
+    ]
   },
   "torch.nn.functional.linear": {
     "Matcher": "ChangeAPIMatcher",
@@ -9367,13 +10040,46 @@
     "min_input_args": 3
   },
   "torch.nn.functional.max_pool1d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.functional.max_pool1d",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "ceil_mode",
+      "return_indices"
+    ]
   },
   "torch.nn.functional.max_pool2d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.functional.max_pool2d",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "ceil_mode",
+      "return_indices"
+    ]
   },
   "torch.nn.functional.max_pool3d": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "MaxPoolMatcher",
+    "paddle_api": "paddle.nn.functional.max_pool3d",
+    "min_input_args": 2,
+    "args_list": [
+      "input",
+      "kernel_size",
+      "stride",
+      "padding",
+      "dilation",
+      "ceil_mode",
+      "return_indices"
+    ]
   },
   "torch.nn.functional.max_unpool1d": {
     "Matcher": "GenericMatcher",
@@ -9597,7 +10303,13 @@
     }
   },
   "torch.nn.functional.relu": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ActivationFunctionMatcher",
+    "paddle_api": "paddle.nn.functional.relu",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "inplace"
+    ]
   },
   "torch.nn.functional.relu6": {
     "Matcher": "GenericMatcher",
@@ -9612,7 +10324,13 @@
     "min_input_args": 1
   },
   "torch.nn.functional.relu_": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ActivationFunctionMatcher",
+    "paddle_api": "paddle.nn.functional.relu_",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "inplace"
+    ]
   },
   "torch.nn.functional.rrelu": {
     "Matcher": "GenericMatcher",
@@ -9915,6 +10633,23 @@
   "torch.nn.init.zeros_": {
     "Matcher": "ChangePrefixMatcher"
   },
+  "torch.nn.modules.activation.LeakyReLU": {
+    "Matcher": "DropInplaceModuleMatcher",
+    "paddle_api": "paddle.nn.LeakyReLU",
+    "min_input_args": 0,
+    "args_list": [
+      "negative_slope",
+      "inplace"
+    ]
+  },
+  "torch.nn.modules.activation.ReLU": {
+    "Matcher": "DropInplaceModuleMatcher",
+    "paddle_api": "paddle.nn.ReLU",
+    "min_input_args": 0,
+    "args_list": [
+      "inplace"
+    ]
+  },
   "torch.nn.modules.batchnorm._BatchNorm": {
     "Matcher": "ReverseMomentumMatcher",
     "paddle_api": "paddle.nn.layer.norm._BatchNormBase",
@@ -10064,10 +10799,22 @@
     "min_input_args": 1
   },
   "torch.nn.utils.rnn.pad_sequence": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "RNNSequenceMatcher",
+    "min_input_args": 1,
+    "args_list": [
+      "sequences",
+      "batch_first",
+      "padding_value"
+    ]
   },
   "torch.nn.utils.rnn.unpad_sequence": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "RNNSequenceMatcher",
+    "min_input_args": 2,
+    "args_list": [
+      "padded_sequences",
+      "lengths",
+      "batch_first"
+    ]
   },
   "torch.nn.utils.spectral_norm": {
     "Matcher": "GenericMatcher",
@@ -10110,7 +10857,16 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.normal": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "NormalMatcher",
+    "min_input_args": 1,
+    "args_list": [
+      "mean",
+      "std",
+      "*",
+      "generator",
+      "out",
+      "size"
+    ]
   },
   "torch.not_equal": {
     "Matcher": "ChangePrefixMatcher"
@@ -10995,7 +11751,17 @@
     }
   },
   "torch.reciprocal": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.reciprocal",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.relu": {
     "Matcher": "GenericMatcher",
@@ -11012,7 +11778,22 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.renorm": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.renorm",
+    "min_input_args": 3,
+    "args_list": [
+      "input",
+      "p",
+      "dim",
+      "maxnorm",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "dim": "axis",
+      "maxnorm": "max_norm"
+    }
   },
   "torch.repeat_interleave": {
     "Matcher": "ChangePrefixMatcher"
@@ -11758,16 +12539,56 @@
     }
   },
   "torch.special.i0": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.i0",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.special.i0e": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.i0e",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.special.i1": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.i1",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.special.i1e": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.i1e",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.special.log1p": {
     "Matcher": "GenericMatcher",
@@ -11934,7 +12755,17 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.square": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.square",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.squeeze": {
     "Matcher": "ChangePrefixMatcher"
@@ -11943,7 +12774,21 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.std": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "StdMatcher",
+    "paddle_api": "paddle.std",
+    "args_list": [
+      "input",
+      "dim",
+      "unbiased",
+      "keepdim",
+      "*",
+      "correction",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x",
+      "dim": "axis"
+    }
   },
   "torch.std_mean": {
     "Matcher": "StdMeanMatcher",
@@ -12055,7 +12900,17 @@
     "Matcher": "ChangePrefixMatcher"
   },
   "torch.tan": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "GenericMatcher",
+    "paddle_api": "paddle.tan",
+    "min_input_args": 1,
+    "args_list": [
+      "input",
+      "*",
+      "out"
+    ],
+    "kwargs_change": {
+      "input": "x"
+    }
   },
   "torch.tanh": {
     "Matcher": "ChangePrefixMatcher"
@@ -12100,7 +12955,23 @@
     }
   },
   "torch.testing.assert_close": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "TestingAssertCloseMatcher",
+    "paddle_api": "paddle.allclose",
+    "min_input_args": 2,
+    "args_list": [
+      "actual",
+      "expected",
+      "*",
+      "allow_subclasses",
+      "rtol",
+      "atol",
+      "equal_nan",
+      "check_device",
+      "check_dtype",
+      "check_layout",
+      "check_stride",
+      "msg"
+    ]
   },
   "torch.testing.make_tensor": {
     "Matcher": "MakeTMatcher",
@@ -12325,10 +13196,12 @@
     ]
   },
   "torch.utils.data.ChainDataset": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.ChainDataset"
   },
   "torch.utils.data.ConcatDataset": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.ConcatDataset"
   },
   "torch.utils.data.DataLoader": {
     "Matcher": "GenericMatcher",
@@ -12366,10 +13239,12 @@
     ]
   },
   "torch.utils.data.Dataset": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.Dataset"
   },
   "torch.utils.data.IterableDataset": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.IterableDataset"
   },
   "torch.utils.data.RandomSampler": {
     "Matcher": "GenericMatcher",
@@ -12383,13 +13258,16 @@
     ]
   },
   "torch.utils.data.Sampler": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.Sampler"
   },
   "torch.utils.data.SequentialSampler": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.SequenceSampler"
   },
   "torch.utils.data.Subset": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.Subset"
   },
   "torch.utils.data.SubsetRandomSampler": {
     "Matcher": "GenericMatcher",
@@ -12448,10 +13326,12 @@
     }
   },
   "torch.utils.data.get_worker_info": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.get_worker_info"
   },
   "torch.utils.data.random_split": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangeAPIMatcher",
+    "paddle_api": "paddle.io.random_split"
   },
   "torch.utils.dlpack.from_dlpack": {
     "Matcher": "GenericMatcher",
@@ -12497,7 +13377,11 @@
     }
   },
   "torch.var": {
-    "Matcher": "ChangePrefixMatcher"
+    "Matcher": "ChangePrefixMatcher",
+    "kwargs_change": {
+      "input": "x",
+      "dim": "axis"
+    }
   },
   "torch.var_mean": {
     "Matcher": "VarMeanMatcher",
diff --git a/paconvert/api_matcher.py b/paconvert/api_matcher.py
index fcade0982..698f6d422 100644
--- a/paconvert/api_matcher.py
+++ b/paconvert/api_matcher.py
@@ -166,6 +166,16 @@ def generate_code(self, kwargs):
         return code
 
 
+class StdMatcher(GenericMatcher):
+    def get_paddle_nodes(self, args, kwargs):
+        if len(args) == 1 and isinstance(args[0], ast.Starred) and len(kwargs) == 0:
+            args = self.parse_args(args)
+            code = "{}({})".format(self.get_paddle_api(), self.args_to_str(args))
+            return ast.parse(code).body
+
+        return super().get_paddle_nodes(args, kwargs)
+
+
 class SliceScatterMatcher(BaseMatcher):
     def generate_code(self, kwargs):
         if "input" in kwargs:
@@ -381,10 +391,9 @@ def get_paddle_nodes(self, args, kwargs):
         args = self.parse_args(args)
         kwargs = self.parse_kwargs(kwargs, allow_none=True)
 
-        # temporary delete these unsupport args, which paddle does not support now
-        for k in ["layout", "generator", "memory_format", "sparse_grad"]:
-            if k in kwargs:
-                kwargs.pop(k)
+        kwargs = self.change_kwargs(
+            kwargs, ["layout", "generator", "memory_format", "sparse_grad"]
+        )
         code = f"{self.get_paddle_api()}({self.args_and_kwargs_to_str(args, kwargs)})"
         return ast.parse(code).body
 
@@ -1447,14 +1456,67 @@ def generate_code(self, kwargs):
         return API_TEMPLATE.format(self.get_paddle_api(), self.kwargs_to_str(kwargs))
 
 
-class FAFlashAttnFuncMatcher(BaseMatcher):
+class FlashAttnMatcher(BaseMatcher):
+    OPTIONAL_UNSUPPORT_ARG_DEFAULTS = {
+        "window_size": {"None", "(-1,-1)"},
+        "softcap": {"None", "(0)", "(0.0)"},
+        "alibi_slopes": {"None"},
+        "deterministic": {"(False)"},
+        "block_table": {"None"},
+    }
+
+    def is_ignorable_unsupport_arg(self, key, value):
+        normalized_value = value.replace(" ", "")
+        return normalized_value in self.OPTIONAL_UNSUPPORT_ARG_DEFAULTS.get(key, set())
+
+    def parse_args_and_kwargs(
+        self, args, kwargs, allow_starred=False, allow_none=False
+    ):
+        args_list = self.api_mapping_dict.get("args_list") or []
+        unsupport_args = self.api_mapping_dict.get("unsupport_args") or []
+
+        if len(args) > len(args_list):
+            return "misidentify"
+
+        new_kwargs = {}
+        for i, node in enumerate(args):
+            key = args_list[i]
+            value = astor.to_source(node).replace("\n", "")
+            if key in unsupport_args:
+                if self.is_ignorable_unsupport_arg(key, value):
+                    continue
+                return None
+            new_kwargs[key] = value
+
+        for node in kwargs:
+            key = node.arg
+            if key is None:
+                if not allow_none:
+                    return None
+                continue
+
+            value = astor.to_source(node.value).replace("\n", "")
+            if key in unsupport_args:
+                if self.is_ignorable_unsupport_arg(key, value):
+                    continue
+                return None
+            new_kwargs[key] = value
+
+        return new_kwargs
+
+
+class FAFlashAttnFuncMatcher(FlashAttnMatcher):
     def generate_code(self, kwargs):
+        if kwargs.get("softmax_scale") == "None":
+            kwargs.pop("softmax_scale")
         return GenericMatcher.generate_code(self, kwargs) + "[0]"
 
 
-class FAFlashAttnUnpaddedFuncMatcher(BaseMatcher):
+class FAFlashAttnUnpaddedFuncMatcher(FlashAttnMatcher):
     def generate_code(self, kwargs):
         kwargs = self.change_kwargs(kwargs)
+        if kwargs.get("scale") == "None":
+            kwargs.pop("scale")
         if "scale" not in kwargs:
             API_TEMPLATE = textwrap.dedent(
                 """
@@ -1463,7 +1525,7 @@ def generate_code(self, kwargs):
                 {}({})[0]
                 """
             )
-            kwargs["scale"] = "math.sqrt({}.shape[-1])".format(kwargs["query"])
+            kwargs["scale"] = "1.0 / math.sqrt({}.shape[-1])".format(kwargs["query"])
         else:
             API_TEMPLATE = textwrap.dedent(
                 """
@@ -2763,9 +2825,18 @@ def generate_code(self, kwargs):
 class StdMeanMatcher(BaseMatcher):
     def generate_code(self, kwargs):
         std_kwargs = kwargs.copy()
-        kwargs.pop("unbiased", None)
-        kwargs.pop("correction", None)
-        mean_kwargs = kwargs
+        if "input" in std_kwargs:
+            std_kwargs["x"] = std_kwargs.pop("input")
+        if "dim" in std_kwargs:
+            std_kwargs["axis"] = std_kwargs.pop("dim")
+
+        mean_kwargs = kwargs.copy()
+        mean_kwargs.pop("unbiased", None)
+        mean_kwargs.pop("correction", None)
+        if "input" in mean_kwargs:
+            mean_kwargs["x"] = mean_kwargs.pop("input")
+        if "dim" in mean_kwargs:
+            mean_kwargs["axis"] = mean_kwargs.pop("dim")
         API_TEMPLATE = textwrap.dedent(
             """
             (paddle.std({}), paddle.mean({}))
@@ -2781,9 +2852,18 @@ def generate_code(self, kwargs):
 class VarMeanMatcher(BaseMatcher):
     def generate_code(self, kwargs):
         var_kwargs = kwargs.copy()
-        kwargs.pop("unbiased", None)
-        kwargs.pop("correction", None)
-        mean_kwargs = kwargs
+        if "input" in var_kwargs:
+            var_kwargs["x"] = var_kwargs.pop("input")
+        if "dim" in var_kwargs:
+            var_kwargs["axis"] = var_kwargs.pop("dim")
+
+        mean_kwargs = kwargs.copy()
+        mean_kwargs.pop("unbiased", None)
+        mean_kwargs.pop("correction", None)
+        if "input" in mean_kwargs:
+            mean_kwargs["x"] = mean_kwargs.pop("input")
+        if "dim" in mean_kwargs:
+            mean_kwargs["axis"] = mean_kwargs.pop("dim")
         API_TEMPLATE = textwrap.dedent(
             """
             (paddle.var({}), paddle.mean({}))
@@ -3211,6 +3291,13 @@ def generate_code(self, kwargs):
 
 
 class AllcloseMatcher(BaseMatcher):
+    def get_paddle_nodes(self, args, kwargs):
+        if len(args) == 1 and isinstance(args[0], ast.Starred) and len(kwargs) == 0:
+            args = self.parse_args(args)
+            code = "{}({}).item()".format(self.get_paddle_api(), self.args_to_str(args))
+            return ast.parse(code).body
+        return BaseMatcher.get_paddle_nodes(self, args, kwargs)
+
     def generate_code(self, kwargs):
         code = GenericMatcher.generate_code(self, kwargs)
         code = "{}.item()".format(code)
@@ -3227,6 +3314,668 @@ def generate_code(self, kwargs):
         return code
 
 
+class TestingAssertCloseMatcher(BaseMatcher):
+    def generate_code(self, kwargs):
+        for key in [
+            "allow_subclasses",
+            "check_device",
+            "check_dtype",
+            "check_layout",
+            "check_stride",
+        ]:
+            kwargs.pop(key, None)
+
+        if "actual" in kwargs:
+            kwargs["x"] = kwargs.pop("actual")
+        if "expected" in kwargs:
+            kwargs["y"] = kwargs.pop("expected")
+
+        return Assert_AllcloseMatcher.generate_code(self, kwargs)
+
+
+class SequenceArgsAsListMatcher(BaseMatcher):
+    def get_paddle_api(self):
+        assert "paddle_api" in self.api_mapping_dict
+        return super().get_paddle_api()
+
+    def get_paddle_nodes(self, args, kwargs):
+        new_kwargs = self.parse_kwargs(kwargs, allow_none=True)
+        if new_kwargs is None:
+            return None
+        if new_kwargs:
+            return "misidentify"
+
+        parts = []
+        literals = []
+        for node in args:
+            if isinstance(node, ast.Starred):
+                if literals:
+                    parts.append("[{}]".format(", ".join(literals)))
+                    literals = []
+                parts.append(
+                    "list({})".format(astor.to_source(node.value).replace("\n", ""))
+                )
+            else:
+                literals.append(astor.to_source(node).replace("\n", ""))
+
+        if literals:
+            parts.append("[{}]".format(", ".join(literals)))
+
+        if not parts:
+            return "misidentify"
+
+        sequence_expr = parts[0] if len(parts) == 1 else " + ".join(parts)
+        code = "{}({})".format(self.get_paddle_api(), sequence_expr)
+        return ast.parse(code).body
+
+
+class StarredGenericMatcher(GenericMatcher):
+    def get_paddle_nodes(self, args, kwargs):
+        if len(args) == 1 and isinstance(args[0], ast.Starred) and len(kwargs) == 0:
+            args = self.parse_args(args)
+            code = "{}({})".format(self.get_paddle_api(), self.args_to_str(args))
+            return ast.parse(code).body
+        return GenericMatcher.get_paddle_nodes(self, args, kwargs)
+
+
+class DistributedIsAvailableMatcher(BaseMatcher):
+    def generate_code(self, kwargs):
+        return "hasattr(paddle, 'distributed')"
+
+
+class FFTShiftMatcher(BaseMatcher):
+    def generate_utils_code(self):
+        CODE_TEMPLATE = textwrap.dedent(
+            """
+            def paddle_fftshift(x, axes=None):
+                if axes is None:
+                    axes = tuple(range(len(x.shape)))
+                elif isinstance(axes, int):
+                    axes = (axes,)
+                shifts = [x.shape[axis] // 2 for axis in axes]
+                return paddle.roll(x, shifts=shifts, axis=axes)
+
+
+            def paddle_ifftshift(x, axes=None):
+                if axes is None:
+                    axes = tuple(range(len(x.shape)))
+                elif isinstance(axes, int):
+                    axes = (axes,)
+                shifts = [(x.shape[axis] + 1) // 2 for axis in axes]
+                return paddle.roll(x, shifts=shifts, axis=axes)
+            """
+        )
+        return CODE_TEMPLATE
+
+    def generate_code(self, kwargs):
+        self.enable_utils_code()
+        if "input" in kwargs:
+            kwargs["x"] = kwargs.pop("input")
+        if "dim" in kwargs:
+            kwargs["axes"] = kwargs.pop("dim")
+
+        helper_name = (
+            "paddle_ifftshift"
+            if self.torch_api.endswith("ifftshift")
+            else "paddle_fftshift"
+        )
+        return "{}({})".format(helper_name, self.kwargs_to_str(kwargs))
+
+
+class CDistMatcher(BaseMatcher):
+    def generate_utils_code(self):
+        CODE_TEMPLATE = textwrap.dedent(
+            """
+            def paddle_torch_cdist(x, y, p=2.0, compute_mode='use_mm_for_euclid_dist_if_necessary'):
+                output_shape = tuple(x.shape[:-1]) + (y.shape[-2],)
+                requires_grad = not (x.stop_gradient and y.stop_gradient)
+                if x.shape[-2] == 0 or y.shape[-2] == 0:
+                    return paddle.empty(
+                        output_shape,
+                        dtype=x.dtype,
+                        device=x.place,
+                        requires_grad=requires_grad,
+                    )
+                if x.shape[-1] == 0 and y.shape[-1] == 0:
+                    return paddle.zeros(
+                        output_shape,
+                        dtype=x.dtype,
+                        device=x.place,
+                        requires_grad=requires_grad,
+                    )
+                return paddle.cdist(x=x, y=y, p=p, compute_mode=compute_mode)
+            """
+        )
+        return CODE_TEMPLATE
+
+    def generate_code(self, kwargs):
+        self.enable_utils_code()
+        if "x1" in kwargs:
+            kwargs["x"] = kwargs.pop("x1")
+        if "x2" in kwargs:
+            kwargs["y"] = kwargs.pop("x2")
+        return "paddle_torch_cdist({})".format(self.kwargs_to_str(kwargs))
+
+
+class NormalMatcher(BaseMatcher):
+    def generate_utils_code(self):
+        CODE_TEMPLATE = textwrap.dedent(
+            """
+            def paddle_torch_normal(mean=0.0, std=1.0, out=None, size=None):
+                if isinstance(mean, paddle.Tensor) and isinstance(std, paddle.Tensor):
+                    result = mean + paddle.randn(
+                        mean.shape, dtype=mean.dtype, device=mean.place
+                    ) * paddle.cast(std, mean.dtype)
+                elif isinstance(std, paddle.Tensor):
+                    dtype = std.dtype if not isinstance(mean, paddle.Tensor) else mean.dtype
+                    mean_tensor = paddle.full(
+                        std.shape, mean, dtype=dtype, device=std.place
+                    )
+                    result = mean_tensor + paddle.randn(
+                        std.shape, dtype=dtype, device=std.place
+                    ) * paddle.cast(std, dtype)
+                elif isinstance(mean, paddle.Tensor):
+                    result = paddle.normal(mean=mean, std=std)
+                elif size is not None:
+                    result = paddle.normal(mean=mean, std=std, shape=size)
+                else:
+                    result = paddle.normal(mean=mean, std=std)
+
+                if out is not None:
+                    paddle.assign(result, output=out)
+                    return out
+                return result
+            """
+        )
+        return CODE_TEMPLATE
+
+    def generate_code(self, kwargs):
+        self.enable_utils_code()
+        kwargs.pop("generator", None)
+        kwargs.setdefault("mean", "0.0")
+        kwargs.setdefault("std", "1.0")
+        return "paddle_torch_normal({})".format(self.kwargs_to_str(kwargs))
+
+
+class DropInplaceModuleMatcher(BaseMatcher):
+    def generate_code(self, kwargs):
+        kwargs.pop("inplace", None)
+        return GenericMatcher.generate_code(self, kwargs)
+
+
+class ActivationFunctionMatcher(BaseMatcher):
+    def generate_code(self, kwargs):
+        kwargs.pop("inplace", None)
+        if "input" in kwargs:
+            kwargs["x"] = kwargs.pop("input")
+        return GenericMatcher.generate_code(self, kwargs)
+
+
+class MaxPoolMatcher(BaseMatcher):
+    @staticmethod
+    def is_default_dilation(value):
+        if value is None:
+            return True
+        value = str(value).replace(" ", "")
+        return value in {
+            "1",
+            "(1)",
+            "[1]",
+            "(1,)",
+            "[1,]",
+            "(1,1)",
+            "[1,1]",
+            "(1,1,1)",
+            "[1,1,1]",
+        }
+
+    def generate_utils_code(self):
+        CODE_TEMPLATE = textwrap.dedent(
+            """
+            import numpy as np
+
+
+            def _pytorch_max_pool_to_tuple(value, dims):
+                if isinstance(value, (list, tuple)):
+                    if len(value) == 1:
+                        return tuple([int(value[0])] * dims)
+                    return tuple(int(v) for v in value)
+                return tuple([int(value)] * dims)
+
+
+            def _pytorch_max_pool_output_shape(
+                input_shape, kernel_size, stride, padding, dilation, ceil_mode
+            ):
+                output_shape = []
+                for in_size, kernel, step, pad, dilate in zip(
+                    input_shape, kernel_size, stride, padding, dilation
+                ):
+                    effective_kernel = dilate * (kernel - 1) + 1
+                    if ceil_mode:
+                        out_size = (
+                            (
+                                in_size
+                                + 2 * pad
+                                - effective_kernel
+                                + step
+                                - 1
+                            )
+                            // step
+                        ) + 1
+                        while out_size > 0 and (out_size - 1) * step >= in_size + pad:
+                            out_size -= 1
+                    else:
+                        out_size = (
+                            (in_size + 2 * pad - effective_kernel) // step
+                        ) + 1
+                    output_shape.append(max(out_size, 0))
+                return tuple(output_shape)
+
+
+            def _pytorch_max_pool_pad_value(dtype):
+                if np.issubdtype(dtype, np.floating):
+                    return -np.inf
+                if np.issubdtype(dtype, np.integer):
+                    return np.iinfo(dtype).min
+                if np.issubdtype(dtype, np.bool_):
+                    return False
+                return 0
+
+
+            def _pytorch_max_pool_to_tensor(value, x, dtype=None, stop_gradient=True):
+                tensor = paddle.to_tensor(
+                    value,
+                    dtype=dtype,
+                    place=x.place,
+                    stop_gradient=stop_gradient,
+                )
+                return tensor
+
+
+            def _pytorch_max_pool_flat_index(input_idx, input_shape):
+                flat_index = 0
+                for i, value in enumerate(input_idx):
+                    stride = 1
+                    for size in input_shape[i + 1 :]:
+                        stride *= size
+                    flat_index += value * stride
+                return flat_index
+
+
+            def _pytorch_max_pool_nd(
+                x,
+                kernel_size,
+                stride=None,
+                padding=0,
+                dilation=1,
+                ceil_mode=False,
+                return_mask=False,
+            ):
+                spatial_dims = len(x.shape) - 2
+                input_shape = tuple(int(v) for v in x.shape[-spatial_dims:])
+                kernel_size = _pytorch_max_pool_to_tuple(kernel_size, spatial_dims)
+                if stride is None:
+                    stride = kernel_size
+                else:
+                    stride = _pytorch_max_pool_to_tuple(stride, spatial_dims)
+                padding = _pytorch_max_pool_to_tuple(padding, spatial_dims)
+                dilation = _pytorch_max_pool_to_tuple(dilation, spatial_dims)
+                output_shape = _pytorch_max_pool_output_shape(
+                    input_shape,
+                    kernel_size,
+                    stride,
+                    padding,
+                    dilation,
+                    ceil_mode,
+                )
+
+                x_numpy = x.numpy()
+                values = np.empty(
+                    x_numpy.shape[:2] + output_shape, dtype=x_numpy.dtype
+                )
+                indices = None
+                if return_mask:
+                    indices = np.zeros(
+                        x_numpy.shape[:2] + output_shape, dtype=np.int64
+                    )
+
+                pad_value = _pytorch_max_pool_pad_value(x_numpy.dtype)
+                for batch_idx in range(x_numpy.shape[0]):
+                    for channel_idx in range(x_numpy.shape[1]):
+                        input_slice = x_numpy[batch_idx, channel_idx]
+                        for out_idx in np.ndindex(*output_shape):
+                            best_value = None
+                            best_index = 0
+                            for kernel_idx in np.ndindex(*kernel_size):
+                                input_idx = tuple(
+                                    out_idx[i] * stride[i]
+                                    - padding[i]
+                                    + kernel_idx[i] * dilation[i]
+                                    for i in range(spatial_dims)
+                                )
+                                valid = all(
+                                    0 <= input_idx[i] < input_shape[i]
+                                    for i in range(spatial_dims)
+                                )
+                                if valid:
+                                    value = input_slice[input_idx]
+                                    flat_index = _pytorch_max_pool_flat_index(
+                                        input_idx, input_shape
+                                    )
+                                else:
+                                    value = pad_value
+                                    flat_index = 0
+                                if best_value is None or value > best_value:
+                                    best_value = value
+                                    best_index = flat_index
+                            values[(batch_idx, channel_idx) + out_idx] = best_value
+                            if return_mask:
+                                indices[(batch_idx, channel_idx) + out_idx] = best_index
+
+                values = _pytorch_max_pool_to_tensor(
+                    values,
+                    x,
+                    dtype=x.dtype,
+                    stop_gradient=x.stop_gradient,
+                )
+                if not return_mask:
+                    return values
+                indices = _pytorch_max_pool_to_tensor(indices, x, dtype="int64")
+                return values, indices
+
+
+            def pytorch_max_pool1d(
+                x,
+                kernel_size,
+                stride=None,
+                padding=0,
+                dilation=1,
+                ceil_mode=False,
+                return_mask=False,
+            ):
+                if isinstance(kernel_size, (list, tuple)):
+                    kernel_size = kernel_size[0]
+                if stride is None:
+                    stride = kernel_size
+                elif isinstance(stride, (list, tuple)):
+                    stride = stride[0]
+                if isinstance(padding, (list, tuple)):
+                    padding = padding[0]
+                if isinstance(dilation, (list, tuple)):
+                    dilation = dilation[0]
+
+                if dilation == 1:
+                    return paddle.nn.functional.max_pool1d(
+                        x,
+                        kernel_size=kernel_size,
+                        stride=stride,
+                        padding=padding,
+                        return_mask=return_mask,
+                        ceil_mode=ceil_mode,
+                    )
+
+                x_2d = paddle.unsqueeze(x, axis=[2])
+                input_length = x.shape[-1]
+                effective_kernel = dilation * (kernel_size - 1) + 1
+                if ceil_mode:
+                    output_length = (
+                        (input_length + 2 * padding - effective_kernel + stride - 1) // stride
+                    ) + 1
+                else:
+                    output_length = (
+                        (input_length + 2 * padding - effective_kernel) // stride
+                    ) + 1
+                output_length = max(output_length, 0)
+                total_padding = max(
+                    (output_length - 1) * stride + effective_kernel - input_length,
+                    0,
+                )
+                right_padding = max(total_padding - padding, 0)
+                if padding or right_padding:
+                    x_2d = paddle.nn.functional.pad(
+                        x_2d,
+                        [padding, right_padding, 0, 0],
+                        value=float("-inf"),
+                    )
+
+                patches = paddle.nn.functional.unfold(
+                    x_2d,
+                    kernel_sizes=[1, kernel_size],
+                    strides=[1, stride],
+                    paddings=0,
+                    dilations=[1, dilation],
+                )
+                out_length = patches.shape[-1]
+                patches = paddle.reshape(
+                    patches,
+                    [patches.shape[0], x.shape[1], kernel_size, out_length],
+                )
+                values = paddle.max(patches, axis=2)
+                if not return_mask:
+                    return values
+
+                local_indices = paddle.argmax(patches, axis=2)
+                starts = (
+                    paddle.arange(out_length, dtype=local_indices.dtype).reshape([1, 1, out_length])
+                    * stride
+                    - padding
+                )
+                indices = starts + local_indices * dilation
+                return values, indices
+
+
+            class PytorchMaxPool1D(paddle.nn.Layer):
+                def __init__(
+                    self,
+                    kernel_size,
+                    stride=None,
+                    padding=0,
+                    return_mask=False,
+                    ceil_mode=False,
+                    dilation=1,
+                ):
+                    super().__init__()
+                    self.kernel_size = kernel_size
+                    self.stride = stride
+                    self.padding = padding
+                    self.return_mask = return_mask
+                    self.ceil_mode = ceil_mode
+                    self.dilation = dilation
+
+                def forward(self, x):
+                    return pytorch_max_pool1d(
+                        x,
+                        kernel_size=self.kernel_size,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        ceil_mode=self.ceil_mode,
+                        return_mask=self.return_mask,
+                    )
+
+
+            def pytorch_max_pool2d(
+                x,
+                kernel_size,
+                stride=None,
+                padding=0,
+                dilation=1,
+                ceil_mode=False,
+                return_mask=False,
+            ):
+                return _pytorch_max_pool_nd(
+                    x,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    dilation=dilation,
+                    ceil_mode=ceil_mode,
+                    return_mask=return_mask,
+                )
+
+
+            class PytorchMaxPool2D(paddle.nn.Layer):
+                def __init__(
+                    self,
+                    kernel_size,
+                    stride=None,
+                    padding=0,
+                    return_mask=False,
+                    ceil_mode=False,
+                    dilation=1,
+                ):
+                    super().__init__()
+                    self.kernel_size = kernel_size
+                    self.stride = stride
+                    self.padding = padding
+                    self.return_mask = return_mask
+                    self.ceil_mode = ceil_mode
+                    self.dilation = dilation
+
+                def forward(self, x):
+                    return pytorch_max_pool2d(
+                        x,
+                        kernel_size=self.kernel_size,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        ceil_mode=self.ceil_mode,
+                        return_mask=self.return_mask,
+                    )
+
+
+            def pytorch_max_pool3d(
+                x,
+                kernel_size,
+                stride=None,
+                padding=0,
+                dilation=1,
+                ceil_mode=False,
+                return_mask=False,
+            ):
+                return _pytorch_max_pool_nd(
+                    x,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    dilation=dilation,
+                    ceil_mode=ceil_mode,
+                    return_mask=return_mask,
+                )
+
+
+            class PytorchMaxPool3D(paddle.nn.Layer):
+                def __init__(
+                    self,
+                    kernel_size,
+                    stride=None,
+                    padding=0,
+                    return_mask=False,
+                    ceil_mode=False,
+                    dilation=1,
+                ):
+                    super().__init__()
+                    self.kernel_size = kernel_size
+                    self.stride = stride
+                    self.padding = padding
+                    self.return_mask = return_mask
+                    self.ceil_mode = ceil_mode
+                    self.dilation = dilation
+
+                def forward(self, x):
+                    return pytorch_max_pool3d(
+                        x,
+                        kernel_size=self.kernel_size,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        ceil_mode=self.ceil_mode,
+                        return_mask=self.return_mask,
+                    )
+            """
+        )
+        return CODE_TEMPLATE
+
+    def generate_code(self, kwargs):
+        if "input" in kwargs:
+            kwargs["x"] = kwargs.pop("input")
+        if "return_indices" in kwargs:
+            kwargs["return_mask"] = kwargs.pop("return_indices")
+
+        dilation = kwargs.pop("dilation", None)
+        paddle_api = self.get_paddle_api()
+        if dilation is not None and not self.is_default_dilation(dilation):
+            helper_api_map = {
+                "paddle.nn.MaxPool1D": "PytorchMaxPool1D",
+                "paddle.nn.functional.max_pool1d": "pytorch_max_pool1d",
+                "paddle.nn.MaxPool2D": "PytorchMaxPool2D",
+                "paddle.nn.functional.max_pool2d": "pytorch_max_pool2d",
+                "paddle.nn.MaxPool3D": "PytorchMaxPool3D",
+                "paddle.nn.functional.max_pool3d": "pytorch_max_pool3d",
+            }
+            helper_api = helper_api_map.get(paddle_api)
+            if helper_api is None:
+                return "misidentify"
+            self.enable_utils_code()
+            self.set_paddle_api(helper_api)
+            kwargs["dilation"] = dilation
+
+        return GenericMatcher.generate_code(self, kwargs)
+
+
+class RNNSequenceMatcher(BaseMatcher):
+    def generate_utils_code(self):
+        CODE_TEMPLATE = textwrap.dedent(
+            """
+            def paddle_pad_sequence(
+                sequences, batch_first=False, padding_value=0.0
+            ):
+                max_len = max(seq.shape[0] for seq in sequences)
+                padded_sequences = []
+                for seq in sequences:
+                    pad_len = max_len - seq.shape[0]
+                    if pad_len > 0:
+                        pad_shape = [pad_len] + list(seq.shape[1:])
+                        pad_tensor = paddle.full(
+                            pad_shape,
+                            padding_value,
+                            dtype=seq.dtype,
+                            device=seq.place,
+                            requires_grad=not seq.stop_gradient,
+                        )
+                        seq = paddle.concat([seq, pad_tensor], axis=0)
+                    padded_sequences.append(seq)
+                axis = 0 if batch_first else 1
+                return paddle.stack(padded_sequences, axis=axis)
+
+
+            def paddle_unpad_sequence(
+                padded_sequences, lengths, batch_first=False
+            ):
+                if isinstance(lengths, paddle.Tensor):
+                    lengths = lengths.numpy().tolist()
+                sequences = []
+                for idx, length in enumerate(lengths):
+                    length = int(length)
+                    if batch_first:
+                        sequences.append(padded_sequences[idx, :length])
+                    else:
+                        sequences.append(padded_sequences[:length, idx])
+                return sequences
+            """
+        )
+        return CODE_TEMPLATE
+
+    def generate_code(self, kwargs):
+        self.enable_utils_code()
+        helper_name = (
+            "paddle_unpad_sequence"
+            if self.torch_api.endswith("unpad_sequence")
+            else "paddle_pad_sequence"
+        )
+        return "{}({})".format(helper_name, self.kwargs_to_str(kwargs))
+
+
 class Num2TensorBinaryMatcher(BaseMatcher):
     def generate_code(self, kwargs):
         if "input" in kwargs:
@@ -3820,30 +4569,62 @@ class FAApplyRotaryEmbFuncMatcher(BaseMatcher):
     def generate_utils_code(self):
         CODE_TEMPLATE = textwrap.dedent(
             """
-            def apply_rotary_position_embeddings(x, cos, sin):
+            def apply_rotary_position_embeddings(
+                x,
+                cos,
+                sin,
+                interleaved=False,
+                inplace=False,
+                seqlen_offsets=0,
+                cu_seqlens=None,
+                max_seqlen=None,
+            ):
+                if seqlen_offsets not in (0, None):
+                    raise NotImplementedError(
+                        "PaConvert only supports apply_rotary_emb_func with default seqlen_offsets"
+                    )
+                if cu_seqlens is not None or max_seqlen is not None:
+                    raise NotImplementedError(
+                        "PaConvert only supports apply_rotary_emb_func without cu_seqlens or max_seqlen"
+                    )
                 if not isinstance(cos, paddle.Tensor):
-                    cos = paddle.to_tensor(cos)
+                    cos = paddle.to_tensor(
+                        cos, dtype=x.dtype, place=x.place, stop_gradient=True
+                    )
                 if not isinstance(sin, paddle.Tensor):
-                    sin = paddle.to_tensor(sin)
+                    sin = paddle.to_tensor(
+                        sin, dtype=x.dtype, place=x.place, stop_gradient=True
+                    )
 
                 def _rotate_half(x):
-                    from einops import rearrange
-
-                    x = rearrange(x, "... (j d) -> ... j d", j=2)
-                    x1, x2 = x.unbind(axis=-2)
+                    if interleaved:
+                        x1 = x[..., ::2]
+                        x2 = x[..., 1::2]
+                        return paddle.reshape(
+                            paddle.stack((-x2, x1), axis=-1), shape=x.shape
+                        )
+                    x1, x2 = paddle.split(x, num_or_sections=2, axis=-1)
                     return paddle.concat((-x2, x1), axis=-1)
-                # [seq_len,rotary_dim/2] ==>[seq_len, rotary_dim]
-                cos = paddle.concat([cos,cos],axis=-1)
-                # [seq_len, rotary_dim] ==>[1,seq_len, 1,rotary_dim]
-                cos=cos.unsqueeze(axis=1).unsqueeze(axis=0)
-                # [seq_len,rotary_dim/2] ==>[seq_len, rotary_dim]
-                sin = paddle.concat([sin,sin],axis=-1)
-                # [seq_len, rotary_dim] ==>[1,seq_len, 1,rotary_dim]
-                sin=sin.unsqueeze(axis=1).unsqueeze(axis=0)
-                t_rot, t_pass = x[..., :cos.shape[-1]], x[..., cos.shape[-1]:]
-                t_rot = (t_rot * cos) + (_rotate_half(t_rot) * sin)
 
-                return paddle.concat(x=(t_rot, t_pass), axis=-1)
+                if interleaved:
+                    cos = paddle.repeat_interleave(cos, repeats=2, axis=-1)
+                    sin = paddle.repeat_interleave(sin, repeats=2, axis=-1)
+                else:
+                    cos = paddle.concat([cos, cos], axis=-1)
+                    sin = paddle.concat([sin, sin], axis=-1)
+
+                cos = cos.unsqueeze(axis=-2)
+                sin = sin.unsqueeze(axis=-2)
+                rotary_dim = cos.shape[-1]
+                assert rotary_dim <= x.shape[-1]
+                t_rot, t_pass = x[..., :rotary_dim], x[..., rotary_dim:]
+                out = paddle.concat(
+                    x=((t_rot * cos) + (_rotate_half(t_rot) * sin), t_pass), axis=-1
+                )
+                if inplace:
+                    paddle.assign(out, output=x)
+                    return x
+                return out
             """
         )
         return CODE_TEMPLATE
@@ -3863,18 +4644,53 @@ def get_paddle_api(self):
 
 
 class FARmsNorm(BaseMatcher):
+    def generate_utils_code(self):
+        CODE_TEMPLATE = textwrap.dedent(
+            """
+            def paddle_flash_attn_rms_norm(x, weight, epsilon):
+                if weight is not None and x.place.is_gpu_place():
+                    try:
+                        out = paddle.incubate.nn.functional.fused_rms_norm(
+                            x, weight, paddle.zeros_like(weight), epsilon, len(x.shape) - 1
+                        )
+                        if isinstance(out, (tuple, list)):
+                            return out[0]
+                        return out
+                    except Exception:
+                        pass
+
+                original_dtype = x.dtype
+                if x.dtype in [paddle.float16, paddle.bfloat16]:
+                    compute_x = paddle.cast(x, "float32")
+                else:
+                    compute_x = x
+
+                out = compute_x * paddle.rsqrt(
+                    paddle.mean(paddle.square(compute_x), axis=-1, keepdim=True) + epsilon
+                )
+                if weight is not None:
+                    if weight.dtype != out.dtype:
+                        weight = paddle.cast(weight, out.dtype)
+                    out = out * weight
+
+                if out.dtype != original_dtype:
+                    out = paddle.cast(out, original_dtype)
+                return out
+            """
+        )
+        return CODE_TEMPLATE
+
     def generate_code(self, kwargs):
+        self.enable_utils_code()
         API_TEMPLATE = textwrap.dedent(
             """
-            paddle.incubate.nn.functional.fused_rms_norm({}, {}, paddle.zeros_like({}), {},len({}.shape)-1)[0]
+            paddle_flash_attn_rms_norm({}, {}, {})
             """
         )
         return API_TEMPLATE.format(
             kwargs["x"],
             kwargs["weight"],
-            kwargs["weight"],
             kwargs["epsilon"],
-            kwargs["x"],
         )
 
 
diff --git a/paconvert/transformer/basic_transformer.py b/paconvert/transformer/basic_transformer.py
index 5e1a9d6a1..af7fd8f0c 100644
--- a/paconvert/transformer/basic_transformer.py
+++ b/paconvert/transformer/basic_transformer.py
@@ -361,6 +361,33 @@ def trans_class_attribute(self, node, torch_api):
         )
         return node
 
+    def insert_paddle_tensor_int_helper(self):
+        helper_code = ast.parse(
+            """
+def paddle_tensor_int(x):
+    module_name = type(x).__module__
+    if module_name.startswith("paddle") and hasattr(x, "numel") and hasattr(x, "reshape"):
+        assert x.numel() == 1, "only one element variable can be converted to int."
+        return int(x.reshape([-1])[0].item())
+    return int(x)
+            """
+        ).body
+        self.insert_multi_node(helper_code)
+
+    def trans_builtin_int(self, node):
+        if not isinstance(node.func, ast.Name) or node.func.id != "int":
+            return None
+        if len(node.args) != 1 or len(node.keywords) != 0:
+            return None
+
+        self.insert_paddle_tensor_int_helper()
+        new_node = ast.Call(
+            func=ast.Name(id="paddle_tensor_int", ctx=ast.Load()),
+            args=node.args,
+            keywords=[],
+        )
+        return ast.copy_location(new_node, node)
+
     def visit_Call(self, node):
         """
         if one line has N torch function, it has 2^N method of
@@ -414,6 +441,10 @@ def visit_Call(self, node):
         # Use Postorder traversal
         super(BasicTransformer, self).generic_visit(node)
 
+        builtin_int_node = self.trans_builtin_int(node)
+        if builtin_int_node:
+            return builtin_int_node
+
         full_attr = self.get_full_attr_for_apiname(node.func)
         # 1) Torch Package Call, include torch third_party
         #   such as : torch.add(x, y) / torch.add(torch.abs(x), y)
diff --git a/tests/code_library/code_case/paddle_code/paddlenlp_Qwen.py b/tests/code_library/code_case/paddle_code/paddlenlp_Qwen.py
index 8abecc2c3..c4532798e 100644
--- a/tests/code_library/code_case/paddle_code/paddlenlp_Qwen.py
+++ b/tests/code_library/code_case/paddle_code/paddlenlp_Qwen.py
@@ -45,30 +45,92 @@ def _post_init(self):
         self._init_weights()
 setattr(paddleformers.transformers.model_utils.PretrainedModel, "post_init", _post_init)
 
-def apply_rotary_position_embeddings(x, cos, sin):
+def apply_rotary_position_embeddings(
+    x,
+    cos,
+    sin,
+    interleaved=False,
+    inplace=False,
+    seqlen_offsets=0,
+    cu_seqlens=None,
+    max_seqlen=None,
+):
+    if seqlen_offsets not in (0, None):
+        raise NotImplementedError(
+            "PaConvert only supports apply_rotary_emb_func with default seqlen_offsets"
+        )
+    if cu_seqlens is not None or max_seqlen is not None:
+        raise NotImplementedError(
+            "PaConvert only supports apply_rotary_emb_func without cu_seqlens or max_seqlen"
+        )
     if not isinstance(cos, paddle.Tensor):
-        cos = paddle.to_tensor(cos)
+        cos = paddle.to_tensor(
+            cos, dtype=x.dtype, place=x.place, stop_gradient=True
+        )
     if not isinstance(sin, paddle.Tensor):
-        sin = paddle.to_tensor(sin)
+        sin = paddle.to_tensor(
+            sin, dtype=x.dtype, place=x.place, stop_gradient=True
+        )
 
     def _rotate_half(x):
-        from einops import rearrange
-
-        x = rearrange(x, "... (j d) -> ... j d", j=2)
-        x1, x2 = x.unbind(axis=-2)
+        if interleaved:
+            x1 = x[..., ::2]
+            x2 = x[..., 1::2]
+            return paddle.reshape(
+                paddle.stack((-x2, x1), axis=-1), shape=x.shape
+            )
+        x1, x2 = paddle.split(x, num_or_sections=2, axis=-1)
         return paddle.concat((-x2, x1), axis=-1)
-    # [seq_len,rotary_dim/2] ==>[seq_len, rotary_dim]
-    cos = paddle.concat([cos,cos],axis=-1)
-    # [seq_len, rotary_dim] ==>[1,seq_len, 1,rotary_dim]
-    cos=cos.unsqueeze(axis=1).unsqueeze(axis=0)
-    # [seq_len,rotary_dim/2] ==>[seq_len, rotary_dim]
-    sin = paddle.concat([sin,sin],axis=-1)
-    # [seq_len, rotary_dim] ==>[1,seq_len, 1,rotary_dim]
-    sin=sin.unsqueeze(axis=1).unsqueeze(axis=0)
-    t_rot, t_pass = x[..., :cos.shape[-1]], x[..., cos.shape[-1]:]
-    t_rot = (t_rot * cos) + (_rotate_half(t_rot) * sin)
-
-    return paddle.concat(x=(t_rot, t_pass), axis=-1)
+
+    if interleaved:
+        cos = paddle.repeat_interleave(cos, repeats=2, axis=-1)
+        sin = paddle.repeat_interleave(sin, repeats=2, axis=-1)
+    else:
+        cos = paddle.concat([cos, cos], axis=-1)
+        sin = paddle.concat([sin, sin], axis=-1)
+
+    cos = cos.unsqueeze(axis=-2)
+    sin = sin.unsqueeze(axis=-2)
+    rotary_dim = cos.shape[-1]
+    assert rotary_dim <= x.shape[-1]
+    t_rot, t_pass = x[..., :rotary_dim], x[..., rotary_dim:]
+    out = paddle.concat(
+        x=((t_rot * cos) + (_rotate_half(t_rot) * sin), t_pass), axis=-1
+    )
+    if inplace:
+        paddle.assign(out, output=x)
+        return x
+    return out
+
+def paddle_flash_attn_rms_norm(x, weight, epsilon):
+    if weight is not None and x.place.is_gpu_place():
+        try:
+            out = paddle.incubate.nn.functional.fused_rms_norm(
+                x, weight, paddle.zeros_like(weight), epsilon, len(x.shape) - 1
+            )
+            if isinstance(out, (tuple, list)):
+                return out[0]
+            return out
+        except Exception:
+            pass
+
+    original_dtype = x.dtype
+    if x.dtype in [paddle.float16, paddle.bfloat16]:
+        compute_x = paddle.cast(x, "float32")
+    else:
+        compute_x = x
+
+    out = compute_x * paddle.rsqrt(
+        paddle.mean(paddle.square(compute_x), axis=-1, keepdim=True) + epsilon
+    )
+    if weight is not None:
+        if weight.dtype != out.dtype:
+            weight = paddle.cast(weight, out.dtype)
+        out = out * weight
+
+    if out.dtype != original_dtype:
+        out = paddle.cast(out, original_dtype)
+    return out
 ############################## 相关utils函数，如上 ##############################
 
 
@@ -177,6 +239,4 @@ class QWenTokenizer(paddleformers.PreTrainedTokenizer):
 print("#########################case16#########################")
 apply_rotary_position_embeddings(x=x, cos=cos, sin=sin)
 print("#########################case17#########################")
-paddle.incubate.nn.functional.fused_rms_norm(
-    x, weight, paddle.zeros_like(weight), eps, len(x.shape) - 1
-)[0]
+paddle_flash_attn_rms_norm(x, weight, eps)
diff --git a/tests/flash_attn_tests/test_flash_attn_apply_rotary_emb_func.py b/tests/flash_attn_tests/test_flash_attn_apply_rotary_emb_func.py
index 44f919858..10586b926 100644
--- a/tests/flash_attn_tests/test_flash_attn_apply_rotary_emb_func.py
+++ b/tests/flash_attn_tests/test_flash_attn_apply_rotary_emb_func.py
@@ -52,3 +52,22 @@ def test_case_1():
         """
     )
     obj.run(pytorch_code, ["result"])
+
+
+def test_case_2():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        from flash_attn.layers.rotary import apply_rotary_emb_func
+        x = torch.ones([1, 2, 2, 4]).cuda()
+        cos = torch.ones([2, 2]).cuda()
+        sin = torch.ones([2, 2]).cuda()
+        result = apply_rotary_emb_func(
+            x, cos, sin, interleaved=False, inplace=False, seqlen_offsets=0
+        )
+        """
+    )
+    paddle_code = obj.convert(pytorch_code)
+    assert "from einops import rearrange" not in paddle_code
+    assert "paddle.assign(out, output=x)" in paddle_code
+    assert "interleaved=False" in paddle_code
diff --git a/tests/flash_attn_tests/test_flash_attn_func.py b/tests/flash_attn_tests/test_flash_attn_func.py
index afd8df4e8..e502374ea 100644
--- a/tests/flash_attn_tests/test_flash_attn_func.py
+++ b/tests/flash_attn_tests/test_flash_attn_func.py
@@ -40,3 +40,47 @@ def test_case_1():
         """
     )
     obj.run(pytorch_code, ["result"])
+
+
+def test_case_2():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        import flash_attn
+        q = torch.ones([1,8,8,8],dtype=torch.float16).cuda()
+        result = flash_attn.flash_attn_interface.flash_attn_func(q,q,q,0,None,False)
+        """
+    )
+    expect_paddle_code = textwrap.dedent(
+        """
+        import paddle
+
+        q = paddle.ones([1, 8, 8, 8], dtype=paddle.float16).cuda()
+        result = paddle.nn.functional.flash_attention.flash_attention(
+            query=q, key=q, value=q, dropout=0, causal=False
+        )[0]
+        """
+    )
+    obj.run(pytorch_code, expect_paddle_code=expect_paddle_code)
+
+
+def test_case_3():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        import flash_attn
+        q = torch.ones([1,8,8,8],dtype=torch.float16).cuda()
+        result = flash_attn.flash_attn_interface.flash_attn_func(q, q, q, 0, None, False, None, 0.0, None, False)
+        """
+    )
+    expect_paddle_code = textwrap.dedent(
+        """
+        import paddle
+
+        q = paddle.ones([1, 8, 8, 8], dtype=paddle.float16).cuda()
+        result = paddle.nn.functional.flash_attention.flash_attention(
+            query=q, key=q, value=q, dropout=0, causal=False
+        )[0]
+        """
+    )
+    obj.run(pytorch_code, expect_paddle_code=expect_paddle_code)
diff --git a/tests/flash_attn_tests/test_flash_attn_rms_norm.py b/tests/flash_attn_tests/test_flash_attn_rms_norm.py
index 43c840443..9f4711671 100644
--- a/tests/flash_attn_tests/test_flash_attn_rms_norm.py
+++ b/tests/flash_attn_tests/test_flash_attn_rms_norm.py
@@ -92,3 +92,21 @@ def test_case_3():
         """
     )
     obj.run(pytorch_code, ["result"])
+
+
+def test_case_4():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        from flash_attn.ops.rms_norm import rms_norm
+        x = torch.tensor([
+            [[0.4742,  3.5466, -4.8008, -8.9079, 0.4742,  9.5466, -8.8008, -6.9079]],
+            [[3.4742,  0.5466, -0.8008, -0.9079, 3.4742,  0.5466, -0.8008, -0.9079]]
+            ]).cuda()
+        weight = torch.ones(8).cuda()
+        result = rms_norm(x, weight,1e-6)
+        """
+    )
+    paddle_code = obj.convert(pytorch_code)
+    assert "paddle_flash_attn_rms_norm" in paddle_code
+    assert "result = paddle_flash_attn_rms_norm(x, weight, 1e-06)" in paddle_code
diff --git a/tests/flash_attn_tests/test_flash_attn_unpadded_func.py b/tests/flash_attn_tests/test_flash_attn_unpadded_func.py
index 68619af86..10cf64389 100644
--- a/tests/flash_attn_tests/test_flash_attn_unpadded_func.py
+++ b/tests/flash_attn_tests/test_flash_attn_unpadded_func.py
@@ -40,3 +40,40 @@ def test_case_1():
         """
     )
     obj.run(pytorch_code, ["result"])
+
+
+def test_case_2():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        from flash_attn.flash_attn_interface import flash_attn_varlen_func as flash_attn_unpadded_func
+        q = torch.ones([8,8,8],dtype=torch.float16).cuda()
+        cu_seqlens_q = torch.ones([8],dtype=torch.int32).cuda()
+        result = flash_attn_unpadded_func(q,q,q,cu_seqlens_q,cu_seqlens_q,4,4,0.25)
+        """
+    )
+    expect_paddle_code = textwrap.dedent(
+        """
+        import math
+
+        import paddle
+
+        q = paddle.ones([8, 8, 8], dtype=paddle.float16).cuda()
+        cu_seqlens_q = paddle.ones([8], dtype=paddle.int32).cuda()
+        assert (
+            paddle.device.cuda.get_device_capability()[0] >= 8
+        ), "Device capabilities should be at least 8"
+        result = paddle.nn.functional.flash_attention.flash_attn_unpadded(
+            query=q,
+            key=q,
+            value=q,
+            cu_seqlens_q=cu_seqlens_q,
+            cu_seqlens_k=cu_seqlens_q,
+            max_seqlen_q=4,
+            max_seqlen_k=4,
+            dropout=0.25,
+            scale=1.0 / math.sqrt(q.shape[-1]),
+        )[0]
+        """
+    )
+    obj.run(pytorch_code, expect_paddle_code=expect_paddle_code)
diff --git a/tests/test_Tensor_std.py b/tests/test_Tensor_std.py
index 46451e000..2e5378189 100644
--- a/tests/test_Tensor_std.py
+++ b/tests/test_Tensor_std.py
@@ -95,3 +95,22 @@ def test_case_7():
         """
     )
     obj.run(pytorch_code, ["result"])
+
+
+def test_case_8():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        input = torch.tensor([[1.4907, 1.0593, 1.5696], [1.4907, 1.0593, 1.5696]])
+        result = input.std(keepdim=True, correction=0, dim=1)
+        """
+    )
+    paddle_code = textwrap.dedent(
+        """
+        import paddle
+
+        input = paddle.tensor([[1.4907, 1.0593, 1.5696], [1.4907, 1.0593, 1.5696]])
+        result = input.std(keepdim=True, correction=0, axis=1)
+        """
+    )
+    obj.run(pytorch_code, expect_paddle_code=paddle_code)
diff --git a/tests/test_std.py b/tests/test_std.py
index 736a6e5c9..586587ae4 100644
--- a/tests/test_std.py
+++ b/tests/test_std.py
@@ -143,3 +143,22 @@ def test_case_11():
         """
     )
     obj.run(pytorch_code, ["result"])
+
+
+def test_case_12():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        input = torch.tensor([[1.4907, 1.0593, 1.5696], [1.4907, 1.0593, 1.5696]])
+        result = torch.std(input=input, dim=1, correction=1, keepdim=True)
+        """
+    )
+    paddle_code = textwrap.dedent(
+        """
+        import paddle
+
+        input = paddle.tensor([[1.4907, 1.0593, 1.5696], [1.4907, 1.0593, 1.5696]])
+        result = paddle.std(x=input, axis=1, correction=1, keepdim=True)
+        """
+    )
+    obj.run(pytorch_code, expect_paddle_code=paddle_code)
diff --git a/tests/test_std_mean.py b/tests/test_std_mean.py
index 2db3b4f04..5c37746a2 100644
--- a/tests/test_std_mean.py
+++ b/tests/test_std_mean.py
@@ -152,3 +152,35 @@ def test_case_9():
         """
     )
     obj.run(pytorch_code, ["std", "mean"])
+
+
+def test_case_10():
+    pytorch_code = textwrap.dedent(
+        """
+        import torch
+        a = torch.tensor(
+            [[ 0.2035,  1.2959,  1.8101, -0.4644],
+            [ 1.5027, -0.3270,  0.5905,  0.6538],
+            [-1.5745,  1.3330, -0.5596, -0.6548],
+            [ 0.1264, -0.5080,  1.6420,  0.1992]])
+        std, mean = torch.std_mean(input=a, correction=0, dim=1, keepdim=True)
+        """
+    )
+    paddle_code = textwrap.dedent(
+        """
+        import paddle
+
+        a = paddle.tensor(
+            [
+                [0.2035, 1.2959, 1.8101, -0.4644],
+                [1.5027, -0.327, 0.5905, 0.6538],
+                [-1.5745, 1.333, -0.5596, -0.6548],
+                [0.1264, -0.508, 1.642, 0.1992],
+            ]
+        )
+        std, mean = paddle.std(correction=0, keepdim=True, x=a, axis=1), paddle.mean(
+            keepdim=True, x=a, axis=1
+        )
+        """
+    )
+    obj.run(pytorch_code, expect_paddle_code=paddle_code)
diff --git a/tools/consistency/consistency_check.py b/tools/consistency/consistency_check.py
index a280cd2c7..2ba9ae436 100644
--- a/tools/consistency/consistency_check.py
+++ b/tools/consistency/consistency_check.py
@@ -44,7 +44,9 @@ def _compare_content(actual_dir, expect_dir):
     result = True
     if os.path.isfile(actual_dir):
         assert os.path.isfile(expect_dir), f"{expect_dir} shoule be a file!"
-        with open(actual_dir, "r") as f1, open(expect_dir, "r") as f2:
+        with open(actual_dir, "r", encoding="utf-8") as f1, open(
+            expect_dir, "r", encoding="utf-8"
+        ) as f2:
             content1 = f1.read().strip()
             content2 = f2.read().strip()
             # 对随机的辅助代码路径进行处理，使用正则表达式匹配并替换