
    :irZ                     2   d dl Z d dlZd dlmZmZ d dlmZmZmZm	Z	 d dl
mZmZmZmZ d dlmZ g dZdej        deej        geej                 f         fdZdej        d	eej        eeef         f         d
edeej        geej                 f         fdZdej        deeej                          deej                 deeef         d	eej        eeef         f         f
dZdefdZ G d de          Zdej        dededeeej                          def
dZ G d de          Z G d de          Z dej        dedededef
dZ!dej        dededeeej                          def
dZ"dej        de	e         defd Z#dej        dededefd!Z$ ed"          ddfdej        deded#ed$eeeej                                   d%eeeej                                   defd&Z%ej&        ej'        he%_(        ej)        he%_*        e j+        d'ed(eded)         fd*            Z,dej        d+edej        fd,Z-dej        d'edej        fd-Z.	 d6dej        d/ed'edeej                 d0eej/                 d1ed2ede0ej        ef         fd3Z1 G d4 d5          Z2dS )7    N)ABCabstractmethod)Callable	GeneratorIterableSequence)AnycastOptionalUnion)always_wrap_policylambda_auto_wrap_policytransformer_auto_wrap_policysize_based_auto_wrap_policyenable_wrapwrapCustomPolicyModuleWrapPolicyroot_modulefnc                     | hdt           j        dt          dt          t           j                 ffd | dd           dS )aQ  
    This applies ``fn`` to every module in the module tree of ``root_module``
    following a post-order traversal. If ``fn`` returns an :class:`nn.Module`,
    then this replaces the original module with the newly returned one in the
    tree. Otherwise, ``fn`` should return ``None``, in which case the module is
    not changed.
    modulemodule_nameparent_modulec                    |                                  D ]+\  }}|vr"                    |            |||            , |           }|t          |t          j                  st          d| d|            |st          d|            t          |t          j                  st          d|           t          |||           d S d S )Nz=Non-root modules should have their parent module set but got z for zTNon-root modules should have their module name set but got an empty module name for z.fn should return None or an nn.Module but got )named_childrenadd
isinstancennModuleAssertionErrorsetattr)	r   r   r   child_module_namechild_moduleoptional_module_post_order_apply_innerr   visited_moduless	         N/var/www/icac/venv/lib/python3.11/site-packages/torch/distributed/fsdp/wrap.pyr&   z2_post_order_apply.<locals>._post_order_apply_inner-   s9   
 06/D/D/F/F 	Q 	Q+|?22##L111''6GPPP"V**&mRY77 $4$4 4+14 4    $9069 9   ory99 $V_VV   M;@@@@@ '&     N)r   r    strr   )r   r   r&   r'   s    `@@r(   _post_order_applyr,      s     (3mOA	AA  	*A A A A A A A A6 KT22222r)   target_module_to_kwargsfsdp_fnreturnc                 b     dt           j        dt          t           j                 f fd}|S )z
    This constructs the "wrap" function to pass to :func:`_post_order_apply`
    based on ``target_module_to_kwargs``, which should be constructed from the
    wrapping policy.
    r   r/   c                 :    | v r| ur|          } | fi |S d S N )r   kwargsr.   r   r-   s     r(   r   z_construct_wrap_fn.<locals>.fnV   sB     ,,,{1J1J,V4F76,,V,,,tr)   )r   r    r   )r   r-   r.   r   s   ``` r(   _construct_wrap_fnr5   K   sK    29 ")!4         Ir)   module_classesignored_modulesroot_kwargsc                     t          t          |                    }|                                 D ]+}||v rt          ||          r||vr|||<   d ||         d<   ,|S )Nmixed_precision)tuplesetmodulesr   )r   r6   r7   r8   r-   module_classes_tupler   s          r(   $_run_mixed_precision_override_policyr?   a   s     !^!4!455%%'' F F_$$ 455 	F4442='/AE#F+,=>""r)   c                      dS )z
    A simple recursive wrap policy that always returns ``True``. This means
    that every submodule is wrapped by the wrapper class in
    :func:`_recursive_wrap`.
    Tr3   )argsr4   s     r(   r   r   u   s	     4r)   c                       e Zd ZdZedej        deej                 dee	e
f         deej        ee	e
f         f         fd            ZdS )_Policyzk
    This defines an abstract base class that represents a policy for applying
    a module-level API.
    r   r7   r8   r/   c                     dS )z
        This should return a dict ``target_module_to_kwargs`` that maps from
        each target module to wrap to its kwargs.
        Nr3   )selfr   r7   r8   s       r(   _run_policyz_Policy._run_policy   s	     	r)   N)__name__
__module____qualname____doc__r   r   r    r<   dictr+   r	   rF   r3   r)   r(   rC   rC   ~   s         
 
Y
 RY
 #s(^	

 
bic3h'	(
 
 
 ^
 
 
r)   rC   r   recursenonwrapped_numelc                 D    |rdS t          | t          |                    S )a   
    This auto wrap policy wraps every module that is an instance of any type in
    ``module_classes`` as its own FSDP instance. The root module given by
    ``module`` is always wrapped as an FSDP instance regardless. Since the
    wrapping proceeds bottom up, each FSDP instance manages the parameters in
    its subtree excluding any already managed by a child FSDP instance.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.
        module_classes (Set[Type[nn.Module]]): Set of module classes that are
            wrapped as FSDP instances.

    Returns:
        ``True`` if ``recurse=True``, and whether ``module`` should be wrapped
        if ``recurse=False``.
    Tr   r;   )r   rL   rM   r6   s       r(   _module_wrap_policyrP      s)    6  tfeN33444r)   c                        e Zd ZdZdeeej                          fdZdej        de	ej                 de
eef         de
ej        e
eef         f         fdZd	 Zdef fd
Z xZS )r   z{
    This policy applies to every module of the specified module classes,
    passing in the kwargs given to the root.
    r6   c                 Z    t          |          }|| _        t          |          | _        d S r2   )r<   _module_classesr+   _module_classes_str)rE   r6   module_classes_sets      r(   __init__zModuleWrapPolicy.__init__   s/     001#&'9#:#:   r)   r   r7   r8   r/   c                     t          | j                  }i }|                                D ].}||v rt          ||          rt	          j        |          ||<   /|S r2   )r;   rS   r=   r   copy)rE   r   r7   r8   r6   r-   r   s          r(   rF   zModuleWrapPolicy._run_policy   st     t344CE!))++ 	I 	IF((FN33 I26)K2H2H'/&&r)   c                 2    t          ||d| j                  S )N)rM   r6   )rP   rS   )rE   r   rL   rA   r4   s        r(   __call__zModuleWrapPolicy.__call__   s%    "GbAU
 
 
 	
r)   c                 \    t                                                      d| j         dz   S )N())super__repr__rT   )rE   	__class__s    r(   r`   zModuleWrapPolicy.__repr__   s-    ww!!$C(@$C$C$CCCr)   )rG   rH   rI   rJ   r   typer   r    rV   r<   rK   r+   r	   rF   r[   r`   __classcell__)ra   s   @r(   r   r      s         
;xRY'@ ; ; ; ;
'Y' RY' #s(^	'
 
bic3h'	(' ' ' ' 
 
 
D# D D D D D D D D D Dr)   r   c                       e Zd ZdZdeej        geee	e
ef         f         f         fdZdej        deej                 de	e
ef         de	ej        e	e
ef         f         fdZd	S )
r   a  
    This policy takes in a lambda function that maps a given ``nn.Module`` to
    either ``False``, ``True``, or a kwarg dictionary.
    - If the function returns ``False`` or an empty dictionary, then the module
      does not have the API applied.
    - If the function returns ``True``, then the module has the API applied
      with the root's kwargs.
    - If the function returns a non-empty dictionary, then the module has the
      API applied, and the dictionary overrides the root's kwargs.

    Example::

        >>> # xdoctest: +SKIP("undefined variables")
        >>> model = init_transformer_model(...)
        >>> def lambda_fn(module: nn.Module):
        >>>     if module is model.lm_head:
        >>>         return {"sharding_strategy": ShardingStrategy.SHARD_GRAD_OP}
        >>>     elif isinstance(module, TransformerBlock):
        >>>         return True
        >>>     return False
        >>> policy = CustomPolicy(lambda_fn)
        >>> fsdp_model = FSDP(model, auto_wrap_policy=policy)
    	lambda_fnc                     || _         d S r2   )
_lambda_fn)rE   re   s     r(   rV   zCustomPolicy.__init__   s    #r)   r   r7   r8   r/   c                 T   i }|                                 D ]}||v r|                     |          }t          |t          t          f          st          d|           |sMt          j        |          }t          |t                    r|                    |           |||<   |S )Nz_The lambda_fn passed to CustomPolicy should return False/True or a kwarg dict, but it returned )r=   rg   r   rK   bool
ValueErrorrX   update)rE   r   r7   r8   r-   r   resr4   s           r(   rF   zCustomPolicy._run_policy   s     DF!))++ 	5 	5F((//&))CcD$<00  ICFI I    Y{++F#t$$ # c""".4#F++&&r)   N)rG   rH   rI   rJ   r   r   r    r   ri   rK   r+   r	   rV   r<   rF   r3   r)   r(   r   r      s         0$(BI;dDcN>R8S+S"T $ $ $ $'Y' RY' #s(^	'
 
bic3h'	(' ' ' ' ' 'r)   r   re   c                      |rdS  ||           S )aU  
    A convenient auto wrap policy to wrap submodules based on an arbitrary user
    function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as
    a `wrapper_cls` unit.

    Return if a module should be wrapped during auto wrapping.

    The first three parameters are required by :func:`_recursive_wrap`.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        lambda_fn (Callable[[nn.Module], bool]): If this returns ``True``, then
            this module will be wrapped.
    Tr3   )r   rL   rM   re   s       r(   r   r     s!    .  t9Vr)   transformer_layer_clsc                 &    t          | |||          S )a-  
    See :func:`_module_wrap_policy`, where ``transformer_layer_cls`` is the
    same as ``module_classes``. Note that shared parameters must be wrapped in
    the same FSDP instance, so this auto wrap policy can help wrap shared
    embeddings into the same FSDP instance for transformer models.
    )rP   )r   rL   rM   rn   s       r(   r   r   *  s     vw0@BWXXXr)   c                 D    |rdS t          | t          |                    S )NTrO   )r   r6   rL   rA   r4   s        r(   _wrap_module_cls_individuallyrq   9  s+      9t &%"7"7888r)   c                 B     t           fd|D                       S )zv
    A policy that wraps ``module`` if any policy in the passed in iterable of
    ``policies`` returns ``True``.
    c              3   4   K   | ]} |           V  dS )r   rL   rM   Nr3   ).0policyr   rM   rL   s     r(   	<genexpr>z_or_policy.<locals>.<genexpr>O  sI         	fg@PQQQ     r)   )any)r   rL   rM   policiess   ``` r(   
_or_policyrz   E  sI                r)   g    חAmin_num_paramsforce_leaf_modulesexclude_wrap_modulesc                     |t           j        n|}|t           j        n|}|}||k    }|r |ot          | t	          |                     S |ot          | t	          |                     S )a  
    A size-based auto wrap policy.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        min_num_params (int): Customizable policy input that controls the size
            threshold over which a module is ready to be wrapped. This is in
            units of numel.
        force_leaf_modules (Optional[set[type[nn.Module]]]): Set of module types to keep
            as leaves, i.e. their children will never be wrapped.
        exclude_wrap_modules (Optional[set[type[nn.Module]]]): Set of module types to be
            excluded in wrapping.

    Returns:
        Whether ``module`` should be wrapped.
    )r   FORCE_LEAF_MODULESEXCLUDE_WRAP_MODULESr   r;   )r   rL   rM   r{   r|   r}   min_nonwrapped_numelis_larges           r(   r   r   U  s    B % 	$66   ' 	$88!  *#77H PM
659K3L3L M MMM O
659M3N3N O OOOr)   wrapper_clswrapper_kwargs)NNNc              +   h   K   d| i|}t          di |5  dV  ddd           dS # 1 swxY w Y   dS )a  
    Context manager to wrap modules using a wrapper.

    Useful for when you'd like to apply the same configuration arguments to all
    child modules that you wrap. A particularly important use case is wrapping
    large layers so that they get sharded (in-place) during initialization, to
    avoid running out of system memory. Large layers can indicate that they
    should be sharded via the ``wrap`` annotation and this context manager can
    provide the exact configuration for these nested instances.

    Usage::

        with enable_wrap(wrapper_cls, **params):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        wrapper_cls:
            Class that `wrap` annotation will `wrap` modules with, such as
            `FullyShardedDataParallel`.
        **wrapper_kwargs:
            Configuration settings that will be passed to all ``wrap``
            instances inside the context
    r   Nr3   )_ConfigAutoWrap)r   r   r4   s      r(   r   r     s      : 	{
F 
	"	"6	"	"                   s   '++wrap_overridesc                     t           j        rCt           j        t          d          i t           j        |}t          | t           j        fi |S | S )a  
    Annotate that a module should be wrapped. Annotated modules will only be
    wrapped if inside of an :func:`enable_wrap` context manager. This allows
    a module to be initialized both with and without a wrapper without code
    change.

    The class that this function wraps the passed in ``nn.Module`` with is the
    passed in ``wrapper_cls`` argument into ``enable_wrap``. Both
    ``enable_wrap`` and ``wrap`` can take in kwargs specifying how to construct
    the ``wrapper_cls`` instance. In the case of duplicate kwargs in
    ``enable_wrap`` and ``wrap``, the argument passed into ``wrap`` will be
    respected.

    Usage::

        with enable_wrap(wrapper_cls=FSDP, **fsdp_config):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        module (nn.Module): module to wrap (if in :func:`enable_wrap` context)
        **wrap_overrides: configuration overrides that will take priority over
            the values provided by the :func:`enable_wrap` context
    Nz.Expected _ConfigAutoWrap.wrapper_cls to be set)r   in_autowrap_contextr   r!   r4   _wrap)r   r   s     r(   r   r     sj    2 * 	
&. !QRRREO2EnE'
 
 
 
 	

 Mr)   c                 ~    |t          d          t          | d          ri || j        } || fi |S  || fi |S )NzExpected wrapper_cls to be set_wrap_overrides)r!   hasattrr   )r   r   r4   	overridess       r(   r   r     sn    =>>>v()) 0
 9v8!78	{6//Y///;v(((((r)   Fauto_wrap_policyignored_paramsonly_wrap_childrenr4   c           
         |t          d          |t          d          |                                 D ]S\  }}||v r
	 t          |t          t          |                    rt          d| d|           D# t
          $ r Y Pw xY wt          fd|                                 D                       }	|t          d           || d|		          rud
}
|                                 D ]5\  }}||v r
t          d||||d|\  }}t          | ||           |
|z  }
6|	|
z
  }|s || d|	          rt          | |fi ||	fS | |
fS | d
fS )a  
    Wraps submodules of ``module`` for which ``auto_wrap_policy`` returns
    ``True`` with ``wrapper_cls``.

    Args:
        module (nn.Module): Module to recursively wrap.
        auto_wrap_policy (Callable): A callable representing a policy that
            determines which modules to recursively wrap with ``wrapper_cls``.
        ignored_modules (set[torch.nn.Module]): Modules to ignore when
            wrapping.
        ignored_params (set[torch.nn.Parameter]): Parameters to ignore when
            wrapping; these should be the parameters contained in the modules
            in ``ignored_modules``.
    Returns:
        (nn.Module, int):
            ``module`` after wrapping and the numel recursively wrapped.
    NzMust specify auto_wrap_policy.zMust specify wrapper_clszChild module z is already wrapped by c              3   H   K   | ]}|v|                                 V  d S r2   )numel)ru   pr   s     r(   rw   z"_recursive_wrap.<locals>.<genexpr>  s>        !>2I2I		2I2I2I2I r)   z#Expected auto_wrap_policy to be setTrt   r   )r   r   r   r7   r   Fr3   )r!   named_modulesr   r
   rb   	TypeErrorsum
parametersr   _recursive_wrapr"   r   )r   r   r   r7   r   r   r4   _childrM   total_wrapped_numelnamewrapped_childnum_wrapped_params	remainders       `          r(   r   r     s9   4 =>>>7888((** 
 
5O##	%dK!8!899 $OEOO+OO    	 	 	D	
     !,,..     BCCCvtFVWWW /!0022 	6 	6KD%''0? 1!1' /-1 1 1 1-M- FD-000#55 %'::	! 	/&6&659'
 '
 '
 	/ 77779III...19s   8A<<
B	B	c                       e Zd ZU dZdZeed<   dZee	         ed<   i Z
eeef         ed<   deeef         fdZededdfd	            Zedd
            ZddZdedededdfdZdS )r   z
    Helper class to wrap modules based on default config args via a context manager.
    See :func:`enable_wrap` for more information.
    Fr   Nr   r4   c                     || _         d S r2   r4   )rE   r4   s     r(   rV   z_ConfigAutoWrap.__init__B  s    r)   r/   c                     t           j        rt          d          dt           _        d| vrt          d          t	          t
          | d                   t           _        | d= | t           _        d S )Nz]You are already within an autowrap context and we currently do not supported nested autowrap.Tr   z9Expected to pass in wrapper_cls arg into _ConfigAutoWrap.)r   r   NotImplementedErrorr!   r
   r   r   r4   r   s    r(   enable_autowrap_contextz'_ConfigAutoWrap.enable_autowrap_contextE  s{    . 	%o   /3+&& K   '+8VM5J&K&K#=!!'r)   c                  N    dt           _        d t           _        i t           _        d S )NF)r   r   r   r4   r3   r)   r(   disable_autowrap_contextz(_ConfigAutoWrap.disable_autowrap_contextV  s     .3+&*#!#r)   c                 :    |                      | j                   d S r2   )r   r4   )rE   s    r(   	__enter__z_ConfigAutoWrap.__enter__\  s    $$T[11111r)   exc_typeexc_valexc_tbc                 .    |                                   d S r2   )r   )rE   r   r   r   s       r(   __exit__z_ConfigAutoWrap.__exit___  s    %%'''''r)   )r/   N)rG   rH   rI   rJ   r   ri   __annotations__r   r   r   r4   rK   r+   r	   rV   staticmethodr   r   r   r   r3   r)   r(   r   r   8  s         
 !&%%%&*K(#***FDcNc3h     ( ( ( ( ( \(  $ $ $ \$
2 2 2 2( (s (C (D ( ( ( ( ( (r)   r   )F)3
contextlibrX   abcr   r   collections.abcr   r   r   r   typingr	   r
   r   r   torch.nnr   __all__r    r,   rK   r+   r5   rb   r<   r?   ri   r   rC   intrP   r   r   r   r   rq   rz   r   
ModuleList
ModuleDictr   MultiheadAttentionr   contextmanagerr   r   r   	Parameterr;   r   r   r3   r)   r(   <module>r      s        # # # # # # # # C C C C C C C C C C C C - - - - - - - - - - - -      	 	 	)3)3")hry112)3 )3 )3 )3X!")T#s(^";<  ryk8BI../	   ,##T")_-# ^# c3h	#
 "")T#s(^";<# # # #(4        c   (5I55 5 RY(	5
 
5 5 5 5@"D "D "D "D "Dw "D "D "DJ4' 4' 4' 4' 4'7 4' 4' 4'nI $8;HP	   8YIYY Y tBI/	Y
 
Y Y Y Y	9I	9'/~	9@D	9 	9 	9 	9I 
 
   * #c((9=;?3P 3PI3P3P 3P
 3P !T")_!563P #3tBI#783P 
3P 3P 3P 3Pn 57M2=3Q  0242G1H  .   +.         F# #c #bi # # # #L)") )( ) ) ) ) )(  %M MIMM M ^	M
 %M M M 29c>M M M M`(( (( (( (( (( (( (( (( (( ((r)   