
    [iR                         U d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	  G d de          Z
 G d d	e          Z G d
 de          Z G d de          Zdaeej        dz           dz  ed<   dej        fdZdS )    N)chain_get_device_index)Function)commc                   :    e Zd Zed             Zed             ZdS )	Broadcastc                    t          d |D                       s
J d            d |D             }|| _        t          |          dk    rdS t          |          | _        |d                                         | _        t          j        || j                  }g }t          | j	        dd                    D ](\  }|s!|
                    fd|D                        ) | j        |  t          t          j        |                    S )Nc              3   6   K   | ]}|j         j        d k    V  dS cpuNdevicetype.0is     O/var/www/icac/venv/lib/python3.11/site-packages/torch/nn/parallel/_functions.py	<genexpr>z$Broadcast.forward.<locals>.<genexpr>   +      ::a18=E)::::::    z2Broadcast function not implemented for CPU tensorsc                 .    g | ]}t          |d           S Tr   r   xs     r   
<listcomp>z%Broadcast.forward.<locals>.<listcomp>   #    GGGa(D11GGGr   r       c              3   (   K   | ]}|         V  d S Nr   )r   outputidxs     r   r   z$Broadcast.forward.<locals>.<genexpr>   s'      *M*M66#;*M*M*M*M*M*Mr   )alltarget_gpuslen
num_inputs
get_deviceinput_devicer   broadcast_coalesced	enumerateneeds_input_gradextendmark_non_differentiabletupler   from_iterable)ctxr%   inputsoutputsnon_differentiablesinput_requires_gradr#   s         @r   forwardzBroadcast.forward   s3   ::6::::: 	
 	
@	
 	
 	
 HG;GGG%v;;!2V!!9//11*63?CC (1#2Fqrr2J(K(K 	N 	N$C$& N#***M*M*M*MW*M*M*MMMM##%899U(11222r   c                 B    dt          j        | j        | j        g|R  z   S )Nr!   )ReduceAddCoalescedapplyr)   r'   r1   grad_outputss     r   backwardzBroadcast.backward   s6    +1cn
/;
 
 
 
 	
r   N__name__
__module____qualname__staticmethodr6   r<   r   r   r   r	   r	   
   sH        3 3 \3$ 
 
 \
 
 
r   r	   c                   :    e Zd Zed             Zed             ZdS )r8   c                     fdt          dt                              D             | _        fdt          dt                              D             }t          j        ||          S )Nc                 D    g | ]}|                                          S r   r(   )r   r   gradss     r   r   z.ReduceAddCoalesced.forward.<locals>.<listcomp>(   s6     
 
 
&'E!H!!
 
 
r   r   c                 *    g | ]}||z            S r   r   )r   r   rF   r'   s     r   r   z.ReduceAddCoalesced.forward.<locals>.<listcomp>,   s'    VVV%A
N*+VVVr   )ranger&   r%   r   reduce_add_coalesced)r1   destinationr'   rF   grads_s     `` r   r6   zReduceAddCoalesced.forward&   s    
 
 
 
+0CJJ
+K+K
 
 
 WVVVVU1c%jj*5U5UVVV(===r   c                 6    dt          j        | j        g|R  z   S )NNN)r	   r9   r%   r:   s     r   r<   zReduceAddCoalesced.backward/   s*    
 OCO;l;;;< 	<r   Nr=   r   r   r   r8   r8   %   sH        > > \> < < \< < <r   r8   c                   :    e Zd Zed             Zed             ZdS )Gatherc                     t          d |D                       s
J d            |dk    rd _        nt          |d          }| _        | _        t	          d |D                        _        t          d |D                       r=|dk    r7t	          d |D                       }t          j        d	d
           d _        nd _        t	           fd|D                        _	        t          j        | j         j                  S )Nc              3   6   K   | ]}|j         j        d k    V  dS r   r   r   s     r   r   z!Gather.forward.<locals>.<genexpr>:   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   >   K   | ]}|                                 V  d S r!   rE   r   s     r   r   z!Gather.forward.<locals>.<genexpr>C   s*      >>!q||~~>>>>>>r   c              3   F   K   | ]}|                                 d k    V  dS r   N)dimr   ts     r   r   z!Gather.forward.<locals>.<genexpr>D   s.      ,,quuww!|,,,,,,r   r   c              3   @   K   | ]}|                     d           V  dS )r   N)viewrV   s     r   r   z!Gather.forward.<locals>.<genexpr>E   s,      55166!99555555r   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.   )
stacklevelFc              3   L   K   | ]}|                     j                  V  d S r!   )sizerU   )r   r   r1   s     r   r   z!Gather.forward.<locals>.<genexpr>O   s/      @@Asw@@@@@@r   )r$   target_devicer   rU   r/   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr   gather)r1   r^   rU   r2   s   `   r   r6   zGather.forward8   sE   ::6::::: 	
 	
=	
 	
 	
 E!! %C-mTBBM -C>>v>>>>>,,V,,,,, 
	*55f55555FM' 	    %)C!!$)C!@@@@@@@@@{637C,=>>>r   c                     t                               | j        | j        | j        |          }| j        rt          d |D                       }d|z   S )Nc              3   &   K   | ]}|d          V  dS rT   r   )r   gs     r   r   z"Gather.backward.<locals>.<genexpr>X   s&      #B#BQAaD#B#B#B#B#B#Br   rM   )Scatterr9   r_   rc   rU   rb   r/   )r1   grad_outputscattered_gradss      r   r<   zGather.backwardR   sY    !--NCOSWk
 
   	C##B#B/#B#B#BBBOo--r   Nr=   r   r   r   rO   rO   7   sH        ? ? \?2 . . \. . .r   rO   c                   :    e Zd Zed             Zed             ZdS )rh   c                 n   d |D             }|| _         |j        j        dk    r|                                nd| _        d }t
          j                                        r| j        dk    rd |D             }t          j	        |||| j         |          }|t          |          D ]\  }}t
          j                            ||                   5  t
          j                                        }	|	                    ||                    |                    |	           d d d            n# 1 swxY w Y   |S )Nc                 .    g | ]}t          |d           S r   r   r   s     r   r   z#Scatter.forward.<locals>.<listcomp>_   r   r   r   c                 P    g | ]#}t          t          j        |                    $S r   )_get_streamtorchr   )r   r   s     r   r   z#Scatter.forward.<locals>.<listcomp>e   s*    SSSV{5<#7#788SSSr   )rU   r   r   r(   r)   rq   acceleratoris_availabler   scatterr+   device_indexcurrent_streamwait_streamrecord_stream)
r1   r%   chunk_sizesrU   inputstreamsr3   r   r"   main_streams
             r   r6   zScatter.forward]   st   GG;GGG161Be1K1K5++---QS))++ 	T0@B0F0FSS{SSSG,uk;QQ&w// 6 6	6&33KNCC 6 6"'"3"B"B"D"DK++GAJ777((5556 6 6 6 6 6 6 6 6 6 6 6 6 6 6 s   AD))D-	0D-	c                 D    d d d t          j        | j        | j        g|R  fS r!   )rO   r9   r)   rU   )r1   ri   s     r   r<   zScatter.backwardp   s*    T4c.>!V+!V!V!VVVr   Nr=   r   r   r   rh   rh   \   sM          \$ W W \W W Wr   rh   _streamsr   c                    | j         dk    st          j                                        sdS t          j                                        j         | j         k    sJ t
          "dgt          j                                        z  at
          | j                 &t          j        | j                  t
          | j        <   t
          | j                 S )zBGet a background stream for copying between CPU and target device.r   N)	r   rq   rr   rs   current_acceleratorr~   device_countindexStream)r   s    r   rp   rp   y   s     {e5#4#A#A#C#Ct002276;FFFF6E-::<<<%!&fl!;!;FL!!r   )r`   	itertoolsr   rq   torch._utilsr   torch.autogradr   torch.nn.parallelr   r	   r8   rO   rh   r~   listr   __annotations__r   rp   r   r   r   <module>r      s\           * * * * * * # # # # # # " " " " " "
 
 
 
 
 
 
 
6< < < < < < < <$". ". ". ". ".X ". ". ".JW W W W Wh W W W4 .2$u|d"
#d
* 1 1 1
" 
" 
" 
" 
" 
" 
"r   