
    [iM              #       j   U d Z ddlZddlmZ ddlmZmZ ddlmZ ddl	Z	ddl
mZ  ed          Z ed          Zi Zee	j        j        ef         ed	<    eh d
          Zdededeeef         deeeef         geeef         f         fdZ	 d9ddddde	j        de	j        de	j        dee	j                 dededede	j        fdZ edde          	 d9ddddde	j        de	j        de	j        dee	j                 dededede	j        fd            Zdee         dedefdZd e	j        d!edede	j        fd"Zd#e	j        d$e	j        d%ed&edee         d'ede	j        fd(Zd%ed&eddfd)Z d#e	j        d$e	j        d%ed&edee         de	j        fd*Z!	 	 	 d:dddddd+dd,d#e	j        d$e	j        d-e	j        d.ee	j                 d/ee	j                 d0ee	j                 d1ed2ed3ed'edee         d4ed5ee         de"e	j        e	j        e	j        e	j        f         fd6Z# ed7de#          	 	 	 d:dddddd+dd,d#e	j        d$e	j        d-e	j        d.ee	j                 d/ee	j                 d0ee	j                 d1ed2ed3ed'edee         d4ed5ee         de"e	j        e	j        e	j        e	j        f         fd8            Z$dS );zImplementations of ONNX operators as native Torch ops.

NOTE: Fake implementations:
    Refer to https://docs.pytorch.org/docs/stable/library.html#torch.library.register_fake
    for more details on how to create fake kernels.
    N)Callable)OptionalTypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_version	fake_implreturnc                      dt           t          t          f         dt           t          t          f         f fd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    d }t          j                            d d| d          |           }| t          t	          t	          t           j        j                  |          <   |                               |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr
   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   s      G/var/www/icac/venv/lib/python3.11/site-packages/torch/onnx/ops/_impl.py	decoratorz_onnx_op.<locals>.decorator'   s    *=**=**)W))x)) + 
 

   	wwuy~w'G'GRRS 	y)))    )r   r   r	   )r   r   r   r$   s   ``` r#   _onnx_opr&   "   sU    
	R( 	Xb"f-= 	 	 	 	 	 	 	 	 r%   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr'   r(   r)   c                *    |                                  S )zFFake implementation for RotaryEmbedding-23 for torch.compile purposes.)clone)r*   r+   r,   r-   r'   r(   r)   s          r#   _rotary_embedding_23_fake_implr0   5   s     7799r%   RotaryEmbedding   c                h   | j         t                    }d         d         t          j                                        dk    fd           t          j        j         d         k    fd           t          j        j         d         k    fd           t          j                                        dk    o                                dk    fd	           nGt          j                                        d
k    o                                d
k    fd           |dk    rt          j        | d          } nJ|d
k    rDt          j        |dk    fd           d         }||z  }	||	g}
t          j        | |
          } t          j        t          | j                   dk    d            | j         d
         }	|dk    r|	}| ddddddd|f         }| dddddd|df         }|dz                    nt          j        j         d         k    oj         d         k    fd           t          j        j         d         k    oj         d         k    fd           t          j        j         d         k    fd           t          j        j         d         k    fd           t          j        d          t          j        d          |r+|dddddddddf         }|dddddddddf         }nt          j        |dd          \  }}|z  |z  z
  }|z  |z  z   }|r]t          j        |d          }t          j        |d          }t          j	        ||fd          }t          j        ||j                   }nt          j	        ||fd          }t          j	        ||fd          }|d
k    rt          j        |          S t          j        |d          S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                      d j          S )Nz6position_ids must be 2D when provided. Received shape shape)r-   s   r#   <lambda>z%rotary_embedding_23.<locals>.<lambda>Z   s    a\M_aa r%   c                  *    d  dj         d          S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r7   )
batch_sizer-   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>^   s"    |Z||eqewxyez|| r%   r   c                  *    d d j         d          S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r;   r   r7   )r-   sequence_lengths   r#   r9   z%rotary_embedding_23.<locals>.<lambda>b   s>      GRa  G  Go{  pB  CD  pE  G  G r%   c                  (    d j          dj          S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r7   r+   r,   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>f   -     ](1] ]KT?] ] r%      c                  (    d j          dj          S )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r@   r7   rA   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>l   rB   r%      )r   r5   r   rC   c                      d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   )input_shapes   r#   r9   z%rotary_embedding_23.<locals>.<lambda>y   s    obmoo r%   c                      dS )Nzx should be a 4D tensor by nowr   r   r%   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   s    ,L r%   c                  &    dj          d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r7   )r<   cosr>   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   &    jjjjjSbjjj r%   c                  &    dj          d  d dS )Nzsin has shape rJ   rK   rL   r7   )r<   r>   sins   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   rN   r%   c                  ,    d j         d          d dS )NzLast dimension of cos cache (rQ   ') should match rotary_embedding_dim/2 ().r7   )rM   rotary_embedding_dim_halfs   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   4      D	"  D  Df  D  D  D r%   c                  ,    dj         d          d  dS )NzLast dimension of sin cache (rQ   rS   rT   r7   )rU   rP   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   rV   r%   dim)
r8   lenr   _checkrY   permutereshape	unsqueezechunkcat)r*   r+   r,   r-   r'   r(   r)   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr<   rM   rG   rU   r>   rP   s    ```               @@@@@@r#   rotary_embedding_23rm   C   sv    'K[!!JQJ!"oO !#aaaa	
 	
 	
 	q!Z/|||||	
 	
 	
 	q!_4 G  G  G  G  G	
 	
 	
 	MMOOq 9Y]]__%9] ] ] ] ]	
 	
 	
 	
 	MMOOq 9Y]]__%9] ] ] ] ]	
 	
 	
 Q M!\**	qNoooo	
 	
 	
 "!n9,	)YG	M!Y''	LQW"$L$LMMM
I q  (AAAqqq////0HQQQ11123334L 4 9 
 
 	L	!
"Fsy|'Fjjjjjj   
L	!
"Fsy|'Fjjjjjj   
L	"22 	D  	D  	D  	D  	D   
L	"22 	D  	D  	D  	D  	D   /Q C /Q C
  2aaaAAAqt!tm$aaaAAAqt!tm$Xqb111B 8cBhD8cBhD  3 tR((tR(()T4Lb999=(.AA9dD\r222Y,/R888FQ}V[111 =...r%   scalerc   c                 8    | | ndt          j        |          z  S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)rn   rc   s     r#   _get_scale_factorrr      s     %55C$)I2F2F,FGr%   tensorr<   c                     | j         d         | j         d         }}||z  }|                     ||||                              dd                                          S )z1Reshape 3D tensor to 4D for multi-head attention.r   r5   )r8   view	transpose
contiguous)rs   r<   r(   r>   rb   rc   s         r#   _reshape_3d_to_4drx      sS     $*<?FLO[Oy(IJIFF	1a	r%   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec           	          |dk    rt          | ||||          S t          j        t          j        | |                    dd                              S )z1Get QK output tensor based on the specified mode.r   r4   rQ   )_compute_qk_output_for_mode_0r   
zeros_likematmulrv   )ry   rz   r{   r|   rn   r}   s         r#   _get_qk_output_for_aten_spdar      s[     !!,q%';U
 
 	

 QB0C0C D DEEEr%   c                 J     t          j         z  dk     fd           dS )z-Validate Group Query Attention configuration.r   c                      d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   )r|   r{   s   r#   r9   z-_validate_gqa_configuration.<locals>.<lambda>   s    y 3yyZnyyy r%   N)r   r[   )r{   r|   s   ``r#   _validate_gqa_configurationr      sA     
L22a7yyyyy    r%   c                    |}||k    r||z  }|                     |d          }t          || j        d                   }t          j        |          }| |z  }	||z  }
t          j        |	|
                    dd                    S )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r   rX   rC   r4   rQ   )repeat_interleaverr   r8   rp   rq   r   r   rv   )ry   rz   r{   r|   rn   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaleds              r#   r   r      s     H222+/CC&&}!&<<$UAGAJ77L<((J:~H*$H<("4"4R"<"<===r%           )	is_causalkv_num_headsq_num_headsr}   rn   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer   r   r   r   r   c                ^   | j         d         }t          | j                   dk    rr| j         d         }| j         }|.|||j         d         |j         d         z   |j         d         |z  f}n|||j         d         |j         d         |z  f}|}||||d         f}n| j         d         }| j         }|A|j         d         |j         d         |j         d         |j         d         z   |j         d         f}n|j         }|}| j         d         | j         d         | j         d         |d         f}t          j        || j        | j                  }t          j        ||j        |j                  }t          j        ||j        |j                  }t          j        || j        | j                  }||||fS )z@Fake implementation for Attention-23 for torch.compile purposes.r   rC   r   Nr5   dtypedevice)r8   rZ   r   emptyr   r   )ry   rz   r   r   r   r   r   r   r   r}   rn   r   r   r<   q_sequence_lengthoutput_shapepresent_key_shapepresent_value_shapeqk_output_shaperl   present_keypresent_value	qk_outputs                          r#   _attention_23_fake_implr     s   " J 17||qGAJw q!AGAJ.
l*	! 

l*	! 0 a 	
 GAJw 

q!AGAJ.
	! !"/ GAJGAJGAJa 	
 [QWQXFFFF+/qwqxPPPKK 31718TTTMO1718LLLI;y88r%   	Attentionc                	   d\  }}}t          | j                  }| j        d         }t          | j                  dk    r`t          j        |dk    o|dk    d            | j        d         }t	          | ||          } t	          |||          }t	          |||          }t          j        t          | j                  dk    o/t          |j                  dk    ot          |j                  dk    d            | j        |         }t          |
|          }
|t          j        ||g|	          n|                                }|t          j        ||g|	          n|                                }||}}| j        |         }|j        |         }| j        |         }|j        |         }|d
k    o"|	dk    o|du o|du p|j        t          j	        k    }t          ||           |rSt          j        j                            | |||d
||
t          ||k                        }t          | ||||
|	          }nY||k    r3||z  }|                    ||	          }|                    ||	          }t          j        ||| j        | j                  }|rut          j        |du d            t          j        t          j        ||t          j	        | j                            }|                    | t+          d                    }|?|j        t          j	        k    r%|                    | t+          d                    }n||z   }t          |
| j        d                   }t-          j        |          } | | z  }!|| z  }"t          j        |!|"                    dd                    }#|#}|#|z   }$|	dk    r|$}|d
k    r|t          j        |$|z            z  }$|	dk    r|$}|x|t6          v rX|$j        }%|$                    t:          j        |                   }$t          j        |$d	          }&|&                    |%          }&n-t          j        |$d	          }&nt          j        |$d	          }&|	dk    r|&}t          j        |&|          }|dk    r=|                    dd                                           !                    ||d          }||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r   r5   rC   r   rC   c                      dS )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r%   r#   r9   zattention_23.<locals>.<lambda>  s    Q r%   r   rE   c                      dS )Nz'Q, K, and V should be 4D tensors by nowr   r   r%   r#   r9   zattention_23.<locals>.<lambda>  s    9 r%   NrX   r   )r   	dropout_pr   rn   
enable_gqar   c                      dS )Nz'Cannot use both is_causal and attn_maskr   r   r%   r#   r9   zattention_23.<locals>.<lambda>  s    +T r%   z-infr4   rQ   r5   )"rZ   r8   r   r[   rx   rr   r`   r/   r   boolr   nn
functionalscaled_dot_product_attentionr   r   zerosr   trilonesmasked_fillfloatrp   rq   r   rv   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrw   ru   )'ry   rz   r   r   r   r   r   r   r   r}   rn   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr<   r   q_head_sizer   r   r{   r|   kv_sequence_lengthcan_use_sdparl   r   r   	attn_biascausal_maskr   r   r   r   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxs'                                          r#   attention_23r   c  s   & ,3(L, !'llOJ 17||q12!2QQ	
 	
 	
 GAJa[99a\::a\::	LAGEc!'lla/ECLLA4E99   '(#Ke[11E
  		8Q-\2222WWYY  ! 		:q/|4444WWYY  qA ',/7<0-. 	3 	A!Q&	A%	A $?)/UZ"?	    35IJJJ k-$AA#';;  B 
 
 1 !
 
		 "666/3GGM##M|#DDA##M|#DDA K1
 
 
	
  	KLT!#T#T    *
%&*8	   K "--{lE&MMJJI  %*,,%119*eFmmLL		 &	1	 )
;; Y|,,
z>z> !<(2D2DR2L2LMM %	 ()3 A%%$I S=="UZw0F%G%GGL A%%$I ( $QQQ!-!3+#=>OP    #]<R@@@
']]>::

"]<R@@@

|<<<J A%%"I j!,, ! Q""--//44ZARTVWW 	 ;y88r%   )N)NNN)%__doc__rp   collections.abcr   typingr   r   typing_extensionsr   r   torch.onnx.opsr   r   r	   r
   dict_ops
OpOverload__annotations__	frozensetr   strintr&   Tensorr   r0   rm   r   rr   rx   r   r   r   tupler   r   r   r%   r#   <module>r      s5      $ $ $ $ $ $ $ $ $ $ $ $ $ $ ' ' ' ' ' '  * * * * * * Yt__WT]] AC UZ2H<= B B B09	  1 1 -!$19"b&1AxB (2r6"223   . ,0	  !  || | 5<(	    \    

R!?@@
 ,0	D/  !D/ D/ D/|D/|D/ |D/ 5<(	D/ D/ D/ D/ \D/ D/ D/ A@D/NHXe_ H H H H H H

L
&)
69

\
 
 
 
F|F|F F 	F
 E?F F \F F F F$47	   >|>|> > 	>
 E?> \> > > >4 )-'+)-Q9 !"!'+Q9 Q9 Q9|Q9|Q9 |Q9 %	Q9
 u|$Q9 &Q9 Q9 Q9 Q9 Q9 E?Q9 Q9  }Q9 5<u|U\ABQ9 Q9 Q9 Q9h 
+r233
 )-'+)-9 !"!'+9 9 9|9|9 |9 %	9
 u|$9 &9 9 9 9 9 E?9 9  }9 5<u|U\AB9 9 9 439 9 9r%   