
    [ibL                     j   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlmZmZmZmZ g dZ e
d          Z e
dd	
          Zeeef         Zeedf         Z e
dee          Z G d dee                   Z G d dee         e	e                   Z G d deeedf                            Z G d dee                   Z G d dee                   Z G d de          Z  G d dee                   Z!efdee         dee"e#z           dedz  de$e!e                  fdZ%dS )     N)Sequence)castGenericIterableTypeVar)
deprecated)default_generator	GeneratorrandpermTensor)DatasetIterableDatasetTensorDatasetStackDatasetConcatDatasetChainDatasetSubsetrandom_split_T_T_coT)	covariant._T_stackc                   &    e Zd ZdZdefdZd	dZdS )
r   a  An abstract class representing a :class:`Dataset`.

    All datasets that represent a map from keys to data samples should subclass
    it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
    data sample for a given key. Subclasses could also optionally overwrite
    :meth:`__len__`, which is expected to return the size of the dataset by many
    :class:`~torch.utils.data.Sampler` implementations and the default options
    of :class:`~torch.utils.data.DataLoader`. Subclasses could also
    optionally implement :meth:`__getitems__`, for speedup batched samples
    loading. This method accepts list of indices of samples of batch and returns
    list of samples.

    .. note::
      :class:`~torch.utils.data.DataLoader` by default constructs an index
      sampler that yields integral indices.  To make it work with a map-style
      dataset with non-integral indices/keys, a custom sampler must be provided.
    returnc                      t          d          )Nz3Subclasses of Dataset should implement __getitem__.)NotImplementedErrorselfindexs     K/var/www/icac/venv/lib/python3.11/site-packages/torch/utils/data/dataset.py__getitem__zDataset.__getitem__:   s    !"WXXX    otherDataset[_T_co]ConcatDataset[_T_co]c                 $    t          | |g          S N)r   r   r#   s     r    __add__zDataset.__add__A   s    dE]+++r"   N)r#   r$   r   r%   )__name__
__module____qualname____doc__r   r!   r)    r"   r    r   r   '   sR         $YE Y Y Y Y, , , , , ,r"   r   c                   *    e Zd ZdZdee         fdZdS )r   a?  An iterable Dataset.

    All datasets that represent an iterable of data samples should subclass it.
    Such form of datasets is particularly useful when data come from a stream.

    All subclasses should overwrite :meth:`__iter__`, which would return an
    iterator of samples in this dataset.

    When a subclass is used with :class:`~torch.utils.data.DataLoader`, each
    item in the dataset will be yielded from the :class:`~torch.utils.data.DataLoader`
    iterator. When :attr:`num_workers > 0`, each worker process will have a
    different copy of the dataset object, so it is often desired to configure
    each copy independently to avoid having duplicate data returned from the
    workers. :func:`~torch.utils.data.get_worker_info`, when called in a worker
    process, returns information about the worker. It can be used in either the
    dataset's :meth:`__iter__` method or the :class:`~torch.utils.data.DataLoader` 's
    :attr:`worker_init_fn` option to modify each copy's behavior.

    Example 1: splitting workload across all workers in :meth:`__iter__`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> # xdoctest: +SKIP("Fails on MacOS12")
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         worker_info = torch.utils.data.get_worker_info()
        ...         if worker_info is None:  # single-process data loading, return the full iterator
        ...             iter_start = self.start
        ...             iter_end = self.end
        ...         else:  # in a worker process
        ...             # split workload
        ...             per_worker = int(math.ceil((self.end - self.start) / float(worker_info.num_workers)))
        ...             worker_id = worker_info.id
        ...             iter_start = self.start + worker_id * per_worker
        ...             iter_end = min(iter_start + per_worker, self.end)
        ...         return iter(range(iter_start, iter_end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [tensor([3]), tensor([4]), tensor([5]), tensor([6])]

        >>> # xdoctest: +REQUIRES(POSIX)
        >>> # Multi-process loading with two worker processes
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

        >>> # With even more workers
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

    Example 2: splitting workload across all workers using :attr:`worker_init_fn`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         return iter(range(self.start, self.end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [3, 4, 5, 6]
        >>>
        >>> # Directly doing multi-process loading yields duplicate data
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [3, 3, 4, 4, 5, 5, 6, 6]

        >>> # Define a `worker_init_fn` that configures each dataset copy differently
        >>> def worker_init_fn(worker_id):
        ...     worker_info = torch.utils.data.get_worker_info()
        ...     dataset = worker_info.dataset  # the dataset copy in this worker process
        ...     overall_start = dataset.start
        ...     overall_end = dataset.end
        ...     # configure the dataset to only process the split workload
        ...     per_worker = int(math.ceil((overall_end - overall_start) / float(worker_info.num_workers)))
        ...     worker_id = worker_info.id
        ...     dataset.start = overall_start + worker_id * per_worker
        ...     dataset.end = min(dataset.start + per_worker, overall_end)
        ...

        >>> # Mult-process loading with the custom `worker_init_fn`
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2, worker_init_fn=worker_init_fn)))
        [3, 5, 4, 6]

        >>> # With even more workers
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12, worker_init_fn=worker_init_fn)))
        [3, 4, 5, 6]
    r#   c                 $    t          | |g          S r'   )r   r(   s     r    r)   zIterableDataset.__add__   s    T5M***r"   N)r*   r+   r,   r-   r   r   r)   r.   r"   r    r   r   I   s@        j jX+WU^ + + + + + +r"   r   c                   P    e Zd ZU dZeedf         ed<   deddfdZd Zde	fdZ
dS )	r   zDataset wrapping tensors.

    Each sample will be retrieved by indexing tensors along the first dimension.

    Args:
        *tensors (Tensor): tensors that have the same size of the first dimension.
    .tensorsr   Nc                 j    t          fdD                       rt          d          | _        d S )Nc              3   |   K   | ]6}d                               d           |                     d           k    V  7dS )r   N)size).0tensorr2   s     r    	<genexpr>z)TensorDataset.__init__.<locals>.<genexpr>   sB      JJwqzq!!V[[^^3JJJJJJr"   zSize mismatch between tensors)allAssertionErrorr2   )r   r2   s    `r    __init__zTensorDataset.__init__   sC    JJJJ'JJJJJ 	B !@AAAr"   c                 D    t          fd| j        D                       S )Nc              3   (   K   | ]}|         V  d S r'   r.   )r6   r7   r   s     r    r8   z,TensorDataset.__getitem__.<locals>.<genexpr>   s'      >>vVE]>>>>>>r"   )tupler2   r   s    `r    r!   zTensorDataset.__getitem__   s(    >>>>>>>>>>r"   c                 B    | j         d                             d          S Nr   )r2   r5   r   s    r    __len__zTensorDataset.__len__   s    |A##A&&&r"   )r*   r+   r,   r-   r>   r   __annotations__r;   r!   intrB   r.   r"   r    r   r      s           63; D    
? ? ?' ' ' ' ' ' 'r"   r   c                   n    e Zd ZU dZeez  ed<   dee         dee         ddfdZ	d Z
d	efd
ZdefdZdS )r   a  Dataset as a stacking of multiple datasets.

    This class is useful to assemble different parts of complex input data, given as datasets.

    Example:
        >>> # xdoctest: +SKIP
        >>> images = ImageDataset()
        >>> texts = TextDataset()
        >>> tuple_stack = StackDataset(images, texts)
        >>> tuple_stack[0] == (images[0], texts[0])
        >>> dict_stack = StackDataset(image=images, text=texts)
        >>> dict_stack[0] == {"image": images[0], "text": texts[0]}

    Args:
        *args (Dataset): Datasets for stacking returned as tuple.
        **kwargs (Dataset): Datasets for stacking returned as dict.
    datasetsargskwargsr   Nc                     |r^|rt          d          t          |d                    _        t           fd|D                       rt          d          | _        d S |rnt          |                                          }t          |d                    _        t           fd|D                       rt          d          | _        d S t          d          )NztSupported either ``tuple``- (via ``args``) or``dict``- (via ``kwargs``) like input/output, but both types are given.r   c              3   H   K   | ]}j         t          |          k    V  d S r'   _lengthlenr6   datasetr   s     r    r8   z(StackDataset.__init__.<locals>.<genexpr>   s0      DDG4<3w<</DDDDDDr"   zSize mismatch between datasetsc              3   H   K   | ]}j         t          |          k    V  d S r'   rK   rN   s     r    r8   z(StackDataset.__init__.<locals>.<genexpr>   s0      CCG4<3w<</CCCCCCr"   z%At least one dataset should be passed)
ValueErrorrM   rL   anyrF   listvalues)r   rG   rH   tmps   `   r    r;   zStackDataset.__init__   s     	F  ^   tAw<<DLDDDDtDDDDD C !ABBB DMMM 	Fv}}''Cs1v;;DLCCCCsCCCCC C !ABBB"DMMMDEEEr"   c                     t          | j        t                    r%fd| j                                        D             S t	          fd| j        D                       S )Nc                 (    i | ]\  }}||         S r.   r.   )r6   krO   r   s      r    
<dictcomp>z,StackDataset.__getitem__.<locals>.<dictcomp>   s#    NNN*!WAwu~NNNr"   c              3   (   K   | ]}|         V  d S r'   r.   )r6   rO   r   s     r    r8   z+StackDataset.__getitem__.<locals>.<genexpr>   s'      AAWU^AAAAAAr"   )
isinstancerF   dictitemsr>   r   s    `r    r!   zStackDataset.__getitem__   se    dmT** 	ONNNN8K8K8M8MNNNNAAAA4=AAAAAAr"   indicesc           	      $   t          | j        t                    rd |D             }| j                                        D ]\  }}t	          t          |dd                     r|                    |          }t          |          t          |          k    r/t          dt          |           dt          |                     t          ||d          D ]
\  }}|||<   t          ||d          D ]\  }}||         ||<   |S d |D             }	| j        D ]}t	          t          |dd                     r|                    |          }t          |          t          |          k    r/t          dt          |           dt          |                     t          ||	d          D ]\  }}
|

                    |           t          ||	d          D ] \  }}
|

                    ||                    !d |	D             }|S )	Nc                     g | ]}i S r.   r.   r6   _s     r    
<listcomp>z-StackDataset.__getitems__.<locals>.<listcomp>  s    (=(=(=(=(=(=r"   __getitems__z0Nested dataset's output size mismatch. Expected z, got Tstrictc                     g | ]}g S r.   r.   ra   s     r    rc   z-StackDataset.__getitems__.<locals>.<listcomp>  s    !6!6!6"!6!6!6r"   c                 ,    g | ]}t          |          S r.   )r>   )r6   samples     r    rc   z-StackDataset.__getitems__.<locals>.<listcomp>$  s    &N&N&NuV}}&N&N&Nr"   )r[   rF   r\   r]   callablegetattrrd   rM   rQ   zipappend)r   r^   
dict_batchrX   rO   r]   datad_sampleidx
list_batcht_sampletuple_batchs               r    rd   zStackDataset.__getitems__  s   dmT** 	(=(=W(=(=(=J"m1133 3 3
7GG^TBBCC 3#0099E5zzS\\11(J),WJ J=@ZZJ J   +.eZ*M*M*M + +h&*+ *-Wj)N)N)N 3 3X&-cl3 "7!6g!6!6!6
} 	2 	2G>>?? 2,,W55u::W--$F%(\\F F9<UF F   '*%D&I&I&I * *ND(OOD))))* &)*T%J%J%J 2 2MCOOGCL11112&N&N:&N&N&Nr"   c                     | j         S r'   )rL   rA   s    r    rB   zStackDataset.__len__'  s
    |r"   )r*   r+   r,   r-   r>   r\   rC   r   r   r;   r!   rS   rd   rD   rB   r.   r"   r    r   r      s          $ dlFgen F F4 F F F F(B B B
#D # # # #J      r"   r   c                        e Zd ZU dZeee                  ed<   ee         ed<   e	d             Z
dee         ddf fdZdefdZd	 Ze ed
e          d                         Z xZS )r   zDataset as a concatenation of multiple datasets.

    This class is useful to assemble different existing datasets.

    Args:
        datasets (sequence): List of datasets to be concatenated
    rF   cumulative_sizesc                 p    g d}}| D ].}t          |          }|                    ||z              ||z  }/|S r@   )rM   rm   )sequencersels        r    cumsumzConcatDataset.cumsum7  sH    11 	 	AAAHHQUOOOFAAr"   r   Nc                 X   t                                                       t          |          | _        t	          | j                  dk    rt          d          | j        D ]&}t          |t                    rt          d          '|                     | j                  | _	        d S )Nr   z(datasets should not be an empty iterablez.ConcatDataset does not support IterableDataset)
superr;   rS   rF   rM   r:   r[   r   r~   rw   )r   rF   d	__class__s      r    r;   zConcatDataset.__init__@  s    Xt}"" !KLLL 	W 	WA!_-- W$%UVVVW $DM : :r"   c                     | j         d         S )Nrw   rA   s    r    rB   zConcatDataset.__len__J  s    $R((r"   c                 
   |dk     r5| t          |           k    rt          d          t          |           |z   }t          j        | j        |          }|dk    r|}n|| j        |dz
           z
  }| j        |         |         S )Nr   z8absolute value of index should not exceed dataset length   )rM   rQ   bisectbisect_rightrw   rF   )r   rq   dataset_idx
sample_idxs       r    r!   zConcatDataset.__getitem__M  s    77tc$ii N   d))c/C)$*?EE!JJt4[1_EEJ}[)*55r"   z>`cummulative_sizes` attribute is renamed to `cumulative_sizes`)categoryc                     | j         S r'   r   rA   s    r    cummulative_sizeszConcatDataset.cummulative_sizes[  s     $$r"   )r*   r+   r,   r-   rS   r   r   rC   rD   staticmethodr~   r   r;   rB   r!   propertyr   FutureWarningr   __classcell__r   s   @r    r   r   +  s          75>""""3i  \;'!2 ;t ; ; ; ; ; ;) ) ) ) )6 6 6 ZH  % %	  X
% % % % %r"   r   c                   J     e Zd ZdZdee         ddf fdZd ZdefdZ	 xZ
S )r   a_  Dataset for chaining multiple :class:`IterableDataset` s.

    This class is useful to assemble different existing dataset streams. The
    chaining operation is done on-the-fly, so concatenating large-scale
    datasets with this class will be efficient.

    Args:
        datasets (iterable of IterableDataset): datasets to be chained together
    rF   r   Nc                 V    t                                                       || _        d S r'   )r   r;   rF   )r   rF   r   s     r    r;   zChainDataset.__init__o  s$     r"   c              #   v   K   | j         D ].}t          |t                    st          d          |E d {V  /d S )N*ChainDataset only supports IterableDataset)rF   r[   r   r:   )r   r   s     r    __iter__zChainDataset.__iter__s  sV       	 	Aa11 S$%QRRRLLLLLLLL	 	r"   c                     d}| j         D ]8}t          |t                    st          d          |t	          |          z  }9|S )Nr   r   )rF   r[   r   r:   rM   )r   totalr   s      r    rB   zChainDataset.__len__y  sP     	 	Aa11 S$%QRRRSVVOEEr"   )r*   r+   r,   r-   r   r   r;   r   rD   rB   r   r   s   @r    r   r   d  s         !'!2 !t ! ! ! ! ! !          r"   r   c                       e Zd ZU dZee         ed<   ee         ed<   dee         dee         ddfdZ	d Z
dee         dee         fdZdefd	ZdS )
r   z
    Subset of a dataset at specified indices.

    Args:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
    rO   r^   r   Nc                 "    || _         || _        d S r'   rO   r^   )r   rO   r^   s      r    r;   zSubset.__init__  s    r"   c                      t          |t                    r j         fd|D                      S  j         j        |                  S )Nc                 *    g | ]}j         |         S r.   r^   )r6   ir   s     r    rc   z&Subset.__getitem__.<locals>.<listcomp>  s     > > >Qa > > >r"   )r[   rS   rO   r^   )r   rq   s   ` r    r!   zSubset.__getitem__  sL    c4   	@< > > > ># > > >??|DL-..r"   c                      t          t           j        dd                     r& j                             fd|D                       S  fd|D             S )Nrd   c                 *    g | ]}j         |         S r.   r   r6   rq   r   s     r    rc   z'Subset.__getitems__.<locals>.<listcomp>  s     -S-S-SCdl3.?-S-S-Sr"   c                 @    g | ]}j         j        |                  S r.   r   r   s     r    rc   z'Subset.__getitems__.<locals>.<listcomp>  s'    GGGDLc!23GGGr"   )rj   rk   rO   rd   )r   r^   s   ` r    rd   zSubset.__getitems__  si     GDL.$??@@ 	H<,,-S-S-S-S7-S-S-STTTGGGGwGGGGr"   c                 *    t          | j                  S r'   )rM   r^   rA   s    r    rB   zSubset.__len__  s    4<   r"   )r*   r+   r,   r-   r   r   rC   r   rD   r;   r!   rS   rd   rB   r.   r"   r    r   r     s           U^c] # 4    / / /
HDI H$u+ H H H H! ! ! ! ! ! !r"   r   rO   lengths	generatorr   c                    
 t          j        t          |          d          rt          |          dk    rg }t          |          D ]]\  }}|dk     s|dk    rt	          d| d          t          j        t                     |z            }|                    |           ^t                     t          |          z
  }t          |          D ]$}|t          |          z  }||xx         dz  cc<   %|}t          |          D ]%\  }}	|	dk    rt          j
        d| dd           &t          |          t                     k    rt	          d	          t          t          |          |
                                          
t          t          t                   |          } 
fdt!          t#          j        |          |d          D             S )a  
    Randomly split a dataset into non-overlapping new datasets of given lengths.

    If a list of fractions that sum up to 1 is given,
    the lengths will be computed automatically as
    floor(frac * len(dataset)) for each fraction provided.

    After computing the lengths, if there are any remainders, 1 count will be
    distributed in round-robin fashion to the lengths
    until there are no remainders left.

    Optionally fix the generator for reproducible results, e.g.:

    Example:
        >>> # xdoctest: +SKIP
        >>> generator1 = torch.Generator().manual_seed(42)
        >>> generator2 = torch.Generator().manual_seed(42)
        >>> random_split(range(10), [3, 7], generator=generator1)
        >>> random_split(range(30), [0.3, 0.3, 0.4], generator=generator2)

    Args:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths or fractions of splits to be produced
        generator (Generator): Generator used for the random permutation.
    r   r   zFraction at index z is not between 0 and 1zLength of split at index z- is 0. This might result in an empty dataset.   )
stacklevelzDSum of input lengths does not equal the length of the input dataset!)r   c                 L    g | ] \  }}t          ||z
  |                   !S r.   )r   )r6   offsetlengthrO   r^   s      r    rc   z random_split.<locals>.<listcomp>  sE       FF 	w& 89::  r"   Tre   )mathisclosesum	enumeraterQ   floorrM   rm   rangewarningswarnr   tolistr   r   rD   rl   	itertools
accumulate)rO   r   r   subset_lengthsr   fracn_items_in_split	remainderidx_to_add_atr   r^   s   `         @r    r   r     s   < |CLL!$$ W):):$& )) 	4 	4GAtaxx4!88 !Pa!P!P!PQQQ#z#g,,*=>>!!"23333LL3~#6#66	y!! 	/ 	/AN 3 33M=)))Q.)))) "7++ 	 	IAv{{> > > >     7||s7||##R
 
 	
 s7||y999@@BBG8C='**G    !)"6w"?"?QUVVV   r"   )&r   r   r   r   collections.abcr   typingr   r   r   r   typing_extensionsr   torchr	   r
   r   r   __all__r   r   r\   str_T_dictr>   _T_tupler   r   r   r   r   r   r   r   rD   floatrS   r   r.   r"   r    <module>r      s          $ $ $ $ $ $ 4 3 3 3 3 3 3 3 3 3 3 3 ( ( ( ( ( ( A @ @ @ @ @ @ @ @ @ @ @	 	 	 WT]]4(((
sEz
7:x11, , , , ,gen , , ,Dn+ n+ n+ n+ n+genhuo n+ n+ n+h' ' ' ' 'GE&#+./ ' ' '.T T T T T78$ T T Tn6% 6% 6% 6% 6%GEN 6% 6% 6%r    ?   <! ! ! ! !WU^ ! ! !H #4> >R[>cEk"> 4> 
&*	> > > > > >r"   