Source code for moderatelyai_sdk.resources_async.datasets

"""Async datasets resource for the Moderately AI API."""

from typing import TYPE_CHECKING, List, Optional

from ..models._shared.dataset_operations import DatasetOperations
from ..types import PaginatedResponse
from ._base import AsyncBaseResource

if TYPE_CHECKING:
    from ..models.dataset_async import DatasetAsyncModel



[docs]
class AsyncDatasets(AsyncBaseResource):
    """Manage datasets in your teams (async version).

    Examples:
        ```python
        import asyncio
        import moderatelyai_sdk

        async def main():
            async with moderatelyai_sdk.AsyncModeratelyAI() as client:
                # List all datasets (still returns raw data)
                datasets = await client.datasets.list()

                # Get a dataset with rich functionality
                dataset = await client.datasets.retrieve("dataset_123")

                # Create a new dataset
                dataset = await client.datasets.create(
                    name="Customer Data",
                    description="Customer interaction dataset"
                )

                # Now use rich methods on the dataset object:

                # Upload data to the dataset
                version = await dataset.upload_data("/path/to/sales_data.csv")
                print(f"Uploaded version {version.version_no} with {version.row_count} rows")

                # Download current data
                data_bytes = await dataset.download_data()
                await dataset.download_data(path="/save/local_copy.csv")

                # Work with specific versions
                versions = await dataset.list_data_versions()
                old_data = await dataset.download_data(version_id="version_123")

                # Schema management
                # Simple schema creation
                schema = await dataset.create_schema([
                    {"name": "user_id", "type": "int", "required": True},
                    {"name": "email", "type": "string"},
                    {"name": "signup_date", "type": "datetime"},
                ])

                # Auto-infer schema from sample data
                schema = await dataset.create_schema_from_sample("sample.csv")

        asyncio.run(main())
        ```
    """


[docs]
    async def list(
        self,
        *,
        dataset_ids: Optional[List[str]] = None,
        name_like: Optional[str] = None,
        name: Optional[str] = None,
        page: int = 1,
        page_size: int = 10,
        order_by: str = "createdAt",
        order_direction: str = "desc",
    ) -> PaginatedResponse:
        """List all datasets with pagination (async).

        Note: Results are automatically filtered to the team specified in the client.

        Args:
            dataset_ids: Filter by specific dataset IDs.
            name_like: Filter by datasets with names containing this text.
            name: Filter by exact dataset name.
            page: Page number (1-based). Defaults to 1.
            page_size: Number of items per page. Defaults to 10.
            order_by: Field to sort by ("createdAt", "updatedAt", "name"). Defaults to "createdAt".
            order_direction: Sort direction ("asc" or "desc"). Defaults to "desc".

        Returns:
            Paginated list of datasets for the client's team.
        """
        query = DatasetOperations.build_list_query(
            dataset_ids=dataset_ids,
            name_like=name_like,
            name=name,
            page=page,
            page_size=page_size,
            order_by=order_by,
            order_direction=order_direction,
        )

        response = await self._get(
            "/datasets",
            options={"query": query},
        )

        # Convert items to DatasetAsyncModel instances
        if "items" in response:
            from ..models.dataset_async import DatasetAsyncModel
            response["items"] = [
                DatasetAsyncModel(item, self._client) for item in response["items"]
            ]

        return response



[docs]
    async def retrieve(self, dataset_id: str) -> "DatasetAsyncModel":
        """Retrieve a specific dataset by ID (async).

        Args:
            dataset_id: The ID of the dataset to retrieve.

        Returns:
            The dataset model with rich functionality.

        Raises:
            NotFoundError: If the dataset doesn't exist.
        """
        from ..models.dataset_async import DatasetAsyncModel

        data = await self._get(f"/datasets/{dataset_id}")
        return DatasetAsyncModel(data, self._client)



[docs]
    async def create(
        self,
        *,
        name: str,
        description: Optional[str] = None,
        **kwargs,
    ) -> "DatasetAsyncModel":
        """Create a new dataset (async).

        Note: The dataset will be created in the team specified in the client.

        Args:
            name: The dataset's name.
            description: The dataset's description.
            **kwargs: Additional dataset properties.

        Returns:
            The created dataset model with rich functionality.

        Raises:
            ValidationError: If the request data is invalid.
        """
        from ..models.dataset_async import DatasetAsyncModel

        body = {
            "name": name,
            "teamId": self._client.team_id,  # Use client's team_id
            **kwargs,
        }
        if description is not None:
            body["description"] = description

        data = await self._post("/datasets", body=body)
        return DatasetAsyncModel(data, self._client)



[docs]
    async def update(
        self,
        dataset_id: str,
        *,
        name: Optional[str] = None,
        description: Optional[str] = None,
        should_process: Optional[bool] = None,
        **kwargs,
    ) -> "DatasetAsyncModel":
        """Update an existing dataset (async).

        Args:
            dataset_id: The ID of the dataset to update.
            name: New dataset name.
            description: New dataset description.
            should_process: Whether to trigger dataset processing workflow.
            **kwargs: Additional properties to update.

        Returns:
            The updated dataset model.

        Raises:
            NotFoundError: If the dataset doesn't exist.
            ValidationError: If the request data is invalid.
        """
        from ..models.dataset_async import DatasetAsyncModel

        body = {**kwargs}
        if name is not None:
            body["name"] = name
        if description is not None:
            body["description"] = description
        if should_process is not None:
            body["shouldProcess"] = should_process

        data = await self._patch(f"/datasets/{dataset_id}", body=body)
        return DatasetAsyncModel(data, self._client)



[docs]
    async def delete(self, dataset_id: str) -> None:
        """Delete a dataset (async).

        Args:
            dataset_id: The ID of the dataset to delete.

        Raises:
            NotFoundError: If the dataset doesn't exist.
        """
        await self._delete(f"/datasets/{dataset_id}")