Source code for moderatelyai_sdk.resources.datasets

"""Datasets resource for the Moderately AI API."""

from typing import List, Optional

from ..models.dataset import DatasetModel
from ..types import PaginatedResponse
from ._base import BaseResource


[docs] class Datasets(BaseResource): """Manage datasets in your teams. Examples: ```python # List all datasets (still returns raw data) datasets = client.datasets.list() # Get a dataset with rich functionality dataset = client.datasets.retrieve("dataset_123") # Create a new dataset dataset = client.datasets.create( name="Customer Data", description="Customer interaction dataset" ) # Now use rich methods on the dataset object: # Upload data to the dataset version = dataset.upload_data("/path/to/sales_data.csv") print(f"Uploaded version {version.version_no} with {version.row_count} rows") # Download current data data_bytes = dataset.download_data() dataset.download_data(path="/save/local_copy.csv") # Work with specific versions versions = dataset.list_data_versions() old_data = dataset.download_data(version_id="version_123") # Schema management (NEW!) # Simple schema creation schema = dataset.create_schema([ {"name": "user_id", "type": "int", "required": True}, {"name": "email", "type": "string"}, {"name": "signup_date", "type": "datetime"}, ]) # Auto-infer schema from sample data schema = dataset.create_schema_from_sample("sample.csv") # Advanced schema with fluent API schema = (dataset.schema_builder() .add_column("id", "int", required=True) .add_column("name", "string", description="User name") .with_parsing(delimiter=",", header_row=1) .as_current() .create()) # Access dataset metadata print(f"Dataset has {dataset.record_count} records") print(f"Processing status: {dataset.processing_status}") current_schema = dataset.get_current_schema() # Update dataset dataset.update(name="Updated Dataset Name", should_process=True) # Delete dataset dataset.delete() ``` """
[docs] def list( self, *, dataset_ids: Optional[List[str]] = None, name_like: Optional[str] = None, name: Optional[str] = None, page: int = 1, page_size: int = 10, order_by: str = "createdAt", order_direction: str = "desc", ) -> PaginatedResponse: """List all datasets with pagination. Note: Results are automatically filtered to the team specified in the client. Args: dataset_ids: Filter by specific dataset IDs. name_like: Filter by datasets with names containing this text. name: Filter by exact dataset name. page: Page number (1-based). Defaults to 1. page_size: Number of items per page. Defaults to 10. order_by: Field to sort by ("createdAt", "updatedAt", "name"). Defaults to "createdAt". order_direction: Sort direction ("asc" or "desc"). Defaults to "desc". Returns: Paginated list of datasets for the client's team. """ query = { "page": page, "pageSize": page_size, "orderBy": order_by, "orderDirection": order_direction, } if dataset_ids is not None: query["datasetIds"] = dataset_ids if name_like is not None: query["nameLike"] = name_like if name is not None: query["name"] = name response = self._get( "/datasets", options={"query": query}, ) # Convert items to DatasetModel instances if "items" in response: response["items"] = [ DatasetModel(item, self._client) for item in response["items"] ] return response
[docs] def retrieve(self, dataset_id: str) -> DatasetModel: """Retrieve a specific dataset by ID. Args: dataset_id: The ID of the dataset to retrieve. Returns: The dataset model with rich functionality. Raises: NotFoundError: If the dataset doesn't exist. """ data = self._get(f"/datasets/{dataset_id}") return DatasetModel(data, self._client)
[docs] def create( self, *, name: str, description: Optional[str] = None, **kwargs, ) -> DatasetModel: """Create a new dataset. Note: The dataset will be created in the team specified in the client. Args: name: The dataset's name. description: The dataset's description. **kwargs: Additional dataset properties. Returns: The created dataset model with rich functionality. Raises: ValidationError: If the request data is invalid. """ body = { "name": name, "teamId": self._client.team_id, # Use client's team_id with camelCase **kwargs, } if description is not None: body["description"] = description data = self._post("/datasets", body=body) return DatasetModel(data, self._client)
[docs] def update( self, dataset_id: str, *, name: Optional[str] = None, description: Optional[str] = None, should_process: Optional[bool] = None, **kwargs, ) -> DatasetModel: """Update an existing dataset. Args: dataset_id: The ID of the dataset to update. name: New dataset name. description: New dataset description. should_process: Whether to trigger dataset processing workflow. **kwargs: Additional properties to update. Returns: The updated dataset model with rich functionality. Raises: NotFoundError: If the dataset doesn't exist. ValidationError: If the request data is invalid. """ body = {**kwargs} if name is not None: body["name"] = name if description is not None: body["description"] = description if should_process is not None: body["shouldProcess"] = should_process data = self._patch(f"/datasets/{dataset_id}", body=body) return DatasetModel(data, self._client)
[docs] def delete(self, dataset_id: str) -> None: """Delete a dataset. Args: dataset_id: The ID of the dataset to delete. Raises: NotFoundError: If the dataset doesn't exist. """ self._delete(f"/datasets/{dataset_id}")