Source code for moderatelyai_sdk.models.file_async

"""Async file model with rich functionality for file operations.

This module provides the FileAsyncModel class, which represents a file with rich
functionality for async file operations like downloading, deleting, and checking
file properties.

Example:
    ```python
    import asyncio
    from moderatelyai_sdk import AsyncModeratelyAI

    async def main():
        async with AsyncModeratelyAI(api_key="your_key", team_id="your_team") as client:
            # Upload a file and get a FileAsyncModel instance
            file = await client.files.upload("document.pdf", name="Important Document")

            # Use rich file operations
            if file.is_ready() and file.is_document():
                content = await file.download()  # Download to memory
                await file.download(path="./local_copy.pdf")  # Download to disk

            # Check file properties
            print(f"File: {file.name} ({file.file_size} bytes)")
            print(f"Type: {file.mime_type}, Extension: {file.get_extension()}")

            # Delete when done
            await file.delete()

    asyncio.run(main())
    ```
"""

from pathlib import Path
from typing import Any, Dict, Optional, Union

import aiofiles
import httpx

from ..exceptions import APIError
from ._base_async import BaseAsyncModel



[docs]
class FileAsyncModel(BaseAsyncModel):
    """Async model representing a file with rich file operations.

    FileAsyncModel provides a high-level async interface for working with files
    in the Moderately AI platform. Instead of working with raw dictionaries, you get
    a rich object with async methods for common file operations.

    This class is returned by async file operations like:
    - `await client.files.upload()`
    - `await client.files.retrieve()`
    - `await client.files.list()` (returns list of FileAsyncModel instances)

    Attributes:
        file_id: Unique identifier for the file
        name: Display name of the file
        original_name: Original filename when uploaded
        mime_type: MIME type (e.g., "text/csv", "application/pdf")
        file_size: Size in bytes
        file_hash: SHA256 hash of file content
        team_id: Team that owns this file
        dataset_id: Associated dataset ID (if any)
        status: Upload/processing status
        metadata: Additional file metadata
        created_at: Creation timestamp
        updated_at: Last update timestamp

    Example:
        ```python
        # Get a file and check its properties
        file = await client.files.retrieve("file_123")

        if file.is_csv() and file.is_ready():
            print(f"Ready CSV file: {file.name} ({file.file_size} bytes)")

            # Download to memory
            content = await file.download()

            # Or download to disk
            await file.download(path="./data.csv")
        ```
    """

    @property
    def file_id(self) -> str:
        """The unique identifier for this file."""
        return self._data["fileId"]

    @property
    def name(self) -> str:
        """The file name."""
        return self._data["fileName"]

    @property
    def original_name(self) -> Optional[str]:
        """The original filename when uploaded."""
        return self._data.get("originalName")

    @property
    def mime_type(self) -> str:
        """The MIME type of the file."""
        return self._data["mimeType"]

    @property
    def file_size(self) -> Optional[int]:
        """The size of the file in bytes."""
        return self._data.get("fileSize")

    @property
    def file_hash(self) -> Optional[str]:
        """The SHA256 hash of the file."""
        return self._data.get("fileHash")

    @property
    def team_id(self) -> str:
        """The team this file belongs to."""
        return self._data["teamId"]

    @property
    def dataset_id(self) -> Optional[str]:
        """The dataset this file is associated with, if any."""
        return self._data.get("datasetId")

    @property
    def status(self) -> str:
        """The file status (uploaded, processing, ready, error)."""
        return self._data.get("uploadStatus", "unknown")

    @property
    def metadata(self) -> Optional[Dict[str, Any]]:
        """Additional metadata for the file."""
        return self._data.get("metadata")

    @property
    def created_at(self) -> str:
        """When this file was created."""
        return self._data["createdAt"]

    @property
    def updated_at(self) -> str:
        """When this file was last updated."""
        return self._data["updatedAt"]


[docs]
    async def download(self, *, path: Optional[Union[str, Path]] = None) -> Optional[bytes]:
        """Download the file content (async).

        Downloads the file content either to memory or to a local file. This method
        handles the presigned URL workflow automatically and creates parent directories
        as needed when saving to disk.

        Args:
            path: Optional path to save the file. If provided, saves to this location
                 and creates parent directories if they don't exist. If not provided,
                 returns the file content as bytes.

        Returns:
            If path is provided: None (file is saved to disk)
            If path is not provided: The file content as bytes

        Raises:
            APIError: If the download fails or the file is not ready
            IOError: If unable to write to the specified path

        Example:
            ```python
            # Download to memory
            content = await file.download()
            print(f"Downloaded {len(content)} bytes")

            # Download to disk
            await file.download(path="./downloads/myfile.pdf")

            # Download with automatic directory creation
            await file.download(path="./new_folder/subfolder/file.csv")
            ```
        """
        # Get download URL
        response = await self._client._request(
            method="GET",
            path=f"/files/{self.file_id}/download",
            cast_type=dict,
        )

        # Handle different response formats
        file_data: bytes
        if "downloadUrl" in response:
            # Download from presigned URL
            download_url = response["downloadUrl"]
            try:
                async with httpx.AsyncClient() as client:
                    download_response = await client.get(download_url)
                    download_response.raise_for_status()
                    file_data = download_response.content
            except httpx.HTTPError as e:
                raise APIError(f"Failed to download file from URL: {e}") from e
        elif "content" in response:
            # Base64 encoded content
            import base64
            file_data = base64.b64decode(response["content"])
        else:
            # Assume response is already binary data
            file_data = (
                response if isinstance(response, bytes) else str(response).encode()
            )

        # Save to file or return bytes
        if path is not None:
            file_path = Path(path)
            # Create parent directories if they don't exist
            file_path.parent.mkdir(parents=True, exist_ok=True)

            async with aiofiles.open(file_path, "wb") as f:
                await f.write(file_data)
            return None
        else:
            return file_data



[docs]
    async def delete(self) -> None:
        """Delete this file permanently (async).

        This operation cannot be undone. The file will be removed from both
        the database and cloud storage.

        Raises:
            APIError: If the deletion fails
            NotFoundError: If the file doesn't exist

        Example:
            ```python
            # Delete a file
            await file.delete()
            # File is now permanently deleted
            ```
        """
        await self._client._request(
            method="DELETE",
            path=f"/files/{self.file_id}",
            cast_type=type(None),
        )



[docs]
    def is_ready(self) -> bool:
        """Check if the file is ready for use (processing complete).

        Files may need processing after upload. Use this method to check if
        a file is ready for operations like downloading or analysis.

        Returns:
            True if the file status is 'ready' or 'completed', False otherwise.

        Example:
            ```python
            if file.is_ready():
                content = await file.download()
                print("File is ready and downloaded!")
            else:
                print("File is still processing...")
            ```
        """
        return self.status in ("ready", "completed")



[docs]
    def is_processing(self) -> bool:
        """Check if the file is currently being processed.

        Returns:
            True if the file status is 'processing', False otherwise.

        Example:
            ```python
            if file.is_processing():
                print("Please wait, file is being processed...")
            ```
        """
        return self.status == "processing"



[docs]
    def has_error(self) -> bool:
        """Check if the file has an error status.

        Returns:
            True if the file status is 'error', False otherwise.

        Example:
            ```python
            if file.has_error():
                print(f"File {file.name} failed to process")
                # Handle error case
            ```
        """
        return self.status == "error"



[docs]
    def get_extension(self) -> str:
        """Get the file extension from the filename.

        Returns:
            The file extension (including the dot), or empty string if none.

        Example:
            ```python
            ext = file.get_extension()
            if ext == '.pdf':
                print("This is a PDF file")
            ```
        """
        return Path(self.name).suffix



[docs]
    def is_image(self) -> bool:
        """Check if this file is an image based on MIME type.

        Detects common image formats like JPEG, PNG, GIF, SVG, etc.

        Returns:
            True if the MIME type indicates an image.

        Example:
            ```python
            if file.is_image():
                print(f"Image file: {file.name} ({file.mime_type})")
                # Handle image-specific logic
            ```
        """
        return self.mime_type.startswith("image/")



[docs]
    def is_document(self) -> bool:
        """Check if this file is a document (PDF, Word, etc.).

        Detects common document formats including:
        - PDF files
        - Microsoft Word documents (.doc, .docx)
        - Microsoft Excel spreadsheets (.xls, .xlsx)
        - Microsoft PowerPoint presentations (.ppt, .pptx)

        Returns:
            True if the MIME type indicates a document.

        Example:
            ```python
            if file.is_document():
                print(f"Document: {file.name}")
                # Process document
            ```
        """
        document_types = {
            "application/pdf",
            "application/msword",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
            "application/vnd.ms-excel",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            "application/vnd.ms-powerpoint",
            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
        }
        return self.mime_type in document_types



[docs]
    def is_text(self) -> bool:
        """Check if this file is a text file.

        Detects all text-based files including plain text, CSV, JSON, XML, etc.

        Returns:
            True if the MIME type indicates a text file.

        Example:
            ```python
            if file.is_text():
                content = await file.download()
                text_content = content.decode('utf-8')
                print(f"Text content: {text_content[:100]}...")
            ```
        """
        return self.mime_type.startswith("text/")



[docs]
    def is_csv(self) -> bool:
        """Check if this file is a CSV file.

        Specifically detects CSV (Comma-Separated Values) files, which are
        commonly used for tabular data.

        Returns:
            True if the MIME type indicates a CSV file.

        Example:
            ```python
            if file.is_csv():
                print(f"CSV file with {file.file_size} bytes of data")
                # Process as structured data
            ```
        """
        return self.mime_type == "text/csv"


    async def _refresh(self) -> None:
        """Refresh this file from the API (async)."""
        response = await self._client._request(
            method="GET",
            path=f"/files/{self.file_id}",
            cast_type=dict,
        )
        self._data = response