Source code for moderatelyai_sdk.models.file_async

"""Async file model with rich functionality for file operations.

This module provides the FileAsyncModel class, which represents a file with rich
functionality for async file operations like downloading, deleting, and checking
file properties.

Example:
    ```python
    import asyncio
    from moderatelyai_sdk import AsyncModeratelyAI

    async def main():
        async with AsyncModeratelyAI(api_key="your_key", team_id="your_team") as client:
            # Upload a file and get a FileAsyncModel instance
            file = await client.files.upload("document.pdf", name="Important Document")

            # Use rich file operations
            if file.is_ready() and file.is_document():
                content = await file.download()  # Download to memory
                await file.download(path="./local_copy.pdf")  # Download to disk

            # Check file properties
            print(f"File: {file.name} ({file.file_size} bytes)")
            print(f"Type: {file.mime_type}, Extension: {file.get_extension()}")

            # Delete when done
            await file.delete()

    asyncio.run(main())
    ```
"""

from pathlib import Path
from typing import Any, Dict, Optional, Union

import aiofiles
import httpx

from ..exceptions import APIError
from ._base_async import BaseAsyncModel


[docs] class FileAsyncModel(BaseAsyncModel): """Async model representing a file with rich file operations. FileAsyncModel provides a high-level async interface for working with files in the Moderately AI platform. Instead of working with raw dictionaries, you get a rich object with async methods for common file operations. This class is returned by async file operations like: - `await client.files.upload()` - `await client.files.retrieve()` - `await client.files.list()` (returns list of FileAsyncModel instances) Attributes: file_id: Unique identifier for the file name: Display name of the file original_name: Original filename when uploaded mime_type: MIME type (e.g., "text/csv", "application/pdf") file_size: Size in bytes file_hash: SHA256 hash of file content team_id: Team that owns this file dataset_id: Associated dataset ID (if any) status: Upload/processing status metadata: Additional file metadata created_at: Creation timestamp updated_at: Last update timestamp Example: ```python # Get a file and check its properties file = await client.files.retrieve("file_123") if file.is_csv() and file.is_ready(): print(f"Ready CSV file: {file.name} ({file.file_size} bytes)") # Download to memory content = await file.download() # Or download to disk await file.download(path="./data.csv") ``` """ @property def file_id(self) -> str: """The unique identifier for this file.""" return self._data["fileId"] @property def name(self) -> str: """The file name.""" return self._data["fileName"] @property def original_name(self) -> Optional[str]: """The original filename when uploaded.""" return self._data.get("originalName") @property def mime_type(self) -> str: """The MIME type of the file.""" return self._data["mimeType"] @property def file_size(self) -> Optional[int]: """The size of the file in bytes.""" return self._data.get("fileSize") @property def file_hash(self) -> Optional[str]: """The SHA256 hash of the file.""" return self._data.get("fileHash") @property def team_id(self) -> str: """The team this file belongs to.""" return self._data["teamId"] @property def dataset_id(self) -> Optional[str]: """The dataset this file is associated with, if any.""" return self._data.get("datasetId") @property def status(self) -> str: """The file status (uploaded, processing, ready, error).""" return self._data.get("uploadStatus", "unknown") @property def metadata(self) -> Optional[Dict[str, Any]]: """Additional metadata for the file.""" return self._data.get("metadata") @property def created_at(self) -> str: """When this file was created.""" return self._data["createdAt"] @property def updated_at(self) -> str: """When this file was last updated.""" return self._data["updatedAt"]
[docs] async def download(self, *, path: Optional[Union[str, Path]] = None) -> Optional[bytes]: """Download the file content (async). Downloads the file content either to memory or to a local file. This method handles the presigned URL workflow automatically and creates parent directories as needed when saving to disk. Args: path: Optional path to save the file. If provided, saves to this location and creates parent directories if they don't exist. If not provided, returns the file content as bytes. Returns: If path is provided: None (file is saved to disk) If path is not provided: The file content as bytes Raises: APIError: If the download fails or the file is not ready IOError: If unable to write to the specified path Example: ```python # Download to memory content = await file.download() print(f"Downloaded {len(content)} bytes") # Download to disk await file.download(path="./downloads/myfile.pdf") # Download with automatic directory creation await file.download(path="./new_folder/subfolder/file.csv") ``` """ # Get download URL response = await self._client._request( method="GET", path=f"/files/{self.file_id}/download", cast_type=dict, ) # Handle different response formats file_data: bytes if "downloadUrl" in response: # Download from presigned URL download_url = response["downloadUrl"] try: async with httpx.AsyncClient() as client: download_response = await client.get(download_url) download_response.raise_for_status() file_data = download_response.content except httpx.HTTPError as e: raise APIError(f"Failed to download file from URL: {e}") from e elif "content" in response: # Base64 encoded content import base64 file_data = base64.b64decode(response["content"]) else: # Assume response is already binary data file_data = ( response if isinstance(response, bytes) else str(response).encode() ) # Save to file or return bytes if path is not None: file_path = Path(path) # Create parent directories if they don't exist file_path.parent.mkdir(parents=True, exist_ok=True) async with aiofiles.open(file_path, "wb") as f: await f.write(file_data) return None else: return file_data
[docs] async def delete(self) -> None: """Delete this file permanently (async). This operation cannot be undone. The file will be removed from both the database and cloud storage. Raises: APIError: If the deletion fails NotFoundError: If the file doesn't exist Example: ```python # Delete a file await file.delete() # File is now permanently deleted ``` """ await self._client._request( method="DELETE", path=f"/files/{self.file_id}", cast_type=type(None), )
[docs] def is_ready(self) -> bool: """Check if the file is ready for use (processing complete). Files may need processing after upload. Use this method to check if a file is ready for operations like downloading or analysis. Returns: True if the file status is 'ready' or 'completed', False otherwise. Example: ```python if file.is_ready(): content = await file.download() print("File is ready and downloaded!") else: print("File is still processing...") ``` """ return self.status in ("ready", "completed")
[docs] def is_processing(self) -> bool: """Check if the file is currently being processed. Returns: True if the file status is 'processing', False otherwise. Example: ```python if file.is_processing(): print("Please wait, file is being processed...") ``` """ return self.status == "processing"
[docs] def has_error(self) -> bool: """Check if the file has an error status. Returns: True if the file status is 'error', False otherwise. Example: ```python if file.has_error(): print(f"File {file.name} failed to process") # Handle error case ``` """ return self.status == "error"
[docs] def get_extension(self) -> str: """Get the file extension from the filename. Returns: The file extension (including the dot), or empty string if none. Example: ```python ext = file.get_extension() if ext == '.pdf': print("This is a PDF file") ``` """ return Path(self.name).suffix
[docs] def is_image(self) -> bool: """Check if this file is an image based on MIME type. Detects common image formats like JPEG, PNG, GIF, SVG, etc. Returns: True if the MIME type indicates an image. Example: ```python if file.is_image(): print(f"Image file: {file.name} ({file.mime_type})") # Handle image-specific logic ``` """ return self.mime_type.startswith("image/")
[docs] def is_document(self) -> bool: """Check if this file is a document (PDF, Word, etc.). Detects common document formats including: - PDF files - Microsoft Word documents (.doc, .docx) - Microsoft Excel spreadsheets (.xls, .xlsx) - Microsoft PowerPoint presentations (.ppt, .pptx) Returns: True if the MIME type indicates a document. Example: ```python if file.is_document(): print(f"Document: {file.name}") # Process document ``` """ document_types = { "application/pdf", "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-powerpoint", "application/vnd.openxmlformats-officedocument.presentationml.presentation", } return self.mime_type in document_types
[docs] def is_text(self) -> bool: """Check if this file is a text file. Detects all text-based files including plain text, CSV, JSON, XML, etc. Returns: True if the MIME type indicates a text file. Example: ```python if file.is_text(): content = await file.download() text_content = content.decode('utf-8') print(f"Text content: {text_content[:100]}...") ``` """ return self.mime_type.startswith("text/")
[docs] def is_csv(self) -> bool: """Check if this file is a CSV file. Specifically detects CSV (Comma-Separated Values) files, which are commonly used for tabular data. Returns: True if the MIME type indicates a CSV file. Example: ```python if file.is_csv(): print(f"CSV file with {file.file_size} bytes of data") # Process as structured data ``` """ return self.mime_type == "text/csv"
async def _refresh(self) -> None: """Refresh this file from the API (async).""" response = await self._client._request( method="GET", path=f"/files/{self.file_id}", cast_type=dict, ) self._data = response