Skip to content

rg.Settings

rg.Settings is used to define the setttings of an Argilla Dataset. The settings can be used to configure the behavior of the dataset, such as the fields, questions, guidelines, metadata, and vectors. The Settings class is passed to the Dataset class and used to create the dataset on the server. Once created, the settings of a dataset cannot be changed.

Usage Examples

Creating a new dataset with settings

To create a new dataset with settings, instantiate the Settings class and pass it to the Dataset class.

import argilla_sdk as rg

settings = rg.Settings(
    guidelines="Select the sentiment of the prompt.",
    fields=[rg.TextField(name="prompt", use_markdown=True)],
    questions=[rg.LabelQuestion(name="sentiment", labels=["positive", "negative"])],
)

dataset = rg.Dataset(name="sentiment_analysis", settings=settings)

# Create the dataset on the server
dataset.create()

To define the settings for fields, questions, metadata, or vectors, refer to the rg.TextField, rg.LabelQuestion, rg.TermsMetadataProperty, and rg.VectorField class documentation.


Class Reference

rg.Settings

Bases: Resource

Settings class for Argilla Datasets.

This class is used to define the representation of a Dataset within the UI.

Source code in src/argilla_sdk/settings/_resource.py
class Settings(Resource):
    """
    Settings class for Argilla Datasets.

    This class is used to define the representation of a Dataset within the UI.
    """

    def __init__(
        self,
        fields: Optional[List[TextField]] = None,
        questions: Optional[List[QuestionType]] = None,
        vectors: Optional[List[VectorField]] = None,
        metadata: Optional[List[MetadataType]] = None,
        guidelines: Optional[str] = None,
        allow_extra_metadata: bool = False,
        _dataset: Optional["Dataset"] = None,
    ) -> None:
        """
        Args:
            fields (List[TextField]): A list of TextField objects that represent the fields in the Dataset.
            questions (List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]): A list of Question objects that represent the questions in the Dataset.
            vectors (List[VectorField]): A list of VectorField objects that represent the vectors in the Dataset.
            metadata (List[MetadataField]): A list of MetadataField objects that represent the metadata in the Dataset.
            guidelines (str): A string containing the guidelines for the Dataset.
            allow_extra_metadata (bool): A boolean that determines whether or not extra metadata is allowed in the Dataset. Defaults to False.
        """
        super().__init__(client=_dataset._client if _dataset else None)

        self.__questions = questions or []
        self.__fields = SettingsProperties(self, fields)
        self.__vectors = SettingsProperties(self, vectors)
        self.__metadata = SettingsProperties(self, metadata)

        self.__guidelines = self.__process_guidelines(guidelines)
        self.__allow_extra_metadata = allow_extra_metadata

        self._dataset = _dataset

    #####################
    # Properties        #
    #####################

    @property
    def fields(self) -> "SettingsProperties":
        return self.__fields

    @fields.setter
    def fields(self, fields: List[TextField]):
        self.__fields = SettingsProperties(self, fields)

    @property
    def questions(self) -> List[QuestionType]:
        return self.__questions

    @questions.setter
    def questions(self, questions: List[QuestionType]):
        self.__questions = questions

    @property
    def vectors(self) -> "SettingsProperties":
        return self.__vectors

    @vectors.setter
    def vectors(self, vectors: List[VectorField]):
        self.__vectors = SettingsProperties(self, vectors)

    @property
    def metadata(self) -> "SettingsProperties":
        return self.__metadata

    @metadata.setter
    def metadata(self, metadata: List[MetadataType]):
        self.__metadata = SettingsProperties(self, metadata)

    @property
    def guidelines(self) -> str:
        return self.__guidelines

    @guidelines.setter
    def guidelines(self, guidelines: str):
        self.__guidelines = self.__process_guidelines(guidelines)

    @property
    def allow_extra_metadata(self) -> bool:
        return self.__allow_extra_metadata

    @allow_extra_metadata.setter
    def allow_extra_metadata(self, value: bool):
        self.__allow_extra_metadata = value

    @property
    def dataset(self) -> "Dataset":
        return self._dataset

    @dataset.setter
    def dataset(self, dataset: "Dataset"):
        self._dataset = dataset
        self._client = dataset._client

    @cached_property
    def schema(self) -> dict:
        schema_dict = {}

        for field in self.fields:
            schema_dict[field.name] = field

        for question in self.questions:
            schema_dict[question.name] = question

        for vector in self.vectors:
            schema_dict[vector.name] = vector

        for metadata in self.metadata:
            schema_dict[metadata.name] = metadata

        return schema_dict

    @cached_property
    def schema_by_id(self) -> Dict[UUID, Union[TextField, QuestionType, MetadataType, VectorField]]:
        return {v.id: v for v in self.schema.values()}

    def validate(self) -> None:
        self._validate_empty_settings()
        self._validate_duplicate_names()

    #####################
    #  Public methods   #
    #####################

    def get(self) -> "Settings":
        self.fields = self._fetch_fields()
        self.questions = self._fetch_questions()
        self.vectors = self._fetch_vectors()
        self.metadata = self._fetch_metadata()
        self.__get_dataset_related_attributes()

        self._update_last_api_call()
        return self

    def create(self) -> "Settings":
        self.validate()

        self._update_dataset_related_attributes()
        self.__fields.create()
        self._create_questions()
        self.__vectors.create()
        self.__metadata.create()

        self._update_last_api_call()
        return self

    def update(self) -> "Resource":
        self.validate()

        self._update_dataset_related_attributes()
        self.__fields.update()
        self.__vectors.update()
        self.__metadata.update()
        # self.questions.update()

        self._update_last_api_call()
        return self

    def question_by_name(self, question_name: str) -> QuestionType:
        for question in self.questions:
            if question.name == question_name:
                return question
        raise ValueError(f"Question with name {question_name} not found")

    def question_by_id(self, question_id: UUID) -> QuestionType:
        property = self.schema_by_id.get(question_id)
        if isinstance(property, QuestionPropertyBase):
            return property
        raise ValueError(f"Question with id {question_id} not found")

    def serialize(self):
        try:
            return {
                "guidelines": self.guidelines,
                "questions": self.__serialize_questions(self.questions),
                "fields": self.__fields.serialize(),
                "vectors": self.vectors.serialize(),
                "metadata": self.metadata.serialize(),
                "allow_extra_metadata": self.allow_extra_metadata,
            }
        except Exception as e:
            raise ArgillaSerializeError(f"Failed to serialize the settings. {e.__class__.__name__}") from e

    def to_json(self, path: Union[Path, str]) -> None:
        """Save the settings to a file on disk

        Parameters:
            path (str): The path to save the settings to
        """
        if not isinstance(path, Path):
            path = Path(path)
        if path.exists():
            raise FileExistsError(f"File {path} already exists")
        with open(path, "w") as file:
            json.dump(self.serialize(), file)

    @classmethod
    def from_json(cls, path: Union[Path, str]) -> "Settings":
        """Load the settings from a file on disk"""

        with open(path, "r") as file:
            settings_dict = json.load(file)

        fields = settings_dict.get("fields", [])
        vectors = settings_dict.get("vectors", [])
        metadata = settings_dict.get("metadata", [])
        guidelines = settings_dict.get("guidelines")
        allow_extra_metadata = settings_dict.get("allow_extra_metadata")

        questions = [question_from_dict(question) for question in settings_dict.get("questions", [])]
        fields = [TextField.from_dict(field) for field in fields]
        vectors = [VectorField.from_dict(vector) for vector in vectors]
        metadata = [MetadataField.from_dict(metadata) for metadata in metadata]

        return cls(
            questions=questions,
            fields=fields,
            vectors=vectors,
            metadata=metadata,
            guidelines=guidelines,
            allow_extra_metadata=allow_extra_metadata,
        )

    def __eq__(self, other: "Settings") -> bool:
        return self.serialize() == other.serialize()  # TODO: Create proper __eq__ methods for fields and questions

    #####################
    #  Repr Methods     #
    #####################

    def __repr__(self) -> str:
        return (
            f"Settings(guidelines={self.guidelines}, allow_extra_metadata={self.allow_extra_metadata}, "
            f"fields={self.fields}, questions={self.questions}, vectors={self.vectors}, metadata={self.metadata})"
        )

    #####################
    #  Private methods  #
    #####################

    def _fetch_fields(self) -> List[TextField]:
        models = self._client.api.fields.list(dataset_id=self._dataset.id)
        return [TextField.from_model(model) for model in models]

    def _fetch_questions(self) -> List[QuestionType]:
        models = self._client.api.questions.list(dataset_id=self._dataset.id)
        return [question_from_model(model) for model in models]

    def _fetch_vectors(self) -> List[VectorField]:
        models = self.dataset._client.api.vectors.list(self.dataset.id)
        return [VectorField.from_model(model) for model in models]

    def _fetch_metadata(self) -> List[MetadataType]:
        models = self._client.api.metadata.list(dataset_id=self._dataset.id)
        return [MetadataField.from_model(model) for model in models]

    def __get_dataset_related_attributes(self):
        # This flow may be a bit weird, but it's the only way to update the dataset related attributes
        # Everything is point that we should have several settings-related endpoints in the API to handle this.
        # POST /api/v1/datasets/{dataset_id}/settings
        # {
        #   "guidelines": ....,
        #   "allow_extra_metadata": ....,
        # }
        # But this is not implemented yet, so we need to update the dataset model directly
        dataset_model = self._client.api.datasets.get(self._dataset.id)

        self.guidelines = dataset_model.guidelines
        self.allow_extra_metadata = dataset_model.allow_extra_metadata

    def _update_dataset_related_attributes(self):
        # This flow may be a bit weird, but it's the only way to update the dataset related attributes
        # Everything is point that we should have several settings-related endpoints in the API to handle this.
        # POST /api/v1/datasets/{dataset_id}/settings
        # {
        #   "guidelines": ....,
        #   "allow_extra_metadata": ....,
        # }
        # But this is not implemented yet, so we need to update the dataset model directly
        dataset_model = DatasetModel(
            id=self._dataset.id,
            name=self._dataset.name,
            guidelines=self.guidelines,
            allow_extra_metadata=self.allow_extra_metadata,
        )
        self._client.api.datasets.update(dataset_model)

    def _create_questions(self) -> None:
        for question in self.__questions:
            try:
                question_model = self._client.api.questions.create(
                    dataset_id=self._dataset.id, question=question._model
                )
                question._model = question_model
            except ArgillaAPIError as e:
                raise SettingsError(f"Failed to create question {question.name}") from e

    def _validate_empty_settings(self):
        if not all([self.fields, self.questions]):
            message = "Fields and questions are required"
            raise SettingsError(message=message)

    def _validate_duplicate_names(self) -> None:
        dataset_properties_by_name = {}

        for properties in [self.fields, self.questions, self.vectors, self.metadata]:
            for property in properties:
                if property.name in dataset_properties_by_name:
                    raise SettingsError(
                        f"names of dataset settings must be unique, "
                        f"but the name {property.name!r} is used by {type(property).__name__!r} and {type(dataset_properties_by_name[property.name]).__name__!r} "
                    )
                dataset_properties_by_name[property.name] = property

    def __process_guidelines(self, guidelines):
        if guidelines is None:
            return guidelines

        if not isinstance(guidelines, str):
            raise SettingsError("Guidelines must be a string or a path to a file")

        if os.path.exists(guidelines):
            with open(guidelines, "r") as file:
                return file.read()

        return guidelines

    def __serialize_questions(self, questions: List[QuestionType]):
        return [question.serialize() for question in questions]

__init__(fields=None, questions=None, vectors=None, metadata=None, guidelines=None, allow_extra_metadata=False, _dataset=None)

Parameters:

Name Type Description Default
fields List[TextField]

A list of TextField objects that represent the fields in the Dataset.

None
questions List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]

A list of Question objects that represent the questions in the Dataset.

None
vectors List[VectorField]

A list of VectorField objects that represent the vectors in the Dataset.

None
metadata List[MetadataField]

A list of MetadataField objects that represent the metadata in the Dataset.

None
guidelines str

A string containing the guidelines for the Dataset.

None
allow_extra_metadata bool

A boolean that determines whether or not extra metadata is allowed in the Dataset. Defaults to False.

False
Source code in src/argilla_sdk/settings/_resource.py
def __init__(
    self,
    fields: Optional[List[TextField]] = None,
    questions: Optional[List[QuestionType]] = None,
    vectors: Optional[List[VectorField]] = None,
    metadata: Optional[List[MetadataType]] = None,
    guidelines: Optional[str] = None,
    allow_extra_metadata: bool = False,
    _dataset: Optional["Dataset"] = None,
) -> None:
    """
    Args:
        fields (List[TextField]): A list of TextField objects that represent the fields in the Dataset.
        questions (List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]): A list of Question objects that represent the questions in the Dataset.
        vectors (List[VectorField]): A list of VectorField objects that represent the vectors in the Dataset.
        metadata (List[MetadataField]): A list of MetadataField objects that represent the metadata in the Dataset.
        guidelines (str): A string containing the guidelines for the Dataset.
        allow_extra_metadata (bool): A boolean that determines whether or not extra metadata is allowed in the Dataset. Defaults to False.
    """
    super().__init__(client=_dataset._client if _dataset else None)

    self.__questions = questions or []
    self.__fields = SettingsProperties(self, fields)
    self.__vectors = SettingsProperties(self, vectors)
    self.__metadata = SettingsProperties(self, metadata)

    self.__guidelines = self.__process_guidelines(guidelines)
    self.__allow_extra_metadata = allow_extra_metadata

    self._dataset = _dataset

from_json(path) classmethod

Load the settings from a file on disk

Source code in src/argilla_sdk/settings/_resource.py
@classmethod
def from_json(cls, path: Union[Path, str]) -> "Settings":
    """Load the settings from a file on disk"""

    with open(path, "r") as file:
        settings_dict = json.load(file)

    fields = settings_dict.get("fields", [])
    vectors = settings_dict.get("vectors", [])
    metadata = settings_dict.get("metadata", [])
    guidelines = settings_dict.get("guidelines")
    allow_extra_metadata = settings_dict.get("allow_extra_metadata")

    questions = [question_from_dict(question) for question in settings_dict.get("questions", [])]
    fields = [TextField.from_dict(field) for field in fields]
    vectors = [VectorField.from_dict(vector) for vector in vectors]
    metadata = [MetadataField.from_dict(metadata) for metadata in metadata]

    return cls(
        questions=questions,
        fields=fields,
        vectors=vectors,
        metadata=metadata,
        guidelines=guidelines,
        allow_extra_metadata=allow_extra_metadata,
    )

to_json(path)

Save the settings to a file on disk

Parameters:

Name Type Description Default
path str

The path to save the settings to

required
Source code in src/argilla_sdk/settings/_resource.py
def to_json(self, path: Union[Path, str]) -> None:
    """Save the settings to a file on disk

    Parameters:
        path (str): The path to save the settings to
    """
    if not isinstance(path, Path):
        path = Path(path)
    if path.exists():
        raise FileExistsError(f"File {path} already exists")
    with open(path, "w") as file:
        json.dump(self.serialize(), file)