From adec90aa5187c2be0291e0fe80a16bee548e8192 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 20 May 2026 10:33:22 +0200 Subject: [PATCH 1/5] feat(scores): support text scores in v3 Backport TEXT score typing and generated API models to v3-stable. --- langfuse/_client/client.py | 24 ++--- langfuse/_client/span.py | 16 ++-- langfuse/api/__init__.py | 12 +++ langfuse/api/resources/__init__.py | 12 +++ langfuse/api/resources/commons/__init__.py | 8 ++ .../api/resources/commons/types/__init__.py | 15 ++- langfuse/api/resources/commons/types/score.py | 68 +++++++++++++- .../commons/types/score_config_data_type.py | 4 + .../commons/types/score_data_type.py | 4 + .../api/resources/commons/types/score_v_1.py | 62 ++++++++++++- .../api/resources/commons/types/text_score.py | 48 ++++++++++ .../resources/commons/types/text_score_v_1.py | 48 ++++++++++ .../resources/ingestion/types/score_body.py | 2 +- .../score/types/create_score_request.py | 2 +- langfuse/api/resources/score_v_2/__init__.py | 4 + .../api/resources/score_v_2/types/__init__.py | 4 + .../types/get_scores_response_data.py | 66 +++++++++++++ .../types/get_scores_response_data_text.py | 46 +++++++++ langfuse/experiment.py | 4 +- langfuse/types.py | 5 +- tests/test_core_sdk.py | 93 +++++++++++++++++++ 21 files changed, 519 insertions(+), 28 deletions(-) create mode 100644 langfuse/api/resources/commons/types/text_score.py create mode 100644 langfuse/api/resources/commons/types/text_score_v_1.py create mode 100644 langfuse/api/resources/score_v_2/types/get_scores_response_data_text.py diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index 3787280c5..94e5a084d 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -201,7 +201,7 @@ class Langfuse: cost_details={"total_cost": 0.0023} ) - # Score the generation (supports NUMERIC, BOOLEAN, CATEGORICAL) + # Score the generation (supports NUMERIC, BOOLEAN, CATEGORICAL, TEXT) generation.score(name="relevance", value=0.95, data_type="NUMERIC") ``` """ @@ -1992,7 +1992,7 @@ def create_score( trace_id: Optional[str] = None, score_id: Optional[str] = None, observation_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -2022,13 +2022,13 @@ def create_score( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) session_id: ID of the Langfuse session to associate the score with dataset_run_id: ID of the Langfuse dataset run to associate the score with trace_id: ID of the Langfuse trace to associate the score with observation_id: Optional ID of the specific observation to score. Trace ID must be provided too. score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -2152,7 +2152,7 @@ def score_current_span( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -2176,9 +2176,9 @@ def score_current_span( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -2216,7 +2216,7 @@ def score_current_span( name=name, value=cast(str, value), score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, metadata=metadata, @@ -2242,7 +2242,7 @@ def score_current_trace( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -2267,9 +2267,9 @@ def score_current_trace( Args: name: Name of the score (e.g., "user_satisfaction", "overall_quality") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -2305,7 +2305,7 @@ def score_current_trace( name=name, value=cast(str, value), score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, metadata=metadata, diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py index 92c1556ca..69ec83fe5 100644 --- a/langfuse/_client/span.py +++ b/langfuse/_client/span.py @@ -287,7 +287,7 @@ def score( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, timestamp: Optional[datetime] = None, @@ -313,9 +313,9 @@ def score( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL) + value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse timestamp: Optional timestamp for the score (defaults to current UTC time) @@ -342,7 +342,7 @@ def score( trace_id=self.trace_id, observation_id=self.id, score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, timestamp=timestamp, @@ -370,7 +370,7 @@ def score_trace( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, timestamp: Optional[datetime] = None, @@ -397,9 +397,9 @@ def score_trace( Args: name: Name of the score (e.g., "user_satisfaction", "overall_quality") - value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL) + value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse timestamp: Optional timestamp for the score (defaults to current UTC time) @@ -425,7 +425,7 @@ def score_trace( value=cast(str, value), trace_id=self.trace_id, score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, timestamp=timestamp, diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py index d1a6414ed..d366774d5 100644 --- a/langfuse/api/__init__.py +++ b/langfuse/api/__init__.py @@ -88,10 +88,12 @@ GetScoresResponseDataCategorical, GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, + GetScoresResponseDataText, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, GetScoresResponseData_Correction, GetScoresResponseData_Numeric, + GetScoresResponseData_Text, GetScoresResponseTraceData, HealthResponse, IngestionError, @@ -200,10 +202,12 @@ ScoreV1_Boolean, ScoreV1_Categorical, ScoreV1_Numeric, + ScoreV1_Text, Score_Boolean, Score_Categorical, Score_Correction, Score_Numeric, + Score_Text, SdkLogBody, SdkLogEvent, ServiceProviderConfig, @@ -211,6 +215,8 @@ Session, SessionWithTraces, Sort, + TextScore, + TextScoreV1, TextPrompt, Trace, TraceBody, @@ -349,10 +355,12 @@ "GetScoresResponseDataCategorical", "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", + "GetScoresResponseDataText", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", + "GetScoresResponseData_Text", "GetScoresResponseTraceData", "HealthResponse", "IngestionError", @@ -461,10 +469,12 @@ "ScoreV1_Boolean", "ScoreV1_Categorical", "ScoreV1_Numeric", + "ScoreV1_Text", "Score_Boolean", "Score_Categorical", "Score_Correction", "Score_Numeric", + "Score_Text", "SdkLogBody", "SdkLogEvent", "ServiceProviderConfig", @@ -472,6 +482,8 @@ "Session", "SessionWithTraces", "Sort", + "TextScore", + "TextScoreV1", "TextPrompt", "Trace", "TraceBody", diff --git a/langfuse/api/resources/__init__.py b/langfuse/api/resources/__init__.py index 0de0a56a5..b54d98098 100644 --- a/langfuse/api/resources/__init__.py +++ b/langfuse/api/resources/__init__.py @@ -100,12 +100,16 @@ ScoreV1_Boolean, ScoreV1_Categorical, ScoreV1_Numeric, + ScoreV1_Text, Score_Boolean, Score_Categorical, Score_Correction, Score_Numeric, + Score_Text, Session, SessionWithTraces, + TextScore, + TextScoreV1, Trace, TraceWithDetails, TraceWithFullDetails, @@ -272,10 +276,12 @@ GetScoresResponseDataCategorical, GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, + GetScoresResponseDataText, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, GetScoresResponseData_Correction, GetScoresResponseData_Numeric, + GetScoresResponseData_Text, GetScoresResponseTraceData, ) from .sessions import PaginatedSessions @@ -369,10 +375,12 @@ "GetScoresResponseDataCategorical", "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", + "GetScoresResponseDataText", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", + "GetScoresResponseData_Text", "GetScoresResponseTraceData", "HealthResponse", "IngestionError", @@ -481,10 +489,12 @@ "ScoreV1_Boolean", "ScoreV1_Categorical", "ScoreV1_Numeric", + "ScoreV1_Text", "Score_Boolean", "Score_Categorical", "Score_Correction", "Score_Numeric", + "Score_Text", "SdkLogBody", "SdkLogEvent", "ServiceProviderConfig", @@ -492,6 +502,8 @@ "Session", "SessionWithTraces", "Sort", + "TextScore", + "TextScoreV1", "TextPrompt", "Trace", "TraceBody", diff --git a/langfuse/api/resources/commons/__init__.py b/langfuse/api/resources/commons/__init__.py index 7105b22c5..e1a050ca5 100644 --- a/langfuse/api/resources/commons/__init__.py +++ b/langfuse/api/resources/commons/__init__.py @@ -40,12 +40,16 @@ ScoreV1_Boolean, ScoreV1_Categorical, ScoreV1_Numeric, + ScoreV1_Text, Score_Boolean, Score_Categorical, Score_Correction, Score_Numeric, + Score_Text, Session, SessionWithTraces, + TextScore, + TextScoreV1, Trace, TraceWithDetails, TraceWithFullDetails, @@ -103,12 +107,16 @@ "ScoreV1_Boolean", "ScoreV1_Categorical", "ScoreV1_Numeric", + "ScoreV1_Text", "Score_Boolean", "Score_Categorical", "Score_Correction", "Score_Numeric", + "Score_Text", "Session", "SessionWithTraces", + "TextScore", + "TextScoreV1", "Trace", "TraceWithDetails", "TraceWithFullDetails", diff --git a/langfuse/api/resources/commons/types/__init__.py b/langfuse/api/resources/commons/types/__init__.py index df87680b7..b5a17e491 100644 --- a/langfuse/api/resources/commons/types/__init__.py +++ b/langfuse/api/resources/commons/types/__init__.py @@ -36,14 +36,23 @@ Score_Categorical, Score_Correction, Score_Numeric, + Score_Text, ) from .score_config import ScoreConfig from .score_config_data_type import ScoreConfigDataType from .score_data_type import ScoreDataType from .score_source import ScoreSource -from .score_v_1 import ScoreV1, ScoreV1_Boolean, ScoreV1_Categorical, ScoreV1_Numeric +from .score_v_1 import ( + ScoreV1, + ScoreV1_Boolean, + ScoreV1_Categorical, + ScoreV1_Numeric, + ScoreV1_Text, +) from .session import Session from .session_with_traces import SessionWithTraces +from .text_score import TextScore +from .text_score_v_1 import TextScoreV1 from .trace import Trace from .trace_with_details import TraceWithDetails from .trace_with_full_details import TraceWithFullDetails @@ -89,12 +98,16 @@ "ScoreV1_Boolean", "ScoreV1_Categorical", "ScoreV1_Numeric", + "ScoreV1_Text", "Score_Boolean", "Score_Categorical", "Score_Correction", "Score_Numeric", + "Score_Text", "Session", "SessionWithTraces", + "TextScore", + "TextScoreV1", "Trace", "TraceWithDetails", "TraceWithFullDetails", diff --git a/langfuse/api/resources/commons/types/score.py b/langfuse/api/resources/commons/types/score.py index dab6eee43..cc008d6d2 100644 --- a/langfuse/api/resources/commons/types/score.py +++ b/langfuse/api/resources/commons/types/score.py @@ -204,6 +204,70 @@ class Config: json_encoders = {dt.datetime: serialize_datetime} +class Score_Text(pydantic_v1.BaseModel): + string_value: str = pydantic_v1.Field(alias="stringValue") + id: str + trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None) + session_id: typing.Optional[str] = pydantic_v1.Field( + alias="sessionId", default=None + ) + observation_id: typing.Optional[str] = pydantic_v1.Field( + alias="observationId", default=None + ) + dataset_run_id: typing.Optional[str] = pydantic_v1.Field( + alias="datasetRunId", default=None + ) + name: str + source: ScoreSource + timestamp: dt.datetime + created_at: dt.datetime = pydantic_v1.Field(alias="createdAt") + updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt") + author_user_id: typing.Optional[str] = pydantic_v1.Field( + alias="authorUserId", default=None + ) + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) + queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) + environment: str + data_type: typing.Literal["TEXT"] = pydantic_v1.Field( + alias="dataType", default="TEXT" + ) + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} + + class Score_Correction(pydantic_v1.BaseModel): value: float string_value: str = pydantic_v1.Field(alias="stringValue") @@ -269,4 +333,6 @@ class Config: json_encoders = {dt.datetime: serialize_datetime} -Score = typing.Union[Score_Numeric, Score_Categorical, Score_Boolean, Score_Correction] +Score = typing.Union[ + Score_Numeric, Score_Categorical, Score_Boolean, Score_Text, Score_Correction +] diff --git a/langfuse/api/resources/commons/types/score_config_data_type.py b/langfuse/api/resources/commons/types/score_config_data_type.py index a7c9e7251..c945db494 100644 --- a/langfuse/api/resources/commons/types/score_config_data_type.py +++ b/langfuse/api/resources/commons/types/score_config_data_type.py @@ -10,12 +10,14 @@ class ScoreConfigDataType(str, enum.Enum): NUMERIC = "NUMERIC" BOOLEAN = "BOOLEAN" CATEGORICAL = "CATEGORICAL" + TEXT = "TEXT" def visit( self, numeric: typing.Callable[[], T_Result], boolean: typing.Callable[[], T_Result], categorical: typing.Callable[[], T_Result], + text: typing.Callable[[], T_Result], ) -> T_Result: if self is ScoreConfigDataType.NUMERIC: return numeric() @@ -23,3 +25,5 @@ def visit( return boolean() if self is ScoreConfigDataType.CATEGORICAL: return categorical() + if self is ScoreConfigDataType.TEXT: + return text() diff --git a/langfuse/api/resources/commons/types/score_data_type.py b/langfuse/api/resources/commons/types/score_data_type.py index 67bd9958b..aa57e3fee 100644 --- a/langfuse/api/resources/commons/types/score_data_type.py +++ b/langfuse/api/resources/commons/types/score_data_type.py @@ -10,6 +10,7 @@ class ScoreDataType(str, enum.Enum): NUMERIC = "NUMERIC" BOOLEAN = "BOOLEAN" CATEGORICAL = "CATEGORICAL" + TEXT = "TEXT" CORRECTION = "CORRECTION" def visit( @@ -17,6 +18,7 @@ def visit( numeric: typing.Callable[[], T_Result], boolean: typing.Callable[[], T_Result], categorical: typing.Callable[[], T_Result], + text: typing.Callable[[], T_Result], correction: typing.Callable[[], T_Result], ) -> T_Result: if self is ScoreDataType.NUMERIC: @@ -25,5 +27,7 @@ def visit( return boolean() if self is ScoreDataType.CATEGORICAL: return categorical() + if self is ScoreDataType.TEXT: + return text() if self is ScoreDataType.CORRECTION: return correction() diff --git a/langfuse/api/resources/commons/types/score_v_1.py b/langfuse/api/resources/commons/types/score_v_1.py index 74c3f53f9..f70e85b92 100644 --- a/langfuse/api/resources/commons/types/score_v_1.py +++ b/langfuse/api/resources/commons/types/score_v_1.py @@ -186,4 +186,64 @@ class Config: json_encoders = {dt.datetime: serialize_datetime} -ScoreV1 = typing.Union[ScoreV1_Numeric, ScoreV1_Categorical, ScoreV1_Boolean] +class ScoreV1_Text(pydantic_v1.BaseModel): + string_value: str = pydantic_v1.Field(alias="stringValue") + id: str + trace_id: str = pydantic_v1.Field(alias="traceId") + name: str + source: ScoreSource + observation_id: typing.Optional[str] = pydantic_v1.Field( + alias="observationId", default=None + ) + timestamp: dt.datetime + created_at: dt.datetime = pydantic_v1.Field(alias="createdAt") + updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt") + author_user_id: typing.Optional[str] = pydantic_v1.Field( + alias="authorUserId", default=None + ) + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) + queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) + environment: str + data_type: typing.Literal["TEXT"] = pydantic_v1.Field( + alias="dataType", default="TEXT" + ) + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} + + +ScoreV1 = typing.Union[ + ScoreV1_Numeric, ScoreV1_Categorical, ScoreV1_Boolean, ScoreV1_Text +] diff --git a/langfuse/api/resources/commons/types/text_score.py b/langfuse/api/resources/commons/types/text_score.py new file mode 100644 index 000000000..3fe17f06c --- /dev/null +++ b/langfuse/api/resources/commons/types/text_score.py @@ -0,0 +1,48 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ....core.datetime_utils import serialize_datetime +from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .base_score import BaseScore + + +class TextScore(BaseScore): + string_value: str = pydantic_v1.Field(alias="stringValue") + """ + The string representation of the score value. Must be between 1 and 500 characters. + """ + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/langfuse/api/resources/commons/types/text_score_v_1.py b/langfuse/api/resources/commons/types/text_score_v_1.py new file mode 100644 index 000000000..4315d906d --- /dev/null +++ b/langfuse/api/resources/commons/types/text_score_v_1.py @@ -0,0 +1,48 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ....core.datetime_utils import serialize_datetime +from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .base_score_v_1 import BaseScoreV1 + + +class TextScoreV1(BaseScoreV1): + string_value: str = pydantic_v1.Field(alias="stringValue") + """ + The string representation of the score value. Must be between 1 and 500 characters. + """ + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/langfuse/api/resources/ingestion/types/score_body.py b/langfuse/api/resources/ingestion/types/score_body.py index 8e72a5682..46932f2e5 100644 --- a/langfuse/api/resources/ingestion/types/score_body.py +++ b/langfuse/api/resources/ingestion/types/score_body.py @@ -46,7 +46,7 @@ class ScoreBody(pydantic_v1.BaseModel): value: CreateScoreValue = pydantic_v1.Field() """ - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. """ comment: typing.Optional[str] = None diff --git a/langfuse/api/resources/score/types/create_score_request.py b/langfuse/api/resources/score/types/create_score_request.py index 1f79f4a64..e2c252e05 100644 --- a/langfuse/api/resources/score/types/create_score_request.py +++ b/langfuse/api/resources/score/types/create_score_request.py @@ -36,7 +36,7 @@ class CreateScoreRequest(pydantic_v1.BaseModel): name: str value: CreateScoreValue = pydantic_v1.Field() """ - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. """ comment: typing.Optional[str] = None diff --git a/langfuse/api/resources/score_v_2/__init__.py b/langfuse/api/resources/score_v_2/__init__.py index 4e333a693..73aba551a 100644 --- a/langfuse/api/resources/score_v_2/__init__.py +++ b/langfuse/api/resources/score_v_2/__init__.py @@ -7,10 +7,12 @@ GetScoresResponseDataCategorical, GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, + GetScoresResponseDataText, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, GetScoresResponseData_Correction, GetScoresResponseData_Numeric, + GetScoresResponseData_Text, GetScoresResponseTraceData, ) @@ -21,9 +23,11 @@ "GetScoresResponseDataCategorical", "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", + "GetScoresResponseDataText", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", + "GetScoresResponseData_Text", "GetScoresResponseTraceData", ] diff --git a/langfuse/api/resources/score_v_2/types/__init__.py b/langfuse/api/resources/score_v_2/types/__init__.py index d08e687ef..2e68e389e 100644 --- a/langfuse/api/resources/score_v_2/types/__init__.py +++ b/langfuse/api/resources/score_v_2/types/__init__.py @@ -7,11 +7,13 @@ GetScoresResponseData_Categorical, GetScoresResponseData_Correction, GetScoresResponseData_Numeric, + GetScoresResponseData_Text, ) from .get_scores_response_data_boolean import GetScoresResponseDataBoolean from .get_scores_response_data_categorical import GetScoresResponseDataCategorical from .get_scores_response_data_correction import GetScoresResponseDataCorrection from .get_scores_response_data_numeric import GetScoresResponseDataNumeric +from .get_scores_response_data_text import GetScoresResponseDataText from .get_scores_response_trace_data import GetScoresResponseTraceData __all__ = [ @@ -21,9 +23,11 @@ "GetScoresResponseDataCategorical", "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", + "GetScoresResponseDataText", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", + "GetScoresResponseData_Text", "GetScoresResponseTraceData", ] diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py index 4f73fbcae..045e2085c 100644 --- a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py +++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py @@ -208,6 +208,71 @@ class Config: json_encoders = {dt.datetime: serialize_datetime} +class GetScoresResponseData_Text(pydantic_v1.BaseModel): + trace: typing.Optional[GetScoresResponseTraceData] = None + string_value: str = pydantic_v1.Field(alias="stringValue") + id: str + trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None) + session_id: typing.Optional[str] = pydantic_v1.Field( + alias="sessionId", default=None + ) + observation_id: typing.Optional[str] = pydantic_v1.Field( + alias="observationId", default=None + ) + dataset_run_id: typing.Optional[str] = pydantic_v1.Field( + alias="datasetRunId", default=None + ) + name: str + source: ScoreSource + timestamp: dt.datetime + created_at: dt.datetime = pydantic_v1.Field(alias="createdAt") + updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt") + author_user_id: typing.Optional[str] = pydantic_v1.Field( + alias="authorUserId", default=None + ) + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) + queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) + environment: str + data_type: typing.Literal["TEXT"] = pydantic_v1.Field( + alias="dataType", default="TEXT" + ) + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} + + class GetScoresResponseData_Correction(pydantic_v1.BaseModel): trace: typing.Optional[GetScoresResponseTraceData] = None value: float @@ -278,5 +343,6 @@ class Config: GetScoresResponseData_Numeric, GetScoresResponseData_Categorical, GetScoresResponseData_Boolean, + GetScoresResponseData_Text, GetScoresResponseData_Correction, ] diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data_text.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data_text.py new file mode 100644 index 000000000..d51385788 --- /dev/null +++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data_text.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ....core.datetime_utils import serialize_datetime +from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from ...commons.types.text_score import TextScore +from .get_scores_response_trace_data import GetScoresResponseTraceData + + +class GetScoresResponseDataText(TextScore): + trace: typing.Optional[GetScoresResponseTraceData] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/langfuse/experiment.py b/langfuse/experiment.py index 00c54fe74..db1d70dc3 100644 --- a/langfuse/experiment.py +++ b/langfuse/experiment.py @@ -19,7 +19,7 @@ Union, ) -from langfuse.api import ScoreDataType +from langfuse.types import ExperimentScoreType if TYPE_CHECKING: from langfuse._client.datasets import DatasetItemClient @@ -188,7 +188,7 @@ def __init__( value: Union[int, float, str, bool], comment: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, - data_type: Optional[ScoreDataType] = None, + data_type: Optional[ExperimentScoreType] = None, config_id: Optional[str] = None, ): """Initialize an Evaluation with the provided data. diff --git a/langfuse/types.py b/langfuse/types.py index 32ebb32d4..a1fe75273 100644 --- a/langfuse/types.py +++ b/langfuse/types.py @@ -41,7 +41,9 @@ def my_evaluator(*, output: str, **kwargs) -> Evaluation: SpanLevel = Literal["DEBUG", "DEFAULT", "WARNING", "ERROR"] -ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"] +ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN", "TEXT"] + +ExperimentScoreType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"] class TraceMetadata(TypedDict): @@ -106,6 +108,7 @@ class TraceContext(TypedDict): __all__ = [ "SpanLevel", "ScoreDataType", + "ExperimentScoreType", "TraceMetadata", "ObservationParams", "MaskFunction", diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index 2888c0554..63bdaeecc 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -9,6 +9,11 @@ from langfuse import Langfuse from langfuse._client.resource_manager import LangfuseResourceManager from langfuse._utils import _get_timestamp +from langfuse.api import ( + CreateScoreConfigRequest, + ScoreConfigDataType, + UpdateScoreConfigRequest, +) from tests.api_wrapper import LangfuseAPI from tests.utils import ( create_uuid, @@ -257,6 +262,94 @@ def test_create_categorical_score(): assert trace["scores"][0]["stringValue"] == "high score" +def test_create_text_score(): + langfuse = Langfuse() + api_wrapper = LangfuseAPI() + + # Create a span and set trace properties + with langfuse.start_as_current_span(name="test-span") as span: + span.update_trace( + name="this-is-so-great-new", + user_id="test", + metadata="test", + ) + # Get trace ID for later use + trace_id = span.trace_id + + # Ensure data is sent + langfuse.flush() + sleep(2) + + # Create a text score + score_id = create_uuid() + langfuse.create_score( + score_id=score_id, + trace_id=trace_id, + name="this-is-a-score", + value="this is a text score", + data_type="TEXT", + ) + + # Create a generation in the same trace + generation = langfuse.start_generation( + name="yet another child", metadata="test", trace_context={"trace_id": trace_id} + ) + generation.end() + + # Ensure data is sent + langfuse.flush() + sleep(2) + + # Retrieve and verify + trace = api_wrapper.get_trace(trace_id) + + assert trace["scores"][0]["id"] == score_id + assert trace["scores"][0]["dataType"] == "TEXT" + assert trace["scores"][0]["value"] is None + assert trace["scores"][0]["stringValue"] == "this is a text score" + + +def test_create_and_list_text_score_config(): + api = get_api() + score_config_name = f"text-score-config-{create_uuid()}" + + score_config = api.score_configs.create( + request=CreateScoreConfigRequest( + name=score_config_name, + data_type=ScoreConfigDataType.TEXT, + ) + ) + + try: + matching_score_config = None + score_configs_response = api.score_configs.get(page=1, limit=100) + + for page in range(1, score_configs_response.meta.total_pages + 1): + if page > 1: + score_configs_response = api.score_configs.get(page=page, limit=100) + + matching_score_config = next( + ( + config + for config in score_configs_response.data + if config.id == score_config.id + ), + None, + ) + + if matching_score_config is not None: + break + + assert matching_score_config is not None + assert matching_score_config.name == score_config_name + assert matching_score_config.data_type == ScoreConfigDataType.TEXT + finally: + api.score_configs.update( + score_config.id, + request=UpdateScoreConfigRequest(is_archived=True), + ) + + def test_create_score_with_custom_timestamp(): langfuse = Langfuse() api_wrapper = LangfuseAPI() From c1b1b38b938d9743bba5168138febbae62709192 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 20 May 2026 10:42:07 +0200 Subject: [PATCH 2/5] ci: update pnpm action setup --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74013b0b6..00054d71e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,9 +75,9 @@ jobs: name: Test on Python version ${{ matrix.python-version }} steps: - uses: actions/checkout@v3 - - uses: pnpm/action-setup@v3 + - uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 with: - version: 9.5.0 + version: 11.1.3 - name: Clone langfuse server run: | From f322849a8592e6b1e878bd3475409916591ac86c Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 20 May 2026 10:57:21 +0200 Subject: [PATCH 3/5] test: fix v3 stable CI failures Backport dataset archived-item assertions and update flaky live test expectations. --- tests/test_core_sdk.py | 50 ++++++++++++++++++++++++------------------ tests/test_datasets.py | 20 +++++++++++++++-- tests/test_openai.py | 5 +++-- 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index 63bdaeecc..ecf0a0967 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -143,10 +143,11 @@ def test_create_numeric_score(): # Create a numeric score score_id = create_uuid() + score_name = f"score-{create_uuid()[:8]}" langfuse.create_score( score_id=score_id, trace_id=trace_id, - name="this-is-a-score", + name=score_name, value=1, ) @@ -163,10 +164,11 @@ def test_create_numeric_score(): # Retrieve and verify trace = api_wrapper.get_trace(trace_id) - assert trace["scores"][0]["id"] == score_id - assert trace["scores"][0]["value"] == 1 - assert trace["scores"][0]["dataType"] == "NUMERIC" - assert trace["scores"][0]["stringValue"] is None + created_score = next((s for s in trace["scores"] if s["name"] == score_name), None) + assert created_score is not None, "Score not found in trace" + assert created_score["value"] == 1 + assert created_score["dataType"] == "NUMERIC" + assert created_score["stringValue"] is None def test_create_boolean_score(): @@ -189,10 +191,11 @@ def test_create_boolean_score(): # Create a boolean score score_id = create_uuid() + score_name = f"score-{create_uuid()[:8]}" langfuse.create_score( score_id=score_id, trace_id=trace_id, - name="this-is-a-score", + name=score_name, value=1, data_type="BOOLEAN", ) @@ -210,10 +213,11 @@ def test_create_boolean_score(): # Retrieve and verify trace = api_wrapper.get_trace(trace_id) - assert trace["scores"][0]["id"] == score_id - assert trace["scores"][0]["dataType"] == "BOOLEAN" - assert trace["scores"][0]["value"] == 1 - assert trace["scores"][0]["stringValue"] == "True" + created_score = next((s for s in trace["scores"] if s["name"] == score_name), None) + assert created_score is not None, "Score not found in trace" + assert created_score["dataType"] == "BOOLEAN" + assert created_score["value"] == 1 + assert created_score["stringValue"] == "True" def test_create_categorical_score(): @@ -236,10 +240,11 @@ def test_create_categorical_score(): # Create a categorical score score_id = create_uuid() + score_name = f"score-{create_uuid()[:8]}" langfuse.create_score( score_id=score_id, trace_id=trace_id, - name="this-is-a-score", + name=score_name, value="high score", ) @@ -256,10 +261,11 @@ def test_create_categorical_score(): # Retrieve and verify trace = api_wrapper.get_trace(trace_id) - assert trace["scores"][0]["id"] == score_id - assert trace["scores"][0]["dataType"] == "CATEGORICAL" - assert trace["scores"][0]["value"] == 0 - assert trace["scores"][0]["stringValue"] == "high score" + created_score = next((s for s in trace["scores"] if s["name"] == score_name), None) + assert created_score is not None, "Score not found in trace" + assert created_score["dataType"] == "CATEGORICAL" + assert created_score["value"] == 0 + assert created_score["stringValue"] == "high score" def test_create_text_score(): @@ -282,10 +288,11 @@ def test_create_text_score(): # Create a text score score_id = create_uuid() + score_name = f"score-{create_uuid()[:8]}" langfuse.create_score( score_id=score_id, trace_id=trace_id, - name="this-is-a-score", + name=score_name, value="this is a text score", data_type="TEXT", ) @@ -303,15 +310,16 @@ def test_create_text_score(): # Retrieve and verify trace = api_wrapper.get_trace(trace_id) - assert trace["scores"][0]["id"] == score_id - assert trace["scores"][0]["dataType"] == "TEXT" - assert trace["scores"][0]["value"] is None - assert trace["scores"][0]["stringValue"] == "this is a text score" + created_score = next((s for s in trace["scores"] if s["name"] == score_name), None) + assert created_score is not None, "Score not found in trace" + assert created_score["dataType"] == "TEXT" + assert created_score["value"] is None + assert created_score["stringValue"] == "this is a text score" def test_create_and_list_text_score_config(): api = get_api() - score_config_name = f"text-score-config-{create_uuid()}" + score_config_name = f"text-score-config-{create_uuid()[:8]}" score_config = api.score_configs.create( request=CreateScoreConfigRequest( diff --git a/tests/test_datasets.py b/tests/test_datasets.py index f20c76f24..a28233c1a 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -124,7 +124,6 @@ def test_upsert_and_get_dataset_item(): input=new_input, id=item.id, expected_output=new_input, - status=DatasetStatus.ARCHIVED, ) # Refresh dataset and find updated item @@ -139,7 +138,24 @@ def test_upsert_and_get_dataset_item(): assert get_new_item.input == new_input assert get_new_item.id == item.id assert get_new_item.expected_output == new_input - assert get_new_item.status == DatasetStatus.ARCHIVED + assert get_new_item.status == DatasetStatus.ACTIVE + + langfuse.create_dataset_item( + dataset_name=name, + input=new_input, + id=item.id, + expected_output=new_input, + status=DatasetStatus.ARCHIVED, + ) + + dataset = langfuse.get_dataset(name) + assert all(dataset_item.id != item.id for dataset_item in dataset.items) + + archived_item = langfuse.api.dataset_items.get(item.id) + assert archived_item.input == new_input + assert archived_item.id == item.id + assert archived_item.expected_output == new_input + assert archived_item.status == DatasetStatus.ARCHIVED def test_dataset_run_with_metadata_and_description(): diff --git a/tests/test_openai.py b/tests/test_openai.py index f24bf93cf..ab7332073 100644 --- a/tests/test_openai.py +++ b/tests/test_openai.py @@ -1220,13 +1220,14 @@ def test_audio_input_and_output(openai): client = openai.OpenAI() openai.langfuse_debug = True generation_name = "test_audio_input_and_output" + create_uuid()[:8] + model = "gpt-4o-mini-audio-preview" content_path = "static/joke_prompt.wav" base64_string = encode_file_to_base64(content_path) client.chat.completions.create( name=generation_name, - model="gpt-4o-audio-preview", + model=model, modalities=["text", "audio"], audio={"voice": "alloy", "format": "wav"}, messages=[ @@ -1260,7 +1261,7 @@ def test_audio_input_and_output(openai): in generation.data[0].input[0]["content"][1]["input_audio"]["data"] ) assert generation.data[0].type == "GENERATION" - assert "gpt-4o-audio-preview" in generation.data[0].model + assert model in generation.data[0].model assert generation.data[0].start_time is not None assert generation.data[0].end_time is not None assert generation.data[0].start_time < generation.data[0].end_time From 3d1b0ae212142cc3bc1b8065da9d8c846863181c Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 20 May 2026 11:07:56 +0200 Subject: [PATCH 4/5] test: address v3 review feedback Update generated export ordering and use the requested audio model. --- langfuse/api/__init__.py | 4 ++-- langfuse/api/resources/__init__.py | 2 +- tests/test_openai.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py index d366774d5..d3eaa2218 100644 --- a/langfuse/api/__init__.py +++ b/langfuse/api/__init__.py @@ -215,9 +215,9 @@ Session, SessionWithTraces, Sort, + TextPrompt, TextScore, TextScoreV1, - TextPrompt, Trace, TraceBody, TraceEvent, @@ -482,9 +482,9 @@ "Session", "SessionWithTraces", "Sort", + "TextPrompt", "TextScore", "TextScoreV1", - "TextPrompt", "Trace", "TraceBody", "TraceEvent", diff --git a/langfuse/api/resources/__init__.py b/langfuse/api/resources/__init__.py index b54d98098..1bc2d0222 100644 --- a/langfuse/api/resources/__init__.py +++ b/langfuse/api/resources/__init__.py @@ -502,9 +502,9 @@ "Session", "SessionWithTraces", "Sort", + "TextPrompt", "TextScore", "TextScoreV1", - "TextPrompt", "Trace", "TraceBody", "TraceEvent", diff --git a/tests/test_openai.py b/tests/test_openai.py index ab7332073..d8068c5ee 100644 --- a/tests/test_openai.py +++ b/tests/test_openai.py @@ -1220,7 +1220,7 @@ def test_audio_input_and_output(openai): client = openai.OpenAI() openai.langfuse_debug = True generation_name = "test_audio_input_and_output" + create_uuid()[:8] - model = "gpt-4o-mini-audio-preview" + model = "gpt-audio-mini-2025-12-15" content_path = "static/joke_prompt.wav" base64_string = encode_file_to_base64(content_path) From f9ba977dfb1bc622c40b5e1299780d72e3467afc Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 20 May 2026 11:27:24 +0200 Subject: [PATCH 5/5] test: tolerate omitted text score value Allow the TEXT score response to omit null numeric values while asserting stringValue. --- tests/test_core_sdk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index ecf0a0967..30b052928 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -313,7 +313,7 @@ def test_create_text_score(): created_score = next((s for s in trace["scores"] if s["name"] == score_name), None) assert created_score is not None, "Score not found in trace" assert created_score["dataType"] == "TEXT" - assert created_score["value"] is None + assert created_score.get("value") is None assert created_score["stringValue"] == "this is a text score"