Skip to content

Get-Strings Indexer

Semantic selector for self or MetaFrame rows and columns.

The _GetStringIndexer object is returned when accessing the .gs or q property of a DataFrame. It provides string-based data access methods for selecting or updating rows and columns using simplified "get-string" expressions.

If query is True, use regular pandas query instead of custom get-strings.

Notes

For more information, see the 'Get-Strings' wiki page!

Source code in metaframe/src/indexer/getstring.py
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
class _GetStringIndexer:
    """
    Semantic selector for self or MetaFrame rows and columns.

    The `_GetStringIndexer` object is returned when accessing the `.gs` or `q`
    property of a `DataFrame`. It provides string-based data access methods 
    for selecting or updating rows and columns using simplified "get-string" expressions.

    If query is True, use regular pandas query instead of custom get-strings.

    Notes
    -----
    For more information, see the 'Get-Strings' wiki page!
    """

    def __init__(self, df: 'DataFrame', query: bool=False): # type: ignore  # noqa: F821
        self.df = df
        self.query = query

    def __getitem__(self, 
                    key: Union[
                        Union[str, slice], 
                        Tuple[Union[str, slice], Union[str, slice]]
                    ]
                ) -> 'DataFrame': # type: ignore  # noqa: F821
        """
        Select rows and/or columns using get-string expressions.

        Allows selection by string-based patterns applied to DataFrame index
        or column. Use a string for single-axis filtering or a tuple of
        (row_getstring, col_getstring) for two-dimensional selection.

        Only strings and empty slices ([:]) are accepted.

        Parameters
        ----------
        key : str, slice, or tuple of (str or slice)
            - A single string or slice selects rows first, falling back to columns if needed.
            - A 2-tuple specifies row and column selections.
            - Only empty slices ([:]) are allowed for unfiltered dimensions.

        Returns
        -------
        DataFrame
            A new DataFrame filtered according to the provided get-string(s).

        Raises
        ------
        ValueError
            If an invalid key type or a non-empty slice is provided.
        Exception
            If no matching rows or columns are found for the get-string expression.

        Examples
        --------
        >>> from metaframe.testing import metaframe_row
        >>> metaframe_row
           floats   bool  group
        0     1.1  False      0
        1     2.2  False      0
        2     3.3   True      2
        3     4.4   True      1
        >>> metaframe_row.gs["floats:<2 or bool:True"]
           floats   bool  group
        0     1.1  False      0
        2     3.3   True      2
        3     4.4   True      1

        >>> from metaframe.testing import dataframe
        >>> dataframe
        strings             f  g     h
        group               1  0     1
        floats bool  group            
        1.1    False 0      1  A     2
        2.2    False 0      2  B  None
        3.3    True  2      3  C     B
        4.4    True  1      4  D  None
        >>> dataframe.gs["bool:False", "group:1 and strings:h"]
        strings                h
        group                  1
        floats bool  group      
        1.1    False 0         2
        2.2    False 0      None
        """
        # Input validity checks
        if isinstance(key, tuple):
            row, col = key
            for e in [row, col]:
                if not isinstance(e, (str, slice)):
                    raise ValueError(f"Invalid input: {e}. Only strings or slice are accepted.")
        elif isinstance(key, (str, slice)):
            row = key
            col = None
        else:
            raise ValueError(f"Invalid input: {key}. Only strings, empty slice or 2 elements tuple of such are accepted.")
        for e in [row, col]:
            if isinstance(e, slice) and e != slice(None):
                raise ValueError(f"Invalid slice {e}. Only empty slice is accepted ([:])!")
        if self.query:
            return self._get_query(row, col)
        return self._get_gs(row, col)

    def _get_query(self, row, col) -> 'DataFrame':  # type: ignore # noqa: F821
        res = self.df   # Result of DataFrame selection (final result)
        ## ROW
        if isinstance(row, str):
            # Try a get-string call on index
            try:
                res = res.mfiloc[((res if res.is_table else res.mf(axis=0)).query(row).index.tolist(),),]
            except:  # noqa: E722
                # Try to call the query on columns if obj.gs[<...>] was used
                if col is not None:
                    raise
                col = row
                row = None
        ## COL
        if isinstance(col, str):
            # Get-string call on columns
            res = res.mfiloc[((res if res.is_table else res.mf(axis=1)).query(col).index.tolist(),),]
        return res


    def _get_gs(self, row, col) -> 'DataFrame': # type: ignore # noqa: F821
        # Parse get-strings
        row, col = [parse_get_string(e, 'df') if isinstance(e, str) else e for e in [row, col]]
        res = self.df   # Result of DataFrame selection (final result)
        ## ROW
        if isinstance(row, str):
            # Try a get-string call on index
            try:
                res = res._eval_get_string(row, 'df', axis=0)
            except:  # noqa: E722
                # Try to call the get-string on columns if obj.gs[<...>] was used
                if col is not None:
                    raise
                col = row
                row = None
        ## COL
        if isinstance(col, str):
            # Get-string call on columns
            res = res._eval_get_string(col, 'df', axis=1)
        return res

    def __setitem__(self, 
                    key: Union[
                        Union[str, slice], 
                        Tuple[Union[str, slice], Union[str, slice]], 
                        Tuple[Union[str, slice], Union[str, slice], str]
                    ], 
                    value: Any
                ) -> None:
        """
        Assign values using get-string expressions.

        Supports updating existing DataFrame values or assigning new
        columns in a MetaFrame based on string-based indexed selection.

        Two main forms are supported:
        1. **DataFrame update:** using 1 or 2 get-strings or empty slices
           to directly modify data values.
        2. **MetaFrame column creation/update:** using a 3-element tuple
           `(row_getstring, col_getstring, new_colname)`.

        Parameters
        ----------
        key : str, slice, tuple
            Key defining target cells or MetaFrame fields.
            - For DataFrame updates: one or two get-strings.
            - For MetaFrame updates: a tuple of `(row_getstring, col_getstring, new_column_name)`.

        value : any
            The value(s) to assign to the matched entries.

        Raises
        ------
        ValueError
            If tuple has invalid elements. 
            Attempting to create columns in a plain DataFrame (only MetaFrames allow this).

        Examples
        --------
        >>> from metaframe.testing import dataframe
        >>> dataframe
        strings             f  g     h
        group               1  0     1
        floats bool  group            
        1.1    False 0      1  A     2
        2.2    False 0      2  B  None
        3.3    True  2      3  C     B
        4.4    True  1      4  D  None
        >>> dataframe.gs["bool:False", "group:1"]
        >>> dataframe
        strings              f  g     h
        group                1  0     1
        floats bool  group             
        1.1    False 0      -1  A    -1
        2.2    False 0      -1  B    -1
        3.3    True  2       3  C     B
        4.4    True  1       4  D  None

        >>> dataframe.gs["group:!0", :, "foo"] = "bar"
        >>> dataframe
        strings                  f  g     h
        group                    1  0     1
        floats bool  group foo             
        1.1    False 0     nan  -1  A    -1
        2.2    False 0     nan  -1  B    -1
        3.3    True  2     bar   3  C     B
        4.4    True  1     bar   4  D  None
        """
        if isinstance(key, tuple) and len(key) == 3:
            if not isinstance(key[2], str):
                raise ValueError("The third element of the tuple must be a column name (str)!")
            if isinstance(key[0], str):
                if not is_mi(self.df.index):
                    raise ValueError("Can not set MetaFrame column in a simple index!")
                mfr = self.df.mf(axis=0)
                idx = mfr.q[:, key[0]].index if self.query else mfr.gs[:, key[0]].index
                mfr.loc[idx, key[2]] = value
                self.df.mfr = mfr
            if isinstance(key[1], str):
                mi_col = is_mi(self.df.columns)
                if mi_col:
                    idx = self[:, key[1]]
                mfc = self.df.mf(axis=1) if mi_col else self.df
                idx = mfc.q[:, key[1]].index if self.query else mfc.gs[:, key[1]].index
                mfc.loc[idx, key[2]] = value
                if mi_col:
                    self.df.mfc = mfc
        # For all other cases
        else:
            df = self[key]
            self.df.loc[df.index, df.columns] = value

__getitem__(key)

Select rows and/or columns using get-string expressions.

Allows selection by string-based patterns applied to DataFrame index or column. Use a string for single-axis filtering or a tuple of (row_getstring, col_getstring) for two-dimensional selection.

Only strings and empty slices ([:]) are accepted.

Parameters:

Name Type Description Default
key str, slice, or tuple of (str or slice)
  • A single string or slice selects rows first, falling back to columns if needed.
  • A 2-tuple specifies row and column selections.
  • Only empty slices ([:]) are allowed for unfiltered dimensions.
required

Returns:

Type Description
DataFrame

A new DataFrame filtered according to the provided get-string(s).

Raises:

Type Description
ValueError

If an invalid key type or a non-empty slice is provided.

Exception

If no matching rows or columns are found for the get-string expression.

Examples:

>>> from metaframe.testing import metaframe_row
>>> metaframe_row
   floats   bool  group
0     1.1  False      0
1     2.2  False      0
2     3.3   True      2
3     4.4   True      1
>>> metaframe_row.gs["floats:<2 or bool:True"]
   floats   bool  group
0     1.1  False      0
2     3.3   True      2
3     4.4   True      1
>>> from metaframe.testing import dataframe
>>> dataframe
strings             f  g     h
group               1  0     1
floats bool  group            
1.1    False 0      1  A     2
2.2    False 0      2  B  None
3.3    True  2      3  C     B
4.4    True  1      4  D  None
>>> dataframe.gs["bool:False", "group:1 and strings:h"]
strings                h
group                  1
floats bool  group      
1.1    False 0         2
2.2    False 0      None
Source code in metaframe/src/indexer/getstring.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def __getitem__(self, 
                key: Union[
                    Union[str, slice], 
                    Tuple[Union[str, slice], Union[str, slice]]
                ]
            ) -> 'DataFrame': # type: ignore  # noqa: F821
    """
    Select rows and/or columns using get-string expressions.

    Allows selection by string-based patterns applied to DataFrame index
    or column. Use a string for single-axis filtering or a tuple of
    (row_getstring, col_getstring) for two-dimensional selection.

    Only strings and empty slices ([:]) are accepted.

    Parameters
    ----------
    key : str, slice, or tuple of (str or slice)
        - A single string or slice selects rows first, falling back to columns if needed.
        - A 2-tuple specifies row and column selections.
        - Only empty slices ([:]) are allowed for unfiltered dimensions.

    Returns
    -------
    DataFrame
        A new DataFrame filtered according to the provided get-string(s).

    Raises
    ------
    ValueError
        If an invalid key type or a non-empty slice is provided.
    Exception
        If no matching rows or columns are found for the get-string expression.

    Examples
    --------
    >>> from metaframe.testing import metaframe_row
    >>> metaframe_row
       floats   bool  group
    0     1.1  False      0
    1     2.2  False      0
    2     3.3   True      2
    3     4.4   True      1
    >>> metaframe_row.gs["floats:<2 or bool:True"]
       floats   bool  group
    0     1.1  False      0
    2     3.3   True      2
    3     4.4   True      1

    >>> from metaframe.testing import dataframe
    >>> dataframe
    strings             f  g     h
    group               1  0     1
    floats bool  group            
    1.1    False 0      1  A     2
    2.2    False 0      2  B  None
    3.3    True  2      3  C     B
    4.4    True  1      4  D  None
    >>> dataframe.gs["bool:False", "group:1 and strings:h"]
    strings                h
    group                  1
    floats bool  group      
    1.1    False 0         2
    2.2    False 0      None
    """
    # Input validity checks
    if isinstance(key, tuple):
        row, col = key
        for e in [row, col]:
            if not isinstance(e, (str, slice)):
                raise ValueError(f"Invalid input: {e}. Only strings or slice are accepted.")
    elif isinstance(key, (str, slice)):
        row = key
        col = None
    else:
        raise ValueError(f"Invalid input: {key}. Only strings, empty slice or 2 elements tuple of such are accepted.")
    for e in [row, col]:
        if isinstance(e, slice) and e != slice(None):
            raise ValueError(f"Invalid slice {e}. Only empty slice is accepted ([:])!")
    if self.query:
        return self._get_query(row, col)
    return self._get_gs(row, col)

__setitem__(key, value)

Assign values using get-string expressions.

Supports updating existing DataFrame values or assigning new columns in a MetaFrame based on string-based indexed selection.

Two main forms are supported: 1. DataFrame update: using 1 or 2 get-strings or empty slices to directly modify data values. 2. MetaFrame column creation/update: using a 3-element tuple (row_getstring, col_getstring, new_colname).

Parameters:

Name Type Description Default
key (str, slice, tuple)

Key defining target cells or MetaFrame fields. - For DataFrame updates: one or two get-strings. - For MetaFrame updates: a tuple of (row_getstring, col_getstring, new_column_name).

required
value any

The value(s) to assign to the matched entries.

required

Raises:

Type Description
ValueError

If tuple has invalid elements. Attempting to create columns in a plain DataFrame (only MetaFrames allow this).

Examples:

>>> from metaframe.testing import dataframe
>>> dataframe
strings             f  g     h
group               1  0     1
floats bool  group            
1.1    False 0      1  A     2
2.2    False 0      2  B  None
3.3    True  2      3  C     B
4.4    True  1      4  D  None
>>> dataframe.gs["bool:False", "group:1"]
>>> dataframe
strings              f  g     h
group                1  0     1
floats bool  group             
1.1    False 0      -1  A    -1
2.2    False 0      -1  B    -1
3.3    True  2       3  C     B
4.4    True  1       4  D  None
>>> dataframe.gs["group:!0", :, "foo"] = "bar"
>>> dataframe
strings                  f  g     h
group                    1  0     1
floats bool  group foo             
1.1    False 0     nan  -1  A    -1
2.2    False 0     nan  -1  B    -1
3.3    True  2     bar   3  C     B
4.4    True  1     bar   4  D  None
Source code in metaframe/src/indexer/getstring.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def __setitem__(self, 
                key: Union[
                    Union[str, slice], 
                    Tuple[Union[str, slice], Union[str, slice]], 
                    Tuple[Union[str, slice], Union[str, slice], str]
                ], 
                value: Any
            ) -> None:
    """
    Assign values using get-string expressions.

    Supports updating existing DataFrame values or assigning new
    columns in a MetaFrame based on string-based indexed selection.

    Two main forms are supported:
    1. **DataFrame update:** using 1 or 2 get-strings or empty slices
       to directly modify data values.
    2. **MetaFrame column creation/update:** using a 3-element tuple
       `(row_getstring, col_getstring, new_colname)`.

    Parameters
    ----------
    key : str, slice, tuple
        Key defining target cells or MetaFrame fields.
        - For DataFrame updates: one or two get-strings.
        - For MetaFrame updates: a tuple of `(row_getstring, col_getstring, new_column_name)`.

    value : any
        The value(s) to assign to the matched entries.

    Raises
    ------
    ValueError
        If tuple has invalid elements. 
        Attempting to create columns in a plain DataFrame (only MetaFrames allow this).

    Examples
    --------
    >>> from metaframe.testing import dataframe
    >>> dataframe
    strings             f  g     h
    group               1  0     1
    floats bool  group            
    1.1    False 0      1  A     2
    2.2    False 0      2  B  None
    3.3    True  2      3  C     B
    4.4    True  1      4  D  None
    >>> dataframe.gs["bool:False", "group:1"]
    >>> dataframe
    strings              f  g     h
    group                1  0     1
    floats bool  group             
    1.1    False 0      -1  A    -1
    2.2    False 0      -1  B    -1
    3.3    True  2       3  C     B
    4.4    True  1       4  D  None

    >>> dataframe.gs["group:!0", :, "foo"] = "bar"
    >>> dataframe
    strings                  f  g     h
    group                    1  0     1
    floats bool  group foo             
    1.1    False 0     nan  -1  A    -1
    2.2    False 0     nan  -1  B    -1
    3.3    True  2     bar   3  C     B
    4.4    True  1     bar   4  D  None
    """
    if isinstance(key, tuple) and len(key) == 3:
        if not isinstance(key[2], str):
            raise ValueError("The third element of the tuple must be a column name (str)!")
        if isinstance(key[0], str):
            if not is_mi(self.df.index):
                raise ValueError("Can not set MetaFrame column in a simple index!")
            mfr = self.df.mf(axis=0)
            idx = mfr.q[:, key[0]].index if self.query else mfr.gs[:, key[0]].index
            mfr.loc[idx, key[2]] = value
            self.df.mfr = mfr
        if isinstance(key[1], str):
            mi_col = is_mi(self.df.columns)
            if mi_col:
                idx = self[:, key[1]]
            mfc = self.df.mf(axis=1) if mi_col else self.df
            idx = mfc.q[:, key[1]].index if self.query else mfc.gs[:, key[1]].index
            mfc.loc[idx, key[2]] = value
            if mi_col:
                self.df.mfc = mfc
    # For all other cases
    else:
        df = self[key]
        self.df.loc[df.index, df.columns] = value