LCOV - CPython 3.12 LCOV report [commit acb105a7c1f] - Objects/stringlib/unicode

LCOV - code coverage report

Current view:	top level - Objects/stringlib - unicode_format.h (source / functions)		Hit	Total	Coverage
Test:	CPython 3.12 LCOV report [commit acb105a7c1f]	Lines:	471	501	94.0 %
Date:	2022-07-20 13:12:14	Functions:	32	32	100.0 %
		Branches:	235	266	88.3 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :     unicode_format.h -- implementation of str.format().
       3                 :            : */
       4                 :            : 
       5                 :            : #include "pycore_floatobject.h"   // _PyFloat_FormatAdvancedWriter()
       6                 :            : 
       7                 :            : /************************************************************************/
       8                 :            : /***********   Global data structures and forward declarations  *********/
       9                 :            : /************************************************************************/
      10                 :            : 
      11                 :            : /*
      12                 :            :    A SubString consists of the characters between two string or
      13                 :            :    unicode pointers.
      14                 :            : */
      15                 :            : typedef struct {
      16                 :            :     PyObject *str; /* borrowed reference */
      17                 :            :     Py_ssize_t start, end;
      18                 :            : } SubString;
      19                 :            : 
      20                 :            : 
      21                 :            : typedef enum {
      22                 :            :     ANS_INIT,
      23                 :            :     ANS_AUTO,
      24                 :            :     ANS_MANUAL
      25                 :            : } AutoNumberState;   /* Keep track if we're auto-numbering fields */
      26                 :            : 
      27                 :            : /* Keeps track of our auto-numbering state, and which number field we're on */
      28                 :            : typedef struct {
      29                 :            :     AutoNumberState an_state;
      30                 :            :     int an_field_number;
      31                 :            : } AutoNumber;
      32                 :            : 
      33                 :            : 
      34                 :            : /* forward declaration for recursion */
      35                 :            : static PyObject *
      36                 :            : build_string(SubString *input, PyObject *args, PyObject *kwargs,
      37                 :            :              int recursion_depth, AutoNumber *auto_number);
      38                 :            : 
      39                 :            : 
      40                 :            : 
      41                 :            : /************************************************************************/
      42                 :            : /**************************  Utility  functions  ************************/
      43                 :            : /************************************************************************/
      44                 :            : 
      45                 :            : static void
      46                 :     494337 : AutoNumber_Init(AutoNumber *auto_number)
      47                 :            : {
      48                 :     494337 :     auto_number->an_state = ANS_INIT;
      49                 :     494337 :     auto_number->an_field_number = 0;
      50                 :     494337 : }
      51                 :            : 
      52                 :            : /* fill in a SubString from a pointer and length */
      53                 :            : Py_LOCAL_INLINE(void)
      54                 :    9580283 : SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
      55                 :            : {
      56                 :    9580283 :     str->str = s;
      57                 :    9580283 :     str->start = start;
      58                 :    9580283 :     str->end = end;
      59                 :    9580283 : }
      60                 :            : 
      61                 :            : /* return a new string.  if str->str is NULL, return None */
      62                 :            : Py_LOCAL_INLINE(PyObject *)
      63                 :      28691 : SubString_new_object(SubString *str)
      64                 :            : {
      65         [ +  + ]:      28691 :     if (str->str == NULL)
      66                 :        298 :         Py_RETURN_NONE;
      67                 :      28393 :     return PyUnicode_Substring(str->str, str->start, str->end);
      68                 :            : }
      69                 :            : 
      70                 :            : /* return a new string.  if str->str is NULL, return a new empty string */
      71                 :            : Py_LOCAL_INLINE(PyObject *)
      72                 :        290 : SubString_new_object_or_empty(SubString *str)
      73                 :            : {
      74         [ +  + ]:        290 :     if (str->str == NULL) {
      75                 :        261 :         return PyUnicode_New(0, 0);
      76                 :            :     }
      77                 :         29 :     return SubString_new_object(str);
      78                 :            : }
      79                 :            : 
      80                 :            : /* Return 1 if an error has been detected switching between automatic
      81                 :            :    field numbering and manual field specification, else return 0. Set
      82                 :            :    ValueError on error. */
      83                 :            : static int
      84                 :    1024383 : autonumber_state_error(AutoNumberState state, int field_name_is_empty)
      85                 :            : {
      86         [ +  + ]:    1024383 :     if (state == ANS_MANUAL) {
      87         [ +  + ]:     153598 :         if (field_name_is_empty) {
      88                 :          2 :             PyErr_SetString(PyExc_ValueError, "cannot switch from "
      89                 :            :                             "manual field specification to "
      90                 :            :                             "automatic field numbering");
      91                 :          2 :             return 1;
      92                 :            :         }
      93                 :            :     }
      94                 :            :     else {
      95         [ +  + ]:     870785 :         if (!field_name_is_empty) {
      96                 :          2 :             PyErr_SetString(PyExc_ValueError, "cannot switch from "
      97                 :            :                             "automatic field numbering to "
      98                 :            :                             "manual field specification");
      99                 :          2 :             return 1;
     100                 :            :         }
     101                 :            :     }
     102                 :    1024379 :     return 0;
     103                 :            : }
     104                 :            : 
     105                 :            : 
     106                 :            : /************************************************************************/
     107                 :            : /***********  Format string parsing -- integers and identifiers *********/
     108                 :            : /************************************************************************/
     109                 :            : 
     110                 :            : static Py_ssize_t
     111                 :    1047714 : get_integer(const SubString *str)
     112                 :            : {
     113                 :    1047714 :     Py_ssize_t accumulator = 0;
     114                 :            :     Py_ssize_t digitval;
     115                 :            :     Py_ssize_t i;
     116                 :            : 
     117                 :            :     /* empty string is an error */
     118         [ +  + ]:    1047714 :     if (str->start >= str->end)
     119                 :     870785 :         return -1;
     120                 :            : 
     121         [ +  + ]:     331126 :     for (i = str->start; i < str->end; i++) {
     122                 :     176986 :         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
     123         [ +  + ]:     176986 :         if (digitval < 0)
     124                 :      22786 :             return -1;
     125                 :            :         /*
     126                 :            :            Detect possible overflow before it happens:
     127                 :            : 
     128                 :            :               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
     129                 :            :               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
     130                 :            :         */
     131         [ +  + ]:     154200 :         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
     132                 :          3 :             PyErr_Format(PyExc_ValueError,
     133                 :            :                          "Too many decimal digits in format string");
     134                 :          3 :             return -1;
     135                 :            :         }
     136                 :     154197 :         accumulator = accumulator * 10 + digitval;
     137                 :            :     }
     138                 :     154140 :     return accumulator;
     139                 :            : }
     140                 :            : 
     141                 :            : /************************************************************************/
     142                 :            : /******** Functions to get field objects and specification strings ******/
     143                 :            : /************************************************************************/
     144                 :            : 
     145                 :            : /* do the equivalent of obj.name */
     146                 :            : static PyObject *
     147                 :       4844 : getattr(PyObject *obj, SubString *name)
     148                 :            : {
     149                 :            :     PyObject *newobj;
     150                 :       4844 :     PyObject *str = SubString_new_object(name);
     151         [ -  + ]:       4844 :     if (str == NULL)
     152                 :          0 :         return NULL;
     153                 :       4844 :     newobj = PyObject_GetAttr(obj, str);
     154                 :       4844 :     Py_DECREF(str);
     155                 :       4844 :     return newobj;
     156                 :            : }
     157                 :            : 
     158                 :            : /* do the equivalent of obj[idx], where obj is a sequence */
     159                 :            : static PyObject *
     160                 :        495 : getitem_sequence(PyObject *obj, Py_ssize_t idx)
     161                 :            : {
     162                 :        495 :     return PySequence_GetItem(obj, idx);
     163                 :            : }
     164                 :            : 
     165                 :            : /* do the equivalent of obj[idx], where obj is not a sequence */
     166                 :            : static PyObject *
     167                 :          1 : getitem_idx(PyObject *obj, Py_ssize_t idx)
     168                 :            : {
     169                 :            :     PyObject *newobj;
     170                 :          1 :     PyObject *idx_obj = PyLong_FromSsize_t(idx);
     171         [ -  + ]:          1 :     if (idx_obj == NULL)
     172                 :          0 :         return NULL;
     173                 :          1 :     newobj = PyObject_GetItem(obj, idx_obj);
     174                 :          1 :     Py_DECREF(idx_obj);
     175                 :          1 :     return newobj;
     176                 :            : }
     177                 :            : 
     178                 :            : /* do the equivalent of obj[name] */
     179                 :            : static PyObject *
     180                 :         37 : getitem_str(PyObject *obj, SubString *name)
     181                 :            : {
     182                 :            :     PyObject *newobj;
     183                 :         37 :     PyObject *str = SubString_new_object(name);
     184         [ -  + ]:         37 :     if (str == NULL)
     185                 :          0 :         return NULL;
     186                 :         37 :     newobj = PyObject_GetItem(obj, str);
     187                 :         37 :     Py_DECREF(str);
     188                 :         37 :     return newobj;
     189                 :            : }
     190                 :            : 
     191                 :            : typedef struct {
     192                 :            :     /* the entire string we're parsing.  we assume that someone else
     193                 :            :        is managing its lifetime, and that it will exist for the
     194                 :            :        lifetime of the iterator.  can be empty */
     195                 :            :     SubString str;
     196                 :            : 
     197                 :            :     /* index to where we are inside field_name */
     198                 :            :     Py_ssize_t index;
     199                 :            : } FieldNameIterator;
     200                 :            : 
     201                 :            : 
     202                 :            : static int
     203                 :    1047173 : FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
     204                 :            :                        Py_ssize_t start, Py_ssize_t end)
     205                 :            : {
     206                 :    1047173 :     SubString_init(&self->str, s, start, end);
     207                 :    1047173 :     self->index = start;
     208                 :    1047173 :     return 1;
     209                 :            : }
     210                 :            : 
     211                 :            : static int
     212                 :       4852 : _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
     213                 :            : {
     214                 :            :     Py_UCS4 c;
     215                 :            : 
     216                 :       4852 :     name->str = self->str.str;
     217                 :       4852 :     name->start = self->index;
     218                 :            : 
     219                 :            :     /* return everything until '.' or '[' */
     220         [ +  + ]:      29122 :     while (self->index < self->str.end) {
     221                 :      24278 :         c = PyUnicode_READ_CHAR(self->str.str, self->index++);
     222         [ +  + ]:      24278 :         switch (c) {
     223                 :          8 :         case '[':
     224                 :            :         case '.':
     225                 :            :             /* backup so that we this character will be seen next time */
     226                 :          8 :             self->index--;
     227                 :          8 :             break;
     228                 :      24270 :         default:
     229                 :      24270 :             continue;
     230                 :            :         }
     231                 :          8 :         break;
     232                 :            :     }
     233                 :            :     /* end of string is okay */
     234                 :       4852 :     name->end = self->index;
     235                 :       4852 :     return 1;
     236                 :            : }
     237                 :            : 
     238                 :            : static int
     239                 :        541 : _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
     240                 :            : {
     241                 :        541 :     int bracket_seen = 0;
     242                 :            :     Py_UCS4 c;
     243                 :            : 
     244                 :        541 :     name->str = self->str.str;
     245                 :        541 :     name->start = self->index;
     246                 :            : 
     247                 :            :     /* return everything until ']' */
     248         [ +  - ]:       1302 :     while (self->index < self->str.end) {
     249                 :       1302 :         c = PyUnicode_READ_CHAR(self->str.str, self->index++);
     250         [ +  + ]:       1302 :         switch (c) {
     251                 :        541 :         case ']':
     252                 :        541 :             bracket_seen = 1;
     253                 :        541 :             break;
     254                 :        761 :         default:
     255                 :        761 :             continue;
     256                 :            :         }
     257                 :        541 :         break;
     258                 :            :     }
     259                 :            :     /* make sure we ended with a ']' */
     260         [ -  + ]:        541 :     if (!bracket_seen) {
     261                 :          0 :         PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
     262                 :          0 :         return 0;
     263                 :            :     }
     264                 :            : 
     265                 :            :     /* end of string is okay */
     266                 :            :     /* don't include the ']' */
     267                 :        541 :     name->end = self->index-1;
     268                 :        541 :     return 1;
     269                 :            : }
     270                 :            : 
     271                 :            : /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
     272                 :            : static int
     273                 :    1052530 : FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
     274                 :            :                        Py_ssize_t *name_idx, SubString *name)
     275                 :            : {
     276                 :            :     /* check at end of input */
     277         [ +  + ]:    1052530 :     if (self->index >= self->str.end)
     278                 :    1047135 :         return 1;
     279                 :            : 
     280      [ +  +  + ]:       5395 :     switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
     281                 :       4852 :     case '.':
     282                 :       4852 :         *is_attribute = 1;
     283         [ -  + ]:       4852 :         if (_FieldNameIterator_attr(self, name) == 0)
     284                 :          0 :             return 0;
     285                 :       4852 :         *name_idx = -1;
     286                 :       4852 :         break;
     287                 :        541 :     case '[':
     288                 :        541 :         *is_attribute = 0;
     289         [ -  + ]:        541 :         if (_FieldNameIterator_item(self, name) == 0)
     290                 :          0 :             return 0;
     291                 :        541 :         *name_idx = get_integer(name);
     292   [ +  +  +  + ]:        541 :         if (*name_idx == -1 && PyErr_Occurred())
     293                 :          1 :             return 0;
     294                 :        540 :         break;
     295                 :          2 :     default:
     296                 :            :         /* Invalid character follows ']' */
     297                 :          2 :         PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
     298                 :            :                         "follow ']' in format field specifier");
     299                 :          2 :         return 0;
     300                 :            :     }
     301                 :            : 
     302                 :            :     /* empty string is an error */
     303         [ +  + ]:       5392 :     if (name->start == name->end) {
     304                 :          3 :         PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
     305                 :          3 :         return 0;
     306                 :            :     }
     307                 :            : 
     308                 :       5389 :     return 2;
     309                 :            : }
     310                 :            : 
     311                 :            : 
     312                 :            : /* input: field_name
     313                 :            :    output: 'first' points to the part before the first '[' or '.'
     314                 :            :            'first_idx' is -1 if 'first' is not an integer, otherwise
     315                 :            :                        it's the value of first converted to an integer
     316                 :            :            'rest' is an iterator to return the rest
     317                 :            : */
     318                 :            : static int
     319                 :    1047173 : field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
     320                 :            :                  Py_ssize_t *first_idx, FieldNameIterator *rest,
     321                 :            :                  AutoNumber *auto_number)
     322                 :            : {
     323                 :            :     Py_UCS4 c;
     324                 :    1047173 :     Py_ssize_t i = start;
     325                 :            :     int field_name_is_empty;
     326                 :            :     int using_numeric_index;
     327                 :            : 
     328                 :            :     /* find the part up until the first '.' or '[' */
     329         [ +  + ]:    1365340 :     while (i < end) {
     330         [ +  + ]:     323539 :         switch (c = PyUnicode_READ_CHAR(str, i++)) {
     331                 :       5372 :         case '[':
     332                 :            :         case '.':
     333                 :            :             /* backup so that we this character is available to the
     334                 :            :                "rest" iterator */
     335                 :       5372 :             i--;
     336                 :       5372 :             break;
     337                 :     318167 :         default:
     338                 :     318167 :             continue;
     339                 :            :         }
     340                 :       5372 :         break;
     341                 :            :     }
     342                 :            : 
     343                 :            :     /* set up the return values */
     344                 :    1047173 :     SubString_init(first, str, start, i);
     345                 :    1047173 :     FieldNameIterator_init(rest, str, i, end);
     346                 :            : 
     347                 :            :     /* see if "first" is an integer, in which case it's used as an index */
     348                 :    1047173 :     *first_idx = get_integer(first);
     349   [ +  +  +  + ]:    1047173 :     if (*first_idx == -1 && PyErr_Occurred())
     350                 :          2 :         return 0;
     351                 :            : 
     352                 :    1047171 :     field_name_is_empty = first->start >= first->end;
     353                 :            : 
     354                 :            :     /* If the field name is omitted or if we have a numeric index
     355                 :            :        specified, then we're doing numeric indexing into args. */
     356   [ +  +  +  + ]:    1047171 :     using_numeric_index = field_name_is_empty || *first_idx != -1;
     357                 :            : 
     358                 :            :     /* We always get here exactly one time for each field we're
     359                 :            :        processing. And we get here in field order (counting by left
     360                 :            :        braces). So this is the perfect place to handle automatic field
     361                 :            :        numbering if the field name is omitted. */
     362                 :            : 
     363                 :            :     /* Check if we need to do the auto-numbering. It's not needed if
     364                 :            :        we're called from string.Format routines, because it's handled
     365                 :            :        in that class by itself. */
     366         [ +  + ]:    1047171 :     if (auto_number) {
     367                 :            :         /* Initialize our auto numbering state if this is the first
     368                 :            :            time we're either auto-numbering or manually numbering. */
     369   [ +  +  +  + ]:    1046931 :         if (auto_number->an_state == ANS_INIT && using_numeric_index)
     370                 :     482198 :             auto_number->an_state = field_name_is_empty ?
     371         [ +  + ]:     482198 :                 ANS_AUTO : ANS_MANUAL;
     372                 :            : 
     373                 :            :         /* Make sure our state is consistent with what we're doing
     374                 :            :            this time through. Only check if we're using a numeric
     375                 :            :            index. */
     376         [ +  + ]:    1046931 :         if (using_numeric_index)
     377         [ +  + ]:    1024383 :             if (autonumber_state_error(auto_number->an_state,
     378                 :            :                                        field_name_is_empty))
     379                 :          4 :                 return 0;
     380                 :            :         /* Zero length field means we want to do auto-numbering of the
     381                 :            :            fields. */
     382         [ +  + ]:    1046927 :         if (field_name_is_empty)
     383                 :     870783 :             *first_idx = (auto_number->an_field_number)++;
     384                 :            :     }
     385                 :            : 
     386                 :    1047167 :     return 1;
     387                 :            : }
     388                 :            : 
     389                 :            : 
     390                 :            : /*
     391                 :            :     get_field_object returns the object inside {}, before the
     392                 :            :     format_spec.  It handles getindex and getattr lookups and consumes
     393                 :            :     the entire input string.
     394                 :            : */
     395                 :            : static PyObject *
     396                 :    1046933 : get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
     397                 :            :                  AutoNumber *auto_number)
     398                 :            : {
     399                 :    1046933 :     PyObject *obj = NULL;
     400                 :            :     int ok;
     401                 :            :     int is_attribute;
     402                 :            :     SubString name;
     403                 :            :     SubString first;
     404                 :            :     Py_ssize_t index;
     405                 :            :     FieldNameIterator rest;
     406                 :            : 
     407         [ +  + ]:    1046933 :     if (!field_name_split(input->str, input->start, input->end, &first,
     408                 :            :                           &index, &rest, auto_number)) {
     409                 :          6 :         goto error;
     410                 :            :     }
     411                 :            : 
     412         [ +  + ]:    1046927 :     if (index == -1) {
     413                 :            :         /* look up in kwargs */
     414                 :      22548 :         PyObject *key = SubString_new_object(&first);
     415         [ -  + ]:      22548 :         if (key == NULL) {
     416                 :          0 :             goto error;
     417                 :            :         }
     418         [ -  + ]:      22548 :         if (kwargs == NULL) {
     419                 :          0 :             PyErr_SetObject(PyExc_KeyError, key);
     420                 :          0 :             Py_DECREF(key);
     421                 :          0 :             goto error;
     422                 :            :         }
     423                 :            :         /* Use PyObject_GetItem instead of PyDict_GetItem because this
     424                 :            :            code is no longer just used with kwargs. It might be passed
     425                 :            :            a non-dict when called through format_map. */
     426                 :      22548 :         obj = PyObject_GetItem(kwargs, key);
     427                 :      22548 :         Py_DECREF(key);
     428         [ +  + ]:      22548 :         if (obj == NULL) {
     429                 :         10 :             goto error;
     430                 :            :         }
     431                 :            :     }
     432                 :            :     else {
     433                 :            :         /* If args is NULL, we have a format string with a positional field
     434                 :            :            with only kwargs to retrieve it from. This can only happen when
     435                 :            :            used with format_map(), where positional arguments are not
     436                 :            :            allowed. */
     437         [ +  + ]:    1024379 :         if (args == NULL) {
     438                 :          3 :             PyErr_SetString(PyExc_ValueError, "Format string contains "
     439                 :            :                             "positional fields");
     440                 :          3 :             goto error;
     441                 :            :         }
     442                 :            : 
     443                 :            :         /* look up in args */
     444                 :    1024376 :         obj = PySequence_GetItem(args, index);
     445         [ +  + ]:    1024376 :         if (obj == NULL) {
     446                 :          6 :             PyErr_Format(PyExc_IndexError,
     447                 :            :                          "Replacement index %zd out of range for positional "
     448                 :            :                          "args tuple",
     449                 :            :                          index);
     450                 :          6 :              goto error;
     451                 :            :         }
     452                 :            :     }
     453                 :            : 
     454                 :            :     /* iterate over the rest of the field_name */
     455                 :    1052284 :     while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
     456         [ +  + ]:    1052284 :                                         &name)) == 2) {
     457                 :            :         PyObject *tmp;
     458                 :            : 
     459         [ +  + ]:       5377 :         if (is_attribute)
     460                 :            :             /* getattr lookup "." */
     461                 :       4844 :             tmp = getattr(obj, &name);
     462                 :            :         else
     463                 :            :             /* getitem lookup "[]" */
     464         [ +  + ]:        533 :             if (index == -1)
     465                 :         37 :                 tmp = getitem_str(obj, &name);
     466                 :            :             else
     467         [ +  + ]:        496 :                 if (PySequence_Check(obj))
     468                 :        495 :                     tmp = getitem_sequence(obj, index);
     469                 :            :                 else
     470                 :            :                     /* not a sequence */
     471                 :          1 :                     tmp = getitem_idx(obj, index);
     472         [ +  + ]:       5377 :         if (tmp == NULL)
     473                 :          1 :             goto error;
     474                 :            : 
     475                 :            :         /* assign to obj */
     476                 :       5376 :         Py_DECREF(obj);
     477                 :       5376 :         obj = tmp;
     478                 :            :     }
     479                 :            :     /* end of iterator, this is the non-error case */
     480         [ +  + ]:    1046907 :     if (ok == 1)
     481                 :    1046901 :         return obj;
     482                 :          6 : error:
     483                 :         32 :     Py_XDECREF(obj);
     484                 :         32 :     return NULL;
     485                 :            : }
     486                 :            : 
     487                 :            : /************************************************************************/
     488                 :            : /*****************  Field rendering functions  **************************/
     489                 :            : /************************************************************************/
     490                 :            : 
     491                 :            : /*
     492                 :            :     render_field() is the main function in this section.  It takes the
     493                 :            :     field object and field specification string generated by
     494                 :            :     get_field_and_spec, and renders the field into the output string.
     495                 :            : 
     496                 :            :     render_field calls fieldobj.__format__(format_spec) method, and
     497                 :            :     appends to the output.
     498                 :            : */
     499                 :            : static int
     500                 :    1045605 : render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
     501                 :            : {
     502                 :    1045605 :     int ok = 0;
     503                 :    1045605 :     PyObject *result = NULL;
     504                 :    1045605 :     PyObject *format_spec_object = NULL;
     505                 :    1045605 :     int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
     506                 :            :     int err;
     507                 :            : 
     508                 :            :     /* If we know the type exactly, skip the lookup of __format__ and just
     509                 :            :        call the formatter directly. */
     510         [ +  + ]:    1045605 :     if (PyUnicode_CheckExact(fieldobj))
     511                 :     548435 :         formatter = _PyUnicode_FormatAdvancedWriter;
     512         [ +  + ]:     497170 :     else if (PyLong_CheckExact(fieldobj))
     513                 :     480256 :         formatter = _PyLong_FormatAdvancedWriter;
     514         [ +  + ]:      16914 :     else if (PyFloat_CheckExact(fieldobj))
     515                 :      16011 :         formatter = _PyFloat_FormatAdvancedWriter;
     516         [ +  + ]:        903 :     else if (PyComplex_CheckExact(fieldobj))
     517                 :          6 :         formatter = _PyComplex_FormatAdvancedWriter;
     518                 :            : 
     519         [ +  + ]:    1045605 :     if (formatter) {
     520                 :            :         /* we know exactly which formatter will be called when __format__ is
     521                 :            :            looked up, so call it directly, instead. */
     522                 :    1044708 :         err = formatter(writer, fieldobj, format_spec->str,
     523                 :            :                         format_spec->start, format_spec->end);
     524                 :    1044708 :         return (err == 0);
     525                 :            :     }
     526                 :            :     else {
     527                 :            :         /* We need to create an object out of the pointers we have, because
     528                 :            :            __format__ takes a string/unicode object for format_spec. */
     529         [ +  + ]:        897 :         if (format_spec->str)
     530                 :         88 :             format_spec_object = PyUnicode_Substring(format_spec->str,
     531                 :            :                                                      format_spec->start,
     532                 :            :                                                      format_spec->end);
     533                 :            :         else
     534                 :        809 :             format_spec_object = PyUnicode_New(0, 0);
     535         [ -  + ]:        897 :         if (format_spec_object == NULL)
     536                 :          0 :             goto done;
     537                 :            : 
     538                 :        897 :         result = PyObject_Format(fieldobj, format_spec_object);
     539                 :            :     }
     540         [ +  + ]:        897 :     if (result == NULL)
     541                 :          5 :         goto done;
     542                 :            : 
     543         [ -  + ]:        892 :     if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
     544                 :          0 :         goto done;
     545                 :        892 :     ok = 1;
     546                 :            : 
     547                 :        897 : done:
     548                 :        897 :     Py_XDECREF(format_spec_object);
     549                 :        897 :     Py_XDECREF(result);
     550                 :        897 :     return ok;
     551                 :            : }
     552                 :            : 
     553                 :            : static int
     554                 :    1047240 : parse_field(SubString *str, SubString *field_name, SubString *format_spec,
     555                 :            :             int *format_spec_needs_expanding, Py_UCS4 *conversion)
     556                 :            : {
     557                 :            :     /* Note this function works if the field name is zero length,
     558                 :            :        which is good.  Zero length field names are handled later, in
     559                 :            :        field_name_split. */
     560                 :            : 
     561                 :    1047240 :     Py_UCS4 c = 0;
     562                 :            : 
     563                 :            :     /* initialize these, as they may be empty */
     564                 :    1047240 :     *conversion = '\0';
     565                 :    1047240 :     SubString_init(format_spec, NULL, 0, 0);
     566                 :            : 
     567                 :            :     /* Search for the field name.  it's terminated by the end of
     568                 :            :        the string, or a ':' or '!' */
     569                 :    1047240 :     field_name->str = str->str;
     570                 :    1047240 :     field_name->start = str->start;
     571         [ +  + ]:    1395967 :     while (str->start < str->end) {
     572   [ +  +  +  + ]:    1395958 :         switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
     573                 :          2 :         case '{':
     574                 :          2 :             PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
     575                 :          2 :             return 0;
     576                 :        552 :         case '[':
     577         [ +  + ]:       1329 :             for (; str->start < str->end; str->start++)
     578         [ +  + ]:       1324 :                 if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
     579                 :        547 :                     break;
     580                 :        552 :             continue;
     581                 :    1047229 :         case '}':
     582                 :            :         case ':':
     583                 :            :         case '!':
     584                 :    1047229 :             break;
     585                 :     348175 :         default:
     586                 :     348175 :             continue;
     587                 :            :         }
     588                 :    1047229 :         break;
     589                 :            :     }
     590                 :            : 
     591                 :    1047238 :     field_name->end = str->start - 1;
     592   [ +  +  +  + ]:    1047238 :     if (c == '!' || c == ':') {
     593                 :            :         Py_ssize_t count;
     594                 :            :         /* we have a format specifier and/or a conversion */
     595                 :            :         /* don't include the last character */
     596                 :            : 
     597                 :            :         /* see if there's a conversion specifier */
     598         [ +  + ]:     338812 :         if (c == '!') {
     599                 :            :             /* there must be another character present */
     600         [ -  + ]:     147144 :             if (str->start >= str->end) {
     601                 :          0 :                 PyErr_SetString(PyExc_ValueError,
     602                 :            :                                 "end of string while looking for conversion "
     603                 :            :                                 "specifier");
     604                 :          0 :                 return 0;
     605                 :            :             }
     606                 :     147144 :             *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
     607                 :            : 
     608         [ +  + ]:     147144 :             if (str->start < str->end) {
     609                 :     147142 :                 c = PyUnicode_READ_CHAR(str->str, str->start++);
     610         [ +  + ]:     147142 :                 if (c == '}')
     611                 :     147119 :                     return 1;
     612         [ +  + ]:         23 :                 if (c != ':') {
     613                 :          2 :                     PyErr_SetString(PyExc_ValueError,
     614                 :            :                                     "expected ':' after conversion specifier");
     615                 :          2 :                     return 0;
     616                 :            :                 }
     617                 :            :             }
     618                 :            :         }
     619                 :     191691 :         format_spec->str = str->str;
     620                 :     191691 :         format_spec->start = str->start;
     621                 :     191691 :         count = 1;
     622         [ +  + ]:     729146 :         while (str->start < str->end) {
     623      [ +  +  + ]:     729142 :             switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
     624                 :      18313 :             case '{':
     625                 :      18313 :                 *format_spec_needs_expanding = 1;
     626                 :      18313 :                 count++;
     627                 :      18313 :                 break;
     628                 :     210000 :             case '}':
     629                 :     210000 :                 count--;
     630         [ +  + ]:     210000 :                 if (count == 0) {
     631                 :     191687 :                     format_spec->end = str->start - 1;
     632                 :     191687 :                     return 1;
     633                 :            :                 }
     634                 :      18313 :                 break;
     635                 :     500829 :             default:
     636                 :     500829 :                 break;
     637                 :            :             }
     638                 :            :         }
     639                 :            : 
     640                 :          4 :         PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
     641                 :          4 :         return 0;
     642                 :            :     }
     643         [ +  + ]:     708426 :     else if (c != '}') {
     644                 :          9 :         PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
     645                 :          9 :         return 0;
     646                 :            :     }
     647                 :            : 
     648                 :     708417 :     return 1;
     649                 :            : }
     650                 :            : 
     651                 :            : /************************************************************************/
     652                 :            : /******* Output string allocation and escape-to-markup processing  ******/
     653                 :            : /************************************************************************/
     654                 :            : 
     655                 :            : /* MarkupIterator breaks the string into pieces of either literal
     656                 :            :    text, or things inside {} that need to be marked up.  it is
     657                 :            :    designed to make it easy to wrap a Python iterator around it, for
     658                 :            :    use with the Formatter class */
     659                 :            : 
     660                 :            : typedef struct {
     661                 :            :     SubString str;
     662                 :            : } MarkupIterator;
     663                 :            : 
     664                 :            : static int
     665                 :     513064 : MarkupIterator_init(MarkupIterator *self, PyObject *str,
     666                 :            :                     Py_ssize_t start, Py_ssize_t end)
     667                 :            : {
     668                 :     513064 :     SubString_init(&self->str, str, start, end);
     669                 :     513064 :     return 1;
     670                 :            : }
     671                 :            : 
     672                 :            : /* returns 0 on error, 1 on non-error termination, and 2 if it got a
     673                 :            :    string (or something to be expanded) */
     674                 :            : static int
     675                 :    1804341 : MarkupIterator_next(MarkupIterator *self, SubString *literal,
     676                 :            :                     int *field_present, SubString *field_name,
     677                 :            :                     SubString *format_spec, Py_UCS4 *conversion,
     678                 :            :                     int *format_spec_needs_expanding)
     679                 :            : {
     680                 :            :     int at_end;
     681                 :    1804341 :     Py_UCS4 c = 0;
     682                 :            :     Py_ssize_t start;
     683                 :            :     Py_ssize_t len;
     684                 :    1804341 :     int markup_follows = 0;
     685                 :            : 
     686                 :            :     /* initialize all of the output variables */
     687                 :    1804341 :     SubString_init(literal, NULL, 0, 0);
     688                 :    1804341 :     SubString_init(field_name, NULL, 0, 0);
     689                 :    1804341 :     SubString_init(format_spec, NULL, 0, 0);
     690                 :    1804341 :     *conversion = '\0';
     691                 :    1804341 :     *format_spec_needs_expanding = 0;
     692                 :    1804341 :     *field_present = 0;
     693                 :            : 
     694                 :            :     /* No more input, end of iterator.  This is the normal exit
     695                 :            :        path. */
     696         [ +  + ]:    1804341 :     if (self->str.start >= self->str.end)
     697                 :     511665 :         return 1;
     698                 :            : 
     699                 :    1292676 :     start = self->str.start;
     700                 :            : 
     701                 :            :     /* First read any literal text. Read until the end of string, an
     702                 :            :        escaped '{' or '}', or an unescaped '{'.  In order to never
     703                 :            :        allocate memory and so I can just pass pointers around, if
     704                 :            :        there's an escaped '{' or '}' then we'll return the literal
     705                 :            :        including the brace, but no format object.  The next time
     706                 :            :        through, we'll return the rest of the literal, skipping past
     707                 :            :        the second consecutive brace. */
     708         [ +  + ]:   10151209 :     while (self->str.start < self->str.end) {
     709         [ +  + ]:    9909617 :         switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
     710                 :    1051084 :         case '{':
     711                 :            :         case '}':
     712                 :    1051084 :             markup_follows = 1;
     713                 :    1051084 :             break;
     714                 :    8858533 :         default:
     715                 :    8858533 :             continue;
     716                 :            :         }
     717                 :    1051084 :         break;
     718                 :            :     }
     719                 :            : 
     720                 :    1292676 :     at_end = self->str.start >= self->str.end;
     721                 :    1292676 :     len = self->str.start - start;
     722                 :            : 
     723   [ +  +  +  +  :    1294591 :     if ((c == '}') && (at_end ||
                   +  + ]
     724                 :       1915 :                        (c != PyUnicode_READ_CHAR(self->str.str,
     725                 :            :                                                  self->str.start)))) {
     726                 :         11 :         PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
     727                 :            :                         "in format string");
     728                 :         11 :         return 0;
     729                 :            :     }
     730   [ +  +  +  + ]:    1292665 :     if (at_end && c == '{') {
     731                 :          4 :         PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
     732                 :            :                         "in format string");
     733                 :          4 :         return 0;
     734                 :            :     }
     735         [ +  + ]:    1292661 :     if (!at_end) {
     736         [ +  + ]:    1051069 :         if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
     737                 :            :             /* escaped } or {, skip it in the input.  there is no
     738                 :            :                markup object following us, just this literal text */
     739                 :       3829 :             self->str.start++;
     740                 :       3829 :             markup_follows = 0;
     741                 :            :         }
     742                 :            :         else
     743                 :    1047240 :             len--;
     744                 :            :     }
     745                 :            : 
     746                 :            :     /* record the literal text */
     747                 :    1292661 :     literal->str = self->str.str;
     748                 :    1292661 :     literal->start = start;
     749                 :    1292661 :     literal->end = start + len;
     750                 :            : 
     751         [ +  + ]:    1292661 :     if (!markup_follows)
     752                 :     245421 :         return 2;
     753                 :            : 
     754                 :            :     /* this is markup; parse the field */
     755                 :    1047240 :     *field_present = 1;
     756         [ +  + ]:    1047240 :     if (!parse_field(&self->str, field_name, format_spec,
     757                 :            :                      format_spec_needs_expanding, conversion))
     758                 :         17 :         return 0;
     759                 :    1047223 :     return 2;
     760                 :            : }
     761                 :            : 
     762                 :            : 
     763                 :            : /* do the !r or !s conversion on obj */
     764                 :            : static PyObject *
     765                 :     147110 : do_conversion(PyObject *obj, Py_UCS4 conversion)
     766                 :            : {
     767                 :            :     /* XXX in pre-3.0, do we need to convert this to unicode, since it
     768                 :            :        might have returned a string? */
     769   [ +  +  +  + ]:     147110 :     switch (conversion) {
     770                 :     147028 :     case 'r':
     771                 :     147028 :         return PyObject_Repr(obj);
     772                 :          8 :     case 's':
     773                 :          8 :         return PyObject_Str(obj);
     774                 :         73 :     case 'a':
     775                 :         73 :         return PyObject_ASCII(obj);
     776                 :          1 :     default:
     777   [ +  -  +  - ]:          1 :         if (conversion > 32 && conversion < 127) {
     778                 :            :                 /* It's the ASCII subrange; casting to char is safe
     779                 :            :                    (assuming the execution character set is an ASCII
     780                 :            :                    superset). */
     781                 :          1 :                 PyErr_Format(PyExc_ValueError,
     782                 :            :                      "Unknown conversion specifier %c",
     783                 :          1 :                      (char)conversion);
     784                 :            :         } else
     785                 :          0 :                 PyErr_Format(PyExc_ValueError,
     786                 :            :                      "Unknown conversion specifier \\x%x",
     787                 :            :                      (unsigned int)conversion);
     788                 :          1 :         return NULL;
     789                 :            :     }
     790                 :            : }
     791                 :            : 
     792                 :            : /* given:
     793                 :            : 
     794                 :            :    {field_name!conversion:format_spec}
     795                 :            : 
     796                 :            :    compute the result and write it to output.
     797                 :            :    format_spec_needs_expanding is an optimization.  if it's false,
     798                 :            :    just output the string directly, otherwise recursively expand the
     799                 :            :    format_spec string.
     800                 :            : 
     801                 :            :    field_name is allowed to be zero length, in which case we
     802                 :            :    are doing auto field numbering.
     803                 :            : */
     804                 :            : 
     805                 :            : static int
     806                 :    1046933 : output_markup(SubString *field_name, SubString *format_spec,
     807                 :            :               int format_spec_needs_expanding, Py_UCS4 conversion,
     808                 :            :               _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
     809                 :            :               int recursion_depth, AutoNumber *auto_number)
     810                 :            : {
     811                 :    1046933 :     PyObject *tmp = NULL;
     812                 :    1046933 :     PyObject *fieldobj = NULL;
     813                 :            :     SubString expanded_format_spec;
     814                 :            :     SubString *actual_format_spec;
     815                 :    1046933 :     int result = 0;
     816                 :            : 
     817                 :            :     /* convert field_name to an object */
     818                 :    1046933 :     fieldobj = get_field_object(field_name, args, kwargs, auto_number);
     819         [ +  + ]:    1046933 :     if (fieldobj == NULL)
     820                 :         32 :         goto done;
     821                 :            : 
     822         [ +  + ]:    1046901 :     if (conversion != '\0') {
     823                 :     147110 :         tmp = do_conversion(fieldobj, conversion);
     824   [ +  +  -  + ]:     147110 :         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
     825                 :       1290 :             goto done;
     826                 :            : 
     827                 :            :         /* do the assignment, transferring ownership: fieldobj = tmp */
     828                 :     145820 :         Py_DECREF(fieldobj);
     829                 :     145820 :         fieldobj = tmp;
     830                 :     145820 :         tmp = NULL;
     831                 :            :     }
     832                 :            : 
     833                 :            :     /* if needed, recursively compute the format_spec */
     834         [ +  + ]:    1045611 :     if (format_spec_needs_expanding) {
     835                 :      18279 :         tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
     836                 :            :                            auto_number);
     837   [ +  +  -  + ]:      18279 :         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
     838                 :          6 :             goto done;
     839                 :            : 
     840                 :            :         /* note that in the case we're expanding the format string,
     841                 :            :            tmp must be kept around until after the call to
     842                 :            :            render_field. */
     843                 :      18273 :         SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
     844                 :      18273 :         actual_format_spec = &expanded_format_spec;
     845                 :            :     }
     846                 :            :     else
     847                 :    1027332 :         actual_format_spec = format_spec;
     848                 :            : 
     849         [ +  + ]:    1045605 :     if (render_field(fieldobj, actual_format_spec, writer) == 0)
     850                 :         17 :         goto done;
     851                 :            : 
     852                 :    1045588 :     result = 1;
     853                 :            : 
     854                 :    1046933 : done:
     855                 :    1046933 :     Py_XDECREF(fieldobj);
     856                 :    1046933 :     Py_XDECREF(tmp);
     857                 :            : 
     858                 :    1046933 :     return result;
     859                 :            : }
     860                 :            : 
     861                 :            : /*
     862                 :            :     do_markup is the top-level loop for the format() method.  It
     863                 :            :     searches through the format string for escapes to markup codes, and
     864                 :            :     calls other functions to move non-markup text to the output,
     865                 :            :     and to perform the markup to the output.
     866                 :            : */
     867                 :            : static int
     868                 :     512614 : do_markup(SubString *input, PyObject *args, PyObject *kwargs,
     869                 :            :           _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
     870                 :            : {
     871                 :            :     MarkupIterator iter;
     872                 :            :     int format_spec_needs_expanding;
     873                 :            :     int result;
     874                 :            :     int field_present;
     875                 :            :     SubString literal;
     876                 :            :     SubString field_name;
     877                 :            :     SubString format_spec;
     878                 :            :     Py_UCS4 conversion;
     879                 :            : 
     880                 :     512614 :     MarkupIterator_init(&iter, input->str, input->start, input->end);
     881                 :    2316088 :     while ((result = MarkupIterator_next(&iter, &literal, &field_present,
     882                 :            :                                          &field_name, &format_spec,
     883                 :            :                                          &conversion,
     884         [ +  + ]:    1803474 :                                          &format_spec_needs_expanding)) == 2) {
     885         [ +  + ]:    1292205 :         if (literal.end != literal.start) {
     886   [ +  +  +  + ]:    1111855 :             if (!field_present && iter.str.start == iter.str.end)
     887                 :     242482 :                 writer->overallocate = 0;
     888         [ -  + ]:    1111855 :             if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
     889                 :            :                                                 literal.start, literal.end) < 0)
     890                 :          0 :                 return 0;
     891                 :            :         }
     892                 :            : 
     893         [ +  + ]:    1292205 :         if (field_present) {
     894         [ +  + ]:    1046933 :             if (iter.str.start == iter.str.end)
     895                 :     268849 :                 writer->overallocate = 0;
     896         [ +  + ]:    1046933 :             if (!output_markup(&field_name, &format_spec,
     897                 :            :                                format_spec_needs_expanding, conversion, writer,
     898                 :            :                                args, kwargs, recursion_depth, auto_number))
     899                 :       1345 :                 return 0;
     900                 :            :         }
     901                 :            :     }
     902                 :     511269 :     return result;
     903                 :            : }
     904                 :            : 
     905                 :            : 
     906                 :            : /*
     907                 :            :     build_string allocates the output string and then
     908                 :            :     calls do_markup to do the heavy lifting.
     909                 :            : */
     910                 :            : static PyObject *
     911                 :     512616 : build_string(SubString *input, PyObject *args, PyObject *kwargs,
     912                 :            :              int recursion_depth, AutoNumber *auto_number)
     913                 :            : {
     914                 :            :     _PyUnicodeWriter writer;
     915                 :            : 
     916                 :            :     /* check the recursion level */
     917         [ +  + ]:     512616 :     if (recursion_depth <= 0) {
     918                 :          2 :         PyErr_SetString(PyExc_ValueError,
     919                 :            :                         "Max string recursion exceeded");
     920                 :          2 :         return NULL;
     921                 :            :     }
     922                 :            : 
     923                 :     512614 :     _PyUnicodeWriter_Init(&writer);
     924                 :     512614 :     writer.overallocate = 1;
     925                 :     512614 :     writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
     926                 :            : 
     927         [ +  + ]:     512614 :     if (!do_markup(input, args, kwargs, &writer, recursion_depth,
     928                 :            :                    auto_number)) {
     929                 :       1372 :         _PyUnicodeWriter_Dealloc(&writer);
     930                 :       1372 :         return NULL;
     931                 :            :     }
     932                 :            : 
     933                 :     511242 :     return _PyUnicodeWriter_Finish(&writer);
     934                 :            : }
     935                 :            : 
     936                 :            : /************************************************************************/
     937                 :            : /*********** main routine ***********************************************/
     938                 :            : /************************************************************************/
     939                 :            : 
     940                 :            : /* this is the main entry point */
     941                 :            : static PyObject *
     942                 :     494337 : do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
     943                 :            : {
     944                 :            :     SubString input;
     945                 :            : 
     946                 :            :     /* PEP 3101 says only 2 levels, so that
     947                 :            :        "{0:{1}}".format('abc', 's')            # works
     948                 :            :        "{0:{1:{2}}}".format('abc', 's', '')    # fails
     949                 :            :     */
     950                 :     494337 :     int recursion_depth = 2;
     951                 :            : 
     952                 :            :     AutoNumber auto_number;
     953                 :            : 
     954         [ -  + ]:     494337 :     if (PyUnicode_READY(self) == -1)
     955                 :          0 :         return NULL;
     956                 :            : 
     957                 :     494337 :     AutoNumber_Init(&auto_number);
     958                 :     494337 :     SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
     959                 :     494337 :     return build_string(&input, args, kwargs, recursion_depth, &auto_number);
     960                 :            : }
     961                 :            : 
     962                 :            : static PyObject *
     963                 :        616 : do_string_format_map(PyObject *self, PyObject *obj)
     964                 :            : {
     965                 :        616 :     return do_string_format(self, NULL, obj);
     966                 :            : }
     967                 :            : 
     968                 :            : 
     969                 :            : /************************************************************************/
     970                 :            : /*********** formatteriterator ******************************************/
     971                 :            : /************************************************************************/
     972                 :            : 
     973                 :            : /* This is used to implement string.Formatter.vparse().  It exists so
     974                 :            :    Formatter can share code with the built in unicode.format() method.
     975                 :            :    It's really just a wrapper around MarkupIterator that is callable
     976                 :            :    from Python. */
     977                 :            : 
     978                 :            : typedef struct {
     979                 :            :     PyObject_HEAD
     980                 :            :     PyObject *str;
     981                 :            :     MarkupIterator it_markup;
     982                 :            : } formatteriterobject;
     983                 :            : 
     984                 :            : static void
     985                 :        450 : formatteriter_dealloc(formatteriterobject *it)
     986                 :            : {
     987                 :        450 :     Py_XDECREF(it->str);
     988                 :        450 :     PyObject_Free(it);
     989                 :        450 : }
     990                 :            : 
     991                 :            : /* returns a tuple:
     992                 :            :    (literal, field_name, format_spec, conversion)
     993                 :            : 
     994                 :            :    literal is any literal text to output.  might be zero length
     995                 :            :    field_name is the string before the ':'.  might be None
     996                 :            :    format_spec is the string after the ':'.  mibht be None
     997                 :            :    conversion is either None, or the string after the '!'
     998                 :            : */
     999                 :            : static PyObject *
    1000                 :        867 : formatteriter_next(formatteriterobject *it)
    1001                 :            : {
    1002                 :            :     SubString literal;
    1003                 :            :     SubString field_name;
    1004                 :            :     SubString format_spec;
    1005                 :            :     Py_UCS4 conversion;
    1006                 :            :     int format_spec_needs_expanding;
    1007                 :            :     int field_present;
    1008                 :        867 :     int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
    1009                 :            :                                      &field_name, &format_spec, &conversion,
    1010                 :            :                                      &format_spec_needs_expanding);
    1011                 :            : 
    1012                 :            :     /* all of the SubString objects point into it->str, so no
    1013                 :            :        memory management needs to be done on them */
    1014                 :            :     assert(0 <= result && result <= 2);
    1015   [ +  +  +  + ]:        867 :     if (result == 0 || result == 1)
    1016                 :            :         /* if 0, error has already been set, if 1, iterator is empty */
    1017                 :        428 :         return NULL;
    1018                 :            :     else {
    1019                 :        439 :         PyObject *literal_str = NULL;
    1020                 :        439 :         PyObject *field_name_str = NULL;
    1021                 :        439 :         PyObject *format_spec_str = NULL;
    1022                 :        439 :         PyObject *conversion_str = NULL;
    1023                 :        439 :         PyObject *tuple = NULL;
    1024                 :            : 
    1025                 :        439 :         literal_str = SubString_new_object(&literal);
    1026         [ -  + ]:        439 :         if (literal_str == NULL)
    1027                 :          0 :             goto done;
    1028                 :            : 
    1029                 :        439 :         field_name_str = SubString_new_object(&field_name);
    1030         [ -  + ]:        439 :         if (field_name_str == NULL)
    1031                 :          0 :             goto done;
    1032                 :            : 
    1033                 :            :         /* if field_name is non-zero length, return a string for
    1034                 :            :            format_spec (even if zero length), else return None */
    1035                 :        439 :         format_spec_str = (field_present ?
    1036         [ +  + ]:        439 :                            SubString_new_object_or_empty :
    1037                 :            :                            SubString_new_object)(&format_spec);
    1038         [ -  + ]:        439 :         if (format_spec_str == NULL)
    1039                 :          0 :             goto done;
    1040                 :            : 
    1041                 :            :         /* if the conversion is not specified, return a None,
    1042                 :            :            otherwise create a one length string with the conversion
    1043                 :            :            character */
    1044         [ +  + ]:        439 :         if (conversion == '\0') {
    1045                 :        410 :             conversion_str = Py_None;
    1046                 :        410 :             Py_INCREF(conversion_str);
    1047                 :            :         }
    1048                 :            :         else
    1049                 :         29 :             conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
    1050                 :            :                                                        &conversion, 1);
    1051         [ -  + ]:        439 :         if (conversion_str == NULL)
    1052                 :          0 :             goto done;
    1053                 :            : 
    1054                 :        439 :         tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
    1055                 :            :                              conversion_str);
    1056                 :        439 :     done:
    1057                 :        439 :         Py_XDECREF(literal_str);
    1058                 :        439 :         Py_XDECREF(field_name_str);
    1059                 :        439 :         Py_XDECREF(format_spec_str);
    1060                 :        439 :         Py_XDECREF(conversion_str);
    1061                 :        439 :         return tuple;
    1062                 :            :     }
    1063                 :            : }
    1064                 :            : 
    1065                 :            : static PyMethodDef formatteriter_methods[] = {
    1066                 :            :     {NULL,              NULL}           /* sentinel */
    1067                 :            : };
    1068                 :            : 
    1069                 :            : static PyTypeObject PyFormatterIter_Type = {
    1070                 :            :     PyVarObject_HEAD_INIT(&PyType_Type, 0)
    1071                 :            :     "formatteriterator",                /* tp_name */
    1072                 :            :     sizeof(formatteriterobject),        /* tp_basicsize */
    1073                 :            :     0,                                  /* tp_itemsize */
    1074                 :            :     /* methods */
    1075                 :            :     (destructor)formatteriter_dealloc,  /* tp_dealloc */
    1076                 :            :     0,                                  /* tp_vectorcall_offset */
    1077                 :            :     0,                                  /* tp_getattr */
    1078                 :            :     0,                                  /* tp_setattr */
    1079                 :            :     0,                                  /* tp_as_async */
    1080                 :            :     0,                                  /* tp_repr */
    1081                 :            :     0,                                  /* tp_as_number */
    1082                 :            :     0,                                  /* tp_as_sequence */
    1083                 :            :     0,                                  /* tp_as_mapping */
    1084                 :            :     0,                                  /* tp_hash */
    1085                 :            :     0,                                  /* tp_call */
    1086                 :            :     0,                                  /* tp_str */
    1087                 :            :     PyObject_GenericGetAttr,            /* tp_getattro */
    1088                 :            :     0,                                  /* tp_setattro */
    1089                 :            :     0,                                  /* tp_as_buffer */
    1090                 :            :     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
    1091                 :            :     0,                                  /* tp_doc */
    1092                 :            :     0,                                  /* tp_traverse */
    1093                 :            :     0,                                  /* tp_clear */
    1094                 :            :     0,                                  /* tp_richcompare */
    1095                 :            :     0,                                  /* tp_weaklistoffset */
    1096                 :            :     PyObject_SelfIter,                  /* tp_iter */
    1097                 :            :     (iternextfunc)formatteriter_next,   /* tp_iternext */
    1098                 :            :     formatteriter_methods,              /* tp_methods */
    1099                 :            :     0,
    1100                 :            : };
    1101                 :            : 
    1102                 :            : /* unicode_formatter_parser is used to implement
    1103                 :            :    string.Formatter.vformat.  it parses a string and returns tuples
    1104                 :            :    describing the parsed elements.  It's a wrapper around
    1105                 :            :    stringlib/string_format.h's MarkupIterator */
    1106                 :            : static PyObject *
    1107                 :        451 : formatter_parser(PyObject *ignored, PyObject *self)
    1108                 :            : {
    1109                 :            :     formatteriterobject *it;
    1110                 :            : 
    1111         [ +  + ]:        451 :     if (!PyUnicode_Check(self)) {
    1112                 :          1 :         PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
    1113                 :          1 :         return NULL;
    1114                 :            :     }
    1115                 :            : 
    1116         [ -  + ]:        450 :     if (PyUnicode_READY(self) == -1)
    1117                 :          0 :         return NULL;
    1118                 :            : 
    1119                 :        450 :     it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
    1120         [ -  + ]:        450 :     if (it == NULL)
    1121                 :          0 :         return NULL;
    1122                 :            : 
    1123                 :            :     /* take ownership, give the object to the iterator */
    1124                 :        450 :     Py_INCREF(self);
    1125                 :        450 :     it->str = self;
    1126                 :            : 
    1127                 :            :     /* initialize the contained MarkupIterator */
    1128                 :        450 :     MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
    1129                 :        450 :     return (PyObject *)it;
    1130                 :            : }
    1131                 :            : 
    1132                 :            : 
    1133                 :            : /************************************************************************/
    1134                 :            : /*********** fieldnameiterator ******************************************/
    1135                 :            : /************************************************************************/
    1136                 :            : 
    1137                 :            : 
    1138                 :            : /* This is used to implement string.Formatter.vparse().  It parses the
    1139                 :            :    field name into attribute and item values.  It's a Python-callable
    1140                 :            :    wrapper around FieldNameIterator */
    1141                 :            : 
    1142                 :            : typedef struct {
    1143                 :            :     PyObject_HEAD
    1144                 :            :     PyObject *str;
    1145                 :            :     FieldNameIterator it_field;
    1146                 :            : } fieldnameiterobject;
    1147                 :            : 
    1148                 :            : static void
    1149                 :        240 : fieldnameiter_dealloc(fieldnameiterobject *it)
    1150                 :            : {
    1151                 :        240 :     Py_XDECREF(it->str);
    1152                 :        240 :     PyObject_Free(it);
    1153                 :        240 : }
    1154                 :            : 
    1155                 :            : /* returns a tuple:
    1156                 :            :    (is_attr, value)
    1157                 :            :    is_attr is true if we used attribute syntax (e.g., '.foo')
    1158                 :            :               false if we used index syntax (e.g., '[foo]')
    1159                 :            :    value is an integer or string
    1160                 :            : */
    1161                 :            : static PyObject *
    1162                 :        246 : fieldnameiter_next(fieldnameiterobject *it)
    1163                 :            : {
    1164                 :            :     int result;
    1165                 :            :     int is_attr;
    1166                 :            :     Py_ssize_t idx;
    1167                 :            :     SubString name;
    1168                 :            : 
    1169                 :        246 :     result = FieldNameIterator_next(&it->it_field, &is_attr,
    1170                 :            :                                     &idx, &name);
    1171   [ +  -  +  + ]:        246 :     if (result == 0 || result == 1)
    1172                 :            :         /* if 0, error has already been set, if 1, iterator is empty */
    1173                 :        234 :         return NULL;
    1174                 :            :     else {
    1175                 :         12 :         PyObject* result = NULL;
    1176                 :         12 :         PyObject* is_attr_obj = NULL;
    1177                 :         12 :         PyObject* obj = NULL;
    1178                 :            : 
    1179                 :         12 :         is_attr_obj = PyBool_FromLong(is_attr);
    1180         [ -  + ]:         12 :         if (is_attr_obj == NULL)
    1181                 :          0 :             goto done;
    1182                 :            : 
    1183                 :            :         /* either an integer or a string */
    1184         [ +  + ]:         12 :         if (idx != -1)
    1185                 :          4 :             obj = PyLong_FromSsize_t(idx);
    1186                 :            :         else
    1187                 :          8 :             obj = SubString_new_object(&name);
    1188         [ -  + ]:         12 :         if (obj == NULL)
    1189                 :          0 :             goto done;
    1190                 :            : 
    1191                 :            :         /* return a tuple of values */
    1192                 :         12 :         result = PyTuple_Pack(2, is_attr_obj, obj);
    1193                 :            : 
    1194                 :         12 :     done:
    1195                 :         12 :         Py_XDECREF(is_attr_obj);
    1196                 :         12 :         Py_XDECREF(obj);
    1197                 :         12 :         return result;
    1198                 :            :     }
    1199                 :            : }
    1200                 :            : 
    1201                 :            : static PyMethodDef fieldnameiter_methods[] = {
    1202                 :            :     {NULL,              NULL}           /* sentinel */
    1203                 :            : };
    1204                 :            : 
    1205                 :            : static PyTypeObject PyFieldNameIter_Type = {
    1206                 :            :     PyVarObject_HEAD_INIT(&PyType_Type, 0)
    1207                 :            :     "fieldnameiterator",                /* tp_name */
    1208                 :            :     sizeof(fieldnameiterobject),        /* tp_basicsize */
    1209                 :            :     0,                                  /* tp_itemsize */
    1210                 :            :     /* methods */
    1211                 :            :     (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
    1212                 :            :     0,                                  /* tp_vectorcall_offset */
    1213                 :            :     0,                                  /* tp_getattr */
    1214                 :            :     0,                                  /* tp_setattr */
    1215                 :            :     0,                                  /* tp_as_async */
    1216                 :            :     0,                                  /* tp_repr */
    1217                 :            :     0,                                  /* tp_as_number */
    1218                 :            :     0,                                  /* tp_as_sequence */
    1219                 :            :     0,                                  /* tp_as_mapping */
    1220                 :            :     0,                                  /* tp_hash */
    1221                 :            :     0,                                  /* tp_call */
    1222                 :            :     0,                                  /* tp_str */
    1223                 :            :     PyObject_GenericGetAttr,            /* tp_getattro */
    1224                 :            :     0,                                  /* tp_setattro */
    1225                 :            :     0,                                  /* tp_as_buffer */
    1226                 :            :     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
    1227                 :            :     0,                                  /* tp_doc */
    1228                 :            :     0,                                  /* tp_traverse */
    1229                 :            :     0,                                  /* tp_clear */
    1230                 :            :     0,                                  /* tp_richcompare */
    1231                 :            :     0,                                  /* tp_weaklistoffset */
    1232                 :            :     PyObject_SelfIter,                  /* tp_iter */
    1233                 :            :     (iternextfunc)fieldnameiter_next,   /* tp_iternext */
    1234                 :            :     fieldnameiter_methods,              /* tp_methods */
    1235                 :            :     0};
    1236                 :            : 
    1237                 :            : /* unicode_formatter_field_name_split is used to implement
    1238                 :            :    string.Formatter.vformat.  it takes a PEP 3101 "field name", and
    1239                 :            :    returns a tuple of (first, rest): "first", the part before the
    1240                 :            :    first '.' or '['; and "rest", an iterator for the rest of the field
    1241                 :            :    name.  it's a wrapper around stringlib/string_format.h's
    1242                 :            :    field_name_split.  The iterator it returns is a
    1243                 :            :    FieldNameIterator */
    1244                 :            : static PyObject *
    1245                 :        241 : formatter_field_name_split(PyObject *ignored, PyObject *self)
    1246                 :            : {
    1247                 :            :     SubString first;
    1248                 :            :     Py_ssize_t first_idx;
    1249                 :            :     fieldnameiterobject *it;
    1250                 :            : 
    1251                 :        241 :     PyObject *first_obj = NULL;
    1252                 :        241 :     PyObject *result = NULL;
    1253                 :            : 
    1254         [ +  + ]:        241 :     if (!PyUnicode_Check(self)) {
    1255                 :          1 :         PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
    1256                 :          1 :         return NULL;
    1257                 :            :     }
    1258                 :            : 
    1259         [ -  + ]:        240 :     if (PyUnicode_READY(self) == -1)
    1260                 :          0 :         return NULL;
    1261                 :            : 
    1262                 :        240 :     it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
    1263         [ -  + ]:        240 :     if (it == NULL)
    1264                 :          0 :         return NULL;
    1265                 :            : 
    1266                 :            :     /* take ownership, give the object to the iterator.  this is
    1267                 :            :        just to keep the field_name alive */
    1268                 :        240 :     Py_INCREF(self);
    1269                 :        240 :     it->str = self;
    1270                 :            : 
    1271                 :            :     /* Pass in auto_number = NULL. We'll return an empty string for
    1272                 :            :        first_obj in that case. */
    1273         [ -  + ]:        240 :     if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
    1274                 :            :                           &first, &first_idx, &it->it_field, NULL))
    1275                 :          0 :         goto done;
    1276                 :            : 
    1277                 :            :     /* first becomes an integer, if possible; else a string */
    1278         [ +  + ]:        240 :     if (first_idx != -1)
    1279                 :         42 :         first_obj = PyLong_FromSsize_t(first_idx);
    1280                 :            :     else
    1281                 :            :         /* convert "first" into a string object */
    1282                 :        198 :         first_obj = SubString_new_object(&first);
    1283         [ -  + ]:        240 :     if (first_obj == NULL)
    1284                 :          0 :         goto done;
    1285                 :            : 
    1286                 :            :     /* return a tuple of values */
    1287                 :        240 :     result = PyTuple_Pack(2, first_obj, it);
    1288                 :            : 
    1289                 :        240 : done:
    1290                 :        240 :     Py_XDECREF(it);
    1291                 :        240 :     Py_XDECREF(first_obj);
    1292                 :        240 :     return result;
    1293                 :            : }

Generated by: LCOV version 1.14