@@ -380,7 +380,22 @@ binary_reader_open(PyObject *path)
380380 Py_fclose (fp );
381381 goto error ;
382382 }
383+ if (st .st_size < 0 ) {
384+ PyErr_SetString (PyExc_IOError , "Invalid negative file size" );
385+ Py_fclose (fp );
386+ goto error ;
387+ }
388+ if ((uintmax_t )st .st_size > SIZE_MAX ) {
389+ PyErr_SetString (PyExc_OverflowError , "File is too large to map" );
390+ Py_fclose (fp );
391+ goto error ;
392+ }
383393 reader -> mapped_size = st .st_size ;
394+ if (reader -> mapped_size == 0 ) {
395+ PyErr_SetString (PyExc_ValueError , "File too small for header" );
396+ Py_fclose (fp );
397+ goto error ;
398+ }
384399
385400 /* Map the file into memory.
386401 * MAP_POPULATE (Linux-only) pre-faults all pages at mmap time, which:
@@ -424,7 +439,10 @@ binary_reader_open(PyObject *path)
424439 }
425440#endif
426441
427- (void )Py_fclose (fp );
442+ if (Py_fclose (fp ) != 0 ) {
443+ PyErr_SetFromErrno (PyExc_IOError );
444+ goto error ;
445+ }
428446
429447 uint8_t * data = reader -> mapped_data ;
430448 size_t file_size = reader -> mapped_size ;
@@ -444,7 +462,15 @@ binary_reader_open(PyObject *path)
444462 PyErr_SetFromErrno (PyExc_IOError );
445463 goto error ;
446464 }
465+ if ((uint64_t )file_size_off > SIZE_MAX ) {
466+ PyErr_SetString (PyExc_OverflowError , "File is too large to read" );
467+ goto error ;
468+ }
447469 reader -> file_size = (size_t )file_size_off ;
470+ if (reader -> file_size == 0 ) {
471+ PyErr_SetString (PyExc_ValueError , "File too small for header" );
472+ goto error ;
473+ }
448474 if (FSEEK64 (reader -> fp , 0 , SEEK_SET ) != 0 ) {
449475 PyErr_SetFromErrno (PyExc_IOError );
450476 goto error ;
@@ -456,8 +482,18 @@ binary_reader_open(PyObject *path)
456482 goto error ;
457483 }
458484
459- if (fread (reader -> file_data , 1 , reader -> file_size , reader -> fp ) != reader -> file_size ) {
460- PyErr_SetFromErrno (PyExc_IOError );
485+ size_t nread = fread (reader -> file_data , 1 , reader -> file_size , reader -> fp );
486+ if (nread != reader -> file_size ) {
487+ int err = errno ;
488+ if (ferror (reader -> fp ) && err != 0 ) {
489+ errno = err ;
490+ PyErr_SetFromErrno (PyExc_IOError );
491+ }
492+ else {
493+ PyErr_Format (PyExc_ValueError ,
494+ "Unexpected end of file: read %zu of %zu bytes" ,
495+ nread , reader -> file_size );
496+ }
461497 goto error ;
462498 }
463499
@@ -944,10 +980,15 @@ invoke_progress_callback(PyObject *callback, Py_ssize_t current, uint32_t total)
944980Py_ssize_t
945981binary_reader_replay (BinaryReader * reader , PyObject * collector , PyObject * progress_callback )
946982{
947- if (!PyObject_HasAttrString (collector , "collect" )) {
948- PyErr_SetString (PyExc_TypeError , "Collector must have a collect() method" );
983+ PyObject * collect_method = PyObject_GetAttrString (collector , "collect" );
984+ if (collect_method == NULL ) {
985+ if (PyErr_ExceptionMatches (PyExc_AttributeError )) {
986+ PyErr_Clear ();
987+ PyErr_SetString (PyExc_TypeError , "Collector must have a collect() method" );
988+ }
949989 return -1 ;
950990 }
991+ Py_DECREF (collect_method );
951992
952993 /* Get module state for struct sequence types */
953994 PyObject * module = PyImport_ImportModule ("_remote_debugging" );
@@ -973,7 +1014,10 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
9731014 while (offset < reader -> sample_data_size ) {
9741015 /* Read thread_id (8 bytes) + interpreter_id (4 bytes) + encoding byte */
9751016 if (reader -> sample_data_size - offset < SAMPLE_HEADER_FIXED_SIZE ) {
976- break ; /* End of data */
1017+ PyErr_Format (PyExc_ValueError ,
1018+ "Truncated sample data: %zu trailing bytes" ,
1019+ reader -> sample_data_size - offset );
1020+ return -1 ;
9771021 }
9781022
9791023 /* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
@@ -1019,6 +1063,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
10191063 count , max_possible_samples );
10201064 return -1 ;
10211065 }
1066+ if ((uint64_t )count > (uint64_t )PY_SSIZE_T_MAX - (uint64_t )replayed ) {
1067+ PyErr_SetString (PyExc_OverflowError ,
1068+ "Sample count exceeds Py_ssize_t maximum" );
1069+ return -1 ;
1070+ }
10221071
10231072 reader -> stats .repeat_records ++ ;
10241073 reader -> stats .repeat_samples += count ;
@@ -1149,6 +1198,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
11491198 return -1 ;
11501199 }
11511200 Py_DECREF (timestamps_list );
1201+ if (replayed == PY_SSIZE_T_MAX ) {
1202+ PyErr_SetString (PyExc_OverflowError ,
1203+ "Sample count exceeds Py_ssize_t maximum" );
1204+ return -1 ;
1205+ }
11521206 replayed ++ ;
11531207 reader -> stats .total_samples ++ ;
11541208 break ;
@@ -1167,6 +1221,13 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
11671221 }
11681222 }
11691223
1224+ if ((uint64_t )replayed != reader -> sample_count ) {
1225+ PyErr_Format (PyExc_ValueError ,
1226+ "Sample count mismatch: header declares %u samples but replay decoded %zd" ,
1227+ reader -> sample_count , replayed );
1228+ return -1 ;
1229+ }
1230+
11701231 /* Final progress callback at 100% */
11711232 if (invoke_progress_callback (progress_callback , replayed , reader -> sample_count ) < 0 ) {
11721233 return -1 ;
0 commit comments