Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions quaddtype/numpy_quaddtype/_quaddtype_main.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any, Literal, TypeAlias, final, overload

import builtins
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the explicit import? We use bool without the builtins.bool elsewhere in the stubs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copied from the NumPy's stubs, I think both are just same.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in numpy it's needed because bool is shadowed by np.bool. But that shouldn't be problem here, so no need for the builtins._

import numpy as np
from numpy._typing import _128Bit # pyright: ignore[reportPrivateUsage]
from typing_extensions import Never, Self, override
Expand Down Expand Up @@ -157,9 +157,10 @@ class QuadPrecision(np.floating[_128Bit]):
# NOTE: is_integer() and as_integer_ratio() are defined on numpy.floating in the
# stubs, but don't exist at runtime. And because QuadPrecision does not implement
# them, we use this hacky workaround to emulate their absence.
# TODO: Remove after https://github.com/numpy/numpy-user-dtypes/issues/216
is_integer: Never # pyright: ignore[reportIncompatibleMethodOverride]
as_integer_ratio: Never # pyright: ignore[reportIncompatibleMethodOverride]
@override
def is_integer(self, /) -> builtins.bool: ...
@override
def as_integer_ratio(self, /) -> tuple[int, int]: ...

#
def is_longdouble_128() -> bool: ...
Expand Down
202 changes: 202 additions & 0 deletions quaddtype/numpy_quaddtype/src/scalar.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
// src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format
#define SLEEF_QUAD_DECIMAL_DIG 36

#if PY_VERSION_HEX < 0x30d00b3
static PyThread_type_lock sleef_lock;
#define LOCK_SLEEF PyThread_acquire_lock(sleef_lock, WAIT_LOCK)
#define UNLOCK_SLEEF PyThread_release_lock(sleef_lock)
#else
static PyMutex sleef_lock = {0};
#define LOCK_SLEEF PyMutex_Lock(&sleef_lock)
#define UNLOCK_SLEEF PyMutex_Unlock(&sleef_lock)
#endif




QuadPrecisionObject *
QuadPrecision_raw_new(QuadBackendType backend)
Expand Down Expand Up @@ -383,6 +395,188 @@ QuadPrecision_get_imag(QuadPrecisionObject *self, void *closure)
return (PyObject *)QuadPrecision_raw_new(self->backend);
}

// Method implementations for float compatibility
static PyObject *
QuadPrecision_is_integer(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored))
{
Sleef_quad value;

if (self->backend == BACKEND_SLEEF) {
value = self->value.sleef_value;
}
else {
// lets also tackle ld from sleef functions as well
value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
}

if(Sleef_iunordq1(value, value)) {
Py_RETURN_FALSE;
}

// Check if value is finite (not inf or nan)
Sleef_quad abs_value = Sleef_fabsq1(value);
Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384);
int32_t is_finite = Sleef_icmpltq1(abs_value, pos_inf);

if (!is_finite) {
Py_RETURN_FALSE;
}

// Check if value equals its truncated version
Sleef_quad truncated = Sleef_truncq1(value);
int32_t is_equal = Sleef_icmpeqq1(value, truncated);

if (is_equal) {
Py_RETURN_TRUE;
}
else {
Py_RETURN_FALSE;
}
}

PyObject* quad_to_pylong(Sleef_quad value)
{
char buffer[128];

// Sleef_snprintf call is thread-unsafe
LOCK_SLEEF;
// Format as integer (%.0Qf gives integer with no decimal places)
// Q modifier means pass Sleef_quad by value
int written = Sleef_snprintf(buffer, sizeof(buffer), "%.0Qf", value);
UNLOCK_SLEEF;
if (written < 0 || written >= sizeof(buffer)) {
PyErr_SetString(PyExc_RuntimeError, "Failed to convert quad to string");
return NULL;
}

PyObject *result = PyLong_FromString(buffer, NULL, 10);

if (result == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Failed to parse integer string");
return NULL;
}

return result;
}

// inspired by the CPython implementation
// https://github.com/python/cpython/blob/ac1ffd77858b62d169a08040c08aa5de26e145ac/Objects/floatobject.c#L1503C1-L1572C2
// NOTE: a 128-bit
static PyObject *
QuadPrecision_as_integer_ratio(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored))
{

Sleef_quad value;
Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384);
const int FLOAT128_PRECISION = 113;

if (self->backend == BACKEND_SLEEF) {
value = self->value.sleef_value;
}
else {
// lets also tackle ld from sleef functions as well
value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
}

if(Sleef_iunordq1(value, value)) {
PyErr_SetString(PyExc_ValueError, "Cannot convert NaN to integer ratio");
return NULL;
}
if(Sleef_icmpgeq1(Sleef_fabsq1(value), pos_inf)) {
PyErr_SetString(PyExc_OverflowError, "Cannot convert infinite value to integer ratio");
return NULL;
}

// Sleef_value == float_part * 2**exponent exactly
int exponent;
Sleef_quad mantissa = Sleef_frexpq1(value, &exponent); // within [0.5, 1.0)

/*
CPython loops for 300 (some huge number) to make sure
float_part gets converted to the floor(float_part) i.e. near integer as

for (i=0; i<300 && float_part != floor(float_part) ; i++) {
float_part *= 2.0;
exponent--;
}

It seems highly inefficient from performance perspective, maybe they pick 300 for future-proof
or If FLT_RADIX != 2, the 300 steps may leave a tiny fractional part

Another way can be doing as:
```
mantissa = ldexpq(mantissa, FLOAT128_PRECISION);
exponent -= FLOAT128_PRECISION;
```
This should work but give non-simplified, huge integers (although they also come down to same representation)
We can also do gcd to find simplified values, but it'll add more O(log(N))
For the sake of simplicity and fixed 128-bit nature, we will loop till 113 only
*/

for (int i = 0; i < FLOAT128_PRECISION && !Sleef_icmpeqq1(mantissa, Sleef_floorq1(mantissa)); i++) {
mantissa = Sleef_mulq1_u05(mantissa, Sleef_cast_from_doubleq1(2.0));
exponent--;
}

// numerator and denominators can't fit in int
// convert items to PyLongObject from string instead

PyObject *py_exp = PyLong_FromLongLong(Py_ABS(exponent));
if(py_exp == NULL)
{
return NULL;
}

PyObject *numerator = quad_to_pylong(mantissa);
if(numerator == NULL)
{
Py_DECREF(numerator);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While it is annoying, testing error cases catches stuff like this...

Suggested change
Py_DECREF(numerator);
Py_DECREF(py_exp);

return NULL;
}
PyObject *denominator = PyLong_FromLong(1);
if (denominator == NULL) {
Py_DECREF(numerator);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a goto error for this since the cleanup is repeated below.

Suggested change
Py_DECREF(numerator);
Py_DECREF(py_exp);
Py_DECREF(numerator);

return NULL;
}

// fold in 2**exponent
if(exponent > 0)
{
PyObject *new_num = PyNumber_Lshift(numerator, py_exp);
Py_DECREF(numerator);
if(new_num == NULL)
{
Py_DECREF(denominator);
Py_DECREF(py_exp);
return NULL;
}
numerator = new_num;
}
else
{
PyObject *new_denom = PyNumber_Lshift(denominator, py_exp);
Py_DECREF(denominator);
if(new_denom == NULL)
{
Py_DECREF(numerator);
Py_DECREF(py_exp);
return NULL;
}
denominator = new_denom;
}

Py_DECREF(py_exp);
return PyTuple_Pack(2, numerator, denominator);
}

static PyMethodDef QuadPrecision_methods[] = {
{"is_integer", (PyCFunction)QuadPrecision_is_integer, METH_NOARGS,
"Return True if the value is an integer."},
{"as_integer_ratio", (PyCFunction)QuadPrecision_as_integer_ratio, METH_NOARGS,
"Return a pair of integers whose ratio is exactly equal to the original value."},
{NULL, NULL, 0, NULL} /* Sentinel */
};

static PyGetSetDef QuadPrecision_getset[] = {
{"real", (getter)QuadPrecision_get_real, NULL, "Real part of the scalar", NULL},
{"imag", (getter)QuadPrecision_get_imag, NULL, "Imaginary part of the scalar (always 0 for real types)", NULL},
Expand All @@ -400,12 +594,20 @@ PyTypeObject QuadPrecision_Type = {
.tp_as_number = &quad_as_scalar,
.tp_as_buffer = &QuadPrecision_as_buffer,
.tp_richcompare = (richcmpfunc)quad_richcompare,
.tp_methods = QuadPrecision_methods,
.tp_getset = QuadPrecision_getset,
};

int
init_quadprecision_scalar(void)
{
#if PY_VERSION_HEX < 0x30d00b3
sleef_lock = PyThread_allocate_lock();
if (sleef_lock == NULL) {
PyErr_NoMemory();
return -1;
}
#endif
QuadPrecision_Type.tp_base = &PyFloatingArrType_Type;
return PyType_Ready(&QuadPrecision_Type);
}
8 changes: 6 additions & 2 deletions quaddtype/reinstall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ if [ -d "build/" ]; then
rm -rf subprojects/sleef
fi

# export CFLAGS="-g -O0"
# export CXXFLAGS="-g -O0"
python -m pip uninstall -y numpy_quaddtype
python -m pip install . -vv --no-build-isolation 2>&1 | tee build_log.txt

# for debugging and TSAN builds, comment the above line and uncomment all below:
# export CFLAGS="-fsanitize=thread -g -O0"
# export CXXFLAGS="-fsanitize=thread -g -O0"
# export LDFLAGS="-fsanitize=thread"
# python -m pip install . -vv --no-build-isolation -Csetup-args=-Db_sanitize=thread 2>&1 | tee build_log.txt
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you also need to pass --no-binary :all: as well, otherwise you'll get a numpy wheel that doesn't have TSan instrumentation. I'd also suggest using a TSan build of CPython and link to https://py-free-threading.github.io/thread_sanitizer/#compiling-cpython-and-foundational-packages-with-thread-sanitizer.

A somewhat more verbose but perhaps clearer way to do this is to explicitly install all the build deps, then build quaddtype with --no-build-isolation:

# either on the cpython_sanity docker image or 
# after building and installing a TSan Python build
python -m pip install meson meson-python wheel ninja
python -m pip install "numpy @ git+https://github.com/numpy/numpy" -C'setup-args=-Db_sanitize=thread'
python -m pip install . --no-build-isolation -C'setup-args=-Db_sanitize=thread'

No need to pass any special arguments for pure-python dependencies.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After I figured out how to compile SLEEF with TSan instrumentation, I got this race report in the new test you added:

quaddtype/tests/test_multithreading.py::test_as_integer_ratio_reconstruction ==================
WARNING: ThreadSanitizer: data race (pid=13598)
  Read of size 8 at 0x00015ef30f18 by thread T14:
    #0 Sleef_iunordq1 <null> (_quaddtype_main.cpython-314t-darwin.so:arm64+0xb8ffc)
    #1 QuadPrecision_as_integer_ratio <null> (_quaddtype_main.cpython-314t-darwin.so:arm64+0xdb6c)
    #2 method_vectorcall_NOARGS descrobject.c:448 (libpython3.14t.dylib:arm64+0xa2218)
    #3 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8ae78)
    #4 _PyEval_EvalFrameDefault generated_cases.c.h:1619 (libpython3.14t.dylib:arm64+0x27e560)
    #5 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #6 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #7 method_vectorcall classobject.c:73 (libpython3.14t.dylib:arm64+0x8f924)
    #8 context_run context.c:728 (libpython3.14t.dylib:arm64+0x2c5404)
    #9 method_vectorcall_FASTCALL_KEYWORDS descrobject.c:421 (libpython3.14t.dylib:arm64+0xa2124)
    #10 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8ae78)
    #11 _PyEval_EvalFrameDefault generated_cases.c.h:1619 (libpython3.14t.dylib:arm64+0x27e560)
    #12 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #13 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #14 method_vectorcall classobject.c:73 (libpython3.14t.dylib:arm64+0x8f924)
    #15 _PyObject_Call call.c:348 (libpython3.14t.dylib:arm64+0x8b12c)
    #16 PyObject_Call call.c:373 (libpython3.14t.dylib:arm64+0x8b1a0)
    #17 thread_run _threadmodule.c:359 (libpython3.14t.dylib:arm64+0x41ae78)
    #18 pythread_wrapper thread_pthread.h:242 (libpython3.14t.dylib:arm64+0x36aeb0)

  Previous write of size 8 at 0x00015ef30f18 by thread T74:
    #0 disp_iunordq1 <null> (_quaddtype_main.cpython-314t-darwin.so:arm64+0xbada8)
    #1 Sleef_iunordq1 <null> (_quaddtype_main.cpython-314t-darwin.so:arm64+0xb9014)
    #2 QuadPrecision_as_integer_ratio <null> (_quaddtype_main.cpython-314t-darwin.so:arm64+0xdb6c)
    #3 method_vectorcall_NOARGS descrobject.c:448 (libpython3.14t.dylib:arm64+0xa2218)
    #4 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8ae78)
    #5 _PyEval_EvalFrameDefault generated_cases.c.h:1619 (libpython3.14t.dylib:arm64+0x27e560)
    #6 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #7 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #8 method_vectorcall classobject.c:73 (libpython3.14t.dylib:arm64+0x8f924)
    #9 context_run context.c:728 (libpython3.14t.dylib:arm64+0x2c5404)
    #10 method_vectorcall_FASTCALL_KEYWORDS descrobject.c:421 (libpython3.14t.dylib:arm64+0xa2124)
    #11 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8ae78)
    #12 _PyEval_EvalFrameDefault generated_cases.c.h:1619 (libpython3.14t.dylib:arm64+0x27e560)
    #13 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #14 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #15 method_vectorcall classobject.c:73 (libpython3.14t.dylib:arm64+0x8f924)
    #16 _PyObject_Call call.c:348 (libpython3.14t.dylib:arm64+0x8b12c)
    #17 PyObject_Call call.c:373 (libpython3.14t.dylib:arm64+0x8b1a0)
    #18 thread_run _threadmodule.c:359 (libpython3.14t.dylib:arm64+0x41ae78)
    #19 pythread_wrapper thread_pthread.h:242 (libpython3.14t.dylib:arm64+0x36aeb0)

  Location is global 'pnt_iunordq1' at 0x00015ef30f18 (_quaddtype_main.cpython-314t-darwin.so+0x104f18)

  Thread T14 (tid=223552319, running) created by main thread at:
    #0 pthread_create <null> (libclang_rt.tsan_osx_dynamic.dylib:arm64e+0x2f708)
    #1 do_start_joinable_thread thread_pthread.h:289 (libpython3.14t.dylib:arm64+0x369b70)
    #2 PyThread_start_joinable_thread thread_pthread.h:331 (libpython3.14t.dylib:arm64+0x3699b8)
    #3 do_start_new_thread _threadmodule.c:1868 (libpython3.14t.dylib:arm64+0x41aa2c)
    #4 thread_PyThread_start_joinable_thread _threadmodule.c:1991 (libpython3.14t.dylib:arm64+0x4197b8)
    #5 cfunction_call methodobject.c:564 (libpython3.14t.dylib:arm64+0x12d2e4)
    #6 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #7 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #8 _PyEval_EvalFrameDefault generated_cases.c.h:3227 (libpython3.14t.dylib:arm64+0x28309c)
    #9 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #10 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #11 method_vectorcall classobject.c:95 (libpython3.14t.dylib:arm64+0x8f9c0)
    #12 _PyObject_Call call.c:348 (libpython3.14t.dylib:arm64+0x8b12c)
    #13 PyObject_Call call.c:373 (libpython3.14t.dylib:arm64+0x8b1a0)
    #14 _PyEval_EvalFrameDefault generated_cases.c.h:2654 (libpython3.14t.dylib:arm64+0x28128c)
    #15 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #16 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #17 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #18 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #19 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #20 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #21 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #22 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #23 _PyEval_EvalFrameDefault generated_cases.c.h:3227 (libpython3.14t.dylib:arm64+0x28309c)
    #24 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #25 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #26 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #27 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #28 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #29 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #30 _PyObject_Call call.c:361 (libpython3.14t.dylib:arm64+0x8b0ec)
    #31 PyObject_Call call.c:373 (libpython3.14t.dylib:arm64+0x8b1a0)
    #32 _PyEval_EvalFrameDefault generated_cases.c.h:2654 (libpython3.14t.dylib:arm64+0x28128c)
    #33 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #34 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #35 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #36 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #37 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #38 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #39 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #40 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #41 _PyEval_EvalFrameDefault generated_cases.c.h:3227 (libpython3.14t.dylib:arm64+0x28309c)
    #42 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #43 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #44 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #45 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #46 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #47 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #48 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #49 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #50 _PyEval_EvalFrameDefault generated_cases.c.h:2959 (libpython3.14t.dylib:arm64+0x282078)
    #51 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #52 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #53 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #54 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #55 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #56 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #57 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #58 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #59 _PyEval_EvalFrameDefault generated_cases.c.h:2959 (libpython3.14t.dylib:arm64+0x282078)
    #60 PyEval_EvalCode ceval.c:857 (libpython3.14t.dylib:arm64+0x279ed8)
    #61 run_mod pythonrun.c:1459 (libpython3.14t.dylib:arm64+0x348844)
    #62 _PyRun_SimpleFileObject pythonrun.c:521 (libpython3.14t.dylib:arm64+0x343f30)
    #63 _PyRun_AnyFileObject pythonrun.c:81 (libpython3.14t.dylib:arm64+0x343684)
    #64 pymain_run_file main.c:429 (libpython3.14t.dylib:arm64+0x384ea4)
    #65 Py_RunMain main.c:775 (libpython3.14t.dylib:arm64+0x3842d0)
    #66 pymain_main main.c:805 (libpython3.14t.dylib:arm64+0x38473c)
    #67 Py_BytesMain main.c:829 (libpython3.14t.dylib:arm64+0x384810)
    #68 main <null> (python3.14:arm64+0x10000073c)

  Thread T74 (tid=223552380, running) created by main thread at:
    #0 pthread_create <null> (libclang_rt.tsan_osx_dynamic.dylib:arm64e+0x2f708)
    #1 do_start_joinable_thread thread_pthread.h:289 (libpython3.14t.dylib:arm64+0x369b70)
    #2 PyThread_start_joinable_thread thread_pthread.h:331 (libpython3.14t.dylib:arm64+0x3699b8)
    #3 do_start_new_thread _threadmodule.c:1868 (libpython3.14t.dylib:arm64+0x41aa2c)
    #4 thread_PyThread_start_joinable_thread _threadmodule.c:1991 (libpython3.14t.dylib:arm64+0x4197b8)
    #5 cfunction_call methodobject.c:564 (libpython3.14t.dylib:arm64+0x12d2e4)
    #6 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #7 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #8 _PyEval_EvalFrameDefault generated_cases.c.h:3227 (libpython3.14t.dylib:arm64+0x28309c)
    #9 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #10 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #11 method_vectorcall classobject.c:95 (libpython3.14t.dylib:arm64+0x8f9c0)
    #12 _PyObject_Call call.c:348 (libpython3.14t.dylib:arm64+0x8b12c)
    #13 PyObject_Call call.c:373 (libpython3.14t.dylib:arm64+0x8b1a0)
    #14 _PyEval_EvalFrameDefault generated_cases.c.h:2654 (libpython3.14t.dylib:arm64+0x28128c)
    #15 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #16 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #17 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #18 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #19 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #20 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #21 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #22 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #23 _PyEval_EvalFrameDefault generated_cases.c.h:3227 (libpython3.14t.dylib:arm64+0x28309c)
    #24 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #25 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #26 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #27 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #28 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #29 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #30 _PyObject_Call call.c:361 (libpython3.14t.dylib:arm64+0x8b0ec)
    #31 PyObject_Call call.c:373 (libpython3.14t.dylib:arm64+0x8b1a0)
    #32 _PyEval_EvalFrameDefault generated_cases.c.h:2654 (libpython3.14t.dylib:arm64+0x28128c)
    #33 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #34 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #35 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #36 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #37 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #38 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #39 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #40 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #41 _PyEval_EvalFrameDefault generated_cases.c.h:3227 (libpython3.14t.dylib:arm64+0x28309c)
    #42 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #43 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #44 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #45 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #46 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #47 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #48 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #49 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #50 _PyEval_EvalFrameDefault generated_cases.c.h:2959 (libpython3.14t.dylib:arm64+0x282078)
    #51 _PyEval_Vector ceval.c:1965 (libpython3.14t.dylib:arm64+0x27a2fc)
    #52 _PyFunction_Vectorcall call.c (libpython3.14t.dylib:arm64+0x8b4b8)
    #53 _PyObject_VectorcallDictTstate call.c:146 (libpython3.14t.dylib:arm64+0x8a084)
    #54 _PyObject_Call_Prepend call.c:504 (libpython3.14t.dylib:arm64+0x8bac8)
    #55 call_method typeobject.c:2937 (libpython3.14t.dylib:arm64+0x197e5c)
    #56 slot_tp_call typeobject.c:10232 (libpython3.14t.dylib:arm64+0x197c7c)
    #57 _PyObject_MakeTpCall call.c:242 (libpython3.14t.dylib:arm64+0x8a324)
    #58 PyObject_Vectorcall call.c:327 (libpython3.14t.dylib:arm64+0x8af14)
    #59 _PyEval_EvalFrameDefault generated_cases.c.h:2959 (libpython3.14t.dylib:arm64+0x282078)
    #60 PyEval_EvalCode ceval.c:857 (libpython3.14t.dylib:arm64+0x279ed8)
    #61 run_mod pythonrun.c:1459 (libpython3.14t.dylib:arm64+0x348844)
    #62 _PyRun_SimpleFileObject pythonrun.c:521 (libpython3.14t.dylib:arm64+0x343f30)
    #63 _PyRun_AnyFileObject pythonrun.c:81 (libpython3.14t.dylib:arm64+0x343684)
    #64 pymain_run_file main.c:429 (libpython3.14t.dylib:arm64+0x384ea4)
    #65 Py_RunMain main.c:775 (libpython3.14t.dylib:arm64+0x3842d0)
    #66 pymain_main main.c:805 (libpython3.14t.dylib:arm64+0x38473c)
    #67 Py_BytesMain main.c:829 (libpython3.14t.dylib:arm64+0x384810)
    #68 main <null> (python3.14:arm64+0x10000073c)

SUMMARY: ThreadSanitizer: data race (_quaddtype_main.cpython-314t-darwin.so:arm64+0xb8ffc) in Sleef_iunordq1+0x38
==================

I think the locking that's going to be needed is probably not limited to snprintf. In this case it looks like there's global state in the iunordq1 implementation.

5 changes: 5 additions & 0 deletions quaddtype/subprojects/packagefiles/sleef/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ if host_machine.system() == 'windows'
parallel_flag = []
endif

# uncomment below lines for TSAN builds (in case compiler flags are not picked up from meson)
sleef_configure = run_command([
cmake,
'-S', meson.current_source_dir(),
Expand All @@ -22,6 +23,10 @@ sleef_configure = run_command([
'-DSLEEF_BUILD_TESTS=OFF',
'-DSLEEF_BUILD_INLINE_HEADERS=OFF',
'-DCMAKE_POSITION_INDEPENDENT_CODE=ON',
# '-DCMAKE_C_FLAGS=-fsanitize=thread -g',
# '-DCMAKE_CXX_FLAGS=-fsanitize=thread -g',
# '-DCMAKE_EXE_LINKER_FLAGS=-fsanitize=thread',
# '-DCMAKE_SHARED_LINKER_FLAGS=-fsanitize=thread',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I poked a little at this. It looks like this doesn't get passed down into the subproject by default so this manual patching is necessary. Too bad...

Also maybe worth mentioning in the comment above that you also need to delete the sleef subproject completely for this change to have any impact unless you're starting from a fresh clone of the repo. At least that's what I had to do for this change to have any effect.

'-DCMAKE_INSTALL_PREFIX=' + meson.current_build_dir() / sleef_install_dir
], check: false, capture: true)

Expand Down
32 changes: 32 additions & 0 deletions quaddtype/tests/test_multithreading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import concurrent.futures
import threading

import pytest

import numpy as np
from numpy._core import _rational_tests
from numpy._core.tests.test_stringdtype import random_unicode_string_list
from numpy.testing import IS_64BIT, IS_WASM
from numpy.testing._private.utils import run_threaded

if IS_WASM:
pytest.skip(allow_module_level=True, reason="no threading support in wasm")

from numpy_quaddtype import *


def test_as_integer_ratio_reconstruction():
"""Multi-threaded test that as_integer_ratio() can reconstruct the original value."""

def test(barrier):
barrier.wait() # All threads start simultaneously
values = ["3.14", "0.1", "1.414213562373095", "2.718281828459045",
"-1.23456789", "1000.001", "0.0001", "1e20", "1.23e15", "1e-30", pi]
for val in values:
quad_val = QuadPrecision(val)
num, denom = quad_val.as_integer_ratio()
# todo: can remove str converstion after merging PR #213
reconstructed = QuadPrecision(str(num)) / QuadPrecision(str(denom))
assert reconstructed == quad_val

run_threaded(test, pass_barrier=True, max_workers=64, outer_iterations=100)
Loading