Nuitka
The Python compiler
Loading...
Searching...
No Matches
HelpersDeepcopy.c
1// Copyright 2025, Kay Hayen, mailto:kay.hayen@gmail.com find license text at end of file
2
7// This file is included from another C file, help IDEs to still parse it on
8// its own.
9#ifdef __IDE_ONLY__
10#include "nuitka/prelude.h"
11#endif
12
13#if PYTHON_VERSION >= 0x390
14typedef struct {
15 PyObject_HEAD PyObject *origin;
16 PyObject *args;
17 PyObject *parameters;
18} GenericAliasObject;
19#endif
20
21typedef PyObject *(*copy_func)(PyThreadState *tstate, PyObject *);
22
23static PyObject *DEEP_COPY_ITEM(PyThreadState *tstate, PyObject *value, PyTypeObject **type, copy_func *copy_function);
24
25PyObject *DEEP_COPY_LIST(PyThreadState *tstate, PyObject *value) {
26 assert(PyList_CheckExact(value));
27
28 Py_ssize_t n = PyList_GET_SIZE(value);
29 PyObject *result = MAKE_LIST_EMPTY(tstate, n);
30
31 PyTypeObject *type = NULL;
32 copy_func copy_function = NULL;
33
34 for (Py_ssize_t i = 0; i < n; i++) {
35 PyObject *item = PyList_GET_ITEM(value, i);
36 if (i == 0) {
37 PyList_SET_ITEM(result, i, DEEP_COPY_ITEM(tstate, item, &type, &copy_function));
38 } else {
39 PyObject *new_item;
40
41 if (likely(type == Py_TYPE(item))) {
42 if (copy_function) {
43 new_item = copy_function(tstate, item);
44 } else {
45 new_item = item;
46 Py_INCREF(item);
47 }
48 } else {
49 new_item = DEEP_COPY_ITEM(tstate, item, &type, &copy_function);
50 }
51
52 PyList_SET_ITEM(result, i, new_item);
53 }
54 }
55
56 return result;
57}
58
59PyObject *DEEP_COPY_TUPLE(PyThreadState *tstate, PyObject *value) {
60 assert(PyTuple_CheckExact(value));
61
62 Py_ssize_t n = PyTuple_GET_SIZE(value);
63
64 PyObject *result = MAKE_TUPLE_EMPTY_VAR(tstate, n);
65
66 for (Py_ssize_t i = 0; i < n; i++) {
67 PyTuple_SET_ITEM(result, i, DEEP_COPY(tstate, PyTuple_GET_ITEM(value, i)));
68 }
69
70 return result;
71}
72
73static PyObject *_DEEP_COPY_SET(PyObject *value) {
74 // Sets cannot contain non-hashable types, so these all must be immutable,
75 // but the set itself might be changed, so we need to copy it.
76 return PySet_New(value);
77}
78
79PyObject *DEEP_COPY_SET(PyThreadState *tstate, PyObject *value) { return _DEEP_COPY_SET(value); }
80
81#if PYTHON_VERSION >= 0x390
82PyObject *DEEP_COPY_GENERICALIAS(PyThreadState *tstate, PyObject *value) {
83 assert(Py_TYPE(value) == &Py_GenericAliasType);
84
85 GenericAliasObject *generic_alias = (GenericAliasObject *)value;
86
87 PyObject *args = DEEP_COPY(tstate, generic_alias->args);
88 PyObject *origin = DEEP_COPY(tstate, generic_alias->origin);
89
90 if (generic_alias->args == args && generic_alias->origin == origin) {
91 Py_INCREF(value);
92 return value;
93 } else {
94 return Py_GenericAlias(origin, args);
95 }
96}
97#endif
98
99static PyObject *_deep_copy_dispatch = NULL;
100static PyObject *_deep_noop = NULL;
101
102static PyObject *Nuitka_CapsuleNew(void *pointer) {
103#if PYTHON_VERSION < 0x300
104 return PyCObject_FromVoidPtr(pointer, NULL);
105#else
106 return PyCapsule_New(pointer, "", NULL);
107#endif
108}
109
110#if PYTHON_VERSION >= 0x300
111typedef struct {
112 PyObject_HEAD void *pointer;
113 const char *name;
114 void *context;
115 PyCapsule_Destructor destructor;
116} Nuitka_PyCapsule;
117
118#define Nuitka_CapsuleGetPointer(capsule) (((Nuitka_PyCapsule *)(capsule))->pointer)
119
120#else
121#define Nuitka_CapsuleGetPointer(capsule) (PyCObject_AsVoidPtr(capsule))
122#endif
123
124#if PYTHON_VERSION >= 0x3a0
125PyTypeObject *Nuitka_PyUnion_Type;
126#endif
127
128static PyObject *_makeDeepCopyFunctionCapsule(copy_func func) { return Nuitka_CapsuleNew((void *)func); }
129
130static void _initDeepCopy(PyThreadState *tstate) {
131 // Once only
132 assert(_deep_copy_dispatch == NULL);
133
134 _deep_copy_dispatch = PyDict_New();
135 _deep_noop = Py_None;
136
137 CHECK_OBJECT(_deep_noop);
138
139 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyDict_Type, _makeDeepCopyFunctionCapsule(DEEP_COPY_DICT));
140 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyList_Type, _makeDeepCopyFunctionCapsule(DEEP_COPY_LIST));
141 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyTuple_Type, _makeDeepCopyFunctionCapsule(DEEP_COPY_TUPLE));
142 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PySet_Type, _makeDeepCopyFunctionCapsule(DEEP_COPY_SET));
143 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyByteArray_Type, _makeDeepCopyFunctionCapsule(BYTEARRAY_COPY));
144
145#if PYTHON_VERSION >= 0x390
146 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&Py_GenericAliasType,
147 _makeDeepCopyFunctionCapsule(DEEP_COPY_GENERICALIAS));
148#endif
149
150#if PYTHON_VERSION >= 0x3a0
151 {
152 PyObject *args[2] = {(PyObject *)&PyFloat_Type, (PyObject *)&PyTuple_Type};
153 PyObject *args_tuple = MAKE_TUPLE(tstate, args, 2);
154 PyObject *union_value = MAKE_UNION_TYPE(args_tuple);
155
156 Nuitka_PyUnion_Type = Py_TYPE(union_value);
157
158 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)Nuitka_PyUnion_Type, _deep_noop);
159
160 Py_DECREF(union_value);
161 Py_DECREF(args_tuple);
162 }
163
164#endif
165
166#if PYTHON_VERSION < 0x300
167 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyString_Type, _deep_noop);
168 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyInt_Type, _deep_noop);
169#else
170 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyBytes_Type, _deep_noop);
171#endif
172 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyUnicode_Type, _deep_noop);
173 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyLong_Type, _deep_noop);
174 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)Py_TYPE(Py_None), _deep_noop);
175 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyBool_Type, _deep_noop);
176 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyFloat_Type, _deep_noop);
177 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyRange_Type, _deep_noop);
178 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyType_Type, _deep_noop);
179 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PySlice_Type, _deep_noop);
180 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyComplex_Type, _deep_noop);
181 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyCFunction_Type, _deep_noop);
182 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)Py_TYPE(Py_Ellipsis), _deep_noop);
183 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)Py_TYPE(Py_NotImplemented), _deep_noop);
184
185 // Sets can be changed, but not a frozenset.
186 PyDict_SetItem(_deep_copy_dispatch, (PyObject *)&PyFrozenSet_Type, _deep_noop);
187}
188
189static PyObject *DEEP_COPY_ITEM(PyThreadState *tstate, PyObject *value, PyTypeObject **type, copy_func *copy_function) {
190 *type = Py_TYPE(value);
191
192 PyObject *dispatcher = DICT_GET_ITEM0(tstate, _deep_copy_dispatch, (PyObject *)*type);
193
194 if (unlikely(dispatcher == NULL)) {
195 NUITKA_CANNOT_GET_HERE("DEEP_COPY encountered unknown type");
196 }
197
198 if (dispatcher == Py_None) {
199 *copy_function = NULL;
200
201 Py_INCREF(value);
202 return value;
203 } else {
204 *copy_function = (copy_func)(Nuitka_CapsuleGetPointer(dispatcher));
205 return (*copy_function)(tstate, value);
206 }
207}
208
209PyObject *DEEP_COPY(PyThreadState *tstate, PyObject *value) {
210 PyObject *dispatcher = DICT_GET_ITEM0(tstate, _deep_copy_dispatch, (PyObject *)Py_TYPE(value));
211
212 if (unlikely(dispatcher == NULL)) {
213 NUITKA_CANNOT_GET_HERE("DEEP_COPY encountered unknown type");
214 }
215
216 if (dispatcher == Py_None) {
217 Py_INCREF(value);
218 return value;
219 } else {
220 copy_func copy_function = (copy_func)(Nuitka_CapsuleGetPointer(dispatcher));
221 return copy_function(tstate, value);
222 }
223}
224
225#ifndef __NUITKA_NO_ASSERT__
226
227static Py_hash_t DEEP_HASH_INIT(PyThreadState *tstate, PyObject *value) {
228 // To avoid warnings about reduced sizes, we put an intermediate value
229 // that is size_t.
230 size_t value2 = (size_t)value;
231 Py_hash_t result = (Py_hash_t)(value2);
232
233 if (Py_TYPE(value) != &PyType_Type) {
234 result ^= DEEP_HASH(tstate, (PyObject *)Py_TYPE(value));
235 }
236
237 return result;
238}
239
240static void DEEP_HASH_BLOB(Py_hash_t *hash, char const *s, Py_ssize_t size) {
241 while (size > 0) {
242 *hash = (1000003 * (*hash)) ^ (Py_hash_t)(*s++);
243 size--;
244 }
245}
246
247static void DEEP_HASH_CSTR(Py_hash_t *hash, char const *s) { DEEP_HASH_BLOB(hash, s, strlen(s)); }
248
249// Hash function that actually verifies things done to the bit level. Can be
250// used to detect corruption.
251Py_hash_t DEEP_HASH(PyThreadState *tstate, PyObject *value) {
252 assert(value != NULL);
253
254 if (PyType_Check(value)) {
255 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
256
257 DEEP_HASH_CSTR(&result, ((PyTypeObject *)value)->tp_name);
258 return result;
259 } else if (PyDict_Check(value)) {
260 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
261
262 Py_ssize_t pos = 0;
263 PyObject *key, *dict_value;
264
265 while (Nuitka_DictNext(value, &pos, &key, &dict_value)) {
266 if (key != NULL && value != NULL) {
267 result ^= DEEP_HASH(tstate, key);
268 result ^= DEEP_HASH(tstate, dict_value);
269 }
270 }
271
272 return result;
273 } else if (PyTuple_Check(value)) {
274 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
275
276 Py_ssize_t n = PyTuple_GET_SIZE(value);
277
278 for (Py_ssize_t i = 0; i < n; i++) {
279 result ^= DEEP_HASH(tstate, PyTuple_GET_ITEM(value, i));
280 }
281
282 return result;
283 } else if (PyList_CheckExact(value)) {
284 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
285
286 Py_ssize_t n = PyList_GET_SIZE(value);
287
288 for (Py_ssize_t i = 0; i < n; i++) {
289 result ^= DEEP_HASH(tstate, PyList_GET_ITEM(value, i));
290 }
291
292 return result;
293 } else if (PySet_Check(value) || PyFrozenSet_Check(value)) {
294 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
295
296 PyObject *iterator = PyObject_GetIter(value);
297 CHECK_OBJECT(iterator);
298
299 while (true) {
300 PyObject *item = PyIter_Next(iterator);
301 if (!item)
302 break;
303
304 CHECK_OBJECT(item);
305
306 result ^= DEEP_HASH(tstate, item);
307
308 Py_DECREF(item);
309 }
310
311 Py_DECREF(iterator);
312
313 return result;
314 } else if (PyLong_Check(value)) {
315 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
316
317 struct Nuitka_ExceptionPreservationItem saved_exception_state;
318
319 FETCH_ERROR_OCCURRED_STATE_UNTRACED(tstate, &saved_exception_state);
320
321 // Use string to hash the long value, which relies on that to not
322 // use the object address.
323 PyObject *str = PyObject_Str(value);
324 result ^= DEEP_HASH(tstate, str);
325 Py_DECREF(str);
326
327 RESTORE_ERROR_OCCURRED_STATE_UNTRACED(tstate, &saved_exception_state);
328
329 return result;
330 } else if (PyUnicode_Check(value)) {
331 Py_hash_t result = DEEP_HASH(tstate, (PyObject *)Py_TYPE(value));
332
333 struct Nuitka_ExceptionPreservationItem saved_exception_state;
334
335 FETCH_ERROR_OCCURRED_STATE_UNTRACED(tstate, &saved_exception_state);
336
337#if PYTHON_VERSION >= 0x300
338 char const *s = (char const *)PyUnicode_DATA(value);
339 Py_ssize_t size = PyUnicode_GET_LENGTH(value) * PyUnicode_KIND(value);
340
341 DEEP_HASH_BLOB(&result, s, size);
342#else
343 PyObject *str = PyUnicode_AsUTF8String(value);
344
345 if (str) {
346 result ^= DEEP_HASH(tstate, str);
347 }
348
349 Py_DECREF(str);
350#endif
351 RESTORE_ERROR_OCCURRED_STATE_UNTRACED(tstate, &saved_exception_state);
352
353 return result;
354 }
355#if PYTHON_VERSION < 0x300
356 else if (PyString_Check(value)) {
357 Py_hash_t result = DEEP_HASH(tstate, (PyObject *)Py_TYPE(value));
358
359 Py_ssize_t size;
360 char *s;
361
362 int res = PyString_AsStringAndSize(value, &s, &size);
363 assert(res != -1);
364
365 DEEP_HASH_BLOB(&result, s, size);
366
367 return result;
368 }
369#else
370 else if (PyBytes_Check(value)) {
371 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
372
373 Py_ssize_t size;
374 char *s;
375
376 int res = PyBytes_AsStringAndSize(value, &s, &size);
377 assert(res != -1);
378
379 DEEP_HASH_BLOB(&result, s, size);
380
381 return result;
382 }
383#endif
384 else if (PyByteArray_Check(value)) {
385 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
386
387 Py_ssize_t size = PyByteArray_Size(value);
388 assert(size >= 0);
389
390 char *s = PyByteArray_AsString(value);
391
392 DEEP_HASH_BLOB(&result, s, size);
393
394 return result;
395 } else if (value == Py_None || value == Py_Ellipsis || value == Py_NotImplemented) {
396 return DEEP_HASH_INIT(tstate, value);
397 } else if (PyComplex_Check(value)) {
398 Py_complex c = PyComplex_AsCComplex(value);
399
400 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
401
402 Py_ssize_t size = sizeof(c);
403 char *s = (char *)&c;
404
405 DEEP_HASH_BLOB(&result, s, size);
406
407 return result;
408 } else if (PyFloat_Check(value)) {
409 double f = PyFloat_AsDouble(value);
410
411 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
412
413 Py_ssize_t size = sizeof(f);
414 char *s = (char *)&f;
415
416 DEEP_HASH_BLOB(&result, s, size);
417
418 return result;
419 } else if (
420#if PYTHON_VERSION < 0x300
421 PyInt_Check(value) ||
422#endif
423 PyBool_Check(value) || PyRange_Check(value) || PySlice_Check(value) || PyCFunction_Check(value)) {
424 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
425
426#if 0
427 printf("Too simple deep hash: %s\n", Py_TYPE(value)->tp_name);
428#endif
429
430 return result;
431#if PYTHON_VERSION >= 0x390
432 } else if (Py_TYPE(value) == &Py_GenericAliasType) {
433 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
434
435 GenericAliasObject *generic_alias = (GenericAliasObject *)value;
436
437 result ^= DEEP_HASH(tstate, generic_alias->args);
438 result ^= DEEP_HASH(tstate, generic_alias->origin);
439
440 return result;
441#endif
442#if PYTHON_VERSION >= 0x3a0
443 } else if (Py_TYPE(value) == Nuitka_PyUnion_Type) {
444 Py_hash_t result = DEEP_HASH_INIT(tstate, value);
445
446 result ^= DEEP_HASH(tstate, LOOKUP_ATTRIBUTE(tstate, value, const_str_plain___args__));
447
448 return result;
449#endif
450 } else if (PyCode_Check(value)) {
451 return DEEP_HASH_INIT(tstate, value);
452 } else {
453 NUITKA_CANNOT_GET_HERE("Unknown type hashed");
454
455 return -1;
456 }
457}
458#endif
459
460// Note: Not recursion safe, cannot do this everywhere.
461void CHECK_OBJECT_DEEP(PyObject *value) {
462 CHECK_OBJECT(value);
463
464 if (PyTuple_Check(value)) {
465 for (Py_ssize_t i = 0, size = PyTuple_GET_SIZE(value); i < size; i++) {
466 PyObject *element = PyTuple_GET_ITEM(value, i);
467
468 CHECK_OBJECT_DEEP(element);
469 }
470 } else if (PyList_CheckExact(value)) {
471 for (Py_ssize_t i = 0, size = PyList_GET_SIZE(value); i < size; i++) {
472 PyObject *element = PyList_GET_ITEM(value, i);
473
474 CHECK_OBJECT_DEEP(element);
475 }
476 } else if (PyDict_Check(value)) {
477 Py_ssize_t pos = 0;
478 PyObject *dict_key, *dict_value;
479
480 while (Nuitka_DictNext(value, &pos, &dict_key, &dict_value)) {
481 CHECK_OBJECT_DEEP(dict_key);
482 CHECK_OBJECT_DEEP(dict_value);
483 }
484 }
485}
486
487void CHECK_OBJECTS_DEEP(PyObject *const *values, Py_ssize_t size) {
488 for (Py_ssize_t i = 0; i < size; i++) {
489 CHECK_OBJECT_DEEP(values[i]);
490 }
491}
492
493static PyObject *_DEEP_COPY_LIST_GUIDED(PyThreadState *tstate, PyObject *value, char const **guide);
494static PyObject *_DEEP_COPY_TUPLE_GUIDED(PyThreadState *tstate, PyObject *value, char const **guide);
495
496static PyObject *_DEEP_COPY_ELEMENT_GUIDED(PyThreadState *tstate, PyObject *value, char const **guide) {
497 char code = **guide;
498 *guide += 1;
499
500 switch (code) {
501 case 'i':
502 Py_INCREF(value);
503 return value;
504 case 'L':
505 return _DEEP_COPY_LIST_GUIDED(tstate, value, guide);
506 case 'l':
507 return LIST_COPY(tstate, value);
508 case 'T':
509 return _DEEP_COPY_TUPLE_GUIDED(tstate, value, guide);
510 case 't':
511 return TUPLE_COPY(tstate, value);
512 case 'D':
513 return DEEP_COPY_DICT(tstate, value);
514 case 'd':
515 return DICT_COPY(tstate, value);
516 case 'S':
517 return DEEP_COPY_SET(tstate, value);
518 case 'B':
519 return BYTEARRAY_COPY(tstate, value);
520 case '?':
521 return DEEP_COPY(tstate, value);
522 default:
523 NUITKA_CANNOT_GET_HERE("Illegal type guide");
524 abort();
525 }
526}
527
528static PyObject *_DEEP_COPY_LIST_GUIDED(PyThreadState *tstate, PyObject *value, char const **guide) {
529 assert(PyList_CheckExact(value));
530
531 Py_ssize_t size = PyList_GET_SIZE(value);
532
533 PyObject *result = MAKE_LIST_EMPTY(tstate, size);
534
535 for (Py_ssize_t i = 0; i < size; i++) {
536 PyObject *item = _DEEP_COPY_ELEMENT_GUIDED(tstate, PyList_GET_ITEM(value, i), guide);
537
538 PyList_SET_ITEM(result, i, item);
539 }
540
541 return result;
542}
543
544static PyObject *_DEEP_COPY_TUPLE_GUIDED(PyThreadState *tstate, PyObject *value, char const **guide) {
545 assert(PyTuple_CheckExact(value));
546
547 Py_ssize_t size = PyTuple_GET_SIZE(value);
548
549 // We cannot have size 0, so this is safe.
550 assert(size > 0);
551 PyObject *result = MAKE_TUPLE_EMPTY(tstate, size);
552
553 for (Py_ssize_t i = 0; i < size; i++) {
554 PyObject *item = _DEEP_COPY_ELEMENT_GUIDED(tstate, PyTuple_GET_ITEM(value, i), guide);
555
556 PyTuple_SET_ITEM(result, i, item);
557 }
558
559 return result;
560}
561
562PyObject *DEEP_COPY_LIST_GUIDED(PyThreadState *tstate, PyObject *value, char const *guide) {
563 PyObject *result = _DEEP_COPY_LIST_GUIDED(tstate, value, &guide);
564 assert(*guide == 0);
565 return result;
566}
567
568PyObject *DEEP_COPY_TUPLE_GUIDED(PyThreadState *tstate, PyObject *value, char const *guide) {
569 PyObject *result = _DEEP_COPY_TUPLE_GUIDED(tstate, value, &guide);
570 assert(*guide == 0);
571 return result;
572}
573
574// Part of "Nuitka", an optimizing Python compiler that is compatible and
575// integrates with CPython, but also works on its own.
576//
577// Licensed under the Apache License, Version 2.0 (the "License");
578// you may not use this file except in compliance with the License.
579// You may obtain a copy of the License at
580//
581// http://www.apache.org/licenses/LICENSE-2.0
582//
583// Unless required by applicable law or agreed to in writing, software
584// distributed under the License is distributed on an "AS IS" BASIS,
585// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
586// See the License for the specific language governing permissions and
587// limitations under the License.
Definition exceptions.h:712