Nuitka
The Python compiler
Loading...
Searching...
No Matches
HelpersConstantsBlob.c
1// Copyright 2025, Kay Hayen, mailto:kay.hayen@gmail.com find license text at end of file
2
12// This file is included from another C file, help IDEs to still parse it on
13// its own.
14#ifdef __IDE_ONLY__
15#include "nuitka/prelude.h"
16#endif
17
18#if _NUITKA_EXPERIMENTAL_WRITEABLE_CONSTANTS
19#define CONST_CONSTANT
20#else
21#define CONST_CONSTANT const
22#endif
23
24#if defined(_NUITKA_CONSTANTS_FROM_LINKER)
25// Symbol as provided by the linker, different for C++ and C11 mode.
26#ifdef __cplusplus
27extern "C" CONST_CONSTANT unsigned char constant_bin_data[];
28#else
29extern CONST_CONSTANT unsigned char constant_bin_data[0];
30#endif
31
32unsigned char const *constant_bin = &constant_bin_data[0];
33
34#elif defined(_NUITKA_CONSTANTS_FROM_CODE)
35#ifdef __cplusplus
36extern "C" CONST_CONSTANT unsigned char constant_bin_data[];
37#else
38extern CONST_CONSTANT unsigned char constant_bin_data[];
39#endif
40
41unsigned char const *constant_bin = &constant_bin_data[0];
42#else
43// Symbol to be assigned locally.
44unsigned char const *constant_bin = NULL;
45#endif
46
47#if defined(_NUITKA_CONSTANTS_FROM_INCBIN)
48extern unsigned const char *getConstantsBlobData(void);
49#endif
50
51#if PYTHON_VERSION < 0x300
52static PyObject *int_cache = NULL;
53#endif
54
55static PyObject *long_cache = NULL;
56
57static PyObject *float_cache = NULL;
58
59#if PYTHON_VERSION >= 0x300
60static PyObject *bytes_cache = NULL;
61#endif
62
63#if PYTHON_VERSION < 0x300
64static PyObject *unicode_cache = NULL;
65#endif
66
67static PyObject *tuple_cache = NULL;
68
69static PyObject *list_cache = NULL;
70
71static PyObject *dict_cache = NULL;
72
73static PyObject *set_cache = NULL;
74
75static PyObject *frozenset_cache = NULL;
76
77// Use our own non-random hash for some of the things to be fast. This is inspired
78// from the original Python2 hash func, but we are mostly using it on pointer values
79static Py_hash_t Nuitka_FastHashBytes(const void *value, Py_ssize_t size) {
80 if (unlikely(size == 0)) {
81 return 0;
82 }
83
84 unsigned char *w = (unsigned char *)value;
85 long x = *w << 7;
86
87 while (--size >= 0) {
88 x = (1000003 * x) ^ *w++;
89 }
90
91 x ^= size;
92
93 // The value -1 is reserved for errors.
94 if (x == -1) {
95 x = -2;
96 }
97
98 return x;
99}
100
101static Py_hash_t our_list_hash(PyListObject *list) {
102 return Nuitka_FastHashBytes(&list->ob_item[0], Py_SIZE(list) * sizeof(PyObject *));
103}
104
105static PyObject *our_list_tp_richcompare(PyListObject *list1, PyListObject *list2, int op) {
106 assert(op == Py_EQ);
107
108 PyObject *result;
109
110 if (list1 == list2) {
111 result = Py_True;
112 } else if (Py_SIZE(list1) != Py_SIZE(list2)) {
113 result = Py_False;
114 } else if (memcmp(&list1->ob_item[0], &list2->ob_item[0], Py_SIZE(list1) * sizeof(PyObject *)) == 0) {
115 result = Py_True;
116 } else {
117 result = Py_False;
118 }
119
120 Py_INCREF_IMMORTAL(result);
121 return result;
122}
123
124static Py_hash_t our_tuple_hash(PyTupleObject *tuple) {
125 return Nuitka_FastHashBytes(&tuple->ob_item[0], Py_SIZE(tuple) * sizeof(PyObject *));
126}
127
128static PyObject *our_tuple_tp_richcompare(PyTupleObject *tuple1, PyTupleObject *tuple2, int op) {
129 assert(op == Py_EQ);
130
131 PyObject *result;
132
133 if (tuple1 == tuple2) {
134 result = Py_True;
135 } else if (Py_SIZE(tuple1) != Py_SIZE(tuple2)) {
136 result = Py_False;
137 } else if (memcmp(&tuple1->ob_item[0], &tuple2->ob_item[0], Py_SIZE(tuple1) * sizeof(PyObject *)) == 0) {
138 result = Py_True;
139 } else {
140 result = Py_False;
141 }
142
143 Py_INCREF_IMMORTAL(result);
144 return result;
145}
146
147static Py_hash_t our_set_hash(PyObject *set) {
148 Py_hash_t result = 0;
149 PyObject *key;
150 Py_ssize_t pos = 0;
151
152#if PYTHON_VERSION < 0x300
153 // Same sized set, simply check if values are identical. Other reductions should
154 // make it identical, or else this won't have the effect intended.
155 while (_PySet_Next(set, &pos, &key)) {
156 result *= 1000003;
157 result ^= Nuitka_FastHashBytes(&key, sizeof(PyObject *));
158 }
159#else
160 Py_hash_t unused;
161
162 while (_PySet_NextEntry(set, &pos, &key, &unused)) {
163 result *= 1000003;
164 result ^= Nuitka_FastHashBytes(&key, sizeof(PyObject *));
165 }
166#endif
167
168 return result;
169}
170
171static PyObject *our_set_tp_richcompare(PyObject *set1, PyObject *set2, int op) {
172 assert(op == Py_EQ);
173
174 PyObject *result;
175
176 Py_ssize_t pos1 = 0, pos2 = 0;
177 PyObject *key1, *key2;
178
179 if (Py_SIZE(set1) != Py_SIZE(set2)) {
180 result = Py_False;
181 } else {
182 result = Py_True;
183
184#if PYTHON_VERSION < 0x300
185 // Same sized set, simply check if values are identical. Other reductions should
186 // make it identical, or else this won't have the effect intended.
187 while (_PySet_Next(set1, &pos1, &key1)) {
188 {
189 NUITKA_MAY_BE_UNUSED int res = _PySet_Next(set2, &pos2, &key2);
190 assert(res != 0);
191 }
192
193 if (key1 != key2) {
194 result = Py_False;
195 break;
196 }
197 }
198#else
199 Py_hash_t unused1, unused2;
200
201 // Same sized dictionary, simply check if values are identical. Other reductions should
202 // make it identical, or else this won't have the effect intended.
203 while (_PySet_NextEntry(set1, &pos1, &key1, &unused1)) {
204 {
205 NUITKA_MAY_BE_UNUSED int res = _PySet_NextEntry(set2, &pos2, &key2, &unused2);
206 assert(res != 0);
207 }
208
209 if (key1 != key2) {
210 result = Py_False;
211 break;
212 }
213 }
214#endif
215 }
216
217 Py_INCREF_IMMORTAL(result);
218 return result;
219}
220
221static PyObject *our_float_tp_richcompare(PyFloatObject *a, PyFloatObject *b, int op) {
222 assert(op == Py_EQ);
223
224 PyObject *result;
225
226 // Avoid the C math when comparing, for it makes too many values equal or unequal.
227 if (memcmp(&a->ob_fval, &b->ob_fval, sizeof(b->ob_fval)) == 0) {
228 result = Py_True;
229 } else {
230 result = Py_False;
231 }
232
233 Py_INCREF_IMMORTAL(result);
234 return result;
235}
236
237static Py_hash_t our_dict_hash(PyObject *dict) {
238 Py_hash_t result = 0;
239
240 Py_ssize_t pos = 0;
241 PyObject *key, *value;
242
243 while (Nuitka_DictNext(dict, &pos, &key, &value)) {
244 result *= 1000003;
245 result ^= Nuitka_FastHashBytes(&key, sizeof(PyObject *));
246 result *= 1000003;
247 result ^= Nuitka_FastHashBytes(&value, sizeof(PyObject *));
248 }
249
250 return result;
251}
252
253static PyObject *our_dict_tp_richcompare(PyObject *a, PyObject *b, int op) {
254 PyObject *result;
255
256 if (Py_SIZE(a) != Py_SIZE(b)) {
257 result = Py_False;
258 } else {
259 result = Py_True;
260
261 Py_ssize_t pos1 = 0, pos2 = 0;
262 PyObject *key1, *value1;
263 PyObject *key2, *value2;
264
265 // Same sized dictionary, simply check if key and values are identical.
266 // Other reductions should make it identical, or else this won't have the
267 // effect intended.
268 while (Nuitka_DictNext(a, &pos1, &key1, &value1)) {
269 {
270 NUITKA_MAY_BE_UNUSED int res = Nuitka_DictNext(b, &pos2, &key2, &value2);
271 assert(res != 0);
272 }
273
274 if (key1 != key2 || value1 != value2) {
275 result = Py_False;
276 break;
277 }
278 }
279 }
280
281 Py_INCREF_IMMORTAL(result);
282 return result;
283}
284
285// For creation of small long singleton long values as required by Python3.
286#if PYTHON_VERSION < 0x3b0
287#if PYTHON_VERSION >= 0x390
288PyObject **Nuitka_Long_SmallValues;
289#elif PYTHON_VERSION >= 0x300
290PyObject *Nuitka_Long_SmallValues[NUITKA_STATIC_SMALLINT_VALUE_MAX - NUITKA_STATIC_SMALLINT_VALUE_MIN + 1];
291#endif
292#endif
293
294static void initCaches(void) {
295 static bool init_done = false;
296 if (init_done == true) {
297 return;
298 }
299
300#if PYTHON_VERSION < 0x300
301 int_cache = PyDict_New();
302#endif
303
304 long_cache = PyDict_New();
305
306 float_cache = PyDict_New();
307
308#if PYTHON_VERSION >= 0x300
309 bytes_cache = PyDict_New();
310#endif
311
312#if PYTHON_VERSION < 0x300
313 unicode_cache = PyDict_New();
314#endif
315
316 tuple_cache = PyDict_New();
317
318 list_cache = PyDict_New();
319
320 dict_cache = PyDict_New();
321
322 set_cache = PyDict_New();
323
324 frozenset_cache = PyDict_New();
325
326#if PYTHON_VERSION < 0x3b0
327#if PYTHON_VERSION >= 0x390
328 // On Python3.9+ these are exposed in the interpreter.
329 Nuitka_Long_SmallValues = (PyObject **)_PyInterpreterState_GET()->small_ints;
330#elif PYTHON_VERSION >= 0x300
331 for (long i = NUITKA_STATIC_SMALLINT_VALUE_MIN; i < NUITKA_STATIC_SMALLINT_VALUE_MAX; i++) {
332 // Have to use the original API here since out "Nuitka_PyLong_FromLong"
333 // would insist on using "Nuitka_Long_SmallValues" to produce it.
334 PyObject *value = PyLong_FromLong(i);
335 Nuitka_Long_SmallValues[NUITKA_TO_SMALL_VALUE_OFFSET(i)] = value;
336 }
337#endif
338#endif
339
340 init_done = true;
341}
342
343static void insertToDictCache(PyObject *dict, PyObject **value) {
344 PyObject *item = PyDict_GetItem(dict, *value);
345
346 if (item != NULL) {
347 *value = item;
348 } else {
349 PyDict_SetItem(dict, *value, *value);
350 }
351}
352
353static void insertToDictCacheForcedHash(PyObject *dict, PyObject **value, hashfunc tp_hash,
354 richcmpfunc tp_richcompare) {
355 hashfunc old_hash = Py_TYPE(*value)->tp_hash;
356 richcmpfunc old_richcmpfunc = Py_TYPE(*value)->tp_richcompare;
357
358 // Hash is optional, e.g. for floats we can spare us doing our own hash,
359 // but we do equality
360 if (tp_hash != NULL) {
361 Py_TYPE(*value)->tp_hash = tp_hash;
362 }
363 Py_TYPE(*value)->tp_richcompare = tp_richcompare;
364
365 insertToDictCache(dict, value);
366
367 Py_TYPE(*value)->tp_hash = old_hash;
368 Py_TYPE(*value)->tp_richcompare = old_richcmpfunc;
369}
370
371static uint16_t unpackValueUint16(unsigned char const **data) {
372 uint16_t value;
373
374 memcpy(&value, *data, sizeof(value));
375
376 assert(sizeof(value) == 2);
377
378 *data += sizeof(value);
379
380 return value;
381}
382
383static uint32_t unpackValueUint32(unsigned char const **data) {
384 uint32_t value;
385
386 memcpy(&value, *data, sizeof(value));
387
388 assert(sizeof(value) == 4);
389
390 *data += sizeof(value);
391
392 return value;
393}
394
395static double unpackValueFloat(unsigned char const **data) {
396 double size;
397
398 memcpy(&size, *data, sizeof(size));
399 *data += sizeof(size);
400
401 return size;
402}
403
404static unsigned char const *_unpackValueCString(unsigned char const *data) {
405 while (*(data++) != 0) {
406 }
407
408 return data;
409}
410
411// Decoding Variable-length quantity values
412static uint64_t _unpackVariableLength(unsigned char const **data) {
413 uint64_t result = 0;
414 uint64_t factor = 1;
415
416 while (1) {
417 unsigned char value = **data;
418 *data += 1;
419
420 result += (value & 127) * factor;
421
422 if (value < 128) {
423 break;
424 }
425
426 factor <<= 7;
427 }
428
429 return result;
430}
431
432static PyObject *_unpackAnonValue(unsigned char anon_index) {
433 switch (anon_index) {
434 case 0:
435 return (PyObject *)Py_TYPE(Py_None);
436 case 1:
437 return (PyObject *)&PyEllipsis_Type;
438 case 2:
439 return (PyObject *)Py_TYPE(Py_NotImplemented);
440 case 3:
441 return (PyObject *)&PyFunction_Type;
442 case 4:
443 return (PyObject *)&PyGen_Type;
444 case 5:
445 return (PyObject *)&PyCFunction_Type;
446 case 6:
447 return (PyObject *)&PyCode_Type;
448 case 7:
449 return (PyObject *)&PyModule_Type;
450
451#if PYTHON_VERSION < 0x300
452 case 8:
453 return (PyObject *)&PyFile_Type;
454 case 9:
455 return (PyObject *)&PyClass_Type;
456 case 10:
457 return (PyObject *)&PyInstance_Type;
458 case 11:
459 return (PyObject *)&PyMethod_Type;
460#endif
461
462 default:
463 PRINT_FORMAT("Missing anon value for %d\n", (int)anon_index);
464 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
465 }
466}
467
468PyObject *_unpackSpecialValue(unsigned char special_index) {
469 switch (special_index) {
470 case 0:
471 return PyObject_GetAttrString((PyObject *)builtin_module, "Ellipsis");
472 case 1:
473 return PyObject_GetAttrString((PyObject *)builtin_module, "NotImplemented");
474 case 2:
475 return Py_SysVersionInfo;
476 default:
477 PRINT_FORMAT("Missing special value for %d\n", (int)special_index);
478 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
479 }
480}
481
482static PyObject *_Nuitka_Unicode_ImmortalFromStringAndSize(PyThreadState *tstate, const char *data, Py_ssize_t size,
483 bool is_ascii) {
484#if PYTHON_VERSION < 0x300
485 PyObject *u = PyUnicode_FromStringAndSize((const char *)data, size);
486#else
487 // spell-checker: ignore surrogatepass
488 PyObject *u = PyUnicode_DecodeUTF8((const char *)data, size, "surrogatepass");
489#endif
490
491#if PYTHON_VERSION >= 0x3d0
492 _PyUnicode_InternImmortal(tstate->interp, &u);
493#elif PYTHON_VERSION >= 0x3c0
494 if (is_ascii) {
495 PyUnicode_InternInPlace(&u);
496 }
497
498#if PYTHON_VERSION >= 0x3c7
499 _PyUnicode_STATE(u).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
500
501#if _NUITKA_EXE_MODE
502 _PyUnicode_STATE(u).statically_allocated = 1;
503#else
504 if (Py_Version >= 0x30c0700) {
505 _PyUnicode_STATE(u).statically_allocated = 1;
506 }
507#endif
508#endif
509
510#elif PYTHON_VERSION >= 0x300
511 if (is_ascii) {
512 PyUnicode_InternInPlace(&u);
513 }
514#else
515 insertToDictCache(unicode_cache, &u);
516#endif
517
518 // Make sure our strings are consistent.
519#if PYTHON_VERSION >= 0x3c0 && !defined(__NUITKA_NO_ASSERT__)
520 // Note: Setting to immortal happens last, but we want to check now.
521 Py_SET_REFCNT_IMMORTAL(u);
522
523 _PyUnicode_CheckConsistency(u, 1);
524#endif
525
526 return u;
527}
528
529static unsigned char const *_unpackBlobConstants(PyThreadState *tstate, PyObject **output, unsigned char const *data,
530 int count);
531
532static unsigned char const *_unpackBlobConstant(PyThreadState *tstate, PyObject **output, unsigned char const *data) {
533
534 // Make sure we discover failures to assign.
535 *output = NULL;
536 bool is_object;
537
538 char c = *((char const *)data++);
539#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
540 unsigned char const *data_old = data;
541 printf("Type %c:\n", c);
542#endif
543 switch (c) {
544
545 case 'p': {
546 *output = *(output - 1);
547 is_object = true;
548
549 break;
550 }
551 case 'T': {
552 int size = (int)_unpackVariableLength(&data);
553
554 PyObject *t = PyTuple_New(size);
555
556 if (size > 0) {
557 data = _unpackBlobConstants(tstate, &PyTuple_GET_ITEM(t, 0), data, size);
558 }
559
560 insertToDictCacheForcedHash(tuple_cache, &t, (hashfunc)our_tuple_hash, (richcmpfunc)our_tuple_tp_richcompare);
561
562 *output = t;
563 is_object = true;
564
565 break;
566 }
567 case 'L': {
568 int size = (int)_unpackVariableLength(&data);
569
570 PyObject *l = PyList_New(size);
571
572 if (size > 0) {
573 data = _unpackBlobConstants(tstate, &PyList_GET_ITEM(l, 0), data, size);
574 }
575
576 insertToDictCacheForcedHash(list_cache, &l, (hashfunc)our_list_hash, (richcmpfunc)our_list_tp_richcompare);
577
578 *output = l;
579 is_object = true;
580
581 break;
582 }
583 case 'D': {
584 int size = (int)_unpackVariableLength(&data);
585
586 PyObject *d = _PyDict_NewPresized(size);
587
588 if (size > 0) {
589 NUITKA_DYNAMIC_ARRAY_DECL(keys, PyObject *, size);
590 NUITKA_DYNAMIC_ARRAY_DECL(values, PyObject *, size);
591
592 data = _unpackBlobConstants(tstate, &keys[0], data, size);
593 data = _unpackBlobConstants(tstate, &values[0], data, size);
594
595 for (int i = 0; i < size; i++) {
596 PyDict_SetItem(d, keys[i], values[i]);
597 }
598 }
599
600 insertToDictCacheForcedHash(dict_cache, &d, (hashfunc)our_dict_hash, (richcmpfunc)our_dict_tp_richcompare);
601
602 *output = d;
603 is_object = true;
604
605 break;
606 }
607 case 'P':
608 case 'S': {
609 int size = (int)_unpackVariableLength(&data);
610
611 PyObject *s;
612
613 if (c == 'S') {
614 s = PySet_New(NULL);
615 } else {
616 if (size == 0) {
617 // Get at the frozenset singleton of CPython and use it too. Some things
618 // rely on it being a singleton across the board.
619 static PyObject *empty_frozenset = NULL;
620
621 if (empty_frozenset == NULL) {
622 empty_frozenset = CALL_FUNCTION_WITH_SINGLE_ARG(tstate, (PyObject *)&PyFrozenSet_Type,
623 Nuitka_Bytes_FromStringAndSize("", 0));
624 }
625
626 s = empty_frozenset;
627 } else {
628 s = PyFrozenSet_New(NULL);
629 }
630 }
631
632 if (size > 0) {
633 NUITKA_DYNAMIC_ARRAY_DECL(values, PyObject *, size);
634
635 data = _unpackBlobConstants(tstate, &values[0], data, size);
636
637 for (int i = 0; i < size; i++) {
638 PySet_Add(s, values[i]);
639 }
640 }
641
642 // sets are cached globally too.
643 if (c == 'S') {
644 insertToDictCacheForcedHash(set_cache, &s, (hashfunc)our_set_hash, (richcmpfunc)our_set_tp_richcompare);
645 } else {
646 insertToDictCacheForcedHash(frozenset_cache, &s, (hashfunc)our_set_hash,
647 (richcmpfunc)our_set_tp_richcompare);
648 }
649
650 *output = s;
651 is_object = true;
652
653 break;
654 }
655#if PYTHON_VERSION < 0x300
656 case 'I':
657 case 'i': {
658 long value = (long)_unpackVariableLength(&data);
659 if (c == 'I') {
660 value = -value;
661 }
662
663 PyObject *i = PyInt_FromLong(value);
664
665 insertToDictCache(int_cache, &i);
666
667 *output = i;
668 is_object = true;
669
670 break;
671 }
672#endif
673 case 'l':
674 case 'q': {
675 // Positive/negative integer value with abs value < 2**31
676 uint64_t value = _unpackVariableLength(&data);
677
678 PyObject *l = Nuitka_LongFromCLong((c == 'l') ? ((long)value) : (-(long)value));
679 assert(l != NULL);
680
681 // Avoid the long cache, won't do anything useful for small ints
682#if PYTHON_VERSION >= 0x300
683 if (value < NUITKA_STATIC_SMALLINT_VALUE_MIN || value >= NUITKA_STATIC_SMALLINT_VALUE_MAX)
684#endif
685 {
686 insertToDictCache(long_cache, &l);
687 }
688
689 *output = l;
690 is_object = true;
691
692 break;
693 }
694 case 'G':
695 case 'g': {
696 PyObject *result = Nuitka_PyLong_FromLong(0);
697
698 int size = (int)_unpackVariableLength(&data);
699
700 PyObject *shift = Nuitka_PyLong_FromLong(31);
701
702 for (int i = 0; i < size; i++) {
703 result = PyNumber_InPlaceLshift(result, shift);
704
705 uint64_t value = _unpackVariableLength(&data);
706 PyObject *part = Nuitka_LongFromCLong((long)value);
707 assert(part != NULL);
708 result = PyNumber_InPlaceAdd(result, part);
709 Py_DECREF(part);
710 }
711
712 Py_DECREF(shift);
713
714 if (c == 'G') {
715 Nuitka_LongSetSignNegative(result);
716 }
717
718 insertToDictCache(long_cache, &result);
719
720 *output = result;
721 is_object = true;
722
723 break;
724 }
725 case 'f': {
726 double value = unpackValueFloat(&data);
727
728 PyObject *f = PyFloat_FromDouble(value);
729
730 // Floats are cached globally too.
731 insertToDictCacheForcedHash(float_cache, &f, NULL, (richcmpfunc)our_float_tp_richcompare);
732
733 *output = f;
734 is_object = true;
735
736 break;
737 }
738 case 'j': {
739 double real = unpackValueFloat(&data);
740 double imag = unpackValueFloat(&data);
741
742 *output = PyComplex_FromDoubles(real, imag);
743 is_object = true;
744
745 break;
746 }
747 case 'J': {
748 PyObject *parts[2];
749
750 // Complex via float is done for ones that are 0, nan, float.
751 data = _unpackBlobConstants(tstate, &parts[0], data, 2);
752
753 *output = BUILTIN_COMPLEX2(tstate, parts[0], parts[1]);
754 is_object = true;
755
756 break;
757 }
758#if PYTHON_VERSION < 0x300
759 case 'a':
760 case 'c': {
761 // Python2 str, potentially attribute, zero terminated.
762 size_t size = strlen((const char *)data);
763
764 PyObject *s = PyString_FromStringAndSize((const char *)data, size);
765 CHECK_OBJECT(s);
766
767 data += size + 1;
768
769 if (c == 'a') {
770 PyString_InternInPlace(&s);
771 }
772
773 *output = s;
774 is_object = true;
775
776 break;
777 }
778#else
779 case 'c': {
780 // Python3 bytes, zero terminated.
781 size_t size = strlen((const char *)data);
782
783 PyObject *b = Nuitka_Bytes_FromStringAndSize((const char *)data, size);
784 CHECK_OBJECT(b);
785
786 data += size + 1;
787
788 // Empty bytes value is here as well.
789 if (size > 1) {
790 insertToDictCache(bytes_cache, &b);
791 }
792
793 *output = b;
794 is_object = true;
795
796 break;
797 }
798#endif
799 case 'd': {
800 // Python2 str length 1 str, potentially attribute, or Python3 single byte
801
802#if PYTHON_VERSION < 0x300
803 PyObject *s = PyString_FromStringAndSize((const char *)data, 1);
804 data += 1;
805 *output = s;
806#else
807 PyObject *b = Nuitka_Bytes_FromStringAndSize((const char *)data, 1);
808 data += 1;
809 *output = b;
810#endif
811
812 is_object = true;
813
814 break;
815 }
816 case 'w': {
817 // Python2 unicode, Python3 str length 1, potentially attribute in Python3
818 PyObject *u = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, 1, true);
819 data += 1;
820
821 *output = u;
822 is_object = true;
823
824 break;
825 }
826 case 'b': {
827 // Python2 str or Python3 bytes, length indicated.
828 int size = (int)_unpackVariableLength(&data);
829 assert(size > 1);
830
831 PyObject *b = Nuitka_Bytes_FromStringAndSize((const char *)data, size);
832 CHECK_OBJECT(b);
833
834 data += size;
835
836#if PYTHON_VERSION >= 0x300
837 insertToDictCache(bytes_cache, &b);
838#endif
839
840 *output = b;
841 is_object = true;
842
843 break;
844 }
845
846 case 'B': {
847 int size = (int)_unpackVariableLength(&data);
848
849 PyObject *b = PyByteArray_FromStringAndSize((const char *)data, size);
850 data += size;
851
852 *output = b;
853 is_object = true;
854
855 break;
856 }
857#if PYTHON_VERSION >= 0x300
858 case 'a': // Python3 attributes
859#endif
860 case 'u': { // Python2 unicode, Python3 str, zero terminated.
861 size_t size = strlen((const char *)data);
862 assert(size != 0);
863
864 PyObject *u = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, size, c == 'a');
865 data += size + 1;
866
867 *output = u;
868 is_object = true;
869
870 break;
871 }
872 case 'v': {
873 int size = (int)_unpackVariableLength(&data);
874 assert(size != 0);
875
876 PyObject *u = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, size, false);
877 data += size;
878
879 *output = u;
880 is_object = true;
881
882 break;
883 }
884 case 'n': {
885 *output = Py_None;
886 is_object = true;
887
888 break;
889 }
890 case 's': {
891 *output = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, 0, true);
892 is_object = true;
893
894 break;
895 }
896 case 't': {
897 *output = Py_True;
898 is_object = true;
899
900 break;
901 }
902 case 'F': {
903 *output = Py_False;
904 is_object = true;
905
906 break;
907 }
908 case ':': {
909 // Slice object
910 PyObject *items[3];
911 data = _unpackBlobConstants(tstate, &items[0], data, 3);
912
913 PyObject *s = MAKE_SLICE_OBJECT3(tstate, items[0], items[1], items[2]);
914
915 *output = s;
916 is_object = true;
917
918 break;
919 }
920 case ';': {
921 // (x)range objects
922 PyObject *items[3];
923 data = _unpackBlobConstants(tstate, &items[0], data, 3);
924#if PYTHON_VERSION < 0x300
925 assert(PyInt_CheckExact(items[0]));
926 assert(PyInt_CheckExact(items[1]));
927 assert(PyInt_CheckExact(items[2]));
928
929 long start = PyInt_AS_LONG(items[0]);
930 long stop = PyInt_AS_LONG(items[1]);
931 long step = PyInt_AS_LONG(items[2]);
932
933 PyObject *s = MAKE_XRANGE(tstate, start, stop, step);
934#else
935 PyObject *s = BUILTIN_XRANGE3(tstate, items[0], items[1], items[2]);
936#endif
937 *output = s;
938 is_object = true;
939
940 break;
941 }
942 case 'M': {
943 // Anonymous builtin by table index value.
944 unsigned char anon_index = *data++;
945
946 *output = _unpackAnonValue(anon_index);
947 is_object = true;
948
949 break;
950 }
951 case 'Q': {
952 // Anonymous builtin by table index value.
953 unsigned char special_index = *data++;
954
955 *output = _unpackSpecialValue(special_index);
956 is_object = true;
957
958 break;
959 }
960 case 'O': {
961 // Builtin by name. TODO: Define number table shared by C and Python
962 // serialization to avoid using strings here.
963 char const *builtin_name = (char const *)data;
964 data = _unpackValueCString(data);
965
966 *output = PyObject_GetAttrString((PyObject *)builtin_module, builtin_name);
967 is_object = true;
968
969 break;
970 }
971 case 'E': {
972 // Builtin exception by name. TODO: Define number table shared by C and Python
973 // serialization to avoid using strings here.
974 char const *builtin_exception_name = (char const *)data;
975 data = _unpackValueCString(data);
976
977 *output = PyObject_GetAttrString((PyObject *)builtin_module, builtin_exception_name);
978 is_object = true;
979
980 break;
981 }
982 case 'Z': {
983 unsigned char v = *data++;
984
985 PyObject *z = NULL;
986
987 switch (v) {
988 case 0: {
989 static PyObject *_const_float_0_0 = NULL;
990
991 if (_const_float_0_0 == NULL) {
992 _const_float_0_0 = PyFloat_FromDouble(0.0);
993 }
994 z = _const_float_0_0;
995
996 break;
997 }
998 case 1: {
999 static PyObject *_const_float_minus_0_0 = NULL;
1000
1001 if (_const_float_minus_0_0 == NULL) {
1002 _const_float_minus_0_0 = PyFloat_FromDouble(0.0);
1003
1004 // Older Python3 has variable signs from C, so be explicit about it.
1005 PyFloat_SET_DOUBLE(_const_float_minus_0_0, copysign(PyFloat_AS_DOUBLE(_const_float_minus_0_0), -1.0));
1006 }
1007 z = _const_float_minus_0_0;
1008
1009 break;
1010 }
1011
1012 case 2: {
1013 static PyObject *_const_float_plus_nan = NULL;
1014
1015 if (_const_float_plus_nan == NULL) {
1016 _const_float_plus_nan = PyFloat_FromDouble(Py_NAN);
1017
1018 // Older Python3 has variable signs for NaN from C, so be explicit about it.
1019 PyFloat_SET_DOUBLE(_const_float_plus_nan, copysign(PyFloat_AS_DOUBLE(_const_float_plus_nan), 1.0));
1020 }
1021 z = _const_float_plus_nan;
1022
1023 break;
1024 }
1025 case 3: {
1026 static PyObject *_const_float_minus_nan = NULL;
1027
1028 if (_const_float_minus_nan == NULL) {
1029 _const_float_minus_nan = PyFloat_FromDouble(Py_NAN);
1030
1031 // Older Python3 has variable signs for NaN from C, so be explicit about it.
1032 PyFloat_SET_DOUBLE(_const_float_minus_nan, copysign(PyFloat_AS_DOUBLE(_const_float_minus_nan), -1.0));
1033 }
1034 z = _const_float_minus_nan;
1035
1036 break;
1037 }
1038 case 4: {
1039 static PyObject *_const_float_plus_inf = NULL;
1040
1041 if (_const_float_plus_inf == NULL) {
1042 _const_float_plus_inf = PyFloat_FromDouble(Py_HUGE_VAL);
1043
1044 // Older Python3 has variable signs from C, so be explicit about it.
1045 PyFloat_SET_DOUBLE(_const_float_plus_inf, copysign(PyFloat_AS_DOUBLE(_const_float_plus_inf), 1.0));
1046 }
1047 z = _const_float_plus_inf;
1048
1049 break;
1050 }
1051 case 5: {
1052 static PyObject *_const_float_minus_inf = NULL;
1053
1054 if (_const_float_minus_inf == NULL) {
1055 _const_float_minus_inf = PyFloat_FromDouble(Py_HUGE_VAL);
1056
1057 // Older Python3 has variable signs from C, so be explicit about it.
1058 PyFloat_SET_DOUBLE(_const_float_minus_inf, copysign(PyFloat_AS_DOUBLE(_const_float_minus_inf), -1.0));
1059 }
1060 z = _const_float_minus_inf;
1061
1062 break;
1063 }
1064 default: {
1065 PRINT_FORMAT("Missing decoding for %d\n", (int)c);
1066 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
1067 }
1068 }
1069
1070 // Floats are cached globally too.
1071 insertToDictCacheForcedHash(float_cache, &z, NULL, (richcmpfunc)our_float_tp_richcompare);
1072
1073 *output = z;
1074 is_object = true;
1075
1076 break;
1077 }
1078 case 'X': {
1079 // Blob data pointer, user knowns size.
1080 uint64_t size = _unpackVariableLength(&data);
1081
1082 *output = (PyObject *)data;
1083 is_object = false;
1084
1085 data += size;
1086
1087 break;
1088 }
1089#if PYTHON_VERSION >= 0x390
1090 case 'A': {
1091 // GenericAlias object
1092 PyObject *items[2];
1093 data = _unpackBlobConstants(tstate, &items[0], data, 2);
1094
1095 PyObject *g = Py_GenericAlias(items[0], items[1]);
1096
1097 // TODO: Maybe deduplicate.
1098 *output = g;
1099
1100 is_object = true;
1101 break;
1102 }
1103#endif
1104#if PYTHON_VERSION >= 0x3a0
1105 case 'H': {
1106 // UnionType object
1107 PyObject *args;
1108 data = _unpackBlobConstants(tstate, &args, data, 1);
1109
1110 PyObject *union_type = MAKE_UNION_TYPE(args);
1111
1112 // TODO: Maybe deduplicate.
1113 *output = union_type;
1114
1115 is_object = true;
1116 break;
1117 }
1118#endif
1119 case 'C': {
1120 // Code object, without the filename, we let the module do that,
1121 // depending on the source mode and this is highly compact
1122 // representation of it.
1123
1124 // First, flags with the optional bits. It's handling
1125 // must match that of encoder 100%, the flag value to
1126 // use changes per version and mode.
1127 uint64_t flags = _unpackVariableLength(&data);
1128
1129 // Current flag indicator value, we trust the C compiler
1130 // to optimize it away among the ifdefs.
1131 uint64_t flag_base = 1;
1132
1133 // Code object flags as used by Python, encoded in the
1134 // flags as well.
1135 int co_flags = 0;
1136
1137 // Name is mandatory, no flag needed.
1138 PyObject *function_name;
1139 data = _unpackBlobConstant(tstate, &function_name, data);
1140
1141 // Line number is mandatory, no flag needed. Encoded values start at 0,
1142 // where 1 is what is normally used.
1143 int line_number = (int)_unpackVariableLength(&data) + 1;
1144
1145 // Right now this is only argument names, so argument count is implied,
1146 // it is mandatory so no flag is needed, empty value is very compact
1147 // anyway and rare.
1148 PyObject *arg_names;
1149 data = _unpackBlobConstant(tstate, &arg_names, data);
1150
1151 // TODO: Not sure if this is redundant potentially it can be derives
1152 // from the var names already. It might be possible to derive by other
1153 // means.
1154 int arg_count = (int)_unpackVariableLength(&data);
1155
1156 // It is version specific if we have this, and dependent on a flag, if
1157 // it's present at all.
1158#if PYTHON_VERSION >= 0x3b0
1159 PyObject *function_qualname;
1160
1161 if (flags & flag_base) {
1162 data = _unpackBlobConstant(tstate, &function_qualname, data);
1163 } else {
1164 function_qualname = function_name;
1165 }
1166 flag_base <<= 1;
1167#endif
1168
1169 // Free vars are optional.
1170 PyObject *free_vars = NULL;
1171
1172 if (flags & flag_base) {
1173 data = _unpackBlobConstant(tstate, &free_vars, data);
1174 }
1175 flag_base <<= 1;
1176
1177#if PYTHON_VERSION >= 0x300
1178 int kw_only_count = 0;
1179 if (flags & flag_base) {
1180 kw_only_count = (int)_unpackVariableLength(&data) + 1;
1181 }
1182 flag_base <<= 1;
1183 assert(kw_only_count >= 0);
1184#endif
1185
1186#if PYTHON_VERSION >= 0x380
1187 int pos_only_count = 0;
1188 if (flags & flag_base) {
1189 pos_only_count = (int)_unpackVariableLength(&data) + 1;
1190 }
1191 flag_base <<= 1;
1192 assert(pos_only_count >= 0);
1193#endif
1194
1195 // TODO: For pre-Python3.5 we could save one bit here, but not worth it
1196 // for now.
1197#if PYTHON_VERSION >= 0x360
1198 if ((flags & (flag_base * 3)) == (flag_base * 3)) {
1199 co_flags += CO_ASYNC_GENERATOR;
1200 } else
1201#endif
1202#if PYTHON_VERSION >= 0x350
1203 if ((flags & (flag_base * 2)) == (flag_base * 2)) {
1204 co_flags += CO_COROUTINE;
1205 } else
1206#endif
1207 if (flags & flag_base) {
1208 co_flags += CO_GENERATOR;
1209 }
1210
1211 flag_base <<= 2;
1212
1213 if (flags & flag_base) {
1214 co_flags += CO_OPTIMIZED;
1215 }
1216 flag_base <<= 1;
1217
1218 if (flags & flag_base) {
1219 co_flags += CO_NEWLOCALS;
1220 }
1221 flag_base <<= 1;
1222
1223 if (flags & flag_base) {
1224 co_flags += CO_VARARGS;
1225 }
1226 flag_base <<= 1;
1227
1228 if (flags & flag_base) {
1229 co_flags += CO_VARKEYWORDS;
1230 }
1231 flag_base <<= 1;
1232
1233#if PYTHON_VERSION < 0x300
1234 if (flags & flag_base) {
1235 co_flags += CO_FUTURE_DIVISION;
1236 }
1237 flag_base <<= 1;
1238#endif
1239
1240 if (flags & flag_base) {
1241 co_flags += CO_FUTURE_UNICODE_LITERALS;
1242 }
1243 flag_base <<= 1;
1244
1245#if PYTHON_VERSION < 0x300
1246 if (flags & flag_base) {
1247 co_flags += CO_FUTURE_PRINT_FUNCTION;
1248 }
1249 flag_base <<= 1;
1250#endif
1251
1252#if PYTHON_VERSION < 0x300
1253 if (flags & flag_base) {
1254 co_flags += CO_FUTURE_ABSOLUTE_IMPORT;
1255 }
1256 flag_base <<= 1;
1257#endif
1258
1259#if PYTHON_VERSION >= 0x350 && PYTHON_VERSION < 0x370
1260 if (flags & flag_base) {
1261 co_flags += CO_FUTURE_GENERATOR_STOP;
1262 }
1263 flag_base <<= 1;
1264#endif
1265
1266#if PYTHON_VERSION >= 0x370
1267 if (flags & flag_base) {
1268 co_flags += CO_FUTURE_ANNOTATIONS;
1269 }
1270 flag_base <<= 1;
1271#endif
1272
1273#if PYTHON_VERSION >= 0x300
1274 if (flags & flag_base) {
1275 co_flags += CO_FUTURE_BARRY_AS_BDFL;
1276 }
1277 flag_base <<= 1;
1278#endif
1279
1280 // Filename will be supplied later during usage.
1281 *output = (PyObject *)MAKE_CODE_OBJECT(Py_None, line_number, co_flags, function_name, function_qualname,
1282 arg_names, free_vars, arg_count, kw_only_count, pos_only_count);
1283
1284 CHECK_OBJECT(*output);
1285
1286 is_object = true;
1287 break;
1288 }
1289 case '.': {
1290 PRINT_STRING("Missing blob values\n");
1291 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
1292 }
1293 default:
1294 PRINT_FORMAT("Missing decoding for %d\n", (int)c);
1295 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
1296 }
1297
1298#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1299 printf("Size for %c was %d\n", c, data - data_old);
1300#endif
1301
1302 // Discourage in-place operations from modifying these. These
1303 // might be put into containers, therefore take 2 refs to be
1304 // accounting for the container too.
1305 if (is_object == true) {
1306 CHECK_OBJECT(*output);
1307
1308#if PYTHON_VERSION < 0x3c0
1309 Py_INCREF(*output);
1310 Py_INCREF(*output);
1311#else
1312 Py_SET_REFCNT_IMMORTAL(*output);
1313#endif
1314 }
1315
1316 return data;
1317}
1318
1319static unsigned char const *_unpackBlobConstants(PyThreadState *tstate, PyObject **output, unsigned char const *data,
1320 int count) {
1321 for (int _i = 0; _i < count; _i++) {
1322 data = _unpackBlobConstant(tstate, output, data);
1323
1324 output += 1;
1325 }
1326
1327 return data;
1328}
1329
1330static void unpackBlobConstants(PyThreadState *tstate, PyObject **output, unsigned char const *data) {
1331 int count = (int)unpackValueUint16(&data);
1332
1333#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1334 printf("unpackBlobConstants count %d\n", count);
1335#endif
1336 _unpackBlobConstants(tstate, output, data, count);
1337}
1338
1339#if _NUITKA_CONSTANTS_FROM_MACOS_SECTION
1340
1341#include <mach-o/getsect.h>
1342#include <mach-o/ldsyms.h>
1343
1344#if !_NUITKA_EXE_MODE
1345static int findMacOSDllImageId(void) {
1346 Dl_info where;
1347 int res = dladdr((void *)findMacOSDllImageId, &where);
1348 assert(res != 0);
1349
1350 char const *dll_filename = where.dli_fname;
1351
1352 unsigned long image_count = _dyld_image_count();
1353
1354 for (int i = 0; i < image_count; i++) {
1355 // Ignore entries without a header.
1356 struct mach_header const *header = _dyld_get_image_header(i);
1357 if (header == NULL) {
1358 continue;
1359 }
1360
1361 if (strcmp(dll_filename, _dyld_get_image_name(i)) == 0) {
1362 return i;
1363 }
1364 }
1365
1366 return -1;
1367}
1368#endif
1369
1370#ifdef __LP64__
1371#define mach_header_arch mach_header_64
1372#else
1373#define mach_header_arch mach_header
1374#endif
1375
1376unsigned char *findMacOSBinarySection(void) {
1377#if _NUITKA_EXE_MODE
1378 const struct mach_header_arch *header = &_mh_execute_header;
1379#else
1380 int image_id = findMacOSDllImageId();
1381 assert(image_id != -1);
1382
1383 const struct mach_header_arch *header = (const struct mach_header_arch *)_dyld_get_image_header(image_id);
1384#endif
1385
1386 unsigned long size;
1387 return getsectiondata(header, "constants", "constants", &size);
1388}
1389
1390#endif
1391
1392void loadConstantsBlob(PyThreadState *tstate, PyObject **output, char const *name) {
1393 static bool init_done = false;
1394
1395 if (init_done == false) {
1396 NUITKA_PRINT_TIMING("loadConstantsBlob(): One time init.");
1397
1398#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1399 printf("loadConstantsBlob '%s' one time init\n", name);
1400#endif
1401
1402#if defined(_NUITKA_CONSTANTS_FROM_INCBIN)
1403 constant_bin = getConstantsBlobData();
1404#elif defined(_NUITKA_CONSTANTS_FROM_RESOURCE)
1405#if _NUITKA_EXE_MODE
1406 // Using NULL as this indicates running program.
1407 HMODULE handle = NULL;
1408#else
1409 HMODULE handle = getDllModuleHandle();
1410#endif
1411
1412 constant_bin = (const unsigned char *)LockResource(
1413 LoadResource(handle, FindResource(handle, MAKEINTRESOURCE(3), RT_RCDATA)));
1414
1415 assert(constant_bin);
1416#elif _NUITKA_CONSTANTS_FROM_MACOS_SECTION
1417 constant_bin = findMacOSBinarySection();
1418
1419 assert(constant_bin);
1420#endif
1421 NUITKA_PRINT_TIMING("loadConstantsBlob(): Found blob, decoding now.");
1422 DECODE(constant_bin);
1423
1424 NUITKA_PRINT_TIMING("loadConstantsBlob(): CRC32 that blob for correctness.");
1425 uint32_t hash = unpackValueUint32(&constant_bin);
1426 uint32_t size = unpackValueUint32(&constant_bin);
1427
1428#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1429 printf("loadConstantsBlob '%u' hash value\n", hash);
1430 printf("loadConstantsBlob '%u' size value\n", size);
1431#endif
1432 if (calcCRC32(constant_bin, size) != hash) {
1433 puts("Error, corrupted constants object");
1434 abort();
1435 }
1436
1437#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1438 printf("Checked CRC32 to match hash %u size %u\n", hash, size);
1439#endif
1440
1441 NUITKA_PRINT_TIMING("loadConstantsBlob(): One time init complete.");
1442
1443 init_done = true;
1444 }
1445
1446#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1447 printf("Loading blob named '%s'\n", name);
1448#endif
1449 // Python 3.9 or higher cannot create dictionary before calling init so avoid it.
1450 if (strcmp(name, ".bytecode") != 0) {
1451 initCaches();
1452 }
1453
1454 unsigned char const *w = constant_bin;
1455
1456 for (;;) {
1457 int match = strcmp(name, (char const *)w);
1458 w += strlen((char const *)w) + 1;
1459
1460#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1461 printf("offset of blob size %d\n", w - constant_bin);
1462#endif
1463
1464 uint32_t size = unpackValueUint32(&w);
1465
1466 if (match == 0) {
1467#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1468 printf("Loading blob named '%s' with size %d\n", name, size);
1469#endif
1470 break;
1471 }
1472
1473 // Skip other module data.
1474 w += size;
1475 }
1476
1477 unpackBlobConstants(tstate, output, w);
1478}
1479
1480// Part of "Nuitka", an optimizing Python compiler that is compatible and
1481// integrates with CPython, but also works on its own.
1482//
1483// Licensed under the Apache License, Version 2.0 (the "License");
1484// you may not use this file except in compliance with the License.
1485// You may obtain a copy of the License at
1486//
1487// http://www.apache.org/licenses/LICENSE-2.0
1488//
1489// Unless required by applicable law or agreed to in writing, software
1490// distributed under the License is distributed on an "AS IS" BASIS,
1491// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1492// See the License for the specific language governing permissions and
1493// limitations under the License.