Nuitka
The Python compiler
Loading...
Searching...
No Matches
HelpersConstantsBlob.c
1// Copyright 2025, Kay Hayen, mailto:kay.hayen@gmail.com find license text at end of file
2
12// This file is included from another C file, help IDEs to still parse it on
13// its own.
14#ifdef __IDE_ONLY__
15#include "nuitka/prelude.h"
16#endif
17
18#if _NUITKA_EXPERIMENTAL_WRITEABLE_CONSTANTS
19#define CONST_CONSTANT
20#else
21#define CONST_CONSTANT const
22#endif
23
24#if defined(_NUITKA_CONSTANTS_FROM_LINKER)
25// Symbol as provided by the linker, different for C++ and C11 mode.
26#ifdef __cplusplus
27extern "C" CONST_CONSTANT unsigned char constant_bin_data[];
28#else
29extern CONST_CONSTANT unsigned char constant_bin_data[0];
30#endif
31
32unsigned char const *constant_bin = &constant_bin_data[0];
33
34#elif defined(_NUITKA_CONSTANTS_FROM_CODE)
35#ifdef __cplusplus
36extern "C" CONST_CONSTANT unsigned char constant_bin_data[];
37#else
38extern CONST_CONSTANT unsigned char constant_bin_data[];
39#endif
40
41unsigned char const *constant_bin = &constant_bin_data[0];
42#else
43// Symbol to be assigned locally.
44unsigned char const *constant_bin = NULL;
45#endif
46
47#if defined(_NUITKA_CONSTANTS_FROM_INCBIN)
48extern unsigned const char *getConstantsBlobData(void);
49#endif
50
51#if PYTHON_VERSION < 0x300
52static PyObject *int_cache = NULL;
53#endif
54
55static PyObject *long_cache = NULL;
56
57static PyObject *float_cache = NULL;
58
59#if PYTHON_VERSION >= 0x300
60static PyObject *bytes_cache = NULL;
61#endif
62
63#if PYTHON_VERSION < 0x300
64static PyObject *unicode_cache = NULL;
65#endif
66
67static PyObject *tuple_cache = NULL;
68
69static PyObject *list_cache = NULL;
70
71static PyObject *dict_cache = NULL;
72
73static PyObject *set_cache = NULL;
74
75static PyObject *frozenset_cache = NULL;
76
77// Use our own non-random hash for some of the things to be fast. This is inspired
78// from the original Python2 hash func, but we are mostly using it on pointer values
79static Py_hash_t Nuitka_FastHashBytes(const void *value, Py_ssize_t size) {
80 if (unlikely(size == 0)) {
81 return 0;
82 }
83
84 unsigned char *w = (unsigned char *)value;
85 long x = *w << 7;
86
87 while (--size >= 0) {
88 x = (1000003 * x) ^ *w++;
89 }
90
91 x ^= size;
92
93 // The value -1 is reserved for errors.
94 if (x == -1) {
95 x = -2;
96 }
97
98 return x;
99}
100
101static Py_hash_t our_list_hash(PyListObject *list) {
102 return Nuitka_FastHashBytes(&list->ob_item[0], Py_SIZE(list) * sizeof(PyObject *));
103}
104
105static PyObject *our_list_tp_richcompare(PyListObject *list1, PyListObject *list2, int op) {
106 assert(op == Py_EQ);
107
108 PyObject *result;
109
110 if (list1 == list2) {
111 result = Py_True;
112 } else if (Py_SIZE(list1) != Py_SIZE(list2)) {
113 result = Py_False;
114 } else if (memcmp(&list1->ob_item[0], &list2->ob_item[0], Py_SIZE(list1) * sizeof(PyObject *)) == 0) {
115 result = Py_True;
116 } else {
117 result = Py_False;
118 }
119
120 Py_INCREF_IMMORTAL(result);
121 return result;
122}
123
124static Py_hash_t our_tuple_hash(PyTupleObject *tuple) {
125 return Nuitka_FastHashBytes(&tuple->ob_item[0], Py_SIZE(tuple) * sizeof(PyObject *));
126}
127
128static PyObject *our_tuple_tp_richcompare(PyTupleObject *tuple1, PyTupleObject *tuple2, int op) {
129 assert(op == Py_EQ);
130
131 PyObject *result;
132
133 if (tuple1 == tuple2) {
134 result = Py_True;
135 } else if (Py_SIZE(tuple1) != Py_SIZE(tuple2)) {
136 result = Py_False;
137 } else if (memcmp(&tuple1->ob_item[0], &tuple2->ob_item[0], Py_SIZE(tuple1) * sizeof(PyObject *)) == 0) {
138 result = Py_True;
139 } else {
140 result = Py_False;
141 }
142
143 Py_INCREF_IMMORTAL(result);
144 return result;
145}
146
147static Py_hash_t our_set_hash(PyObject *set) {
148 Py_hash_t result = 0;
149 PyObject *key;
150 Py_ssize_t pos = 0;
151
152#if PYTHON_VERSION < 0x300
153 // Same sized set, simply check if values are identical. Other reductions should
154 // make it identical, or else this won't have the effect intended.
155 while (_PySet_Next(set, &pos, &key)) {
156 result *= 1000003;
157 result ^= Nuitka_FastHashBytes(&key, sizeof(PyObject *));
158 }
159#else
160 Py_hash_t unused;
161
162 while (_PySet_NextEntry(set, &pos, &key, &unused)) {
163 result *= 1000003;
164 result ^= Nuitka_FastHashBytes(&key, sizeof(PyObject *));
165 }
166#endif
167
168 return result;
169}
170
171static PyObject *our_set_tp_richcompare(PyObject *set1, PyObject *set2, int op) {
172 assert(op == Py_EQ);
173
174 PyObject *result;
175
176 Py_ssize_t pos1 = 0, pos2 = 0;
177 PyObject *key1, *key2;
178
179 if (Py_SIZE(set1) != Py_SIZE(set2)) {
180 result = Py_False;
181 } else {
182 result = Py_True;
183
184#if PYTHON_VERSION < 0x300
185 // Same sized set, simply check if values are identical. Other reductions should
186 // make it identical, or else this won't have the effect intended.
187 while (_PySet_Next(set1, &pos1, &key1)) {
188 {
189 NUITKA_MAY_BE_UNUSED int res = _PySet_Next(set2, &pos2, &key2);
190 assert(res != 0);
191 }
192
193 if (key1 != key2) {
194 result = Py_False;
195 break;
196 }
197 }
198#else
199 Py_hash_t unused1, unused2;
200
201 // Same sized dictionary, simply check if values are identical. Other reductions should
202 // make it identical, or else this won't have the effect intended.
203 while (_PySet_NextEntry(set1, &pos1, &key1, &unused1)) {
204 {
205 NUITKA_MAY_BE_UNUSED int res = _PySet_NextEntry(set2, &pos2, &key2, &unused2);
206 assert(res != 0);
207 }
208
209 if (key1 != key2) {
210 result = Py_False;
211 break;
212 }
213 }
214#endif
215 }
216
217 Py_INCREF_IMMORTAL(result);
218 return result;
219}
220
221static PyObject *our_float_tp_richcompare(PyFloatObject *a, PyFloatObject *b, int op) {
222 assert(op == Py_EQ);
223
224 PyObject *result;
225
226 // Avoid the C math when comparing, for it makes too many values equal or unequal.
227 if (memcmp(&a->ob_fval, &b->ob_fval, sizeof(b->ob_fval)) == 0) {
228 result = Py_True;
229 } else {
230 result = Py_False;
231 }
232
233 Py_INCREF_IMMORTAL(result);
234 return result;
235}
236
237static Py_hash_t our_dict_hash(PyObject *dict) {
238 Py_hash_t result = 0;
239
240 Py_ssize_t pos = 0;
241 PyObject *key, *value;
242
243 while (Nuitka_DictNext(dict, &pos, &key, &value)) {
244 result *= 1000003;
245 result ^= Nuitka_FastHashBytes(&key, sizeof(PyObject *));
246 result *= 1000003;
247 result ^= Nuitka_FastHashBytes(&value, sizeof(PyObject *));
248 }
249
250 return result;
251}
252
253static PyObject *our_dict_tp_richcompare(PyObject *a, PyObject *b, int op) {
254 PyObject *result;
255
256 if (Py_SIZE(a) != Py_SIZE(b)) {
257 result = Py_False;
258 } else {
259 result = Py_True;
260
261 Py_ssize_t pos1 = 0, pos2 = 0;
262 PyObject *key1, *value1;
263 PyObject *key2, *value2;
264
265 // Same sized dictionary, simply check if key and values are identical.
266 // Other reductions should make it identical, or else this won't have the
267 // effect intended.
268 while (Nuitka_DictNext(a, &pos1, &key1, &value1)) {
269 {
270 NUITKA_MAY_BE_UNUSED int res = Nuitka_DictNext(b, &pos2, &key2, &value2);
271 assert(res != 0);
272 }
273
274 if (key1 != key2 || value1 != value2) {
275 result = Py_False;
276 break;
277 }
278 }
279 }
280
281 Py_INCREF_IMMORTAL(result);
282 return result;
283}
284
285// For creation of small long singleton long values as required by Python3.
286#if PYTHON_VERSION < 0x3b0
287#if PYTHON_VERSION >= 0x390
288PyObject **Nuitka_Long_SmallValues;
289#elif PYTHON_VERSION >= 0x300
290PyObject *Nuitka_Long_SmallValues[NUITKA_STATIC_SMALLINT_VALUE_MAX - NUITKA_STATIC_SMALLINT_VALUE_MIN + 1];
291#endif
292#endif
293
294static void initCaches(void) {
295 static bool init_done = false;
296 if (init_done == true) {
297 return;
298 }
299
300#if PYTHON_VERSION < 0x300
301 int_cache = PyDict_New();
302#endif
303
304 long_cache = PyDict_New();
305
306 float_cache = PyDict_New();
307
308#if PYTHON_VERSION >= 0x300
309 bytes_cache = PyDict_New();
310#endif
311
312#if PYTHON_VERSION < 0x300
313 unicode_cache = PyDict_New();
314#endif
315
316 tuple_cache = PyDict_New();
317
318 list_cache = PyDict_New();
319
320 dict_cache = PyDict_New();
321
322 set_cache = PyDict_New();
323
324 frozenset_cache = PyDict_New();
325
326#if PYTHON_VERSION < 0x3b0
327#if PYTHON_VERSION >= 0x390
328 // On Python3.9+ these are exposed in the interpreter.
329 Nuitka_Long_SmallValues = (PyObject **)_PyInterpreterState_GET()->small_ints;
330#elif PYTHON_VERSION >= 0x300
331 for (long i = NUITKA_STATIC_SMALLINT_VALUE_MIN; i < NUITKA_STATIC_SMALLINT_VALUE_MAX; i++) {
332 // Have to use the original API here since out "Nuitka_PyLong_FromLong"
333 // would insist on using "Nuitka_Long_SmallValues" to produce it.
334 PyObject *value = PyLong_FromLong(i);
335 Nuitka_Long_SmallValues[NUITKA_TO_SMALL_VALUE_OFFSET(i)] = value;
336 }
337#endif
338#endif
339
340 init_done = true;
341}
342
343static void insertToDictCache(PyObject *dict, PyObject **value) {
344 PyObject *item = PyDict_GetItem(dict, *value);
345
346 if (item != NULL) {
347 *value = item;
348 } else {
349 PyDict_SetItem(dict, *value, *value);
350 }
351}
352
353static void insertToDictCacheForcedHash(PyObject *dict, PyObject **value, hashfunc tp_hash,
354 richcmpfunc tp_richcompare) {
355 hashfunc old_hash = Py_TYPE(*value)->tp_hash;
356 richcmpfunc old_richcmpfunc = Py_TYPE(*value)->tp_richcompare;
357
358 // Hash is optional, e.g. for floats we can spare us doing our own hash,
359 // but we do equality
360 if (tp_hash != NULL) {
361 Py_TYPE(*value)->tp_hash = tp_hash;
362 }
363 Py_TYPE(*value)->tp_richcompare = tp_richcompare;
364
365 insertToDictCache(dict, value);
366
367 Py_TYPE(*value)->tp_hash = old_hash;
368 Py_TYPE(*value)->tp_richcompare = old_richcmpfunc;
369}
370
371static uint16_t unpackValueUint16(unsigned char const **data) {
372 uint16_t value;
373
374 memcpy(&value, *data, sizeof(value));
375
376 assert(sizeof(value) == 2);
377
378 *data += sizeof(value);
379
380 return value;
381}
382
383static uint32_t unpackValueUint32(unsigned char const **data) {
384 uint32_t value;
385
386 memcpy(&value, *data, sizeof(value));
387
388 assert(sizeof(value) == 4);
389
390 *data += sizeof(value);
391
392 return value;
393}
394
395static double unpackValueFloat(unsigned char const **data) {
396 double size;
397
398 memcpy(&size, *data, sizeof(size));
399 *data += sizeof(size);
400
401 return size;
402}
403
404static unsigned char const *_unpackValueCString(unsigned char const *data) {
405 while (*(data++) != 0) {
406 }
407
408 return data;
409}
410
411// Decoding Variable-length quantity values
412static uint64_t _unpackVariableLength(unsigned char const **data) {
413 uint64_t result = 0;
414 uint64_t factor = 1;
415
416 while (1) {
417 unsigned char value = **data;
418 *data += 1;
419
420 result += (value & 127) * factor;
421
422 if (value < 128) {
423 break;
424 }
425
426 factor <<= 7;
427 }
428
429 return result;
430}
431
432static PyObject *_unpackAnonValue(unsigned char anon_index) {
433 switch (anon_index) {
434 case 0:
435 return (PyObject *)Py_TYPE(Py_None);
436 case 1:
437 return (PyObject *)&PyEllipsis_Type;
438 case 2:
439 return (PyObject *)Py_TYPE(Py_NotImplemented);
440 case 3:
441 return (PyObject *)&PyFunction_Type;
442 case 4:
443 return (PyObject *)&PyGen_Type;
444 case 5:
445 return (PyObject *)&PyCFunction_Type;
446 case 6:
447 return (PyObject *)&PyCode_Type;
448 case 7:
449 return (PyObject *)&PyModule_Type;
450
451#if PYTHON_VERSION < 0x300
452 case 8:
453 return (PyObject *)&PyFile_Type;
454 case 9:
455 return (PyObject *)&PyClass_Type;
456 case 10:
457 return (PyObject *)&PyInstance_Type;
458 case 11:
459 return (PyObject *)&PyMethod_Type;
460#endif
461
462 default:
463 PRINT_FORMAT("Missing anon value for %d\n", (int)anon_index);
464 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
465 }
466}
467
468PyObject *_unpackSpecialValue(unsigned char special_index) {
469 switch (special_index) {
470 case 0:
471 return PyObject_GetAttrString((PyObject *)builtin_module, "Ellipsis");
472 case 1:
473 return PyObject_GetAttrString((PyObject *)builtin_module, "NotImplemented");
474 case 2:
475 return Py_SysVersionInfo;
476 default:
477 PRINT_FORMAT("Missing special value for %d\n", (int)special_index);
478 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
479 }
480}
481
482static PyObject *_Nuitka_Unicode_ImmortalFromStringAndSize(PyThreadState *tstate, const char *data, Py_ssize_t size,
483 bool is_ascii) {
484#if PYTHON_VERSION < 0x300
485 PyObject *u = PyUnicode_FromStringAndSize((const char *)data, size);
486#else
487 // spell-checker: ignore surrogatepass
488 PyObject *u = PyUnicode_DecodeUTF8((const char *)data, size, "surrogatepass");
489#endif
490
491#if PYTHON_VERSION >= 0x3d0
492 _PyUnicode_InternImmortal(tstate->interp, &u);
493#elif PYTHON_VERSION >= 0x3c0
494 if (is_ascii) {
495 PyUnicode_InternInPlace(&u);
496 }
497
498#if PYTHON_VERSION >= 0x3c7
499 _PyUnicode_STATE(u).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
500
501#if _NUITKA_EXE_MODE
502 _PyUnicode_STATE(u).statically_allocated = 1;
503#else
504 if (Py_Version >= 0x30c0700) {
505 _PyUnicode_STATE(u).statically_allocated = 1;
506 }
507#endif
508#endif
509
510#elif PYTHON_VERSION >= 0x300
511 if (is_ascii) {
512 PyUnicode_InternInPlace(&u);
513 }
514#else
515 insertToDictCache(unicode_cache, &u);
516#endif
517
518 // Make sure our strings are consistent.
519 // TODO: Check with an assertion making build of Python 3.13.0 if this is really true,
520 // for 3.14 it ought to not be done.
521#if PYTHON_VERSION >= 0x3c0 && PYTHON_VERSION < 0x3e0 && !defined(__NUITKA_NO_ASSERT__)
522 // Note: Setting to immortal happens last, but we want to check now.
523 Py_SET_REFCNT_IMMORTAL(u);
524
525 _PyUnicode_CheckConsistency(u, 1);
526#endif
527
528 return u;
529}
530
531static unsigned char const *_unpackBlobConstants(PyThreadState *tstate, PyObject **output, unsigned char const *data,
532 int count);
533
534static unsigned char const *_unpackBlobConstant(PyThreadState *tstate, PyObject **output, unsigned char const *data) {
535
536 // Make sure we discover failures to assign.
537 *output = NULL;
538 bool is_object;
539
540 char c = *((char const *)data++);
541#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
542 unsigned char const *data_old = data;
543 printf("Type %c:\n", c);
544#endif
545 switch (c) {
546
547 case 'p': {
548 *output = *(output - 1);
549 is_object = true;
550
551 break;
552 }
553 case 'T': {
554 int size = (int)_unpackVariableLength(&data);
555
556 PyObject *t = PyTuple_New(size);
557
558 if (size > 0) {
559 data = _unpackBlobConstants(tstate, &PyTuple_GET_ITEM(t, 0), data, size);
560 }
561
562 insertToDictCacheForcedHash(tuple_cache, &t, (hashfunc)our_tuple_hash, (richcmpfunc)our_tuple_tp_richcompare);
563
564 *output = t;
565 is_object = true;
566
567 break;
568 }
569 case 'L': {
570 int size = (int)_unpackVariableLength(&data);
571
572 PyObject *l = PyList_New(size);
573
574 if (size > 0) {
575 data = _unpackBlobConstants(tstate, &PyList_GET_ITEM(l, 0), data, size);
576 }
577
578 insertToDictCacheForcedHash(list_cache, &l, (hashfunc)our_list_hash, (richcmpfunc)our_list_tp_richcompare);
579
580 *output = l;
581 is_object = true;
582
583 break;
584 }
585 case 'D': {
586 int size = (int)_unpackVariableLength(&data);
587
588 PyObject *d = _PyDict_NewPresized(size);
589
590 if (size > 0) {
591 NUITKA_DYNAMIC_ARRAY_DECL(keys, PyObject *, size);
592 NUITKA_DYNAMIC_ARRAY_DECL(values, PyObject *, size);
593
594 data = _unpackBlobConstants(tstate, &keys[0], data, size);
595 data = _unpackBlobConstants(tstate, &values[0], data, size);
596
597 for (int i = 0; i < size; i++) {
598 PyDict_SetItem(d, keys[i], values[i]);
599 }
600 }
601
602 insertToDictCacheForcedHash(dict_cache, &d, (hashfunc)our_dict_hash, (richcmpfunc)our_dict_tp_richcompare);
603
604 *output = d;
605 is_object = true;
606
607 break;
608 }
609 case 'P':
610 case 'S': {
611 int size = (int)_unpackVariableLength(&data);
612
613 PyObject *s;
614
615 if (c == 'S') {
616 s = PySet_New(NULL);
617 } else {
618 if (size == 0) {
619 // Get at the frozenset singleton of CPython and use it too. Some things
620 // rely on it being a singleton across the board.
621 static PyObject *empty_frozenset = NULL;
622
623 if (empty_frozenset == NULL) {
624 empty_frozenset = CALL_FUNCTION_WITH_SINGLE_ARG(tstate, (PyObject *)&PyFrozenSet_Type,
625 Nuitka_Bytes_FromStringAndSize("", 0));
626 }
627
628 s = empty_frozenset;
629 } else {
630 s = PyFrozenSet_New(NULL);
631 }
632 }
633
634 if (size > 0) {
635 NUITKA_DYNAMIC_ARRAY_DECL(values, PyObject *, size);
636
637 data = _unpackBlobConstants(tstate, &values[0], data, size);
638
639 for (int i = 0; i < size; i++) {
640 PySet_Add(s, values[i]);
641 }
642 }
643
644 // sets are cached globally too.
645 if (c == 'S') {
646 insertToDictCacheForcedHash(set_cache, &s, (hashfunc)our_set_hash, (richcmpfunc)our_set_tp_richcompare);
647 } else {
648 insertToDictCacheForcedHash(frozenset_cache, &s, (hashfunc)our_set_hash,
649 (richcmpfunc)our_set_tp_richcompare);
650 }
651
652 *output = s;
653 is_object = true;
654
655 break;
656 }
657#if PYTHON_VERSION < 0x300
658 case 'I':
659 case 'i': {
660 long value = (long)_unpackVariableLength(&data);
661 if (c == 'I') {
662 value = -value;
663 }
664
665 PyObject *i = PyInt_FromLong(value);
666
667 insertToDictCache(int_cache, &i);
668
669 *output = i;
670 is_object = true;
671
672 break;
673 }
674#endif
675 case 'l':
676 case 'q': {
677 // Positive/negative integer value with abs value < 2**31
678 uint64_t value = _unpackVariableLength(&data);
679
680 PyObject *l = Nuitka_LongFromCLong((c == 'l') ? ((long)value) : (-(long)value));
681 assert(l != NULL);
682
683 // Avoid the long cache, won't do anything useful for small ints
684#if PYTHON_VERSION >= 0x300
685 if (value < NUITKA_STATIC_SMALLINT_VALUE_MIN || value >= NUITKA_STATIC_SMALLINT_VALUE_MAX)
686#endif
687 {
688 insertToDictCache(long_cache, &l);
689 }
690
691 *output = l;
692 is_object = true;
693
694 break;
695 }
696 case 'G':
697 case 'g': {
698 PyObject *result = Nuitka_PyLong_FromLong(0);
699
700 int size = (int)_unpackVariableLength(&data);
701
702 PyObject *shift = Nuitka_PyLong_FromLong(31);
703
704 for (int i = 0; i < size; i++) {
705 result = PyNumber_InPlaceLshift(result, shift);
706
707 uint64_t value = _unpackVariableLength(&data);
708 PyObject *part = Nuitka_LongFromCLong((long)value);
709 assert(part != NULL);
710 result = PyNumber_InPlaceAdd(result, part);
711 Py_DECREF(part);
712 }
713
714 Py_DECREF(shift);
715
716 if (c == 'G') {
717 Nuitka_LongSetSignNegative(result);
718 }
719
720 insertToDictCache(long_cache, &result);
721
722 *output = result;
723 is_object = true;
724
725 break;
726 }
727 case 'f': {
728 double value = unpackValueFloat(&data);
729
730 PyObject *f = PyFloat_FromDouble(value);
731
732 // Floats are cached globally too.
733 insertToDictCacheForcedHash(float_cache, &f, NULL, (richcmpfunc)our_float_tp_richcompare);
734
735 *output = f;
736 is_object = true;
737
738 break;
739 }
740 case 'j': {
741 double real = unpackValueFloat(&data);
742 double imag = unpackValueFloat(&data);
743
744 *output = PyComplex_FromDoubles(real, imag);
745 is_object = true;
746
747 break;
748 }
749 case 'J': {
750 PyObject *parts[2];
751
752 // Complex via float is done for ones that are 0, nan, float.
753 data = _unpackBlobConstants(tstate, &parts[0], data, 2);
754
755 *output = BUILTIN_COMPLEX2(tstate, parts[0], parts[1]);
756 is_object = true;
757
758 break;
759 }
760#if PYTHON_VERSION < 0x300
761 case 'a':
762 case 'c': {
763 // Python2 str, potentially attribute, zero terminated.
764 size_t size = strlen((const char *)data);
765
766 PyObject *s = PyString_FromStringAndSize((const char *)data, size);
767 CHECK_OBJECT(s);
768
769 data += size + 1;
770
771 if (c == 'a') {
772 PyString_InternInPlace(&s);
773 }
774
775 *output = s;
776 is_object = true;
777
778 break;
779 }
780#else
781 case 'c': {
782 // Python3 bytes, zero terminated.
783 size_t size = strlen((const char *)data);
784
785 PyObject *b = Nuitka_Bytes_FromStringAndSize((const char *)data, size);
786 CHECK_OBJECT(b);
787
788 data += size + 1;
789
790 // Empty bytes value is here as well.
791 if (size > 1) {
792 insertToDictCache(bytes_cache, &b);
793 }
794
795 *output = b;
796 is_object = true;
797
798 break;
799 }
800#endif
801 case 'd': {
802 // Python2 str length 1 str, potentially attribute, or Python3 single byte
803
804#if PYTHON_VERSION < 0x300
805 PyObject *s = PyString_FromStringAndSize((const char *)data, 1);
806 data += 1;
807 *output = s;
808#else
809 PyObject *b = Nuitka_Bytes_FromStringAndSize((const char *)data, 1);
810 data += 1;
811 *output = b;
812#endif
813
814 is_object = true;
815
816 break;
817 }
818 case 'w': {
819 // Python2 unicode, Python3 str length 1, potentially attribute in Python3
820 PyObject *u = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, 1, true);
821 data += 1;
822
823 *output = u;
824 is_object = true;
825
826 break;
827 }
828 case 'b': {
829 // Python2 str or Python3 bytes, length indicated.
830 int size = (int)_unpackVariableLength(&data);
831 assert(size > 1);
832
833 PyObject *b = Nuitka_Bytes_FromStringAndSize((const char *)data, size);
834 CHECK_OBJECT(b);
835
836 data += size;
837
838#if PYTHON_VERSION >= 0x300
839 insertToDictCache(bytes_cache, &b);
840#endif
841
842 *output = b;
843 is_object = true;
844
845 break;
846 }
847
848 case 'B': {
849 int size = (int)_unpackVariableLength(&data);
850
851 PyObject *b = PyByteArray_FromStringAndSize((const char *)data, size);
852 data += size;
853
854 *output = b;
855 is_object = true;
856
857 break;
858 }
859#if PYTHON_VERSION >= 0x300
860 case 'a': // Python3 attributes
861#endif
862 case 'u': { // Python2 unicode, Python3 str, zero terminated.
863 size_t size = strlen((const char *)data);
864 assert(size != 0);
865
866 PyObject *u = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, size, c == 'a');
867 data += size + 1;
868
869 *output = u;
870 is_object = true;
871
872 break;
873 }
874 case 'v': {
875 int size = (int)_unpackVariableLength(&data);
876 assert(size != 0);
877
878 PyObject *u = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, size, false);
879 data += size;
880
881 *output = u;
882 is_object = true;
883
884 break;
885 }
886 case 'n': {
887 *output = Py_None;
888 is_object = true;
889
890 break;
891 }
892 case 's': {
893 *output = _Nuitka_Unicode_ImmortalFromStringAndSize(tstate, (const char *)data, 0, true);
894 is_object = true;
895
896 break;
897 }
898 case 't': {
899 *output = Py_True;
900 is_object = true;
901
902 break;
903 }
904 case 'F': {
905 *output = Py_False;
906 is_object = true;
907
908 break;
909 }
910 case ':': {
911 // Slice object
912 PyObject *items[3];
913 data = _unpackBlobConstants(tstate, &items[0], data, 3);
914
915 PyObject *s = MAKE_SLICE_OBJECT3(tstate, items[0], items[1], items[2]);
916
917 *output = s;
918 is_object = true;
919
920 break;
921 }
922 case ';': {
923 // (x)range objects
924 PyObject *items[3];
925 data = _unpackBlobConstants(tstate, &items[0], data, 3);
926#if PYTHON_VERSION < 0x300
927 assert(PyInt_CheckExact(items[0]));
928 assert(PyInt_CheckExact(items[1]));
929 assert(PyInt_CheckExact(items[2]));
930
931 long start = PyInt_AS_LONG(items[0]);
932 long stop = PyInt_AS_LONG(items[1]);
933 long step = PyInt_AS_LONG(items[2]);
934
935 PyObject *s = MAKE_XRANGE(tstate, start, stop, step);
936#else
937 PyObject *s = BUILTIN_XRANGE3(tstate, items[0], items[1], items[2]);
938#endif
939 *output = s;
940 is_object = true;
941
942 break;
943 }
944 case 'M': {
945 // Anonymous builtin by table index value.
946 unsigned char anon_index = *data++;
947
948 *output = _unpackAnonValue(anon_index);
949 is_object = true;
950
951 break;
952 }
953 case 'Q': {
954 // Anonymous builtin by table index value.
955 unsigned char special_index = *data++;
956
957 *output = _unpackSpecialValue(special_index);
958 is_object = true;
959
960 break;
961 }
962 case 'O': {
963 // Builtin by name. TODO: Define number table shared by C and Python
964 // serialization to avoid using strings here.
965 char const *builtin_name = (char const *)data;
966 data = _unpackValueCString(data);
967
968 *output = PyObject_GetAttrString((PyObject *)builtin_module, builtin_name);
969 is_object = true;
970
971 break;
972 }
973 case 'E': {
974 // Builtin exception by name. TODO: Define number table shared by C and Python
975 // serialization to avoid using strings here.
976 char const *builtin_exception_name = (char const *)data;
977 data = _unpackValueCString(data);
978
979 *output = PyObject_GetAttrString((PyObject *)builtin_module, builtin_exception_name);
980 is_object = true;
981
982 break;
983 }
984 case 'Z': {
985 unsigned char v = *data++;
986
987 PyObject *z = NULL;
988
989 switch (v) {
990 case 0: {
991 static PyObject *_const_float_0_0 = NULL;
992
993 if (_const_float_0_0 == NULL) {
994 _const_float_0_0 = PyFloat_FromDouble(0.0);
995 }
996 z = _const_float_0_0;
997
998 break;
999 }
1000 case 1: {
1001 static PyObject *_const_float_minus_0_0 = NULL;
1002
1003 if (_const_float_minus_0_0 == NULL) {
1004 _const_float_minus_0_0 = PyFloat_FromDouble(0.0);
1005
1006 // Older Python3 has variable signs from C, so be explicit about it.
1007 PyFloat_SET_DOUBLE(_const_float_minus_0_0, copysign(PyFloat_AS_DOUBLE(_const_float_minus_0_0), -1.0));
1008 }
1009 z = _const_float_minus_0_0;
1010
1011 break;
1012 }
1013
1014 case 2: {
1015 static PyObject *_const_float_plus_nan = NULL;
1016
1017 if (_const_float_plus_nan == NULL) {
1018 _const_float_plus_nan = PyFloat_FromDouble(Py_NAN);
1019
1020 // Older Python3 has variable signs for NaN from C, so be explicit about it.
1021 PyFloat_SET_DOUBLE(_const_float_plus_nan, copysign(PyFloat_AS_DOUBLE(_const_float_plus_nan), 1.0));
1022 }
1023 z = _const_float_plus_nan;
1024
1025 break;
1026 }
1027 case 3: {
1028 static PyObject *_const_float_minus_nan = NULL;
1029
1030 if (_const_float_minus_nan == NULL) {
1031 _const_float_minus_nan = PyFloat_FromDouble(Py_NAN);
1032
1033 // Older Python3 has variable signs for NaN from C, so be explicit about it.
1034 PyFloat_SET_DOUBLE(_const_float_minus_nan, copysign(PyFloat_AS_DOUBLE(_const_float_minus_nan), -1.0));
1035 }
1036 z = _const_float_minus_nan;
1037
1038 break;
1039 }
1040 case 4: {
1041 static PyObject *_const_float_plus_inf = NULL;
1042
1043 if (_const_float_plus_inf == NULL) {
1044 _const_float_plus_inf = PyFloat_FromDouble(Py_HUGE_VAL);
1045
1046 // Older Python3 has variable signs from C, so be explicit about it.
1047 PyFloat_SET_DOUBLE(_const_float_plus_inf, copysign(PyFloat_AS_DOUBLE(_const_float_plus_inf), 1.0));
1048 }
1049 z = _const_float_plus_inf;
1050
1051 break;
1052 }
1053 case 5: {
1054 static PyObject *_const_float_minus_inf = NULL;
1055
1056 if (_const_float_minus_inf == NULL) {
1057 _const_float_minus_inf = PyFloat_FromDouble(Py_HUGE_VAL);
1058
1059 // Older Python3 has variable signs from C, so be explicit about it.
1060 PyFloat_SET_DOUBLE(_const_float_minus_inf, copysign(PyFloat_AS_DOUBLE(_const_float_minus_inf), -1.0));
1061 }
1062 z = _const_float_minus_inf;
1063
1064 break;
1065 }
1066 default: {
1067 PRINT_FORMAT("Missing decoding for %d\n", (int)c);
1068 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
1069 }
1070 }
1071
1072 // Floats are cached globally too.
1073 insertToDictCacheForcedHash(float_cache, &z, NULL, (richcmpfunc)our_float_tp_richcompare);
1074
1075 *output = z;
1076 is_object = true;
1077
1078 break;
1079 }
1080 case 'X': {
1081 // Blob data pointer, user knowns size.
1082 uint64_t size = _unpackVariableLength(&data);
1083
1084 *output = (PyObject *)data;
1085 is_object = false;
1086
1087 data += size;
1088
1089 break;
1090 }
1091#if PYTHON_VERSION >= 0x390
1092 case 'A': {
1093 // GenericAlias object
1094 PyObject *items[2];
1095 data = _unpackBlobConstants(tstate, &items[0], data, 2);
1096
1097 PyObject *g = Py_GenericAlias(items[0], items[1]);
1098
1099 // TODO: Maybe deduplicate.
1100 *output = g;
1101
1102 is_object = true;
1103 break;
1104 }
1105#endif
1106#if PYTHON_VERSION >= 0x3a0
1107 case 'H': {
1108 // UnionType object
1109 PyObject *args;
1110 data = _unpackBlobConstants(tstate, &args, data, 1);
1111
1112 PyObject *union_type = MAKE_UNION_TYPE(args);
1113
1114 // TODO: Maybe deduplicate.
1115 *output = union_type;
1116
1117 is_object = true;
1118 break;
1119 }
1120#endif
1121 case 'C': {
1122 // Code object, without the filename, we let the module do that,
1123 // depending on the source mode and this is highly compact
1124 // representation of it.
1125
1126 // First, flags with the optional bits. It's handling
1127 // must match that of encoder 100%, the flag value to
1128 // use changes per version and mode.
1129 uint64_t flags = _unpackVariableLength(&data);
1130
1131 // Current flag indicator value, we trust the C compiler
1132 // to optimize it away among the ifdefs.
1133 uint64_t flag_base = 1;
1134
1135 // Code object flags as used by Python, encoded in the
1136 // flags as well.
1137 int co_flags = 0;
1138
1139 // Name is mandatory, no flag needed.
1140 PyObject *function_name;
1141 data = _unpackBlobConstant(tstate, &function_name, data);
1142
1143 // Line number is mandatory, no flag needed. Encoded values start at 0,
1144 // where 1 is what is normally used.
1145 int line_number = (int)_unpackVariableLength(&data) + 1;
1146
1147 // Right now this is only argument names, so argument count is implied,
1148 // it is mandatory so no flag is needed, empty value is very compact
1149 // anyway and rare.
1150 PyObject *arg_names;
1151 data = _unpackBlobConstant(tstate, &arg_names, data);
1152
1153 // TODO: Not sure if this is redundant potentially it can be derives
1154 // from the var names already. It might be possible to derive by other
1155 // means.
1156 int arg_count = (int)_unpackVariableLength(&data);
1157
1158 // It is version specific if we have this, and dependent on a flag, if
1159 // it's present at all.
1160#if PYTHON_VERSION >= 0x3b0
1161 PyObject *function_qualname;
1162
1163 if (flags & flag_base) {
1164 data = _unpackBlobConstant(tstate, &function_qualname, data);
1165 } else {
1166 function_qualname = function_name;
1167 }
1168 flag_base <<= 1;
1169#endif
1170
1171 // Free vars are optional.
1172 PyObject *free_vars = NULL;
1173
1174 if (flags & flag_base) {
1175 data = _unpackBlobConstant(tstate, &free_vars, data);
1176 }
1177 flag_base <<= 1;
1178
1179#if PYTHON_VERSION >= 0x300
1180 int kw_only_count = 0;
1181 if (flags & flag_base) {
1182 kw_only_count = (int)_unpackVariableLength(&data) + 1;
1183 }
1184 flag_base <<= 1;
1185 assert(kw_only_count >= 0);
1186#endif
1187
1188#if PYTHON_VERSION >= 0x380
1189 int pos_only_count = 0;
1190 if (flags & flag_base) {
1191 pos_only_count = (int)_unpackVariableLength(&data) + 1;
1192 }
1193 flag_base <<= 1;
1194 assert(pos_only_count >= 0);
1195#endif
1196
1197 // TODO: For pre-Python3.5 we could save one bit here, but not worth it
1198 // for now.
1199#if PYTHON_VERSION >= 0x360
1200 if ((flags & (flag_base * 3)) == (flag_base * 3)) {
1201 co_flags += CO_ASYNC_GENERATOR;
1202 } else
1203#endif
1204#if PYTHON_VERSION >= 0x350
1205 if ((flags & (flag_base * 2)) == (flag_base * 2)) {
1206 co_flags += CO_COROUTINE;
1207 } else
1208#endif
1209 if (flags & flag_base) {
1210 co_flags += CO_GENERATOR;
1211 }
1212
1213 flag_base <<= 2;
1214
1215 if (flags & flag_base) {
1216 co_flags += CO_OPTIMIZED;
1217 }
1218 flag_base <<= 1;
1219
1220 if (flags & flag_base) {
1221 co_flags += CO_NEWLOCALS;
1222 }
1223 flag_base <<= 1;
1224
1225 if (flags & flag_base) {
1226 co_flags += CO_VARARGS;
1227 }
1228 flag_base <<= 1;
1229
1230 if (flags & flag_base) {
1231 co_flags += CO_VARKEYWORDS;
1232 }
1233 flag_base <<= 1;
1234
1235#if PYTHON_VERSION < 0x300
1236 if (flags & flag_base) {
1237 co_flags += CO_FUTURE_DIVISION;
1238 }
1239 flag_base <<= 1;
1240#endif
1241
1242 if (flags & flag_base) {
1243 co_flags += CO_FUTURE_UNICODE_LITERALS;
1244 }
1245 flag_base <<= 1;
1246
1247#if PYTHON_VERSION < 0x300
1248 if (flags & flag_base) {
1249 co_flags += CO_FUTURE_PRINT_FUNCTION;
1250 }
1251 flag_base <<= 1;
1252#endif
1253
1254#if PYTHON_VERSION < 0x300
1255 if (flags & flag_base) {
1256 co_flags += CO_FUTURE_ABSOLUTE_IMPORT;
1257 }
1258 flag_base <<= 1;
1259#endif
1260
1261#if PYTHON_VERSION >= 0x350 && PYTHON_VERSION < 0x370
1262 if (flags & flag_base) {
1263 co_flags += CO_FUTURE_GENERATOR_STOP;
1264 }
1265 flag_base <<= 1;
1266#endif
1267
1268#if PYTHON_VERSION >= 0x370
1269 if (flags & flag_base) {
1270 co_flags += CO_FUTURE_ANNOTATIONS;
1271 }
1272 flag_base <<= 1;
1273#endif
1274
1275#if PYTHON_VERSION >= 0x300
1276 if (flags & flag_base) {
1277 co_flags += CO_FUTURE_BARRY_AS_BDFL;
1278 }
1279 flag_base <<= 1;
1280#endif
1281
1282 // Filename will be supplied later during usage.
1283 *output = (PyObject *)MAKE_CODE_OBJECT(Py_None, line_number, co_flags, function_name, function_qualname,
1284 arg_names, free_vars, arg_count, kw_only_count, pos_only_count);
1285
1286 CHECK_OBJECT(*output);
1287
1288 is_object = true;
1289 break;
1290 }
1291 case '.': {
1292 PRINT_STRING("Missing blob values\n");
1293 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
1294 }
1295 default:
1296 PRINT_FORMAT("Missing decoding for %d\n", (int)c);
1297 NUITKA_CANNOT_GET_HERE("Corrupt constants blob");
1298 }
1299
1300#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1301 printf("Size for %c was %d\n", c, data - data_old);
1302#endif
1303
1304 // Discourage in-place operations from modifying these. These
1305 // might be put into containers, therefore take 2 refs to be
1306 // accounting for the container too.
1307 if (is_object == true) {
1308 CHECK_OBJECT(*output);
1309
1310#if PYTHON_VERSION < 0x3c0
1311 Py_INCREF(*output);
1312 Py_INCREF(*output);
1313#else
1314 Py_SET_REFCNT_IMMORTAL(*output);
1315#endif
1316 }
1317
1318 return data;
1319}
1320
1321static unsigned char const *_unpackBlobConstants(PyThreadState *tstate, PyObject **output, unsigned char const *data,
1322 int count) {
1323 for (int _i = 0; _i < count; _i++) {
1324 data = _unpackBlobConstant(tstate, output, data);
1325
1326 output += 1;
1327 }
1328
1329 return data;
1330}
1331
1332static void unpackBlobConstants(PyThreadState *tstate, PyObject **output, unsigned char const *data) {
1333 int count = (int)unpackValueUint16(&data);
1334
1335#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1336 printf("unpackBlobConstants count %d\n", count);
1337#endif
1338 _unpackBlobConstants(tstate, output, data, count);
1339}
1340
1341#if _NUITKA_CONSTANTS_FROM_MACOS_SECTION
1342
1343#include <mach-o/getsect.h>
1344#include <mach-o/ldsyms.h>
1345
1346#if !_NUITKA_EXE_MODE
1347static int findMacOSDllImageId(void) {
1348 Dl_info where;
1349 int res = dladdr((void *)findMacOSDllImageId, &where);
1350 assert(res != 0);
1351
1352 char const *dll_filename = where.dli_fname;
1353
1354 unsigned long image_count = _dyld_image_count();
1355
1356 for (int i = 0; i < image_count; i++) {
1357 // Ignore entries without a header.
1358 struct mach_header const *header = _dyld_get_image_header(i);
1359 if (header == NULL) {
1360 continue;
1361 }
1362
1363 if (strcmp(dll_filename, _dyld_get_image_name(i)) == 0) {
1364 return i;
1365 }
1366 }
1367
1368 return -1;
1369}
1370#endif
1371
1372#ifdef __LP64__
1373#define mach_header_arch mach_header_64
1374#else
1375#define mach_header_arch mach_header
1376#endif
1377
1378unsigned char *findMacOSBinarySection(void) {
1379#if _NUITKA_EXE_MODE
1380 const struct mach_header_arch *header = &_mh_execute_header;
1381#else
1382 int image_id = findMacOSDllImageId();
1383 assert(image_id != -1);
1384
1385 const struct mach_header_arch *header = (const struct mach_header_arch *)_dyld_get_image_header(image_id);
1386#endif
1387
1388 unsigned long size;
1389 return getsectiondata(header, "constants", "constants", &size);
1390}
1391
1392#endif
1393
1394void loadConstantsBlob(PyThreadState *tstate, PyObject **output, char const *name) {
1395 static bool init_done = false;
1396
1397 if (init_done == false) {
1398 NUITKA_PRINT_TIMING("loadConstantsBlob(): One time init.");
1399
1400#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1401 printf("loadConstantsBlob '%s' one time init\n", name);
1402#endif
1403
1404#if defined(_NUITKA_CONSTANTS_FROM_INCBIN)
1405 constant_bin = getConstantsBlobData();
1406#elif defined(_NUITKA_CONSTANTS_FROM_RESOURCE)
1407#if _NUITKA_EXE_MODE
1408 // Using NULL as this indicates running program.
1409 HMODULE handle = NULL;
1410#else
1411 HMODULE handle = getDllModuleHandle();
1412#endif
1413
1414 constant_bin = (const unsigned char *)LockResource(
1415 LoadResource(handle, FindResource(handle, MAKEINTRESOURCE(3), RT_RCDATA)));
1416
1417 assert(constant_bin);
1418#elif _NUITKA_CONSTANTS_FROM_MACOS_SECTION
1419 constant_bin = findMacOSBinarySection();
1420
1421 assert(constant_bin);
1422#endif
1423 NUITKA_PRINT_TIMING("loadConstantsBlob(): Found blob, decoding now.");
1424 DECODE(constant_bin);
1425
1426 NUITKA_PRINT_TIMING("loadConstantsBlob(): CRC32 that blob for correctness.");
1427 uint32_t hash = unpackValueUint32(&constant_bin);
1428 uint32_t size = unpackValueUint32(&constant_bin);
1429
1430#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1431 printf("loadConstantsBlob '%u' hash value\n", hash);
1432 printf("loadConstantsBlob '%u' size value\n", size);
1433#endif
1434 if (calcCRC32(constant_bin, size) != hash) {
1435 puts("Error, corrupted constants object");
1436 abort();
1437 }
1438
1439#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1440 printf("Checked CRC32 to match hash %u size %u\n", hash, size);
1441#endif
1442
1443 NUITKA_PRINT_TIMING("loadConstantsBlob(): One time init complete.");
1444
1445 init_done = true;
1446 }
1447
1448#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1449 printf("Loading blob named '%s'\n", name);
1450#endif
1451 // Python 3.9 or higher cannot create dictionary before calling init so avoid it.
1452 if (strcmp(name, ".bytecode") != 0) {
1453 initCaches();
1454 }
1455
1456 unsigned char const *w = constant_bin;
1457
1458 for (;;) {
1459 int match = strcmp(name, (char const *)w);
1460 w += strlen((char const *)w) + 1;
1461
1462#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1463 printf("offset of blob size %d\n", w - constant_bin);
1464#endif
1465
1466 uint32_t size = unpackValueUint32(&w);
1467
1468 if (match == 0) {
1469#ifdef _NUITKA_EXPERIMENTAL_DEBUG_CONSTANTS
1470 printf("Loading blob named '%s' with size %d\n", name, size);
1471#endif
1472 break;
1473 }
1474
1475 // Skip other module data.
1476 w += size;
1477 }
1478
1479 unpackBlobConstants(tstate, output, w);
1480}
1481
1482// Part of "Nuitka", an optimizing Python compiler that is compatible and
1483// integrates with CPython, but also works on its own.
1484//
1485// Licensed under the Apache License, Version 2.0 (the "License");
1486// you may not use this file except in compliance with the License.
1487// You may obtain a copy of the License at
1488//
1489// http://www.apache.org/licenses/LICENSE-2.0
1490//
1491// Unless required by applicable law or agreed to in writing, software
1492// distributed under the License is distributed on an "AS IS" BASIS,
1493// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1494// See the License for the specific language governing permissions and
1495// limitations under the License.