From 2bdca414d0f96deed57df852d7599362e6c29444 Mon Sep 17 00:00:00 2001 From: liquidev Date: Sat, 18 May 2024 13:41:43 +0200 Subject: [PATCH] lvalues --- content/programming.tree | 4 + content/programming/blog/lvalues.tree | 337 ++++++++++++++++++ .../treehouse/dev/syntax-highlighting.tree | 127 +++++++ content/treehouse/new.tree | 8 + static/css/main.css | 16 + static/syntax/c.json | 98 +++++ static/syntax/c.types.json | 106 ++++++ 7 files changed, 696 insertions(+) create mode 100644 content/programming/blog/lvalues.tree create mode 100644 static/syntax/c.json create mode 100644 static/syntax/c.types.json diff --git a/content/programming.tree b/content/programming.tree index c4cfb00..ecc4079 100644 --- a/content/programming.tree +++ b/content/programming.tree @@ -15,6 +15,10 @@ id = "01HPD4XQQ5WM0APCAX014HM43V" + featured tairu - an interactive exploration of 2D autotiling techniques + % content.link = "programming/blog/lvalues" + id = "01HY5R1ZW0M0Y5KQ1E8F0Q73ZT" + + places, or what is up with `*x` not always meaning the same thing + % content.link = "programming/blog/or-types" id = "01HTWN4XB2YMF3615BE8V6Y76A" + OR-types diff --git a/content/programming/blog/lvalues.tree b/content/programming/blog/lvalues.tree new file mode 100644 index 0000000..fdd50f5 --- /dev/null +++ b/content/programming/blog/lvalues.tree @@ -0,0 +1,337 @@ +%% title = "places, or what is up with *x not always meaning the same thing in different contexts" + +% id = "01HY5R1ZV9DD7BV0F66Y0DHAEA" +- I recently got a question from my someone telling me they doesn't understand why `*x` does not read from the pointer `x` when on the left-hand side of an assignment. + + % id = "01HY5R1ZV9G92SVA0XP7CG1X6K" + - as in this case: + ```c + void example(int *x) { + int y = *x; // (1) + *x = 10; // (2) + } + ``` + + % id = "01HY5R1ZV9MVWYF9VNPK403JDE" + - it seems pretty weird, right? why does it read from the pointer in declaration `(1)`, but not in the assignment `(2)`? + + % id = "01HY5R1ZV9N9AY0HAYEWC0RBFT" + - doesn't `*x` mean "read the value pointed to by `x`?" + +% id = "01HY5R1ZV9WJM9DMW5QGK04DCR" +- TL;DR: the deal with this example is that `*x` *does not mean* "read from the location pointed to by `x`", but _just_ "the location pointed to by `x`". + + % id = "01HY5R1ZV9JN9GZ8BECJ0KV1FC" + - **`*x` is not a _value_, it's a _memory location_, or _place_ in Rust parlance** + + % id = "01HY5R1ZV9QVH3W5BRS16V6EPG" + - same thing with `x` + + % id = "01HY5R1ZV9RQQCBNFHY0ZSX4RP" + - same thing with `some_struct.abc` or `some_struct->abc` + +% id = "01HY5R1ZV9BMQ7NZJ06B0W48HP" +- but instead of jumping to conclusions, let's go back to the beginning. +let's think, what is it that makes places _different_? + + % id = "01HY5R1ZV9JGE9FTN9CN736FJN" + - the main thing is that you can write to them and create references to them. + for instance, this doesn't work: + + ```c + void example(void) { + 1 = 2; // error! + } + ``` + + but this does: + + ```c + void example(void) { + int i; + i = 2; // fine! + } + ``` + + % id = "01HY5R1ZV91NN3YMXFNRACPDS7" + - so really, places are kind of a different _type_ - we can do certain additional operations with them, such as writing! + + % id = "01HY5R1ZV9KH99CGCFM7TM1ZZB" + - we'll call this type `place(T)`, where `T` is any arbitrary type that is stored at the memory location represented by the `place(T)`. + +% id = "01HY5R1ZV9F6BEQ5RHRPZPRVM6" +- `place(T)` behaves a bit weirdly compared to other types. +for starters, it is impossible to write the type down in C code, so we're always bound to turn `place(T)` into something else quickly after its creation. + + % id = "01HY5R1ZV92VA27TGY6DJPZNP9" + - for instance, in this example: + + ```c.types + void take_int(int x); + + void example(int x) { + take_int(x /*: place(int) */); + } + ``` + + the type of `x` being passed into the `take_int` function is `place(int)`, but since that function accepts an `int`, we convert from `place(int)` + to a regular `int`. + + % id = "01HY5R1ZV9ST0B5PHERGD4J4R8" + - this conversion happens implicitly and involves _reading_ from the place - + remember that places represent locations in memory, so they're a bit like pointers. + we have to read from them before we can access the value. + +% id = "01HY5R1ZV9E1NSCCTF1CXPMY3D" +- but there are operations in the language that _expect_ a `place(T)`, and therefore do not perform the implicit conversion. + + % id = "01HY5R1ZV95BHSV9MF4Z46M377" + + we're able to describe these operations as _functions_ which take in a type `T` and return a type `U` - written down like `T -> U`. + + % id = "01HY5R1ZV90Z3S1FZ65R977P8Y" + - this notation is taken from functional languages like Haskell. + + % id = "01HY5R1ZV9RERR87NHVSGS6TWP" + - the `->` operator is _right-associative_ - `T -> U -> V` is a function which returns a function `U -> V`, not a function that accepts a `T -> U`. + + % id = "01HY5R1ZV9CG67DEA8Y649D40T" + - one of these operations is _assignment_, which is like a function `place(T) -> T -> T`. + + it accepts a `place(T)` to write to, a `T` to write to that place, and returns the `T` written. + note that in that case no read occurs, since the implicit conversion described before does not apply. + + ```c.types + void example(void) { + int x = 0; + x /*: place(int) */ = 1 /*: int */; /*-> int (discarded) */ + } + ``` + + % id = "01HY5R1ZV9MNZKK381TS7K3VR2" + - another one of these operations is the _`&` reference operator_, which is like a function `place(T) -> T*`. + + it accepts a `place(T)` and returns a pointer `T*` that points to the place's memory location in exchange. + + ```c.types + void example(void) { + int x = 0; + int* p = &(x /*: place(T) */); + } + ``` + + % id = "01HY5R1ZV9YN7H33BMK0NPX4QT" + - and of course its analogue, the _`*` dereferencing operator_, which does not consume a place, but produces one. + + it accepts a `T*` and produces a `place(T)` that is placed at the pointer's memory location - it's the reverse of `&x`, `T* -> place(T)`. + + ```c + void example(int* x) { + int y = *x; + } + ``` + + % id = "01HY5R1ZV9KZZARNK8KXZTGAFX" + - another couple of operations that accept a `place(T)` is the _`.` and `[]` operators_, both of which can be used to refer to subplaces + within the place. + + % id = "01HY5R1ZV9N6PY2ANQC45NN27E" + - the difference is that `.` is a _static, compile-time known_ subplace, while `[]` may be _dynamic, runtime known_. + + % id = "01HY5R1ZV99N78RA7Z5GSVVPRB" + - the `.` operator takes in a `place(T)` and returns a `place(U)` depending on the type of structure field we're referencing. + + % id = "01HY5R1ZV99PZT6SDK2HRCRMAZ" + - since there is no type that represents the set of fields of a structure `S`, we'll invent a type `anyfield(S)` which represents that set. + + % id = "01HY5R1ZV9RYF2P5X53VW9S9K8" + - the type of a specific field `f` in the structure `S` is `field(S, f)`. + + % id = "01HY5R1ZV9PACB0HVE976TZZ69" + - we'll also introduce a type `fieldtype(F)` which is the type stored in the field `F`. + + % id = "01HY5R1ZV99M92Z0XH1H3385JE" + - given that, the type of the `.` operator is `place(T) -> F -> place(fieldtype(F))`, where `F` is an `anyfield(T)`. + + % id = "01HY5R1ZV9VZFHRKY4QWBDTFJ7" + - example: + ```c.types + void example(void) { + struct S { int x; } s; + + s /*: place(struct S) */; + s /*: place(struct S) */ .x /*: field(struct S, x) */; /*-> place(int) (discarded) */ + } + ``` + + % id = "01HY5R1ZV952B3C5AHFME5TJG4" + - the `[]` operator takes in a `T*`, a `ptrdiff_t` to offset the pointer by, and returns a `place(T)` whose memory location is the offset pointer. + the function signature is therefore `T* -> ptrdiff_t -> place(T)`. + + example: + ```c.types + void example(int* array) { + int* p = &((array /*: int* */)[123] /*: place(int) */); + } + ``` + + % id = "01HY5R1ZV9VC2NNFKY9W0AE8F1" + - we can actually think of this `a[i]` operator as syntax sugar for `*(a + i)`. + + % id = "01HY5R1ZV9SKX5QGJCBRG5QAD9" + - this has a funny consequence where `array[0]` is equivalent to `0[array]` - + offsetting a pointer is just addition, and addition is commutative. + therefore we can swap the operands to `[]` and it will work just fine! + + % id = "01HY5R1ZV9MQWK5VQZQG8JJ03J" + - I do wonder though why it doesn't produce a warning. + I'm no standards lawyer, but I *believe* this may have something to do with implicit type conversions - the `0` gets promoted to a + pointer as part of the desugared addition. + I really need to read up about C's type promotion rules. + +% id = "01HY5R1ZV9KA2Q3TKQWQPS19KV" +- now I have to confess, I lied to you. +there are no places in C. + + % id = "01HY5R1ZV9PAD1XRS29YV64GV2" + - the C standard actually calls this concept "lvalues", which comes from the fact that they are **values** which are valid + **l**eft-hand sides of assignment. + + % id = "01HY5R1ZV9BNZM45RK2AW8BF5N" + + however, I don't like that name since it's quite esoteric - if you tell a beginner "`x` is not an lvalue," they will look at you confused. + but if you tell a beginner "`x` is not a place in memory," then it's immediately more clear! + + so I will keep using the Rust name despite the name "lvalues" technically being more "correct" and standards-compliant. + + % id = "01HY5R1ZV9DXQ8AQJXVK1JE9XS" + - I'm putting "correct" in quotes because I don't believe this is a matter of correctness, just opinion. + +% id = "01HY5R1ZV9HP593J62VWDBWHK4" +- what's interesting about `place(T)` is that it's actually a real type in C++ - except under a different name: `T&`. + + % id = "01HY5R1ZV961P77W5TPDP9AMT9" + - references are basically a way of introducing places into the type system for real, which is nice, + but on the other hand having places bindable to names results in some weird holes in the language. + + % id = "01HY5R1ZV9F292M89VEBSMB80F" + - to begin with, in C we could assume that referencing any variable `T x` by its name `x` would produce a `place(T)`. + this is a simple and clear rule to understand. + + % id = "01HY5R1ZV9WPRVN3S84B7H9W25" + - in C++, this is no longer the case - referencing a variable `T x` by its name `x` produces a `T&`, + but referencing a variable `T& x` by its name `x` produces a `T&`, not a `T& &`! + + in layman's terms, C++ makes it impossible to rebind references to something else. + you can't make this variable point to `y`: + + + ```c + int x = 0; + int y = 1; + int& r = x; + r = y; // nope, this is just the same as x = y + ``` + + % id = "01HY5R1ZV9FQX72E6XDJ8BHY7K" + - and it's not like it could've been done any better - if we got a `T& &` instead, we'd be able to reassign a different place to + the variable, but then we'd get a type mismatch on something like `r = 1` + + % id = "01HY5R1ZV98W2X9HSG1KM7CDPN" + - because assignment is `T& -> T -> T`; + if our `T` is `int& &`, the expected signature is `int& & -> int& -> int&`, but we're providing an `int`, not an `int&` - + and we can't make a reference out of a value! + + % id = "01HY5R1ZV9AVSDRGP8XYG56KNK" + - so we'd need a way of doing `T& -> T`, but guess what: (almost) this already exists and is called "pointers" and "the unary `*` operator". + + % id = "01HY5R1ZV99T9X3ANYQJS40NBX" + - except of course, with pointers the signature is `T* -> T&`. + + % id = "01HY5R1ZV94MHBS9RERA4CWPTM" + - so by introducing references, C++ was actually made less consistent! + + % id = "01HY5R1ZV93GGZ7CPTEPY8CW8K" + - I actually kind of wish references were more like they are in Rust - basically just pointers but non-null and guaranteed to be aligne + +% id = "01HY5R1ZV9QWHZRJ5V53CVYG5V" +- anyways, as a final ~~boss~~ bonus of this blog post, I'd like to introduce you to the `x->y` operator (the C one) + + % id = "01HY5R1ZV9AYFWV96FC07WX68G" + - if you've been programmming C or C++ for a while, you'll know that it's pretty dangerous to just go pointer-[walkin'](https://www.youtube.com/watch?v=d_dLIy2gQGU) with the `->` operator + ```c + int* third(struct list* first) { + return &list->next->next->value; + } + ``` + + % id = "01HY5R1ZV9XR75M4TN7H08HYPF" + + there's a pretty high chance that using the `third` function will cause a crash for you if there are only two elements in the list. + + % id = "01HY5R1ZV97K8Y4S4V68555MKV" + - if it doesn't cause a crash, you may have more serious problems to worry about :kamien: + + % id = "01HY5R1ZV9EEN448SB62YMAGZP" + - but how does it cause a crash if we're taking the reference out of that whole `->` chain? shouldn't taking a reference not cause any reads? + + % id = "01HY5R1ZV977KHP1QDXWY7CTZJ" + - the secret lies in what the `x->y` operator really does. + basically, it's just convenience syntax for `(*x).y`. + + % id = "01HY5R1ZV9X8M5T64BD0MZ2WN2" + - let's start by dismantling the entire pointer access chain into separate expressions: + ```c + int* third(struct list* first) { + struct list* second = first->next; + struct list* third = second->next; + return &third->value; + } + ``` + + % id = "01HY5R1ZV9PGQMS2A6H5XTWYZX" + - now let's desugar the `->` operator: + ```c + int* third(struct list* first) { + struct list* second = (*first).next; + struct list* third = (*second).next; + return &(*third).value; + } + ``` + + % id = "01HY5R1ZV92CZAV13P4KFABTR1" + - and add some type annotations: + ```c.types + int* third(struct list* first) { + struct list* second = (*first).next /*: place(struct list*) */; + struct list* third = (*second).next /*: place(struct list*) */; + return &(*third).value; + } + ``` + + % id = "01HY5R1ZV9671KDJAATWBPGD2C" + - and now let's follow it line by line. + + % id = "01HY5R1ZV936BNY9MY4ANCXYH1" + - ```c.types + struct list* second = (*first).next /*: place(struct list*) */; + ``` + first we read the value of the `next` field from the structure pointed to by `first`. + assuming `first` is a valid pointer, this shouldn't fail. + + % id = "01HY5R1ZV9VNTRSCFWBW9BCTHJ" + - ```c.types + struct list* third = (*second).next /*: place(struct list*) */; + ``` + but now something bad happens: we don't know if the `second` pointer we just got a `place(T)` from is valid. + we offset it by `.next` and implicitly read from it, which is bad! + + % id = "01HY5R1ZV9S50S4MYYQ1Q5P00Z" + - at this point there's no point in analyzing the rest of the function - we've hit Undefined Behavior! + + % id = "01HY5R1ZV9A1FS9SRFJBM5NVSR" + - the conclusion here is that chaining `x->y` can be really dangerous if you don't check for the validity of each reference. + just doing one hop and a reference - `&x->y` - is fine, because we never end up reading from the invalid pointer - + it's like doing `&x[1]`. + but two hops is where it gets hairy - in `x->y->z`, the `->z` has to _read_ from `x->y` to know the pointer to read from. + +% id = "01HY5R1ZV9VSE2WWH93NCAGRS8" +- TODO: in the future I'd like to embed a C compiler here that will desugar all place operations into explicit ones. +stay tuned for that! diff --git a/content/treehouse/dev/syntax-highlighting.tree b/content/treehouse/dev/syntax-highlighting.tree index 44300d0..a1ea3e2 100644 --- a/content/treehouse/dev/syntax-highlighting.tree +++ b/content/treehouse/dev/syntax-highlighting.tree @@ -6,6 +6,133 @@ % id = "01HRT0DG7VF31P185J898QQH85" - really there's not much more to it, but I use it for debugging + with it you can get a general feel for how I highlight things in the treehouse +% id = "01HY5R1ZW5JFAYBFFT579HF1T4" +- design notes + + % id = "01HY5R1ZW5V5Q72QGP1RK13H10" + - don't do magic: stick to standards and community conventions. + + % id = "01HY5R1ZW578NT8G6BTNAN79QK" + - like in C, don't highlight uppercase identifiers. + those are not special in any way. + + % id = "01HY5R1ZW5MAS6K7K9QJT4HYQV" + - in Rust, we highlight identifiers starting with an uppercase letter, because that's the + style convention for user-defined types. + + % id = "01HY5R1ZW5NC96PA7VDKDVMEPX" + - keep it simple. + the highlighting doesn't have to be perfect. + you know what you're typing. + in case of user input, you have a compiler that will highlight the error anyways. + +% id = "01HY5R1ZW5R3808RNG6RAPC8H4" +- `c` + % id = "01HY5R1ZW51JDH26B27ZTGP9JA" + - NOTE: this is C23 so you may see some unfamiliar keywords + % id = "01HY5R1ZW5DDH54AFNJTFMKZSF" + - patterns + ```c + #include + #define SOMETHING_SOMETHING + + // a comment + /* a multiline + comment */ + + function() + struct S enum E union U + u8'ą' u'g' U'g' L'g' + u8"UTF-8" u"UTF-16" U"UTF-32" L"wchar_t" + ident + + 0b1010'1010 0b1010'1010u 0b1010'1010llu 0b1010'1010wb + 0xDeadBeef 012345l + 123ull 127wb + 3.14159265 3.141592f 1.0e-4d 0xa.dp+2 + + . -> + ++ -- & * + - ~ ! + / % << >> < > <= >= == != ^ | && || + ? : :: ; ... + = *= /= %= += -= <<= >>= &= ^= |= + , # ## + <: :> <% %> %: %:%: + ``` + + % id = "01HY5R1ZW5X2AVZFVHV0QR1J93" + - keywords + ```c + alignas alignof auto break case const constexpr continue default do else extern for goto if + inline register restrict return sizeof static static_assert switch thread_local typedef typeof + typeof_unqual volatile while _Generic _Noreturn + + bool char double float int long short signed struct unsigned union void _Atomic _BitInt _Complex + _Decimal128 _Decimal32 _Decimal64 _Imaginary + + nullptr false true + ``` + + % id = "01HY5R1ZW5PP1C00NSWAG5FA8B" + - sample + ```c + #include + + #include + + void bump_init(struct bump* a, void* slab, i32 size) + { + a->start = slab; + a->ptr = slab + size; + #if SNUGGLES_BUMP_TRACKING + a->tracker = null; + #endif + } + + void* bump_try_alloc(struct bump* a, i32 size, const char* what) + { + // Allocate n bytes and align the pointer to 16 bytes. + a->ptr = (void*)((long long)(a->ptr - size) & ~0xF); + + void* addr; + // TODO: Because this check is done after the allocation, this will eventually start + // overflowing. Not good, but not important either because most allocations + // use bump_alloc_or_panic. + if (a->ptr < a->start) { + addr = null; + } else { + addr = a->ptr; + } + + #if SNUGGLES_BUMP_TRACKING + if (a->tracker) { + (a->tracker)(addr, size, what); + } + #endif + + return addr; + } + + void* bump_alloc_or_panic(struct bump* a, i32 size, const char* what) + { + (void)what; // Currently unused, may use for panic message in the future. + + void* p = bump_try_alloc(a, size, what); + b32 allocation_succeeded = p != 0; + ASSERT(allocation_succeeded, "out of memory"); + return p; + } + ``` + + % id = "01HY5R1ZW5Y28GDH0YX46WV9KN" + - `.types` + + % id = "01HY5R1ZW5KYY6VCKWBHN1GF10" + - patterns + ```c.types + x /*: int */ + ``` + % id = "01HRT0DG7VN5TH971H7W8AT8YY" - `javascript` diff --git a/content/treehouse/new.tree b/content/treehouse/new.tree index 37613ea..11114f4 100644 --- a/content/treehouse/new.tree +++ b/content/treehouse/new.tree @@ -10,6 +10,14 @@ [read][page:programming/blog/tairu] +% id = "01HY5R1ZW2PYZSSP2J2KAA23DA" +- I recently got a question from my someone telling me they doesn't understand why `*x` does not read from the pointer `x` when on the left-hand side of an assignment. +and that made me think, + +### what's up with `*x` not always meaning the same thing in different contexts? + +[read][page:programming/blog/lvalues] + % id = "01HV1DGFHZ65GJVQRSREKR67J9" - I've been thinking recently how cool it is to be able to single-step into Unreal Engine's source code and edit it while you're working with it, and how uncool it is that I can't do the same thing easily in the Rust world. diff --git a/static/css/main.css b/static/css/main.css index 42be067..aa7c1f7 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -108,6 +108,13 @@ button { line-height: 1.5; } +pre, +code, +kbd, +button { + font-size: 100%; +} + :root { --recursive-mono: 0.0; --recursive-casl: 1.0; @@ -792,6 +799,15 @@ th-literate-program[data-mode="output"] { color: var(--error-color); text-decoration: wavy underline; } + + &.hidden { + display: none; + } + + &.type-hint { + color: var(--syntax-comment); + font-size: 80%; + } } .th-syntax-highlighting { diff --git a/static/syntax/c.json b/static/syntax/c.json new file mode 100644 index 0000000..fb47877 --- /dev/null +++ b/static/syntax/c.json @@ -0,0 +1,98 @@ +{ + "patterns": [ + { + "regex": "#include (<.+?>)", + "is": { "default": "keyword1", "captures": ["string"] } + }, + { "regex": "#[a-zA-Z0-9_]+", "is": "keyword1" }, + { "regex": "\\/\\/.*", "is": "comment" }, + { + "regex": "\\/\\*.*?\\*\\/", + "flags": ["dotMatchesNewline"], + "is": "comment" + }, + { + "regex": "[a-zA-Z_][a-zA-Z0-9_]*(\\()", + "is": { "default": "function", "captures": ["default"] } + }, + { + "regex": "(struct|enum|union)\\s+([a-zA-Z_][a-zA-Z0-9_]*)", + "is": { + "default": "default", + "captures": ["identifier", "keyword2"] + } + }, + { "regex": "(u8|u|U|L)'(\\\\'|[^'])'", "is": "string" }, + { "regex": "(u8|u|U|L)\"(\\\\\"|[^\"])*\"", "is": "string" }, + { "regex": "[a-zA-Z_][a-zA-Z0-9_]*", "is": "identifier" }, + { "regex": "0[bB][01']+[uUlLfFlLdDwWbB]*", "is": "literal" }, + { + "regex": "0[xX][0-9a-fA-F']+(\\.[0-9a-fA-F']*([pP][-+]?[0-9a-fA-F']+)?)?+[uUlLwWbB]*", + "is": "literal" + }, + { + "regex": "[0-9']+(\\.[0-9']*([eE][-+]?[0-9']+)?)?[uUlLfFlLdDwWbB]*", + "is": "literal" + }, + { "regex": "[+=/*^%<>!~|&\\.?:#-]+", "is": "operator" }, + { "regex": "[,;]", "is": "punct" } + ], + "keywords": { + "alignas": { "into": "keyword1" }, + "alignof": { "into": "keyword1" }, + "auto": { "into": "keyword1" }, + "break": { "into": "keyword1" }, + "case": { "into": "keyword1" }, + "const": { "into": "keyword1" }, + "constexpr": { "into": "keyword1" }, + "continue": { "into": "keyword1" }, + "default": { "into": "keyword1" }, + "do": { "into": "keyword1" }, + "else": { "into": "keyword1" }, + "extern": { "into": "keyword1" }, + "for": { "into": "keyword1" }, + "goto": { "into": "keyword1" }, + "if": { "into": "keyword1" }, + "inline": { "into": "keyword1" }, + "register": { "into": "keyword1" }, + "restrict": { "into": "keyword1" }, + "return": { "into": "keyword1" }, + "sizeof": { "into": "keyword1" }, + "static": { "into": "keyword1" }, + "static_assert": { "into": "keyword1" }, + "switch": { "into": "keyword1" }, + "thread_local": { "into": "keyword1" }, + "typedef": { "into": "keyword1" }, + "typeof": { "into": "keyword1" }, + "typeof_unqual": { "into": "keyword1" }, + "volatile": { "into": "keyword1" }, + "while": { "into": "keyword1" }, + "_Generic": { "into": "keyword1" }, + "_Noreturn": { "into": "keyword1" }, + + "bool": { "into": "keyword2" }, + "char": { "into": "keyword2" }, + "double": { "into": "keyword2" }, + "enum": { "into": "keyword2" }, + "float": { "into": "keyword2" }, + "int": { "into": "keyword2" }, + "long": { "into": "keyword2" }, + "short": { "into": "keyword2" }, + "signed": { "into": "keyword2" }, + "struct": { "into": "keyword2" }, + "unsigned": { "into": "keyword2" }, + "union": { "into": "keyword2" }, + "void": { "into": "keyword2" }, + "_Atomic": { "into": "keyword2" }, + "_BitInt": { "into": "keyword2" }, + "_Complex": { "into": "keyword2" }, + "_Decimal128": { "into": "keyword2" }, + "_Decimal32": { "into": "keyword2" }, + "_Decimal64": { "into": "keyword2" }, + "_Imaginary": { "into": "keyword2" }, + + "nullptr": { "into": "literal" }, + "false": { "into": "literal" }, + "true": { "into": "literal" } + } +} diff --git a/static/syntax/c.types.json b/static/syntax/c.types.json new file mode 100644 index 0000000..5453a6b --- /dev/null +++ b/static/syntax/c.types.json @@ -0,0 +1,106 @@ +{ + "patterns": [ + { + "regex": "#include (<.+?>)", + "is": { "default": "keyword1", "captures": ["string"] } + }, + { "regex": "#[a-zA-Z0-9_]+", "is": "keyword1" }, + { "regex": "\\/\\/.*", "is": "comment" }, + { + "regex": "\\/\\*((?::|->).*?)\\s*\\*\\/", + "flags": ["dotMatchesNewline"], + "is": { + "default": "hidden", + "captures": ["type-hint"] + } + }, + { + "regex": "\\/\\*.*?\\*\\/", + "flags": ["dotMatchesNewline"], + "is": "comment" + }, + { + "regex": "[a-zA-Z_][a-zA-Z0-9_]*(\\()", + "is": { "default": "function", "captures": ["default"] } + }, + { + "regex": "(struct|enum|union)\\s+([a-zA-Z_][a-zA-Z0-9_]*)", + "is": { + "default": "default", + "captures": ["identifier", "keyword2"] + } + }, + { "regex": "(u8|u|U|L)'(\\\\'|[^'])'", "is": "string" }, + { "regex": "(u8|u|U|L)\"(\\\\\"|[^\"])*\"", "is": "string" }, + { "regex": "[a-zA-Z_][a-zA-Z0-9_]*", "is": "identifier" }, + { "regex": "0[bB][01']+[uUlLfFlLdDwWbB]*", "is": "literal" }, + { + "regex": "0[xX][0-9a-fA-F']+(\\.[0-9a-fA-F']*([pP][-+]?[0-9a-fA-F']+)?)?+[uUlLwWbB]*", + "is": "literal" + }, + { + "regex": "[0-9']+(\\.[0-9']*([eE][-+]?[0-9']+)?)?[uUlLfFlLdDwWbB]*", + "is": "literal" + }, + { "regex": "[+=/*^%<>!~|&\\.?:#-]+", "is": "operator" }, + { "regex": "[,;]", "is": "punct" } + ], + "keywords": { + "alignas": { "into": "keyword1" }, + "alignof": { "into": "keyword1" }, + "auto": { "into": "keyword1" }, + "break": { "into": "keyword1" }, + "case": { "into": "keyword1" }, + "const": { "into": "keyword1" }, + "constexpr": { "into": "keyword1" }, + "continue": { "into": "keyword1" }, + "default": { "into": "keyword1" }, + "do": { "into": "keyword1" }, + "else": { "into": "keyword1" }, + "extern": { "into": "keyword1" }, + "for": { "into": "keyword1" }, + "goto": { "into": "keyword1" }, + "if": { "into": "keyword1" }, + "inline": { "into": "keyword1" }, + "register": { "into": "keyword1" }, + "restrict": { "into": "keyword1" }, + "return": { "into": "keyword1" }, + "sizeof": { "into": "keyword1" }, + "static": { "into": "keyword1" }, + "static_assert": { "into": "keyword1" }, + "switch": { "into": "keyword1" }, + "thread_local": { "into": "keyword1" }, + "typedef": { "into": "keyword1" }, + "typeof": { "into": "keyword1" }, + "typeof_unqual": { "into": "keyword1" }, + "volatile": { "into": "keyword1" }, + "while": { "into": "keyword1" }, + "_Generic": { "into": "keyword1" }, + "_Noreturn": { "into": "keyword1" }, + + "bool": { "into": "keyword2" }, + "char": { "into": "keyword2" }, + "double": { "into": "keyword2" }, + "enum": { "into": "keyword2" }, + "float": { "into": "keyword2" }, + "int": { "into": "keyword2" }, + "long": { "into": "keyword2" }, + "short": { "into": "keyword2" }, + "signed": { "into": "keyword2" }, + "struct": { "into": "keyword2" }, + "unsigned": { "into": "keyword2" }, + "union": { "into": "keyword2" }, + "void": { "into": "keyword2" }, + "_Atomic": { "into": "keyword2" }, + "_BitInt": { "into": "keyword2" }, + "_Complex": { "into": "keyword2" }, + "_Decimal128": { "into": "keyword2" }, + "_Decimal32": { "into": "keyword2" }, + "_Decimal64": { "into": "keyword2" }, + "_Imaginary": { "into": "keyword2" }, + + "nullptr": { "into": "literal" }, + "false": { "into": "literal" }, + "true": { "into": "literal" } + } +}