From eb6f2abdd56df0134b66c79a35a21219cf3cb939 Mon Sep 17 00:00:00 2001 From: lqdev Date: Thu, 20 Jun 2024 22:24:02 +0200 Subject: [PATCH] cxx: using shared/unique ptr to free C memory --- content/programming/languages/cxx.tree | 61 +-- .../cxx/access-modifiers-as-labels.tree | 56 +++ .../cxx/shared-unique-ptr-deleter.tree | 360 ++++++++++++++++++ 3 files changed, 421 insertions(+), 56 deletions(-) create mode 100644 content/programming/languages/cxx/access-modifiers-as-labels.tree create mode 100644 content/programming/languages/cxx/shared-unique-ptr-deleter.tree diff --git a/content/programming/languages/cxx.tree b/content/programming/languages/cxx.tree index a9c1517..b9aa476 100644 --- a/content/programming/languages/cxx.tree +++ b/content/programming/languages/cxx.tree @@ -1,62 +1,11 @@ %% title = "C++" % id = "01H9R1KJESR2F420HE67HW4AVR" -- design lessons from the best programming language of all time that everyone loves (not really) +- notes and design lessons from the best programming language of all time that everyone loves (not really) -% id = "programming/cxx/access-modifiers-as-labels" +% content.link = "programming/languages/cxx/access-modifiers-as-labels" + redirect_from = ["programming/cxx/access-modifiers-as-labels"] + :page: access modifiers as labels (`private:`, `protected:`, and `public:`) - % id = "01H9R1KJES39Z6RBCKY4E71PYD" - - although Java and C#'s approach to symbol privacy may be verbose, it has one great advantage: it is stateless. - - % id = "01H9R1KJES17626QXYEGM7XBC7" - - the way they're implemented in C++, it's essentially a bit more parsing state you have to keep track of - - % id = "01H9R1KJESG4K8T1K1G36T7HBP" - - and you know what other parsing state you have to keep track of in C++? - that's right, the preprocessor.\ - access modifiers, like all tokens, are affected by the preprocessor, and you have to take that into account. - - % id = "01H9R1KJESJ0G0VQAW994ZHR0S" - - take the following example: - ```cpp - class ComfyZone - { - std::vector _soft_beds; - - #if ENABLE_HUGS - - public: - void hug(Person& person); - - #endif - - int _remaining_hugs = 10; - }; - ``` - - % id = "01H9R1KJESDDX4089WVHVV8N3H" - + although quite contrived, it illustrates the problem pretty well - - % id = "01H9R1KJESD2KED5TAFBY426A6" - - (before you ask, `_remaining_hugs` needs to be always present because it has to be (de)serialized no matter if hugging functionality is compiled in. otherwise we'd get data loss.) - - % id = "01H9R1KJESES27VKVW4A0ZVM11" - - we intended for `_remaining_hugs` to be private, but if hugs are enabled, it becomes public. - - % id = "01H9R1KJESTKW90R788SSPMNC6" - - this can be _very_ hard to spot if you have a big class with lots of declarations inside. - - % id = "01H9R1KJESCJ3VC8ATPYFDCPSP" - - this can be worked around by banning access modifiers from appearing in `#ifdef`s, but you have to *realize* that this might happen - - % id = "01H9R1KJES4ZYHVADDF80WAXH6" - - and I've seen instances of this exact thing occurring in the Unreal Engine codebase, which is *full* of long lists of declarations (made even longer by the prevalence of `UPROPERTY()` specifiers) - - % id = "01H9R1KJES182MCV2V0A4VHKKX" - - even if we didn't have the preprocessor, that access modifier is state _you_ have to keep track of - - % id = "01H9R1KJESH7PWNKCKW3H0WJHW" - - I very often find myself needing to scroll upward after Ctrl-clicking on a field or function declaration, just to find out if I can use it - - % id = "01H9R1KJESFE6F1D4J5PA5Q381" - - (thankfully IDEs are helpful here and Rider shows you a symbol's visibility in the tooltip on hover, but I don't have Rider on code reviews) +% content.link = "programming/languages/cxx/shared-unique-ptr-deleter" ++ :page: freeing C memory automatically using `std::unique_ptr` and `std::shared_ptr` diff --git a/content/programming/languages/cxx/access-modifiers-as-labels.tree b/content/programming/languages/cxx/access-modifiers-as-labels.tree new file mode 100644 index 0000000..18953ae --- /dev/null +++ b/content/programming/languages/cxx/access-modifiers-as-labels.tree @@ -0,0 +1,56 @@ +%% title = "C++ syntactic pitfall: access modifiers as labels" + +% id = "01H9R1KJES39Z6RBCKY4E71PYD" +- although Java and C#'s approach to symbol privacy may be verbose, it has one great advantage: it is stateless. + +% id = "01H9R1KJES17626QXYEGM7XBC7" +- the way they're implemented in C++, it's essentially a bit more parsing state you have to keep track of + + % id = "01H9R1KJESG4K8T1K1G36T7HBP" + - and you know what other parsing state you have to keep track of in C++? - that's right, the preprocessor.\ + access modifiers, like all tokens, are affected by the preprocessor, and you have to take that into account. + + % id = "01H9R1KJESJ0G0VQAW994ZHR0S" + - take the following example: + ```cpp + class ComfyZone + { + std::vector _soft_beds; + + #if ENABLE_HUGS + + public: + void hug(Person& person); + + #endif + + int _remaining_hugs = 10; + }; + ``` + + % id = "01H9R1KJESDDX4089WVHVV8N3H" + + although quite contrived, it illustrates the problem pretty well + + % id = "01H9R1KJESD2KED5TAFBY426A6" + - (before you ask, `_remaining_hugs` needs to be always present because it has to be (de)serialized no matter if hugging functionality is compiled in. otherwise we'd get data loss.) + + % id = "01H9R1KJESES27VKVW4A0ZVM11" + - we intended for `_remaining_hugs` to be private, but if hugs are enabled, it becomes public. + + % id = "01H9R1KJESTKW90R788SSPMNC6" + - this can be _very_ hard to spot if you have a big class with lots of declarations inside. + + % id = "01H9R1KJESCJ3VC8ATPYFDCPSP" + - this can be worked around by banning access modifiers from appearing in `#ifdef`s, but you have to *realize* that this might happen + + % id = "01H9R1KJES4ZYHVADDF80WAXH6" + - and I've seen instances of this exact thing occurring in the Unreal Engine codebase, which is *full* of long lists of declarations (made even longer by the prevalence of `UPROPERTY()` specifiers) + +% id = "01H9R1KJES182MCV2V0A4VHKKX" +- even if we didn't have the preprocessor, that access modifier is state _you_ have to keep track of + + % id = "01H9R1KJESH7PWNKCKW3H0WJHW" + - I very often find myself needing to scroll upward after Ctrl-clicking on a field or function declaration, just to find out if I can use it + + % id = "01H9R1KJESFE6F1D4J5PA5Q381" + - (thankfully IDEs are helpful here and Rider shows you a symbol's visibility in the tooltip on hover, but I don't have Rider on code reviews) diff --git a/content/programming/languages/cxx/shared-unique-ptr-deleter.tree b/content/programming/languages/cxx/shared-unique-ptr-deleter.tree new file mode 100644 index 0000000..3a569e7 --- /dev/null +++ b/content/programming/languages/cxx/shared-unique-ptr-deleter.tree @@ -0,0 +1,360 @@ +%% title = "freeing C memory automatically using `std::unique_ptr` and `std::shared_ptr`" + +- say you need to interface with a C library such as SDL2 in your C++ code + + - obviously the simplest way would be to just use the C library. + ```cpp + int main(void) + { + SDL_Init(SDL_INIT_VIDEO); + + SDL_Window* window = SDL_CreateWindow( + "Hello, world!", + SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, + 800, 600, + 0 + ); + + bool running = true; + while (running) { + SDL_Event event; + while (SDL_PollEvent(&event)) { + if (event.type == SDL_QUIT) { + running = false; + } + } + } + + SDL_DestroyWindow(window); + } + ``` + + - this approach has the nice advantage of being really simple, but it doesn't work well if you build your codebase on RAII. + + - and as much as I disagree with using it *everywhere* and injecting object-oriented design into everything, RAII is actually really useful for OS resources such as an `SDL_Window*`. + +- to make use of RAII you might be tempted to wrap your `SDL_Window*` in a class with a destructor… + +```cpp +struct window +{ + SDL_Window* raw = nullptr; + + window(const char* title, int x, int y, int w, int h, int flags) + : raw(SDL_CreateWindow(title, x, y, w, h, flags)) + {} + + ~window() + { + if (raw != nullptr) { + SDL_DestroyWindow(raw); + raw = nullptr; + } + } +}; +``` + + + but remember the rule of three - if you declare a destructor, you pretty much always also want to declare a copy constructor, and a copy assignment operator + + - the rule of three says that + + > If a class requires a user-defined destructor, a user-defined copy constructor, or a user-defined copy assignment operator, it almost certainly requires all three. + + from [cppreference.com](https://en.cppreference.com/w/cpp/language/rule_of_three#Rule_of_three), retrieved 2024-06-20 21:13 UTC+2 + + - imagine a situation where you have a class managing a raw pointer like our `window`. + + - what will happen with an explicit destructor, but a default copy constructor and copy assignment operator, is that upon copying an instance of the object, the new object will receive the same pointer as the original - + and _its_ destructor will run to delete the pointer, _in addition to_ the destructor that will run to delete our original object - causing a double free! + + - therefore we need a copy constructor to create a new allocation that will be freed by the second destructor. + + - copying windows doesn't really make sense, so we can delete the copy constructor and copy assignment operator… + ```cpp + struct window + { + // -- snip -- + + window(const window&) = delete; + void operator=(const window&) = delete; + }; + ``` + + - that alone is cool, but it would be nice if we could move a `window` to a different location in memory instead of having to keep it in place. + + - having a copy constructor inhibits the compiler from creating a default move constructor and move assignment operator. + + - so we'll also want an explicit move constructor and a move assignment operator: + ```cpp + struct window + { + // -- snip -- + + window(window&& other) + { + raw = other.raw; + other.raw = nullptr; + } + + window& operator=(window&& other) + { + raw = other.raw; + other.raw = nullptr; + return *this; + } + }; + ``` + + + this fulfills the rule of five, which says that if you follow the rule of three and would like the object to be movable, you will want a move constructor and move assignment operator. + + - > Because the presence of a user-defined (or `= default` or `= delete` declared) destructor, copy-constructor, or copy-assignment operator prevents implicit definition of the move constructor and the move assignment operator, any class for which move semantics are desirable, has to declare all five special member functions: […] + + from [cppreference.com](https://en.cppreference.com/w/cpp/language/rule_of_three#Rule_of_five), retrieved 2024-06-20 21:13 UTC+2 + + - with all of this combined, our final `window` class looks like this: + ```cpp + struct window + { + SDL_Window* raw = nullptr; + + window(const char* title, int x, int y, int w, int h, int flags) + : raw(SDL_CreateWindow(title, x, y, w, h, flags)) + {} + + ~window() + { + if (raw != nullptr) { + SDL_DestroyWindow(raw); + raw = nullptr; + } + } + + window(const window&) = delete; + void operator=(const window&) = delete; + + window(window&& other) + { + raw = other.raw; + other.raw = nullptr; + } + + window& operator=(window&& other) + { + raw = other.raw; + other.raw = nullptr; + return *this; + } + }; + ``` + + - and with this class, our simple _Hello, world!_ program becomes this: + + ```cpp + int main(void) + { + SDL_Init(SDL_INIT_VIDEO); + + window window{ + "Hello, world!", + SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, + 800, 600, + 0, + }; + + bool running = true; + while (running) { + SDL_Event event; + while (SDL_PollEvent(&event)) { + if (event.type == SDL_QUIT) { + running = false; + } + } + } + } + ``` + + - quite a bit of boilerplate just to call save a single line of code, isn't it? + + + we blew up our single line into 32. good job, young C++ programmer! + + - opinion time: you might be tempted to say that having this class makes it easy to provide functions that will query information about the window. + + - my argument is that in most cases you shouldn't create such functions, because the ones from SDL2 already exist. + + - albeit I'll admit that writing + ```cpp + int width; + SDL_GetWindowSize(&window, &width, nullptr); + ``` + just to obtain the window width does _not_ spark joy. + + - on the other hand it being this verbose does suggest that _maybe_ it's a little expensive to call, so there's that. + + maybe save it somewhere and reuse it during a frame. + I dunno, I'm not your dad to be telling you what to do. + + neither have I read the SDL2 source code to know how expensive this function is, but the principle of least surprise tells me it should always return the _current_ window size, so I assume it always asks the OS. + +- but the fine folks designing the C++ standard library have already thought of this use case. +this is what _smart pointers_ are for after all - our good friends `std::shared_ptr` and `std::unique_ptr`, which `delete` things for us when they go out of scope, automatically! + +- let's start with `std::shared_ptr` because it's a bit simpler. + + - `std::shared_ptr` is a simple form of _garbage collection_ - it will free its associated allocation once there are no more referencers to it. + + - naturally it has to know _how_ to perform the freeing. + the standard library designers could have just assumed that all allocations are created with `new` and deleted with `delete`, but unfortunately the real world is not so simple. + we have C libraries to interface with after all, and there destruction is accomplished simply by calling functions! + + + not to mention polymorphism - `delete` does not have any metadata about the underlying type. it calls the destructor of the _static_ type, which wouldn't work very well if the actual type was something else. + + - (this is why having a `virtual` method in your polymorphic class requires your destructor to become `virtual`, too.) + + - because of this, `std::shared_ptr` actually stores a _deleter_ object, whose sole task is to destroy the shared pointer's contents once there are no more references to it. + + - to set a custom deleter for an `std::shared_ptr`, we provide it as the 2nd argument of the constructor. + so to automatically free our `SDL_Window` pointer, we would do this: + ```cpp + int main(void) + { + SDL_Init(SDL_INIT_VIDEO); + + std::shared_ptr window{ + SDL_CreateWindow( + "Hello, world!", + SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, + 800, 600, + 0 + ), + SDL_DestroyWindow, + }; + + bool running = true; + while (running) { + SDL_Event event; + while (SDL_PollEvent(&event)) { + if (event.type == SDL_QUIT) { + running = false; + } + } + } + } + ``` + and that's all there is to it! + + + this is pretty much the simplest solution to our problem - it does not require declaring any additional types or anything of that sort. + this is the solution I would go with in a production codebase. + + - this is despite `std::shared_ptr`'s extra reference counting semantics - + having formed somem Good Memory Management habits in Rust, I tend to shape my memory layout into a _tree_ rather than a _graph_, so to pass the window to the rest of the program I would pass an `SDL_Window&` down in function arguments. + then only `main` has to concern itself with how the `SDL_Window`'s memory is managed. + + - using `std::shared_ptr` does have a downside though, and it's that there is some extra overhead associated with handling the shared pointer's _control block_. + + + the control block is an additional area in memory that stores metadata about the shared pointer - + the strong reference count, the [weak](https://en.cppreference.com/w/cpp/memory/weak_ptr) reference count, as well as our deleter. + + - an additional thing to note is that when you're constructing an `std::shared_ptr` from an existing raw pointer, C++ cannot allocate the control block together with the original allocation. + this can reduce cache locality if the allocator happens to place the control block very far from the allocation we want to manage through the shared pointer. + +- we can avoid all of this overhead by using a `std::unique_ptr`, albeit not without some boilerplate. +(spoiler: it's still way better than our original example though!) + + - an `std::unique_ptr` stores which deleter to use as part of its template arguments - you may have never noticed, but `std::unique_ptr` is defined with an additional `Deleter` argument in its signature: + + ```cpp + template > + class unique_ptr + { + // ... + }; + ``` + + - unfortunately for us, adding a deleter to an `std::unique_ptr` is not as simple as adding one to an `std::shared_ptr`, because it involves creating an additional type - + we cannot just pass `SDL_DestroyWindow` into that argument, because that's a _function_, not a _type_. + + - writing a little wrapper that will call `SDL_DestroyWindow` (or really any static function) for us is a pretty trivial task though: + + ```cpp + template + class function_delete + { + void operator()(void* allocation) const + { + Deleter(static_cast(allocation)); + } + }; + ``` + + - now we can delete an `SDL_Window` using our custom deleter like so: + + ```cpp + std::unique_ptr> window{ + SDL_CreateWindow( + "Hello, world!", + SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, + 800, 600, + 0 + ), + }; + ``` + + - having to type this whole type out every single time we want to refer to an owned `SDL_Window` is a bit of a pain though, so we can create a type alias: + + ```cpp + namespace sdl + { + using window = std::unique_ptr>; + } + + sdl::window window{ + SDL_CreateWindow( + "Hello, world!", + SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, + 800, 600, + 0 + ), + }; + ``` + + - and having to repeat `SDL_Window` twice in the type alias is no fun, so we can create a type alias for `std::unique_ptr>` too: + + ```cpp + template + using c_unique_ptr = std::unique_ptr>; + + namespace sdl + { + using window = c_unique_ptr; + } + ``` + + …you get the idea. + + - I'm calling it `c_unique_ptr` by the way because it's a _unique pointer to a C resource_. + + - the unfortunate downside to this approach is that you can get pretty abysmal template error messages upon type mismatch: + + ```cpp + void example(const sdl::window& w); + + int main(void) + { + example(1); + + // ... + } + ``` + + ```diagnostics-clang + sdl2.cpp:36:5: error: no matching function for call to 'example' + 36 | example(1); + | ^~~~~~~ + sdl2.cpp:21:6: note: candidate function not viable: no known conversion from 'int' to 'const sdl::window' (aka 'const unique_ptr>') for 1st argument + 21 | void example(const sdl::window& w); + | ^ ~~~~~~~~~~~~~~~~~~~~ + 1 error generated. + ``` + + - but hey, at least you avoid the overhead of reference counting - by making it completely unnecessary! + move semantics ftw!