From: Thomas Walker Lynch Date: Sun, 23 Mar 2025 13:05:33 +0000 (+0000) Subject: yet another TM refactoring next .. X-Git-Url: https://git.reasoningtechnology.com/style/rt_dark_doc.css?a=commitdiff_plain;h=13e6356c08831e3e63d8d0fe437e3167db72bab2;p=N yet another TM refactoring next .. --- diff --git "a/developer/cc\360\237\226\211/TM.lib.c" "b/developer/cc\360\237\226\211/TM.lib.c" index 4580104..35a9ded 100644 --- "a/developer/cc\360\237\226\211/TM.lib.c" +++ "b/developer/cc\360\237\226\211/TM.lib.c" @@ -13,7 +13,18 @@ An Area with zero elements has 'length == 0' or is 'empty'. In contrast ,and area located (position specified) with a null pointer is said not to exist. - Template variable: CVT + ---- + + CVT is a template variable that affects type names that show on the interface. + They affect all implementations in the same way. + + There are multiple implementations, such as Array. Each has its own FG + table instance. For example Array·fg. Also each has its own Tableau type, for + example, `typedef struct { ... } Ξ(TM·Array ,CVT)` is the Tableau type for + the array implementation. + + This implementation uses pointer pairs to stand for 'TM type' that are + passed around. */ @@ -47,80 +58,97 @@ #ifndef CVT typedef enum{ TM·Tape·Topo·mu = 0 - ,TM·Tape·Topo·nonexistent = 1 - ,TM·Tape·Topo·empty = 1 << 1 - ,TM·Tape·Topo·singleton = 1 << 2 - ,TM·Tape·Topo·segment = 1 << 3 - ,TM·Tape·Topo·circle = 1 << 4 - ,TM·Tape·Topo·tail_cyclic = 1 << 5 - ,TM·Tape·Topo·infinite = 1 << 6 + ,TM·Tape·Topo·empty = 1 + ,TM·Tape·Topo·singleton = 1 << 1 + ,TM·Tape·Topo·segment = 1 << 2 + ,TM·Tape·Topo·circle = 1 << 3 + ,TM·Tape·Topo·tail_cyclic = 1 << 4 + ,TM·Tape·Topo·infinite = 1 << 5 }TM·Tape·Topo; - const TM·Tape·Topo TM·Tape·Topo·finite_nz = - TM·Tape·Topo·singleton | TM·Tape·Topo·segment + const TM·Tape·Topo TM·Tape·Topo·bounded = + TM·Tape·Topo·singleton + | TM·Tape·Topo·segment ; // If tape machine does not support step left ,then Status·leftmost // will be reported as Status·interim. typedef enum{ TM·Head·Status·mu = 0 - ,TM·Head·Status·not_on_tape = 1 - ,TM·Head·Status·origin = 1 << 1 - ,TM·Head·Status·interim = 1 << 2 - ,TM·Head·Status·rightmost = 1 << 3 + ,TM·Head·Status·dismounted = 1 + ,TM·Head·Status·out_of_area = 1 << 1 + ,TM·Head·Status·leftmost = 1 << 2 + ,TM·Head·Status·interim = 1 << 3 + ,TM·Head·Status·rightmost = 1 << 4 } TM·Head·Status; const TM·Head·Status TM·Head·Status·on_tape = - TM·Head·Status·origin + TM·Head·Status·leftmost | TM·Head·Status·interim | TM·Head·Status·rightmost ; - typedef struct TM; + #endif // #ifndef CVT + + #ifdef CVT + + typedef Ξ(extent_t ,CVT) size_t; + + // instance struct with vtable pointer as first entry // tape and area are included with Tape Machine to facilitate abstract interfaces. typedef struct{ - Core·Status (*topo) (TM *tm ,TM·Tape·Topo *result); + TM·Tape·Topo (*Tape·topo)(TM *tm); + bool (*Tape·bounded)(TM *tm); + + TM·Head·Status (*Head·status)(TM *tm ,TM·Head·Status *status); + bool (*Head·on_tape)(TM *tm); + bool (*Head·on_leftmost) (TM *tm); + bool (*Head·on_rightmost)(TM *tm); // tape machine functions Core·Status (*mount) (TM *tm); Core·Status (*dismount)(TM *tm); - TM·Head·Status (*status) (TM *tm ,TM·Head·Status *status); - Core·Status (*head_on_format)(TM *tm ,bool *flag); - - bool (*can_read) (TM *tm); - bool (*on_leftmost) (TM *tm); - bool (*on_rightmost)(TM *tm); - void (*step) (TM *tm); void (*step_left) (TM *tm); - void (*step_right)(TM *tm); // Synonym for step void (*rewind) (TM *tm); - } TM·FG; - - #endif // #ifndef CVT + TM·FG TM·fg; // points to TM·FG instance + Ξ(extent_t ,CVT) (*extent)(TM *tm); + CVT (*read) (TM *tm); + void (*write)(TM *tm ,CVT *remote_pt); - #ifdef CVT + } Ξ(TM ,CVT)·FG; - typedef Ξ(extent_t ,CVT) size_t; + // array FG instance + + typedef struct Ξ(TM ,CVT)·Tableau; + typedef struct Ξ(TM·Array ,CVT)·Tableau; - // instance struct with vtable pointer as first entry + // `init` puts type consistent values in this struct typedef struct{ - Ξ(extent_t ,CVT) (*extent)(TM *tm); - CVT (*read) (TM *tm); - void (*write)(TM *tm ,CVT *remote_pt); + Ξ(TM ,CVT)·FG *fg; + Ξ(TM ,CVT)·Tableau *t; } Ξ(TM ,CVT); - // Array Initializers - typedef struct{ - TM *(*mount_pe)( Ξ(TM·Array ,CVT) *tm ,CVT position[] ,Ξ(extent_t ,CVT) extent ); - TM *(*mount_pp)( Ξ(TM·Array ,CVT) *tm ,CVT *position_left ,CVT *position_right ); - } Ξ(TM·Array ,CVT)·FG; + #define FG·call(tm, fn, ...) \ + ((tm)->fg->fn)((tm)->t, ##__VA_ARGS__) Ξ(TM ,CVT)·FG Ξ(TM·Array ,CVT)·fg; + + Ξ(TM ,CVT) Ξ(TM·Array ,CVT)·init_pe( + Ξ(TM·Array ,CVT)·Tableau t + ,CVT position[] + ,Ξ(extent_t ,CVT) extent + ); + + Ξ(TM ,CVT) Ξ(TM·Array ,CVT)·init_pp( + Ξ(TM·Array ,CVT)·Tableau t + ,CVT *position_left + ,CVT *position_right + ); #endif // #ifdef CVT @@ -157,185 +185,153 @@ "Null position.This is only possible when the tape machine has not been initialized."; const char *TM·Msg·flag="given NULL flag pointer"; const char *TM·Msg·result="given NULL result pointer"; + const char *TM·Msg·head="head not on tape"; - //----------------------------------- - // generic instance type, with vtable pointer at top + //---------------------------------------- + // TM Tableau, not CVT differentiated struct{ - TM·FG *fg; - } TM; + void *hd; + void *position; + }TM·Tableau; + + + //---------------------------------------- + // TM Array implementation, not CVT differentiated + + TM·Tape·Topo TM·Array·Tape·topo(TM·Tableau *t){ + if(!t || !t->position) return T·Tape·Topo·mu; + if(t->extent == 0) TM·Tape·Topo·singleton; + return TM·Tape·Topo·segment; + } + Local TM·Tape·Topo TM·Tape·bounded(TM·Tableau *t){ + return TM·tape_top(t) & TM·Tape·Topo·bounded; + } + + TM·Tape·Topo TM·Array·Tape·mount(TM·Tableau *t){ + if(!t || !t->position) return T·Tape·Topo·mu; + if(t->extent == 0) TM·Tape·Topo·singleton; + return TM·Tape·Topo·segment; + } + //----------------------------------- // generic call wrappers - Core·Status TM·topo(TM *tm ,TM·Tape·Topo *result){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Msg·tm); - Core·Guard·fg.check(&chk ,1 ,result ,TM·Msg·result); - Core·Guard·if_return(chk); - #endif - return tm->fg.topo(tm ,result); + Local TM·Tape·Topo TM·Tape·topo(TM *tm){ + return tm->fg.Tape·topo(tm); } - - // mount dismount - #define Core·Status_tm(name) \ - Local Core·Status TM##name##(TM *tm) { \ - #ifdef TM·DEBUG \ - Core·Guard·init_count(chk); \ - Core·Guard·fg.check(&chk, 1, tm, TM·Msg·tm); \ - Core·Guard·if_return(chk); \ - #endif \ - return tm->fg.##name##(tm); \ + Local TM·Tape·Topo TM·Tape·bounded(TM *tm){ + return TM·tape_top(tm) & TM·Tape·Topo·bounded); } - Core·Status_tm(mount); - Core·Status_tm(dismount); - - Local TM·Head·Status TM·status(TM *tm ,TM·Head·Status *result){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Msg·tm); - Core·Guard·fg.check(&chk ,1 ,result ,TM·Msg·result); - Core·Guard·if_return(chk); - #endif + Local TM·Head·Status TM·head_status(TM *tm){ return tm->fg.status(tm ,result); } - - // Stronger than `can_read`. Used mostly for debugging. - // as it checks for a legal head position. - Local Core·Status TM·head_on_format(TM *tm ,bool *flag){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Msg·tm); - Core·Guard·fg.check(&chk ,1 ,flag ,TM·Msg·flag); - Core·Guard·if_return(chk); - #endif - return tm->fg.head_on_format(tm ,flag); + Local TM·Head·Status TM·head_on_tape(TM *tm){ + return TM·status(tm) & TM·Head·Status·on_tape; } - bool bool_fn_tm(TM *tm){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Msg·tm); - Core·Guard·if_return(chk); - #endif - return tm->fg.read(tm); + // does nothing if the head is already mounted + Local Core·Status TM·mount(TM *tm){ + #ifdef TM·DEBUG + Core·Guard·init_count(chk); + Core·Guard·fg.check(&chk, 1, tm, TM·Msg·tm); + Core·Guard·if_return(chk); + #endif + if( !TM·head_on_tape(tm) ) return Core·Status·on_track; + return tm->fg.mount(tm); } - bool TM·on_leftmost(TM *tm){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Msg·tm); - Core·Guard·if_return(chk); + // does nothing if the head is already dismounted + Local Core·Status TM·dismount(TM *tm){ + #ifdef TM·DEBUG + Core·Guard·init_count(chk); + Core·Guard·fg.check(&chk, 1, tm, TM·Msg·tm); + Core·Guard·if_return(chk); #endif - return tm->fg.on_leftmost(tm); + if( TM·head_status(TM *tm) & TM·Head·Status·dismounted) ) return Core·Status·on_track; + return tm->fg.dismount(tm); } + + #define TM·macro·bool_tm(name) + Local bool name(TM *tm){ \ + #ifdef TM·DEBUG \ + Core·Guard·init_count(chk); \ + Core·Guard·fg.check( &chk ,1 ,TM·head_on_tape(tm) ,TM·Msg·head); \ + Core·Guard·assert(chk); \ + #endif \ + return tm->fg.name(tm); \ + } - bool TM·on_rigthmost(TM *tm){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Msg·tm); - Core·Guard·if_return(chk); - #endif - return tm->fg.on_rigthmost(tm); - } + Local bool TM·macro·bool_tm(on_leftmost); + Local bool TM·macro·bool_tm(on_rightmost); + Local bool TM·macro·bool_tm(step); + Local bool TM·macro·bool_tm(step_left); + Local bool TM·macro·bool_tm(rewind); + //---------------------------------------- + // Initialization for TM·fg + Local TM·FG TM·fg = { + .Tape·topo = TM·Tape·topo + ,.Tape·bounded = TM·Tape·bounded + ,.Head·status = TM·Head·status + ,.Head·on_tape = TM·Head·on_tape - Local bool TM·Given1(can_read); - Local bool TM·Given1(on_leftmost); - Local bool TM·Given1(on_rightmost); - Local bool TM·Given1(step); - Local bool TM·Given1(step_left); - Local bool TM·Given1(rewind); + ,.Head·on_leftmost = TM·Head·on_leftmost + ,.Head·on_rightmost = TM·Head·on_rightmost - // tm_can_read must be true for both machines. - void TM·copy_datum(TM *tm_read ,TM *tm_write){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - bool flag = true ,s; - s = TM·head_on_format(tm_read ,flag) == Control·Status·on_track; - Core·Guard·fg.check(&chk ,1 ,s && flag ,"tm_read head off track"); - s = TM·head_on_format(tm_write ,flag) == Control·Status·on_track; - Core·Guard·fg.check(&chk ,1 ,s && flag ,"tm_write head off track"); - Core·Guard·assert(chk); - #endif + ,.mount = TM·mount + ,.dismount = TM·dismount - *(tm_write->hd) = *(tm_read->hd); - return Core·Status·on_track; - } + ,.step = TM·step + ,.step_left = TM·step_left + ,.step_right = TM·step // Synonym + ,.rewind = TM·rewind + + }; + + #endif // ifndef CVT + + //----------------------------------- + // CVT dependent functions - void TM·read(TM *tm ,CVT *read_pt){ + #ifdef CVT + + Local Ξ(extent_t ,CVT) Ξ(TM ,CVT)·extent(TM *tm){ #ifdef TM·DEBUG Core·Guard·init_count(chk); - bool flag = true ,s; - s = TM·head_on_format(tm ,flag) == Core·Status·on_track; - Core·Guard·fg.check(&chk ,1 ,s && flag ,"head off format"); + Core·Guard·fg.check(&chk ,1 ,TM·Tape·bounded(tm) ,"Tape is not bounded."); Core·Guard·assert(chk); #endif - - *read_pt = *(tm->hd); + return tm->fg.extent(tm); } - void TM·write(TM *tm ,CVT *write_pt){ + Local CVT TM·read(TM *tm){ #ifdef TM·DEBUG Core·Guard·init_count(chk); - bool flag = true ,s; - s = TM·head_on_format(tm ,flag) == Core·Status·on_track; - Core·Guard·fg.check(&chk ,1 ,s && flag ,"head off format"); + Core·Guard·fg.check( &chk ,1 ,TM·head_on_tape(tm) ,TM·Msg·head); Core·Guard·assert(chk); #endif - - *(tm->hd) = *write_pt; + return tm->fg.read(tm); } - // step_right is a synonym for step - - // check the topo to make sure tape has extent before calling this - // `extent·CVT` returns the index to the rightmost cell in the array. - Local Ξ(extent_t ,CVT) TM·extent(TM *tm){ + Local void TM·write(TM *tm ,CVT *write_pt){ #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Msg·tm); + Core·Guard·init_count(chk); + Core·Guard·fg.check( &chk ,1 ,TM·head_on_tape(tm) ,TM·Msg·head); + Core·Guard·fg.check( &chk ,1 ,write_pt ,"Given NULL write_pt"); Core·Guard·assert(chk); #endif - return tm->fg.extent(tm); + return tm->fg.write(tm ,write_pt); } - - - //---------------------------------------- - // Initialization for TM·fg - - Local TM·FG TM·fg = { - .tape = { - .topo = TM·topo - .extent = TM·extent - } - - ,.area = { - .mount_pe = TM·mount_pe - ,.mount_pp = TM·mount_pp - } - - ,.mount = TM·mount - ,.dismount = TM·dismount - - ,.status = TM·status - ,.head_on_format = TM·head_on_format - - ,.can_read = TM·can_read - ,.on_origin = TM·on_origin - ,.on_rightmost = TM·on_rightmost - - ,.step = TM·step - ,.step_left = TM·step_left - ,.step_right = TM·step_right // Synonym for step - ,.rewind = TM·rewind - - ,.read = TM·read - ,.write = TM·write + Local Ξ(TM ,CVT)·FG Ξ(TM ,CVT)·fg = { + .parent = TM·fg + ,.extent = Ξ(TM ,CVT)·extent + ,.read = Ξ(TM ,CVT)·read + ,.write = Ξ(TM ,CVT)·write }; #endif // ifdef CVT @@ -371,8 +367,8 @@ const char *TM·Array·Msg·tm="given NULL tm"; const char *TM·Array·Msg·flag="given NULL flag pointer"; const char *TM·Array·Msg·result="given NULL result pointer"; - const char *TM·Array·Msg·position= - "Null position.This is only possible when the tape machine has not been initialized."; + const char *TM·Array·Msg·status="bad head status"; + #endif // #ifndef CVT @@ -387,37 +383,24 @@ //----------------------------------- // TM·Array.tape implementation - /* - For an Array Tape Machine ,a bound tape will be singleton or segment. - An initialized Array Tape Machine always has a bound tape. - */ - Core·Status Ξ(TM·Array ,CVT)·topo(Ξ(TM·Array ,CVT) *tm ,TM·Tape·Topo *result){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Array·Msg·tm); - if(tm) Core·Guard·fg.check(&chk ,1 ,tm->position ,TM·Array·Msg·position); - Core·Guard·fg.check(&chk ,1 ,result ,TM·Array·Msg·result); - Core·Guard·if_return(chk); - #endif - if(tm->extent == 0){ - *result = TM·Tape·Topo·singleton; - }else{ - *result = TM·Tape·Topo·segment; - } - return Core·Status·on_track; + // For an Array Tape Machine ,a bound tape will be singleton or segment. + TM·Tape·Topo Ξ(TM·Array ,CVT)·Tape·topo(Ξ(TM·Array ,CVT) *tm){ + if(!tm || !tm->position) return TM·Tape·Topo·mu; + if(tm->extent == 0) TM·Tape·Topo·singleton; + return TM·Tape·Topo·segment; } - // check the topo to make sure tape has extent before calling this + // check the Tape·topo to make sure tape has extent before calling this // `extent·CVT` returns the index to the rightmost cell in the array. Local Ξ(extent_t ,CVT) Ξ(TM·Array ,CVT)·extent(Ξ(TM·Array ,CVT) *tm){ #ifdef TM·DEBUG Core·Guard·init_count(chk); - Core·Tape·Topo topo = Core·Tape·Topo·mu; - Core·Status status = Ξ(TM·Array ,CVT)·topo(tm ,&topo); - bool good_topo = - (status == Core·Status·on_track) && (topo & Core·Tape·Topo·finite_nz) + Core·Tape·Topo Tape·topo = Core·Tape·Topo·mu; + Core·Status status = Ξ(TM·Array ,CVT)·Tape·topo(tm ,&Tape·topo); + bool good_Tape·topo = + (status == Core·Status·on_track) && (Tape·topo & Core·Tape·Topo·finite_nz) ; - Core·Guard·fg.check(&chk ,1 ,good_topo ,"Tape does not have an extent."); + Core·Guard·fg.check(&chk ,1 ,good_Tape·topo ,"Tape does not have an extent."); Core·Guard·assert(chk); #endif @@ -490,49 +473,17 @@ return Core·Status·on_track; } - Local TM·Head·Status Ξ(TM·Array ,CVT)·status( - Ξ(TM·Array ,CVT) *tm ,TM·Head·Status *status - ){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Array·Msg·tm); - Core·Guard·fg.check(&chk ,1 ,status ,"given NULL status pointer"); - Core·Guard·if_return(chk); - #endif - - if(tm->hd == NULL){ - *status = TM·Head·Status·not_on_tape; - }else if(tm->hd == tm->position){ - *status = TM·Head·Status·origin; - }else if(tm->hd == tm->position + tm->extent){ - *status = TM·Head·Status·rightmost; - }else{ - *status = TM·Head·Status·interim; - } - - return Core·Status·on_track; - } + Local TM·Head·Status Ξ(TM·Array ,CVT)·head_status(TM *tm){ + if(!tm || !tm->position) return TM·Head·Status·mu; + if(!tm->hd) return TM·Head·Status·dismounted; + if(tm->hd == tm->position) return TM·Head·Status·leftmost; - // Stronger than `can_read`. Used mostly for debugging. - // as it checks for a legal head position. - Local Core·Status Ξ(TM·Array ,CVT)·head_on_format( - Ξ(TM·Array ,CVT) *tm ,bool *flag - ){ - #ifdef TM·DEBUG - Core·Guard·init_count(chk); - Core·Guard·fg.check(&chk ,1 ,tm ,TM·Array·Msg·tm); - if(tm) Core·Guard·fg.check(&chk ,1 ,tm->position ,TM·Array·Msg·position); - Core·Guard·fg.check(&chk ,1 ,flag ,TM·Array·Msg·flag); - Core·Guard·if_return(chk); - #endif + CVT *rightmost_pt = tm->position + tm->extent; + if(tm->hd == rightmost_pt) TM·Head·Status·rightmost; + if(tm->hd < tm->position || tm->hd > rightmost_pt) + return TM·Head·Status·out_of_area; - *flag = - tm->hd - && tm->hd >= tm->position - && tm->hd - tm->position <= tm->extent - && ( (AU *)tm->hd - (AU *)tm->position ) % sizeof(CVT) == 0 // '%' expensive - ; - return Core·Status·on_track; + return TM·Head·Status·interim; } bool Ξ(TM·Array ,CVT)·can_read(Ξ(TM·Array ,CVT) *tm){ @@ -633,7 +584,7 @@ Local Ξ(TM·Array ,CVT)·FG Ξ(TM·Array ,CVT)·fg = { .tape = { - .topo = Ξ(TM·Array ,CVT)·topo + .Tape·topo = Ξ(TM·Array ,CVT)·Tape·topo .extent = Ξ(TM·Array ,CVT)·extent } diff --git "a/developer/document\360\237\226\211/generalizing_type.org" "b/developer/document\360\237\226\211/generalizing_type.org" deleted file mode 100644 index 73bba86..0000000 --- "a/developer/document\360\237\226\211/generalizing_type.org" +++ /dev/null @@ -1,109 +0,0 @@ -* Generalizing type - -** Passing an TM FG table instance - -1. Suppose we have a generic TM FG table with the declared functions constituting the architecture (user's view, interface) of a tape machine. - -2. Instances of this TM FG table to represent different implementations of the architecture. Say TM·Array·fg for a TM implemented as an array, and TM·Function·fg for a function implementation (for example the Natural_Number in the Java implementation). - -3. Now suppose we have a function `map` that is given a TM_read, a TM_write, and a function. - The map does not care how the TM is implemented. - - If these first two arguments are declared as TM FG type, then they can accept any fg table whether it be an Array implementation, a Function implementation, etc. The map can then pick the functions it needs, such as step, and rightmost, from the - table, and thus get tape behavior. - - This achieves what we wanted, a map function that can use a tape machine and does not care what the implementation of that tape machine is. - - However, the TM functions that map calls require to be given a first argument of that holds the TM instance data. Without this it can not know which machine is being operated on. - Furthermore, the instance data type must match the TM fg table type. - - Given steps 1 through 3 we never passed in instance data to map, only a TM FG table insance. So map has a type, but not the data. - - -** Passing TM instance data, compile in the fg table. - -1. As noted, `map` is given a TM_read, a TM_write, and a function. For these first two - arguments, suppose we pass in the instance data, i.e. struct pointers. - -2. The TM·Array struct will hold a base pointer to the array, and the head location. The TM·Function struct would hold a handle recognized by the functions, or some state used by them. - -3. Then we have a map function for each architecture type, and we compile into each of these a call to the correct default TM·fg table. The map·Array will have compiled into it, TM·Array·fg, and the map·Function will have compiled into it TM·Function·fg etc. - -4. This approach works, but we duplicate the map code many times just to get the types to match the calls. - - -** Passing in both, the TM instance data, and corresponding TM FG tables instances. - -1. Say we have one generic `map` that given 5 arguments: TM_read (an instance), TM_read_fg, a TM_write (an instance), and TM_write_fg. - -2. The instance parameters must be declared `void *` to prevent argument checking on them, otherwise it will be necessary to implement many map functions for each instance type combination. - -3. This works, though we have no way to know if the correct type of instance data, that matches the fg table, was actually passed in. - -4. We could create many wrapper functions to accomplish the instance data type check, then to call a central map function with `void *` instance data pointers. With any luck the optimizer would make this extra layer go away, so it will be efficient. Still map now has 5 arguments instead the three that are natural for the problem. - - -** Using a "vtable" - -1. All TM instance types are structs with a first field that points to the fg table, - no matter the implementation type. This is not a serious requirement because - each implementation type already has an struct for its implementation data. However it does make an instance bigger, and it is not clear that the optimizer will be able to - discombobulation it all. - -2. Then a generic fg table is created, where the functions have the signatures as specified - in the FG table. However, each of these functions follows the fg table pointer in the - the instance data given to it as a first argument, and then calls that function. - -3. It works, and it is a tried and true method. - -** Using a pointer pair .. - -1. Instead of the instance data being called the 'TM type', a pair is the 'TM type', - the first member of the pair points to an instance, the second to a corresponding - fg table. - -2. Again there are some generic TM functions, each accepts a pair as a first argument, and then calls the corresponding fg table function while giving it the instance pointer - from the pair as the first argument. - -3. This approach packs the same information as the vtable approach. If there is a - one to one correspondence between instances and pairs, then it is larger by one pointer. - -4. This is a type of 'link' from our continuations, but the tableau pointer is gone due - to using a stack frame, and the continuation table pointer is gone due to flow through sequencing. Thus it is two pointers instead of 4. - -5. Hopefully the optimizer would recognize that the pair pointers are redundant. They are only needed to ensure that the instance and fg table go together. - -** Instance curried dispatch function - -1. In this case a pointer to a function is the 'TM type'. There is an enum that gives numeric tags to each of the functions in the FG struct instance. - -2. The tag for the desired fg function to be called is passed as the first argument to the dispatch function, and the remaining arguments are for the fg function. - -3. The dispatch function has the instance data and fg table pointer curried into it, so it then calls the correct fg function while handing it the correct instance data. - -4. In C it is not immediately obvious how to create a function on the fly each time an - instance is created, to curry the instance data into it, and then later how to call - the newly created function. - -** Self-typed Instance and dispatch function - -1. With this method, an extra field is added to the instance data that identifies its type. Then a globally available TM specific dispatch function is called to run the desired fg function on the data. It is similar to the dispatch function from above, but the instance data is an additional argument. - -2. The dispatch function uses the type information to know which fg table to call. This type information could be a pointer to the appropriate fg table. - -3. This approach differs from the vtable method in that all the arguments are given to the one TM dispatch function, and it figures out how to do the dispatch. There is not a wrapper-per-function. - -** Which approach? - -We want to be able to declare an instance as a local variable, where it will end up being on the stack frame. We want to avoid heap allocation for performance reasons, but not exclude use of heap memory. - -Dynamically created functions are out of the question. Dispatch approaches require the enum table so as to have tags. This is not show stopper. However, having one dispatch function means the argument types are not checked. That is a problem. - -Doubled up argument, one for the instance, the other for the table, either require wrappers, or not checking that an instance really goes with the data. Besides it is a lot of typing. - -The pointer pair approach requires the separate maintenance of the pair. When these are local variables, then the pair has to be tied to the instance. This would require the user to be aware of the existence of both the instance data and the pair, so as to pass them to an initializer. This could be avoided if the pair would combined with the instance, but that -is identical to the vtable approach (the instance pointer no longer being needed). - -So we are lead to the vtable. Though we are still hoping for the curried dispatch function - lol. - - diff --git "a/document\360\237\226\211/#Model.org#" "b/document\360\237\226\211/#Model.org#" new file mode 100644 index 0000000..83b69b5 --- /dev/null +++ "b/document\360\237\226\211/#Model.org#" @@ -0,0 +1,216 @@ +#+TITLE: Model +#+AUTHOR: Thomas +#+STARTUP: content + +* Tableau + +A Tableau is a type definition for a work area, typically implemented as a typedef struct in C. + +An instance of a Tableau is a tableau. There can be many instances of a given Tableau. + +There are multiple kinds of Tableau: + +- interface (also called a Face) +- state + +* ActionTable + + +An action is a function that operates on a tableau. + +Each action is an instance of an Action. An Action is a typedef for a function pointer. + +An ActionTable is a typedef of a struct, and instance of is said to be an action_table. +A given ActionTable can + +An instance of an ActionTable is an action_table. There can be many isntances of a given ActionTable. + +Each function in an action_table is an action. + + +* Model + +A Model is a TableauFace, a TableauState, and an ActionTable. + +An instance of a Model is a model. + + +* Status bits or continuations? + +The problem I found with continuations is that when the library is +limited to a single status function for a model, there will necessarily +be many continuations from the status function, one for each shade of meaning that it can discern. To endow it with fewer would limit its usefulness for some scenario, perhaps rare, but still supported. + +In addition, not all status's are independent. Thus there must be a priority scheme and then a choice of continuation, but the status function might not know the caller's +priorities. An alternative would be to take both continuations and break into multithreaded execuation. However, many programs are designed to be single threaded. + +When status is used as an argument guard this often comes down to, "Is the passed in +status baring model operable or not?" Thus the tableau for the many-continuations status function will have many duplicate next pointers. + +If rather than returning continuations, a highly discerning status function were to return status bits, The caller could mask the bits and then chose among continuations. In the cause of an argument guard, choosing among two paths. + +It is also a little funny when using a status function as an argument guard that the guard continues into the very function it is guarding. The tableau facilitates this +design pattern in that the input operands need not be written again. Though this +requires managing the tableau for the model who's status is being checked. + +An alternative to using status bits is to have more than one status function. One implementation is to embed the many-continuations status function inside the few-continuations status function, to perform the mask, and then pick the continuation. This might be convenient function to have, but where a second status function really makes sense is where it reduces the amount of computation that need be done. + +* A Single Tableau per Model + +Inputs for actions are written to a tableau. A link currently holds an actions table pointer, and a tableau pointer, hence there is one tableau that holds both action inputs and model state variables. + +If this tableau is declared as part of the FACE then the user can access the model state variables. This would follow a philosophy of, "if they are useful, then so bit it"; however exposing state variables limits both the number of models that can be made that provide the user with the same interface, and limits what can be done with maintenance edits without risking breaking the user's code. Recall that Model is an abstract type, and there can be many implementations for said type, i.e. many models for a given Model. + +C does not allow for public and private sections of a struct, or we could make the state variables private. + +However, we can accomplish the same thing as public and private, perhaps in even a better way, by including the interface tableau in the full tableau, and have the code know it was given the full tableau: + +#+BEGIN_SRC c +typedef struct{ + ... +}X·TableaFace; + +// in the implementation + +typedef struct{ + X·TableaFace face; + ... +}X·Tablea; + +#+END_SRC + +The details of the TableauFace type are shown to the user. The actions are +declared to be given a TableauFace, so any directly calls will work. Inside each action the tableau is converted from TableauFace to Tableau. + +=> Core uses separate Face and State tableaux + +** Tableau structure + +The TableauFace has: +- all possible inputs +- results +- continuations + +Then the Tableau extension + +- state variables + + +* Continuation Link + +There are two dimensions of variation. 1) Multiple action_tables from a given ActionTable, and 2) multiple tableau from a given Tableau. + +Each action_table instance of a given ActionTable can have very different function implementations, as only the function type signatures are specified in the ActionTable definition. + +The interface tableau, TableauFace, is designed against the ActionTable. Both deal with the user's view of the Model. In contrast each state tableau, TableauState is designed against a specific action_table. + +Hence, for each ActionTable declaration there will be corresponding to a TableauFace declaration, unless by coincidence, an existing one can be recycled, though even then programmers would appreciate having matching names. This begs the question if the two structures can be combined into one. So doing would constrain our ability to make instances from them independently. + +A given action table can be used with multiple state tableaux. In other words, there can be multiple instances of the same type. + +Conversely a given tableau can be used with multiple action tables. This happens in +polymorphism, where a child tableau can be passed to a parent action. The child +parts are in an extended portion of the tableau, and thus not seen by the parent +action. + +In the one Tableau per model approach, each tableau instance is both state and inputs, hence each instance will have its own inputs. We would lose the ability to make independent state and interface instances. The interface tableau is playing the role +that is typically played by the stack frame. Hence there is a copy per call, now this the one tableau per model, there would be a copy per instance. If left independent, there would be as many or few copies as the user determines is necessary, with the network of interface tableaux being connected by pointers tending to look like a circuit. + +**proposal 1 + +Place an action_table pointer in each single tableau per model tableau, then there would be many copies of the action_table pointer, one per tableau. This is the virtual table pointer approach from C++ or the delegate pointer from JavaScript. + +With this approach the action table can be reached through the tableau, so only the tableau, or references to it, need to be passed around as an instance of the model. This is appealing. + +However, such a link to such a tableau is not sufficient for knowing which action on the action table should be called. Hence a link to such an overloaded tableau can not function as a continuation link. + +Suppose if instead of an action_table link, a function link were embedded in the tableau, perhaps as a first member. Then the first member could be called and given the tableau as its only argument. This could function as a link, but then the state of the model is owned by a single function call. This looks problematic. + +**proposal 2 + +A link is a two pointer bundle. The first pointer to an action table, the second to single tableau. Then there is a macro called `call` that is used for calling functions in the action table. ``call(extent ,&tape_tableau)` + +The problem here is that such a link can not appear as a continuation because the link itself does not say which action is being invoked. That information is in the call. + +Attempts to save this proposal by including a 'field pointer' in the link, that would be applied to the action table are rather awkward in C, though it can be done by creating an enumeration of the struct offsets, and using those tags. + +**proposal 3 + +A three pointer bundle, the first pointer to the function being called, the second pointer to an interface tableau, the third pointer to the state tableau shared by the functions in the action_table the function comes from. + +In this approach there is not action_table pointer, though that is not a problem because if that information is needed it can be curried into the called function when it is written, though chances are if a given action needs to call another action function in the table, it will do so through a function pointer. The purposes of the action_table struct is then to document which actions share the corresponding tableau state, and to provide a namespace to put the actions in, which makes it conceptually nicer for the programmer. It would be even nicer if C supported Pascal's 'with' statement. + +Such a link could be reduced to two pointers if the single tableau variation is used. + +==> Core uses proposal 3 + + +* Mixing continuations with fall through execution. + +#+BEGIN_SRC c + Local void call(Core·Link *lnk){ + while(lnk) lnk = lnk->act(lnk); + } +#+END_SRC + +This implementation of call falls through when a null `lnk` pointer is returned. However the caller can not know anything about the status of the call. + +**proposal 1 + +Each continuation function that can return a NULL link, where the caller wants to know the status, also sets a `status` value at the top of the base tableau, that all FACE tableau's extend from. + +Disadvantage, all code must set status, so why have continuations? The optimizer will drop unused status setting code, so perhaps this is not serious. Though, on the other hand, for Local functions would the optimizer skip the status return and condition testing, and put the continuation in - thus making status setting the way to go as it must be coded anyway? + + +**proposal 2 + +Have actions that set status, link to them when status is desired. + +Actually we need default continuations anyway. In many cases these could +be generic. Argument guard continuations could be 'good' or 'bad'. + +=> Core uses option 2 + +* Continuations and linking + +**proposal 1 + +Put continuations on the interface tableau. The reasoning is that the programmer will want to set them, but actually this is not a general programming task. Rather it occurs up front as part of a 'wiring' phase, and is then typically it is not done again. + +Consider this example. + +#+BEGIN_SRC c + typedef struct{ + Core·Tape·Tableau·Face tape_tableau; + Core·Area *area; + void *position_left; + void *position_right; + AU *pt0; + AU *pt1 + Core·Area *a; + Core·Area *b; + bool q; // predicate -> q + Core·Link next; + }Core·Area·Tableau·Face; +#+END_SRC + +The idea is that there is one interface tableau used for all of the Area·Tableau actions, but it has a link field. Normally while the linked together functions are running, they can not set the 'next' link, as that information comes from a higher level. For example, after `encloses_pt_q` runs, it has no idea as to where the programmer wants control to flow next. + +**proposal 2 + +Add a fourth pointer to a link. This points to the continuations block. + +#+BEGIN_SRC c + typedef struct{ + Core·Tableau *face; + Core·Tableau *state; + Core·ActionTable *action; + Core·NextTable *next_table; + }Core·Link; +#+END_SRC + +We may then describe the program flow network as a series of next_tables, and then view the program as traversing a graph. + +In database representation a set of nodes, and the dual graph is specified as a bridge table. It is more efficient when traversing a graph, to keep make the neighbor list a property of the node. Thus, "Link" can be seen as a node with a neighbor list. + +=> Core uses option 2 diff --git "a/document\360\237\226\211/Abstracting_Type_in_C.org" "b/document\360\237\226\211/Abstracting_Type_in_C.org" new file mode 100644 index 0000000..9a50785 --- /dev/null +++ "b/document\360\237\226\211/Abstracting_Type_in_C.org" @@ -0,0 +1,564 @@ +#+TITLE: Abstracting Type in C +#+AUTHOR: Thomas + +* Introduction + +Type abstraction is a fundamental challenge in C programming. Unlike object-oriented languages that provide built-in mechanisms for polymorphism, C requires explicit design patterns to achieve similar flexibility. This document explores various approaches to implementing type abstraction in C, using a tape machine (TM) as a concrete example. + +The core problem we're addressing is how to write generic code that can operate on different implementations of the same abstract interface. Specifically, we want to create functions that can work with any tape machine implementation without knowing the implementation details. This separation of interface from implementation facilitates creating modular, maintainable code in C. + +* Namespaces in C + +C does not have built-in namespace support like C++ or other modern languages. However, we can simulate namespaces using naming conventions. We use the middle dot character (·) as a namespace separator to organize identifiers into logical groups. + +** The middle dot convention + +The middle dot (·) in identifiers like `TM·Array` or `TM·Function·fg` is not a special operator in C. It is literally part of the identifier name. This convention creates a visual hierarchy that mimics namespaces while remaining valid C code. + +Example: +#+BEGIN_SRC c +TM·x // variable x +TM·Array·i // variable i +#+END_SRC +In the first example `x` is a variable in the TM namespace. TM stands for Tape Machine. In the second example, `Array` is a namespace inside of the `TM` namespace, and `i` is a variable in the `Array` namespace. + +In general, in our shop, types and namespaces are written in PascalCase. Variable and function names are written in snake_case. When proper nouns, types, or namespacess, are represented in variable or function names, they maintain their capitalization, though appear with underscores. + +#+BEGIN_SRC c +char *person_Thomas = "Thomas"; +#+END_SRC + +** Comparison with C++ Namespaces + +Unlike C++ namespaces, our convention: +1. Does not provide actual scope isolation +2. Cannot be opened with `using` directives +3. Is purely a naming convention, not a language feature + +Despite these limitations, this convention provides many of the organizational benefits of true namespaces while remaining compatible with standard C. + +* Implementing Templates in C + +C lacks built-in support for templates or generics, unlike C++ or modern languages like Java or Rust. However, we can implement template-like functionality using the C preprocessor. This section explains the template system used throughout this document. + +** The Xi (Ξ) Macro System + +The cpp macro `Ξ` is used for putting identifiers into a namespace. Consider these variables within a namespace. + +#+BEGIN_SRC c +TM·x // variable x in namespace TM +TM·Array·i // variable i in namespace TM·Array +#+END_SRC + + +,#+BEGIN_SRC c +Ξ(TM ,x) // variable x in namespace TM +Ξ(TM·Array ,i) // variable i in namespace TM·Array +#+END_SRC + +The reason the macro is needed is that cpp will not replace a macro that is embedded in an identifier. With this macro we can do the following: + +#+BEGIN_SRC c + #define CVT uint32_t + CVT i; // -> uint32_t i + typedef struct Ξ(TM ,CVT) // -> typedef struct TM·uint32_t + #undef CVT + #define CVT string + typedef string char *; + CVT ch; // -> string ch; + typedef struct Ξ(TM ,CVT) // -> typedef struct TM·string +#+END_SRC + +This allows us to create type and function names that are specialized for specific types, while maintaining a consistent naming convention. + +*** Implementation Details + +The Ξ macro was initially defined for two operands, as shown below, though now it will take between zero and ten operands. With zero operands it expands to nothing, with one it echos the operand with no center dots, with more than that the operands are appended with a prior center dot. + +#+BEGIN_SRC c +#define _Ξ2(a, b) a##·##b +#define Ξ2(a, b) _Ξ2(a, b) +#+END_SRC + +** CVT: Cell Value Type + +Throughout our tape machine implementation, we use CVT (Cell Value Type) as a template parameter. This represents the type of data stored in each cell of the tape. + +The implementation uses conditional compilation to handle different CVT values: + +#+BEGIN_SRC c +#ifndef CVT + // Code for generic, non-specialized case +#endif + +#ifdef CVT + // Code specialized for a specific CVT +#endif +#+END_SRC + +When using the library, you include it multiple times with different CVT definitions: + +#+BEGIN_SRC c +// First include with CVT undefined for base definitions +#include "TM.lib.c" + +// Then include with CVT defined for each type specialization +#define CVT int +#include "TM.lib.c" +#undef CVT + +#define CVT float +#include "TM.lib.c" +#undef CVT +#+END_SRC + +* Tableau and FG Tables + +Two key concepts in our type abstraction approach are the "Tableau" and "Function Given (FG) Table." + +** Tableau + +A "Tableau" is our term for a data structure that holds the state of a particular instance. In the context of our tape machine example: + +1. **Definition**: A tableau is a struct that contains all the state variables needed for a specific implementation of an abstract interface. + +2. **Purpose**: The tableau serves as a shared workspace where functions can read and write state information. Think of it as a blackboard where functions can leave "notes" for each other. + +3. **Implementation-specific**: Each implementation of an abstract interface will have its own tableau structure with different fields appropriate to that implementation. + +For example, the tableau for an array-based tape machine might look like: + +#+BEGIN_SRC c +struct { + CVT *hd; // Current head position + CVT *position; // Base position for the tape (leftmost cell) + Ξ(extent_t, CVT) extent; // Largest index that can be used for accessing the tape +} Ξ(TM·Array, CVT); +#+END_SRC + +Notice this struct is templated on `CVT`. Suppose it were in the file `tape.lib.c`, then: + +#+BEGIN_SRC c +#define CVT uint8_t +#include `tape.lib.c` +typedef stirng char *; +#udef CVT +#define CVT string +#include `tape.lib.c` +#+END_SRC + +This will create two versions of said `struct` tableau with two different CVT types. + +** Function Given (FG) Tables + +The "Function Given" (FG) table is a collection of function pointers that implement operations on a specific tableau type. + +1. **Definition**: An FG table is a struct containing function pointers that all take a tableau as an argument (hence "Function Given"). + +2. **Purpose**: FG tables allow for polymorphic behavior in C by providing different implementations of the same interface. + +3. **Structure**: Each function in the FG table takes a pointer to a tableau as an argument, allowing it to access and modify the state. + +A simplified example of an FG table for our tape machine might look like: + +#+BEGIN_SRC c +typedef struct { + TM·Tape·Topo (*Tape·topo)(TM *tm); + bool (*Tape·bounded)(TM *tm); + TM·Head·Status (*Head·status)(TM *tm); + Core·Status (*mount)(TM *tm); + void (*step)(TM *tm); + void (*rewind)(TM *tm); + // ... other functions +} TM·FG; +#+END_SRC + +When specialized for a specific cell value type (CVT), we might then have: + +#+BEGIN_SRC c +typedef struct { + TM·FG *fg; // Base function table + Ξ(extent_t, CVT) (*extent)(TM *tm); + CVT (*read)(TM *tm); + void (*write)(TM *tm, CVT *remote_pt); +} Ξ(TM, CVT)·FG; +#+END_SRC + +** The Relationship Between Tableaux and FG Tables + +The tableau and FG table work together to implement polymorphism: + +1. Each implementation provides its own tableau type and instance of an FG table. +2. The first field of a tableau is typically a pointer to its corresponding FG table. (When following the 'vtable' method.) +3. Functions in the FG table operate on the tableau, reading and writing its state. +4. Client code can work with any implementation by using the functions in the FG table without knowing the details of the tableau structure. + +This pattern allows us to achieve polymorphic behavior in C while maintaining type safety and avoiding the overhead of dynamic dispatch mechanisms like those used in object-oriented languages. + +** Template-Based Type Generation + +The template system generates several types of identifiers: + +1. **Type names**: `Ξ(TM, CVT)` becomes `TM·int` when CVT is defined as `int` +2. **Function names**: `Ξ(TM·Array, CVT)·init_pe` can become `TM·Array·int·init_pe` +3. **Variable names**: `Ξ(extent_t, CVT)` can become `extent_t·int` + +** Function Given (FG) Tables + +A key part of our implementation is the Function Given (FG) table pattern. Each implementation of the tape machine provides an FG table containing function pointers for its operations. + +The FG tables are specialized based on the CVT: + +#+BEGIN_SRC c +typedef struct { + TM·FG *fg; // Base function table + Ξ(extent_t, CVT) (*extent)(TM *tm); + CVT (*read)(TM *tm); + void (*write)(TM *tm, CVT *remote_pt); +} Ξ(TM, CVT)·FG; +#+END_SRC + +This allows for type-safe operations while maintaining a consistent interface. + +** Benefits of This Approach + +1. **Type safety**: The compiler checks that the correct types are used with each specialized function +2. **Code reuse**: Common functionality is defined once and specialized for different types +3. **Consistent naming**: The Ξ macro ensures a consistent naming convention +4. **Modularity**: Different implementations can share the same interface + +** Limitations + +1. **Verbosity**: More verbose than native templates in C++ +2. **Complexity**: Requires understanding of preprocessor macros and conditional compilation +3. **Debugging**: Preprocessor-generated code can be harder to debug +4. **Limited IDE support**: Most IDEs don't understand this template system for code completion + +Despite these limitations, this approach provides a powerful way to implement generic, type-safe code in C without resorting to void pointers or code duplication. + +* Proposed approaches for generalizing type in C + +These are some of the approaches what were considered, or in some cases tried. + +** Using an TM FG table instance as an instance of 'TM type' + +1. Suppose we have a generic TM FG table with the declared functions constituting the architecture (user's view, interface) of a tape machine. + +2. Instances of this TM FG table represent different implementations of the architecture. For example, TM·Array·fg for a TM implemented as an array, and TM·Function·fg for a function implementation (similar to the Natural_Number in the Java implementation). + +3. Suppose we use an instance of a FG table to be the type for a tape machine. + +4. Now suppose we have a function called `map` that is given two Tape Machine arguments, say TM_read, and TM_write, as well as a function to map. + + The `map` only refers to the interfaces for the tape machines, and is agnostic as to how they are implemented. Hence, it works out fine that it is given two instances of the FG type. It uses the dot access on these the two given fg tables to find by name the tape machine functions it wants to call. While doing so it calls whichever function that happens to be implemented on the given fg table. The declared type signature match, so it has no problem to call them. + + This achieves what we wanted: a map function that can use a tape machine and does not care what the implementation of that tape machine is. + + However, the TM functions that map calls require having the shared tableau as one of their arguments. Without this, the called function cannot know which machine is being operated on. + Furthermore, the tableau type must match the TM fg table type. + + So giving a function, such as `map`, an FG table instance alone is not sufficient. It must + also have the tableau that does with the FG instance. But where to put it? There can be many tableau for a given FG table, each representing an instance of a tape machine, so we can not attach the tableau to the FG table. + + In conclusion this method does not work. + + +** The TM tableau instance as an instance of 'TM type' + +1. As noted in the prior section, our example `map` function is given three arguments, namely, TM_read, a TM_write, and a function. For these first two + arguments it was found not to be sufficient to pass the instance fg tables. Suppose that instead we make TM_read and TM_write the relative tableau. + +2. Now we have the opposite problem from before. We have the tableau for the function tables, but we do not have the function tables that are to be used with them. If we make the function tables global, then `map` will have to decide somehow which one to call, and make this decision at run time, as it can process instances of many types. In C this can not + be done from the type signature of the instance, because it is not available at run time. + +3. One solution is to keep the specific type on the tableau, and then have multiple `match functions` one per implementation (and per CVT template value), and then call the map function that goes with the tableau. The programmer picks the matching one, and the compiler verifies the types match at compile time. + +4. This approach works, but we duplicate the map code many times just to get the function type signatures to match the tableau type with the fg table used. + +** Using both the TM tableau instance, and the corresponding TM FG table instance + +1. Say we have one generic `map` that takes 5 arguments: TM_read (an tape machine tableau), TM_read_fg, a TM_write (a tape machine tableau), and TM_write_fg, and of course, all the a pointer to the function that map is to apply. + +2. The instance parameters must be declared `void *` to prevent argument checking on them; otherwise, it will be necessary to implement many map functions for each instance type combination. + +3. This works, though we have no way to know if the type of the given tableau goes with the type of the fg table, for each of the read tape machine and write tape machine. + +4. We could create many wrapper functions to accomplish the argument type checks, then call a central map function with `void *` instance data pointers, and with any luck, the optimizer would make this extra layer go away, so it would not be a run time performance drag. Still, by this method map has 5 arguments instead of the three that are natural for the problem. + +5. So this approach, with the delegation wrappers, does everything we want, but it feels unsatisfactory due to having to provide arguments in pairs. + +** Tableau instances carries pointer to the matching FG table instance + +1. Each fg table is full of functions that are given a tableau of a specific type. Hence, it is practical to add a field to the tableau such that when an instance is made of the tableau, that this field is initialized with a pointer to its matching fg table. + +2. In practice then, the function that allocates a tape machine will actually be allocating a tableau of the correct type. This tableau would then be allocated on a stack frame or in a heap block. Following the allocation a type specific init function is called, and this function sets the fg pointer table, along with other variable initialization. + +3. Then a call to a function on the fg table looks like this: + + #+BEGIN_SRC c + + typedef struct{ + TM·FG fg; + } TM; + + typedef struct{ + TM tm; + uint8_t *hd; + uint8_t *position; + extent_t extent; + } TM·Array; + + ... + + TM *init(TM·Array *tm ,position ,extent); + + ... + + TM *tm_read; + TM·Array *tma; + + tm_read = *init(tma ,position ,extent); + + ... + + void map(TM *tm_read ...){ + ... + tm_read->fg->step(tm_read); + } + #+END_SRC + + Here `tma` has been allocated. `tm` is has not been allocated, but rather is a generic stand in for any tape machine tableau. + + `map` is given `TM *` type, so said tape machine could be of any implementation. All that matters is that the `TM` interface has been implemented. + +4. It works, and it is a tried and true method. It is a little funny that the programmer uses `tm_read` twice in the call, but at least the tm_read argument will be type checked. + +** Using a pointer pair as the `TM type'. + +1. Instead of the instance data being called the 'TM type', a pair is the 'TM type'. + The first member of the pair points to a tableau instance, the second to a matching FG + table instance. + +2. With this approach, generic TM functions each accept a pair as a TM argument. + +3. This approach encapsulates the same information as the vtable approach but keeps the relationship explicit. Note, if there is a one-to-one correspondence between tableaux and pairs, then the memory footprint is larger by one pointer per instance. + +4. An example + + #+BEGIN_SRC c + // Define the pair structure + typedef struct { + TM·Tableau *t; // Points to the implementation-specific tableau + TM·FG *fg; // Points to the corresponding FG table + } TM; + + // Implementation-specific tableau + typedef struct { + uint8_t *hd; + uint8_t *position; + extent_t extent; + } TM·Tableau·Array; + + // Function to initialize a pair + TM init_pair(TM·Tableau·Array *tma, uint8_t *position, extent_t extent) { + TM pair; + // Initialize the tableau + tma->hd = position; + tma->position = position; + tma->extent = extent; + + // Set up the pair + pair.t = (TM·Tableau *)tma; + pair.fg = &TM·Array·fg; + + return pair; + } + + // Using the pair in a function + void map(TM tm_read, ...) { + ... + + // Call functions through the pair + tm_read.fg->rewind(tm_read.t); + ... + + } + #+END_SRC + +5. Note that when using a pointer pair, the `init` function will be given the same arguments as for the vtable approach, but it will return the pair instead of a more generic pointer to the tableau. + +6. With this approach a pair of pointers is passed around instead of a single pointer. Though it might be more optimizer friendly, as it is easier to drop structures, such as the pair, than it is to rearrange structures as would have to be done to unpack the vtable pointer from the tableau structure. + +** Using a dispatch function as the `TM type'. + +1. In this case, a pointer to a function is the 'TM type'. There is an enum that gives numeric tags to each of the functions in the FG struct instance. + +2. The tag for the desired fg function to be called is passed as the first argument to the dispatch function, and the remaining arguments are for the fg function. + +3. The dispatch function has the instance data and fg table pointer curried into it, so it then calls the correct fg function while providing it with the correct instance data. + +4. In C, it is not immediately obvious how to create a function on the fly each time an + instance is created to curry the instance data into it, and then later how to call + the newly created function. + +** Dispatch function and Self-identified Tableau type. + +1. With this method, an extra field is added to each tableau instance to identify its type. Then a globally available TM-specific dispatch function is called to run the desired fg function on the data. It is similar to the dispatch function from above, but the instance data is an additional argument. + +2. The dispatch function uses the type information to know which fg table to call. This type information could be a pointer to the appropriate fg table. + +3. This approach differs from the vtable method in that all the arguments are given to the one TM dispatch function, and it figures out how to do the dispatch. There is not a wrapper-per-function. + +** Which approach? + +We want to be able to declare an instance as a local variable, where it will end up being on the stack frame. We want to avoid heap allocation for performance reasons, but not exclude the use of heap memory. + +Dynamically created functions are out of the question. Dispatch approaches require the enum table to have tags. This is not a show-stopper. However, having one dispatch function means the argument types are not checked. That is a problem. + +Doubled-up arguments (one for the instance, the other for the table) either require wrappers or do not check that an instance really goes with the data. Besides, this approach requires a lot of typing. + +The pointer pair approach becomes a maintenance problem, unless it is passed by value. When passed by value. When passed by value it is two pointers being copied instead of one. There is no corresponding savings on fewer levels of indirection on lookup. It does achieve separation of concerns, and would facilitate using the same instance with multiple fg tables, i.e. multiple 'views', that that is not a feature that is driving the decision process. + +So their are two practical approaches, the vtable, and the pointer pair. + +* Terminology + +** Model + +A 'model' consists of a Tableau definition, FG table definitions, and all of the type definitions that occur on the function signatures of those functions, all instances of the FG table, all while including the variations due to the template variables. This includes the function implementations, but does not include the values assigned to the tableaux. + +Going by this definition, it can be said that in the file TM.lib.c provides a Tape Machine model. + +** Architecture + +Architecture is the users view of the model. In the case of a software library, the user will be an application programmer. + +An architecture can also be a specification for which the model is an implementation. In which case, it says "The user must see this when using the model that is to be written." + +** FG table instances + +An FG table with template variabes, will expand out to one or more FG tables (without template variables), when those variables are bound to values. + +There can be multiple instances of an FG table. Each instance will consists of pointers to functions that have the type signature specified in the FG table. However, though all instances of the FG table share the same interface, their implementations can be completely different. + +Each template variable binding, and each FG table instance can have a different Tableau type as the given argument. For the Tape Machine, we give each tableau struct the name Ξ(TM·, CVT), where is the FG instance table type. For example, Ξ(TM·Array, CVT) is a struct of state variables for an Array implementation of the TM. + +------------------------------------------------------------------- + +* Hierarchy + +The TM model has functions that are independent of the CVT parameter. + + +** One for all FG table + +The "TM.lib.c" file defines an FG type called Ξ(TM, CVT)·FG. Note that CVT is a template +parameter. Hence, giving different CVT bindings, there will be different FG tables. + +For each Ξ(TM, CVT)·FG, there can be a number of Ξ(TM·, CVT)·fg instances. For +example, Ξ(TM·Array, CVT)·fg. Each of these is a parameterized table. + +A user has an fg table to call functions from. This is either passed in or, more +likely, found at global scope after including "TM.lib.c", and then including it again for +each value of the CVT variable to be used. + +For each implementation type, there is a struct defined called Ξ(TM·, CVT), +note the lack of further suffix. Instances of this type are said to be instances of the tape +machine. `FG` stands for `function given`, and what each said function is given is one or more arguments of the Ξ(TM·, CVT) type. + +When a user wants a tape machine, the user makes an instance of a Ξ(TM·, CVT). +This is then handed to a function called Ξ(TM·, CVT)·init(). The init function is given whatever parameters are needed for initializing the instance, and it returns a more generalized pointer of type `Ξ(TM, CVT) *`. + +After initialization, the Ξ(TM·, CVT) instance will have a first field value that points to the Ξ(TM·, CVT)·fg table. However, the pointer to this instance will be of the type returned by init, which is `Ξ(TM, CVT) *` (notice it lost the implementation). This more generic pointer can now be used with the generic instance of the +Ξ(TM, CVT)·FG table, Ξ(TM, CVT)·fg, which holds wrapper functions that make use of the +first field value to find the real fg table and to call those functions. + +Hence, after the user calls the correct init for the specific instance made, the resulting pointer points to the same type object independent of the implementation of the functions. Thus, code written to manipulate TMs can be written in a manner that is agnostic to the implementation of those TMs. + +When a function is called, say Ξ(TM, CVT)·fg.step(tm), it is passed the generic type `Ξ(TM, CVT) *`, tm. This will call the step wrapper function, which will in turn pass the same arguments (in this case, only `tm`) to the step function found in the fg table pointed to by the first member of the tm struct. + +** Separate FG tables + +In the flat fg table approach described in the prior section, each wrapper function table of the form Ξ(TM, CVT)·fg, obtained by setting the CVT template to a type value, will have a full set of wrapper function pointers. When a wrapper function is the same independent of the CVT variable, it still has a pointer copied into each fg table. Thus, there will be a lot of redundancy in the tables if many functions are not CVT-specific. + +For the layered fg table, there are separate wrapper function tables for the wrappers that are independent of CVT and those that are CVT-differentiated. The user then calls functions from the CVT-independent table when invoking those functions and from the CVT-differentiated functions table when calling those. So, for example, the user would call TM·fg.step(tm) to do a step, as that is CVT-independent, but call Ξ(TM, CVT)·fg.read(tm) to do a read, as the read value is CVT-differentiated. + +** Direct access through instance field (Orrin suggestion) + +Instead of calling fg functions through a globally visible table such as: + +#+BEGIN_SRC c +fg->step(tm); // Wrapper style +#+END_SRC + +where fg is of type Ξ(TM, CVT)·FG and calls to it require globally accessible wrapper functions, + +we can use the instance's first field directly: + +#+BEGIN_SRC c +tm->fg->step(tm); +#+END_SRC + +Here, tm is a Ξ(TM, CVT) * — the generic type returned by init — and fg is the first field of the instance pointing to the correct FG table. + +This style removes the need for wrapper functions entirely. Each TM implementation embeds its corresponding FG table pointer directly in the instance. Thus, no global wrapper needs to be referenced, and function type-checking is preserved at compile time. + +If there are CVT-independent functions, we can use a layered FG table. Each CVT-specific FG table includes a pointer to a base table: + +#+BEGIN_SRC c +tm->fg->base->step(tm); // Access shared, CVT-independent function +tm->fg->read(tm); // Access CVT-specific function +#+END_SRC + +This structure mimics prototype delegation in JavaScript, except we resolve the chain explicitly with types, not dynamic name resolution. + +The benefit of this approach is that the user only needs to have the instance in scope — no global FG table declarations. All function dispatch is local and type-safe. + +The only remaining user burden is knowing whether a function is CVT-specific or not. This is made obvious by whether the function appears in the base or CVT-differentiated FG struct, and the compiler will catch mismatches. + +(Contribution by Orrin — co-author) + +* Observations + +C does not have automatic conversion of a child type to its parent; instead, it will give a type mismatch error. + +The vtable pattern with separate tables will have a CVT-differentiated fg table pointer on the tableau. Without the function wrappers, a call can occur like this: + +#+BEGIN_SRC c +tm->fg->step(tm) +#+END_SRC + +Or even with the wrappers and a globally available fg table: + +#+BEGIN_SRC c +fg->step(tm) +#+END_SRC + +In both of these, `step` will be the CVT-differentiated version of step, so types will match, and these calls work. + +However, things change when we want to call the non-CVT-differentiated shared table: + +#+BEGIN_SRC c +tm->TM·fg->mount(tm) +#+END_SRC + +Due to `mount` being in the shared fg table that has type `TM·fg` where it expects to be given arguments of type `TM *`, and having type `TM·`, there will be a type mismatch +error on the call. The programmer will be compelled to fix this with a cast: + +#+BEGIN_SRC c +tm->TM·fg->mount((TM *)tm) +#+END_SRC + +Requiring the programmer to defeat the type system by habit is not a good approach. + +So the one solution that does work is the one big table per type. All functions will then have correct signatures in the FG declaration. In the implementation, the more generic functions can be cast when assigned directly to the fg table function pointers, or wrappers that delegate can be written and assigned to the fg table function pointers. + +* Conclusion + +After examining various approaches to type abstraction in C, the vtable pattern emerges as the most practical solution for our tape machine implementation. While it has some overhead in terms of memory usage, it provides the best balance of type safety, ease of use, and performance. + +The direct access through instance field approach suggested by Orrin offers an elegant alternative that eliminates the need for wrapper functions while maintaining type safety. This approach is particularly appealing for its simplicity and the fact that it keeps all the necessary information localized to the instance. + +For projects where performance is critical, the "one big table per type" approach may be the most efficient, as it avoids the need for type casting while still providing a clean interface. + +Ultimately, the choice of approach depends on the specific requirements of the project, but the vtable pattern and its variations provide a solid foundation for type abstraction in C programming. diff --git "a/document\360\237\226\211/Abstracting_Type_in_C.pdf" "b/document\360\237\226\211/Abstracting_Type_in_C.pdf" new file mode 100644 index 0000000..ce5ac69 Binary files /dev/null and "b/document\360\237\226\211/Abstracting_Type_in_C.pdf" differ diff --git "a/document\360\237\226\211/Abstracting_Type_in_C.tex" "b/document\360\237\226\211/Abstracting_Type_in_C.tex" new file mode 100644 index 0000000..9c540d0 --- /dev/null +++ "b/document\360\237\226\211/Abstracting_Type_in_C.tex" @@ -0,0 +1,596 @@ +% Created 2025-03-23 Sun 06:28 +% Intended LaTeX compiler: pdflatex +\documentclass[11pt]{article} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} +\usepackage{longtable} +\usepackage{wrapfig} +\usepackage{rotating} +\usepackage[normalem]{ulem} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{capt-of} +\usepackage{hyperref} +\author{Thomas} +\date{\today} +\title{Abstracting Type in C} +\hypersetup{ + pdfauthor={Thomas}, + pdftitle={Abstracting Type in C}, + pdfkeywords={}, + pdfsubject={}, + pdfcreator={Emacs 29.4 (Org mode 9.6.15)}, + pdflang={English}} +\begin{document} + +\maketitle +\tableofcontents + + +\section{Introduction} +\label{sec:org8b18d97} + +Type abstraction is a fundamental challenge in C programming. Unlike object-oriented languages that provide built-in mechanisms for polymorphism, C requires explicit design patterns to achieve similar flexibility. This document explores various approaches to implementing type abstraction in C, using a tape machine (TM) as a concrete example. + +The core problem we're addressing is how to write generic code that can operate on different implementations of the same abstract interface. Specifically, we want to create functions that can work with any tape machine implementation without knowing the implementation details. This separation of interface from implementation facilitates creating modular, maintainable code in C. + +Throughout this document, we use the notation Ξ(TM, CVT) to represent a generic type where TM is the tape machine and CVT is a template parameter for the cell value type. This allows us to discuss both implementation-agnostic and type-specific behaviors. + +\section{Namespaces in C} +\label{sec:org8165afa} + +C does not have built-in namespace support like C++ or other modern languages. However, we can simulate namespaces using naming conventions. In this document, we use the middle dot character (·) as a namespace separator to organize identifiers into logical groups. + +\subsection{The Middle Dot Convention} +\label{sec:org598d1d1} + +The middle dot (·) in identifiers like `TM·Array` or `TM·Function·fg` is not a special operator in C. It is literally part of the identifier name. This convention creates a visual hierarchy that mimics namespaces while remaining valid C code. + +Examples: +```c +\emph{/ TM is the primary namespace +/} Array is a sub-namespace +// init\textsubscript{pe} is a function in that namespace +Ξ(TM·Array, CVT)·init\textsubscript{pe} + +\emph{/ Core is the primary namespace +/} Guard is a sub-namespace +// check is a function in that namespace +Core·Guard·fg.check(\&chk, 1, tm, TM·Msg·tm) +``` + +\subsection{Benefits of This Approach} +\label{sec:orgc15284c} + +\begin{enumerate} +\item \textbf{\textbf{Organization}}: Related functions and types are visually grouped together +\item \textbf{\textbf{Collision avoidance}}: Reduces the risk of name collisions in large codebases +\item \textbf{\textbf{Readability}}: Makes it clear which component a function or type belongs to +\item \textbf{\textbf{Consistency}}: Provides a uniform naming scheme across the codebase +\end{enumerate} + +\subsection{Implementation Details} +\label{sec:org4c7d92c} + +Since the middle dot is part of the identifier, the C compiler treats these names as atomic units. For example, `TM·Array` is a single identifier to the compiler, not two identifiers separated by an operator. + +The Ξ macro system we discussed earlier helps generate these namespace-like identifiers consistently: + +```c +// Generates TM·int +Ξ(TM, int) + +// Generates TM·Array·int·init\textsubscript{pe} +Ξ(TM·Array, int)·init\textsubscript{pe} +``` + +\subsection{Comparison with C++ Namespaces} +\label{sec:org420b759} + +Unlike C++ namespaces, our convention: +\begin{enumerate} +\item Does not provide actual scope isolation +\item Cannot be opened with `using` directives +\item Is purely a naming convention, not a language feature +\end{enumerate} + +Despite these limitations, this convention provides many of the organizational benefits of true namespaces while remaining compatible with standard C. + +\section{Tableau and FG Tables} +\label{sec:org8b1de5b} + +Two key concepts in our type abstraction approach are the "Tableau" and "Function Given (FG) Table." Understanding these concepts is essential for following the rest of this document. + +\subsection{Tableau} +\label{sec:orgdef1786} + +A "Tableau" is our term for a data structure that holds the state of a particular instance. In the context of our tape machine example: + +\begin{enumerate} +\item \textbf{\textbf{Definition}}: A tableau is a struct that contains all the state variables needed for a specific implementation of an abstract interface. + +\item \textbf{\textbf{Purpose}}: The tableau serves as a shared workspace where functions can read and write state information. Think of it as a blackboard where functions can leave "notes" for each other. + +\item \textbf{\textbf{Implementation-specific}}: Each implementation of an abstract interface will have its own tableau structure with different fields appropriate to that implementation. +\end{enumerate} + +For example, the tableau for an array-based tape machine might look like: + +```c +struct \{ + CVT *hd; \emph{/ Current head position + CVT *position; /} Base position (leftmost cell) + Ξ(extent\textsubscript{t}, CVT) extent; // Number of cells in the tape +\} Ξ(TM·Array, CVT); +``` + +While a function-based implementation might have a completely different tableau structure. + +\subsection{Function Given (FG) Tables} +\label{sec:orgdfb6392} + +The "Function Given" (FG) table is a collection of function pointers that implement operations on a specific tableau type. + +\begin{enumerate} +\item \textbf{\textbf{Definition}}: An FG table is a struct containing function pointers that all take a tableau as their first argument (hence "Function Given"). + +\item \textbf{\textbf{Purpose}}: FG tables allow for polymorphic behavior in C by providing different implementations of the same interface. + +\item \textbf{\textbf{Structure}}: Each function in the FG table takes a pointer to a tableau as its first argument, allowing it to access and modify the state. +\end{enumerate} + +A simplified example of an FG table for our tape machine might look like: + +```c +typedef struct \{ + TM·Tape·Topo (*Tape·topo)(TM *tm); + bool (*Tape·bounded)(TM *tm); + TM·Head·Status (*Head·status)(TM *tm); + Core·Status (*mount)(TM *tm); + void (*step)(TM *tm); + void (*rewind)(TM *tm); + // \ldots{} other functions +\} TM·FG; +``` + +When specialized for a specific cell value type (CVT), we might have: + +```c +typedef struct \{ + TM·FG *fg; // Base function table + Ξ(extent\textsubscript{t}, CVT) (*extent)(TM *tm); + CVT (*read)(TM *tm); + void (*write)(TM *tm, CVT *remote\textsubscript{pt}); +\} Ξ(TM, CVT)·FG; +``` + +\subsection{The Relationship Between Tableaux and FG Tables} +\label{sec:org6e5cc1f} + +The tableau and FG table work together to implement polymorphism: + +\begin{enumerate} +\item Each implementation provides its own tableau type and corresponding FG table. +\item The first field of a tableau is typically a pointer to its corresponding FG table. +\item Functions in the FG table operate on the tableau, reading and writing its state. +\item Client code can work with any implementation by using the functions in the FG table without knowing the details of the tableau structure. +\end{enumerate} + +This pattern allows us to achieve polymorphic behavior in C while maintaining type safety and avoiding the overhead of dynamic dispatch mechanisms like those used in object-oriented languages. + +\section{Implementing Templates in C} +\label{sec:orge82e21e} + +C lacks built-in support for templates or generics, unlike C++ or modern languages like Java or Rust. However, we can implement template-like functionality using the C preprocessor. This section explains the template system used throughout this document. + +\subsection{The Xi (Ξ) Macro System} +\label{sec:org1ff0935} + +We use a special macro system, represented by the Greek letter Xi (Ξ), to create template-like behavior in C. This system allows us to parameterize types and functions, similar to templates in C++. + +The Ξ macro concatenates identifiers with a middle dot (·) separator: + +```c +Ξ(TM, CVT) -> TM·CVT +Ξ(TM·Array, CVT)·init -> TM·Array·CVT·init +``` + +This allows us to create type and function names that are specialized for specific types, while maintaining a consistent naming convention. + +\subsubsection{Implementation Details} +\label{sec:org08d9fe2} + +The Ξ macro system is implemented using variadic macros and the C preprocessor's token concatenation operator (\#\#): + +```c +\#define \_Ξ2(a, b) a\#\#·\#\#b +\#define Ξ2(a, b) \_Ξ2(a, b) +``` + +The system includes macros for different numbers of arguments (Ξ0 through Ξ10) and uses argument counting to automatically select the appropriate macro: + +```c +\#define Ξ(\ldots{}) Ξ\textsubscript{EXPAND}(COUNT\textsubscript{ARGS}(\uline{\uline{VA\textsubscript{ARGS}}}), \uline{\uline{VA\textsubscript{ARGS}}}) +``` + +This allows for flexible usage with different numbers of template parameters. + +\subsection{CVT: Cell Value Type} +\label{sec:org8359158} + +Throughout our tape machine implementation, we use CVT (Cell Value Type) as a template parameter. This represents the type of data stored in each cell of the tape. + +The implementation uses conditional compilation to handle different CVT values: + +```c +\#ifndef CVT + // Code for generic, non-specialized case +\#endif + +\#ifdef CVT + // Code specialized for a specific CVT +\#endif +``` + +When using the library, you include it multiple times with different CVT definitions: + +```c +// First include with CVT undefined for base definitions +\#include "TM.lib.c" + +// Then include with CVT defined for each type specialization +\#define CVT int +\#include "TM.lib.c" +\#undef CVT + +\#define CVT float +\#include "TM.lib.c" +\#undef CVT +``` + +\subsection{Template-Based Type Generation} +\label{sec:orgcde6c14} + +The template system generates several types of identifiers: + +\begin{enumerate} +\item \textbf{\textbf{Type names}}: `Ξ(TM, CVT)` becomes `TM·int` when CVT is defined as `int` +\item \textbf{\textbf{Function names}}: `Ξ(TM·Array, CVT)·init\textsubscript{pe}` becomes `TM·Array·int·init\textsubscript{pe}` +\item \textbf{\textbf{Variable names}}: `Ξ(extent\textsubscript{t}, CVT)` becomes `extent\textsubscript{t}·int` +\end{enumerate} + +\subsection{Function Given (FG) Tables} +\label{sec:org691e6ba} + +A key part of our implementation is the Function Given (FG) table pattern. Each implementation of the tape machine provides an FG table containing function pointers for its operations. + +The FG tables are specialized based on the CVT: + +```c +typedef struct \{ + TM·FG *fg; // Base function table + Ξ(extent\textsubscript{t}, CVT) (*extent)(TM *tm); + CVT (*read)(TM *tm); + void (*write)(TM *tm, CVT *remote\textsubscript{pt}); +\} Ξ(TM, CVT)·FG; +``` + +This allows for type-safe operations while maintaining a consistent interface. + +\subsection{Benefits of This Approach} +\label{sec:org5338608} + +\begin{enumerate} +\item \textbf{\textbf{Type safety}}: The compiler checks that the correct types are used with each specialized function +\item \textbf{\textbf{Code reuse}}: Common functionality is defined once and specialized for different types +\item \textbf{\textbf{Consistent naming}}: The Ξ macro ensures a consistent naming convention +\item \textbf{\textbf{Modularity}}: Different implementations can share the same interface +\end{enumerate} + +\subsection{Limitations} +\label{sec:org70b9fc6} + +\begin{enumerate} +\item \textbf{\textbf{Verbosity}}: More verbose than native templates in C++ +\item \textbf{\textbf{Complexity}}: Requires understanding of preprocessor macros and conditional compilation +\item \textbf{\textbf{Debugging}}: Preprocessor-generated code can be harder to debug +\item \textbf{\textbf{Limited IDE support}}: Most IDEs don't understand this template system for code completion +\end{enumerate} + +Despite these limitations, this approach provides a powerful way to implement generic, type-safe code in C without resorting to void pointers or code duplication. + +\section{Generalizing type} +\label{sec:orgbc78d46} + +\subsection{Passing a TM FG table instance} +\label{sec:org326c1a7} + +\begin{enumerate} +\item Suppose we have a generic TM FG table with the declared functions constituting the architecture (user's view, interface) of a tape machine. + +\item Instances of this TM FG table represent different implementations of the architecture. For example, TM·Array·fg for a TM implemented as an array, and TM·Function·fg for a function implementation (similar to the Natural\textsubscript{Number} in the Java implementation). + +\item Now suppose we have a function `map` that is given a TM\textsubscript{read}, a TM\textsubscript{write}, and a function. +The map does not care how the TM is implemented. + +If these first two arguments are declared as TM FG type, then they can accept any fg table whether it be an Array implementation, a Function implementation, etc. The map can then pick the functions it needs, such as step and rightmost, from the +table, and thus get tape behavior. + +This achieves what we wanted: a map function that can use a tape machine and does not care what the implementation of that tape machine is. + +However, the TM functions that map calls require a first argument that holds the TM instance data. Without this, it cannot know which machine is being operated on. +Furthermore, the instance data type must match the TM fg table type. + +Given steps 1 through 3, we never passed in instance data to map, only a TM FG table instance. So map has a type, but not the data. +\end{enumerate} + + +\subsection{Passing TM instance data, compile in the fg table} +\label{sec:orged57256} + +\begin{enumerate} +\item As noted, `map` is given a TM\textsubscript{read}, a TM\textsubscript{write}, and a function. For these first two +arguments, suppose we pass in the instance data, i.e., struct pointers. + +\item The TM·Array struct will hold a base pointer to the array and the head location. The TM·Function struct would hold a handle recognized by the functions or some state used by them. + +\item Then we have a map function for each architecture type, and we compile into each of these a call to the correct default TM·fg table. The map·Array will have TM·Array·fg compiled into it, and the map·Function will have TM·Function·fg compiled into it, etc. + +\item This approach works, but we duplicate the map code many times just to get the types to match the calls. +\end{enumerate} + + +\subsection{Passing in both the TM instance data and corresponding TM FG tables instances} +\label{sec:orgf9f5316} + +\begin{enumerate} +\item Say we have one generic `map` that takes 5 arguments: TM\textsubscript{read} (an instance), TM\textsubscript{read}\textsubscript{fg}, a TM\textsubscript{write} (an instance), and TM\textsubscript{write}\textsubscript{fg}, plus the function to apply. + +\item The instance parameters must be declared `void *` to prevent argument checking on them; otherwise, it will be necessary to implement many map functions for each instance type combination. + +\item This works, though we have no way to know if the correct type of instance data that matches the fg table was actually passed in. + +\item We could create many wrapper functions to accomplish the instance data type check, then call a central map function with `void *` instance data pointers. With any luck, the optimizer would make this extra layer go away, so it will be efficient. Still, map now has 5 arguments instead of the three that are natural for the problem. +\end{enumerate} + + +\subsection{Using a "vtable"} +\label{sec:orga3c331a} + +\begin{enumerate} +\item All TM instance types are structs with a first field that points to the fg table, +regardless of the implementation type. This is not a serious requirement because +each implementation type already has a struct for its implementation data. However, it does make an instance bigger, and it is not clear that the optimizer will be able to +optimize it effectively. + +\item Then a generic fg table is created, where the functions have the signatures as specified +in the FG table. However, each of these functions follows the fg table pointer in the +instance data given to it as a first argument, and then calls that function. + +\item It works, and it is a tried and true method. +\end{enumerate} + +\subsection{Using a pointer pair} +\label{sec:orgc9d996e} + +\begin{enumerate} +\item Instead of the instance data being called the 'TM type', a pair is the 'TM type'. +The first member of the pair points to an instance, the second to a corresponding +fg table. + +\item Again there are some generic TM functions, each accepts a pair as a first argument, and then calls the corresponding fg table function while giving it the instance pointer +from the pair as the first argument. + +\item This approach packs the same information as the vtable approach. If there is a +one-to-one correspondence between instances and pairs, then it is larger by one pointer. + +\item This is a type of 'link' from our continuations, but the tableau pointer is gone due +to using a stack frame, and the continuation table pointer is gone due to flow-through sequencing. Thus it is two pointers instead of 4. + +\item Hopefully, the optimizer would recognize that the pair pointers are redundant. They are only needed to ensure that the instance and fg table go together. +\end{enumerate} + +\subsection{Instance curried dispatch function} +\label{sec:orga7d5cdc} + +\begin{enumerate} +\item In this case, a pointer to a function is the 'TM type'. There is an enum that gives numeric tags to each of the functions in the FG struct instance. + +\item The tag for the desired fg function to be called is passed as the first argument to the dispatch function, and the remaining arguments are for the fg function. + +\item The dispatch function has the instance data and fg table pointer curried into it, so it then calls the correct fg function while providing it with the correct instance data. + +\item In C, it is not immediately obvious how to create a function on the fly each time an +instance is created to curry the instance data into it, and then later how to call +the newly created function. +\end{enumerate} + +\subsection{Self-typed Instance and dispatch function} +\label{sec:orge6b7e0a} + +\begin{enumerate} +\item With this method, an extra field is added to the instance data that identifies its type. Then a globally available TM-specific dispatch function is called to run the desired fg function on the data. It is similar to the dispatch function from above, but the instance data is an additional argument. + +\item The dispatch function uses the type information to know which fg table to call. This type information could be a pointer to the appropriate fg table. + +\item This approach differs from the vtable method in that all the arguments are given to the one TM dispatch function, and it figures out how to do the dispatch. There is not a wrapper-per-function. +\end{enumerate} + +\subsection{Which approach?} +\label{sec:org6c9a58c} + +We want to be able to declare an instance as a local variable, where it will end up being on the stack frame. We want to avoid heap allocation for performance reasons, but not exclude the use of heap memory. + +Dynamically created functions are out of the question. Dispatch approaches require the enum table to have tags. This is not a show-stopper. However, having one dispatch function means the argument types are not checked. That is a problem. + +Doubled-up arguments (one for the instance, the other for the table) either require wrappers or do not check that an instance really goes with the data. Besides, it requires a lot of typing. + +The pointer pair approach requires the separate maintenance of the pair. When these are local variables, the pair has to be tied to the instance. This would require the user to be aware of the existence of both the instance data and the pair, to pass them to an initializer. This could be avoided if the pair were combined with the instance, but that +is identical to the vtable approach (the instance pointer no longer being needed). + +So we are led to the vtable approach, though we are still hoping for the curried dispatch function - lol. + +\section{Implementation Details} +\label{sec:org009bd61} + +\subsection{Main objects} +\label{sec:org4c8ad02} + +\begin{enumerate} +\item Ξ(TM, CVT)·FG table (CVT is a template for the tape cell value type). +\item Ξ(TM, CVT) instance, typically declared as a local variable. +\item FG Ξ(TM·Array, CVT)·fg, which is a Ξ(TM, CVT)·FG type, provides pointers to function +wrappers that are given a Ξ(TM, CVT) instance. Each wrapper then calls the function found in the +instance's vtable. +\end{enumerate} + +\subsection{Declaration} +\label{sec:org2d5c70b} + +\begin{enumerate} +\item With CVT not defined, \#include "TM.lib.c" in FACE section. +\item With each different CVT value defined, \#include "TM.lib.c" in FACE section again. +\item Do the same in the IMPLEMENTATION section. +\end{enumerate} + +\subsection{Instantiation} +\label{sec:orgf6668e0} + +\begin{enumerate} +\item Define a local variable, or get from malloc, a Ξ(TM, CVT) type, say `tm`. +\item Call init(tm). +\end{enumerate} + +\subsection{Use} +\label{sec:orgcb2825f} + +\begin{enumerate} +\item Call, for example, Ξ(TM·Array, CVT)·fg.step(tm), where tm is a Ξ(TM, CVT) type. +\end{enumerate} + +\subsection{FG table instances} +\label{sec:org4267ad9} + +There can be multiple instances of an FG table, each with functions that are implemented differently, though they share the same FG table type. + +Each CVT and FG table instance type will have a different tableau struct. Each tableau struct will have the type Ξ(TM·, CVT), where is the FG instance table type. For example, Ξ(TM·Array, CVT) is a struct of state variables for an Array implementation of the TM. + +\subsection{One for all FG table} +\label{sec:orga349d27} + +The "TM.lib.c" file defines an FG type called Ξ(TM, CVT)·FG. Note that CVT is a template +parameter. Hence, giving different CVT bindings, there will be different FG tables. + +For each Ξ(TM, CVT)·FG, there can be a number of Ξ(TM·, CVT)·fg instances. For +example, Ξ(TM·Array, CVT)·fg. Each of these is a parameterized table. + +A user has an fg table to call functions from. This is either passed in or, more +likely, found at global scope after including "TM.lib.c", and then including it again for +each value of the CVT variable to be used. + +For each implementation type, there is a struct defined called Ξ(TM·, CVT), +note the lack of further suffix. Instances of this type are said to be instances of the tape +machine. `FG` stands for `function given`, and what each said function is given is one or more arguments of the Ξ(TM·, CVT) type. + +When a user wants a tape machine, the user makes an instance of a Ξ(TM·, CVT). +This is then handed to a function called Ξ(TM·, CVT)·init(). The init function is given whatever parameters are needed for initializing the instance, and it returns a more generalized pointer of type `Ξ(TM, CVT) *`. + +After initialization, the Ξ(TM·, CVT) instance will have a first field value that points to the Ξ(TM·, CVT)·fg table. However, the pointer to this instance will be of the type returned by init, which is `Ξ(TM, CVT) *` (notice it lost the implementation). This more generic pointer can now be used with the generic instance of the +Ξ(TM, CVT)·FG table, Ξ(TM, CVT)·fg, which holds wrapper functions that make use of the +first field value to find the real fg table and to call those functions. + +Hence, after the user calls the correct init for the specific instance made, the resulting pointer points to the same type object independent of the implementation of the functions. Thus, code written to manipulate TMs can be written in a manner that is agnostic to the implementation of those TMs. + +When a function is called, say Ξ(TM, CVT)·fg.step(tm), it is passed the generic type `Ξ(TM, CVT) *`, tm. This will call the step wrapper function, which will in turn pass the same arguments (in this case, only `tm`) to the step function found in the fg table pointed to by the first member of the tm struct. + +\subsection{Separate FG tables} +\label{sec:orgafbf17c} + +In the flat fg table approach described in the prior section, each wrapper function table of the form Ξ(TM, CVT)·fg, obtained by setting the CVT template to a type value, will have a full set of wrapper function pointers. When a wrapper function is the same independent of the CVT variable, it still has a pointer copied into each fg table. Thus, there will be a lot of redundancy in the tables if many functions are not CVT-specific. + +For the layered fg table, there are separate wrapper function tables for the wrappers that are independent of CVT and those that are CVT-differentiated. The user then calls functions from the CVT-independent table when invoking those functions and from the CVT-differentiated functions table when calling those. So, for example, the user would call TM·fg.step(tm) to do a step, as that is CVT-independent, but call Ξ(TM, CVT)·fg.read(tm) to do a read, as the read value is CVT-differentiated. + +\subsection{Direct access through instance field (Orrin suggestion)} +\label{sec:orgfe58dd7} + +Instead of calling fg functions through a globally visible table such as: + +```c +fg->step(tm); // Wrapper style +``` + +where fg is of type Ξ(TM, CVT)·FG and calls to it require globally accessible wrapper functions, + +we can use the instance's first field directly: + +```c +tm->fg->step(tm); +``` + +Here, tm is a Ξ(TM, CVT) * — the generic type returned by init — and fg is the first field of the instance pointing to the correct FG table. + +This style removes the need for wrapper functions entirely. Each TM implementation embeds its corresponding FG table pointer directly in the instance. Thus, no global wrapper needs to be referenced, and function type-checking is preserved at compile time. + +If there are CVT-independent functions, we can use a layered FG table. Each CVT-specific FG table includes a pointer to a base table: + +```c +tm->fg->base->step(tm); \emph{/ Access shared, CVT-independent function +tm->fg->read(tm); /} Access CVT-specific function +``` + +This structure mimics prototype delegation in JavaScript, except we resolve the chain explicitly with types, not dynamic name resolution. + +The benefit of this approach is that the user only needs to have the instance in scope — no global FG table declarations. All function dispatch is local and type-safe. + +The only remaining user burden is knowing whether a function is CVT-specific or not. This is made obvious by whether the function appears in the base or CVT-differentiated FG struct, and the compiler will catch mismatches. + +(Contribution by Orrin — co-author) + +\section{Observations} +\label{sec:org46e6336} + +C does not have automatic conversion of a child type to its parent; instead, it will give a type mismatch error. + +The vtable pattern with separate tables will have a CVT-differentiated fg table pointer on the tableau. Without the function wrappers, a call can occur like this: + +```c +tm->fg->step(tm) +``` + +Or even with the wrappers and a globally available fg table: + +```c +fg->step(tm) +``` + +In both of these, `step` will be the CVT-differentiated version of step, so types will match, and these calls work. + +However, things change when we want to call the non-CVT-differentiated shared table: + +```c +tm->TM·fg->mount(tm) +``` + +Due to `mount` being in the shared fg table that has type `TM·fg` where it expects to be given arguments of type `TM *`, and having type `TM·`, there will be a type mismatch +error on the call. The programmer will be compelled to fix this with a cast: + +```c +tm->TM·fg->mount((TM *)tm) +``` + +Requiring the programmer to defeat the type system by habit is not a good approach. + +So the one solution that does work is the one big table per type. All functions will then have correct signatures in the FG declaration. In the implementation, the more generic functions can be cast when assigned directly to the fg table function pointers, or wrappers that delegate can be written and assigned to the fg table function pointers. + +\section{Conclusion} +\label{sec:org1380bd0} + +After examining various approaches to type abstraction in C, the vtable pattern emerges as the most practical solution for our tape machine implementation. While it has some overhead in terms of memory usage, it provides the best balance of type safety, ease of use, and performance. + +The direct access through instance field approach suggested by Orrin offers an elegant alternative that eliminates the need for wrapper functions while maintaining type safety. This approach is particularly appealing for its simplicity and the fact that it keeps all the necessary information localized to the instance. + +For projects where performance is critical, the "one big table per type" approach may be the most efficient, as it avoids the need for type casting while still providing a clean interface. + +Ultimately, the choice of approach depends on the specific requirements of the project, but the vtable pattern and its variations provide a solid foundation for type abstraction in C programming. +\end{document} diff --git "a/document\360\237\226\211/emacs_keys.el" "b/document\360\237\226\211/emacs_keys.el" new file mode 100644 index 0000000..566c808 --- /dev/null +++ "b/document\360\237\226\211/emacs_keys.el" @@ -0,0 +1,84 @@ +;; the sake of sanity... +;; +(global-set-key (kbd "C-z") nil) ;; turn off the poison C-z key. Use C-x C-z or the command suspend-emacs +(global-set-key (kbd "C-v") nil) ;; tempting to put paste (yank) for a common typo, but at least lets not jump down the page +;; would be nice to clear C-c but minor modes redefine it + + +;;-------------------------------------------------------------------------------- +;; extended character set for programming examples in the TTCA book +;; +;; preferable to use an Xcompose file definition when available +;; +(when t + + (global-set-key [f1] 'help-command) + (global-set-key "\C-h" 'nil) + (define-key key-translation-map (kbd "M-S") (kbd "§")) + + (global-set-key (kbd "C-x g copyright SPC") [?©]) + + (global-set-key (kbd "C-x g phi SPC") [?φ]) ; phi for phase + (global-set-key (kbd "C-x g Phi SPC") [?Φ]) + + (global-set-key (kbd "C-x g d SPC") [?δ]) + (global-set-key (kbd "C-x g D SPC") [?Δ]) ; this is 'delta' is not 'increment'! + (global-set-key (kbd "C-x g delta SPC") [?δ]) + (global-set-key (kbd "C-x g Delta SPC") [?Δ]) ; this is 'delta' is not 'increment'! + + (global-set-key (kbd "C-x g g SPC") [?γ]) + (global-set-key (kbd "C-x g G SPC") [?Γ]) + (global-set-key (kbd "C-x g gamma SPC") [?γ]) + (global-set-key (kbd "C-x g Gamma SPC") [?Γ]) + + (global-set-key (kbd "C-x g l SPC") [?λ]) + (global-set-key (kbd "C-x g L SPC") [?Λ]) + (global-set-key (kbd "C-x g lambda SPC") [?λ]) + (global-set-key (kbd "C-x g Lambda SPC") [?Λ]) + + (global-set-key (kbd "C-x g m SPC") [?μ]) + (global-set-key (kbd "C-x g M SPC") [?Μ]) + (global-set-key (kbd "C-x g mu SPC") [?μ]) + (global-set-key (kbd "C-x g Mu SPC") [?Μ]) + + (global-set-key (kbd "C-x g p SPC") [?π]) + (global-set-key (kbd "C-x g P SPC") [?Π]) + (global-set-key (kbd "C-x g pi SPC") [?π]) + (global-set-key (kbd "C-x g Pi SPC") [?Π]) + + (global-set-key (kbd "C-x g x SPC") [?ξ]) + (global-set-key (kbd "C-x g X SPC") [?Ξ]) + (global-set-key (kbd "C-x g xi SPC") [?ξ]) + (global-set-key (kbd "C-x g Xi SPC") [?Ξ]) + + (global-set-key (kbd "C-x g > = SPC") [?≥]) + (global-set-key (kbd "C-x g < = SPC") [?≤]) + (global-set-key (kbd "C-x g ! = SPC") [?≠]) + (global-set-key (kbd "C-x g neq SPC") [?≠]) + + (global-set-key (kbd "C-x g nil SPC") [?∅]) + + (global-set-key (kbd "C-x g not SPC") [?¬]) + + (global-set-key (kbd "C-x g and SPC") [?∧]) + (global-set-key (kbd "C-x g or SPC") [?∨]) + + (global-set-key (kbd "C-x g exists SPC") [?∃]) + (global-set-key (kbd "C-x g all SPC") [?∀]) + + (global-set-key (kbd "C-x g do SPC") [?⟳]) ; do + (global-set-key (kbd "C-x g rb SPC") [?◨]) + (global-set-key (kbd "C-x g lb SPC") [?◧]) + + (global-set-key (kbd "C-x g cont SPC") [?➜]) ; continue + (global-set-key (kbd "C-x g thread SPC") [?☥]) ; thread + + (global-set-key (kbd "C-x g in SPC") [?∈]) ; set membership + + (global-set-key (kbd "C-x g times SPC") [?×]) ; set membership + + (global-set-key (kbd "C-x g cdot SPC") [?·]) ; scoping sepearator for gcc C + + (global-set-key (kbd "C-x g pencil SPC") [?🖉]) ; scoping sepearator for gcc C + +)