From: Thomas Walker Lynch Date: Thu, 6 Mar 2025 10:48:16 +0000 (+0000) Subject: full refactored Core.lib.c X-Git-Url: https://git.reasoningtechnology.com/style/rt_dark_doc.css?a=commitdiff_plain;h=be0f73e123b15032b84134afe1b530535ebd8918;p=N full refactored Core.lib.c --- diff --git "a/developer/cc\360\237\226\211/Copy.lib.c" "b/developer/cc\360\237\226\211/Copy.lib.c" deleted file mode 100644 index f93e599..0000000 --- "a/developer/cc\360\237\226\211/Copy.lib.c" +++ /dev/null @@ -1,347 +0,0 @@ -/* - CoreCopy - Memory copy operations with attention to alignment. - Provides optimized copy and byte order reversal functions. - - 'ATP' At This Point in the code. Assertions follow. -*/ - -#define CoreCopy·DEBUG - -#ifndef FACE -#define CoreCopy·IMPLEMENTATION -#define FACE -#endif - -//-------------------------------------------------------------------------------- -// Interface - -#ifndef CoreCopy·FACE -#define CoreCopy·FACE - - #include - #include - - #define extentof(x) (sizeof(x) - 1) - #define extent_t size_t - - typedef struct{ - void *read0; - extent_t read_extent; - void *write0; - extent_t write_extent; - } CoreCopy·It; - - typedef enum{ - CoreCopy·It·Status·valid = 0 - ,CoreCopy·It·Status·null_read - ,CoreCopy·It·Status·null_write - ,CoreCopy·It·Status·overlap - } CoreCopy·It·Status; - - typedef enum{ - CoreCopy·Step·perfect_fit = 0 - ,CoreCopy·Step·argument_guard - ,CoreCopy·Step·read_surplus - ,CoreCopy·Step·read_surplus_write_gap - ,CoreCopy·Step·write_available - ,CoreCopy·Step·write_gap - } CoreCopy·Status; - - typedef struct{ - bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1); - bool CoreCopy·IntervalPts·contains(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11); - bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11); - - bool CoreCopy·IntervalPtSize·in(void *pt, void *pt0 ,size_t s); - bool CoreCopy·IntervalPtSize·overlap(void *pt00 ,size_t s0, void *pt10 ,size_t s1); - - CoreCopy·It·Status CoreCopy·wellformed_it(CoreCopy·It *it) - - void *identity(void *read0 ,void *read1 ,void *write0); - void *reverse_byte_order(void *read0 ,void *read1 ,void *write0); - - CoreCopy·Status CoreCopy·Step·identity(CoreCopy·It *it); - CoreCopy·Status CoreCopy·Step·reverse_order(CoreCopy·It *it); - CoreCopy·Status CoreCopy·Step·write_hex(CoreCopy·It *it); - CoreCopy·Status CoreCopy·Step·read_hex(CoreCopy·It *it); - } CoreCopy·M; - -#endif - -//-------------------------------------------------------------------------------- -// Implementation - -#ifdef CoreCopy·IMPLEMENTATION - - #ifdef CoreCopy·DEBUG - #include - #endif - - // this part goes into Copylib.a - // yes this is empty, so there is no Copylib.a - #ifndef LOCAL - #endif - - #ifdef LOCAL - - // Interval predicates. - // Intervals in Copy have inclusive bounds - - Local bool CoreCopy·aligned64(void *p){ - return ((uintptr_t)p & 0x7) == 0; - } - - Local bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1){ - return pt >= pt0 && pt <= pt1; // Inclusive bounds - } - - Local bool CoreCopy·in_extent_interval(void *pt, void *pt0 ,extent_t e){ - return CoreCopy·IntervalPts·in(pt ,pt0 ,pt0 + e); - } - - // interval 0 contains interval 1, overlap on boundaries allowed. - Local bool CoreCopy·IntervalPts·contains( - void *pt00 ,void *pt01 ,void *pt10 ,void *pt11 - ){ - return pt10 >= pt00 && pt11 <= pt01; - } - - // interval 0 properly contains interval 1, overlap on boundaries not allowed. - Local bool CoreCopy·contains_proper_pt_interval( - void *pt00 ,void *pt01 ,void *pt10 ,void *pt11 - ){ - return pt10 > pt00 && pt11 < pt01; - } - - // Possible cases of overlap, including just touching - // 1. interval 0 to the right of interval 1, just touching p00 == p11 - // 2. interval 0 to the left of interval 1, just touching p01 == p10 - // 3. interval 0 wholly contained in interval 1 - // 4. interval 0 wholly contains interval 1 - Local bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11){ - return - CoreCopy·IntervalPts·in(pt00 ,pt10 ,pt11) // #1, #3 - || CoreCopy·IntervalPts·in(pt10 ,pt00 ,pt01) // #2, #4 - ; - } - - Local bool CoreCopy·overlap_extent_interval(void *pt00 ,extent_t e0, void *pt10 ,extent_t e1){ - return CoreCopy·IntervalPts·overlap(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1); - } - - Local CoreCopy·It·Status CoreCopy·It·wellformed(CoreCopy·It *it){ - char *this_name = "CoreCopy·It·wellformed"; - CoreCopy·It·Status status = CoreCopy·It·Status·valid; - - if(it->read0 == NULL){ - fprintf(stderr, "%s: NULL read pointer\n", this_name); - status |= CoreCopy·It·Status·null_read; - } - - if(it->write0 == NULL){ - fprintf(stderr, "%s: NULL write pointer\n", this_name); - status |= CoreCopy·It·Status·null_write; - } - - if( - CoreCopy·overlap_extent_interval(it->read0 ,it->read_extent ,it->write0 ,it->write_extent) - ){ - fprintf(stderr, "%s: Read and write buffers overlap!\n", this_name); - status |= CoreCopy·It·Status·overlap; - } - - return status; - } - - // consider an 8 byte window that is aligned - // returns the byte pointer to the least address byte in the window - Local void *CoreCopy·floor64(void *p){ - return (uintptr_t)p & ~(uintptr_t)0x7; - } - - // consider an 8 byte window that is aligned - // returns the byte pointer to the greatest address byte in the window - Local void *CoreCopy·ceiling64(void *p){ - return (uintptr_t)p | 0x7; - } - - // byte array greatest address byte at p1 (inclusive) - // byte array least address byte at p0 (inclusive) - // returns pointer to the greatest full 64-bit word-aligned address that is ≤ p1 - // by contract, p1 must be >= p0 - Local uint64_t *CoreCopy·greatest_full_64(void *p0 ,void *p1){ - - // If p1 - 0x7 moves into a prior word while p0 does not, a prefetch hazard can occur. - // If p1 and p0 are more than 0x7 apart, they cannot be in the same word, - // but this does not guarantee a full 64-bit word exists in the range. - if((uintptr_t)p1 < (uintptr_t)p0 + 0x7) return NULL; - - // Compute the last fully aligned word at or before p1. - uint64_t *p1_64 = (void *)( ((uintptr_t)p1 - 0x7) & ~(uintptr_t)0x7 ); - - // If alignment rounds p1_64 below p0, there is no full word available. - if(p1_64 < p0) return NULL; - - return p1_64; - } - - // byte array greatest address byte at p1 (inclusive) - // byte array least address byte at p0 (inclusive) - // returns pointer to the least full 64-bit word-aligned address that is ≥ p0 - Local uint64_t *CoreCopy·least_full_64(void *p0 ,void *p1){ - - // If p0 + 0x7 moves into the next word while p1 does not, a prefetch hazard can occur. - // If p1 and p0 are more than 0x7 apart, they cannot be in the same word, - // but this does not guarantee a full 64-bit word exists in the range. - if(p1 - p0 < 0x7) return NULL; - - // Compute the first fully aligned word at or after p0. - uint64_t *p0_64 = (void *)( ((uintptr_t)p0 + 0x7) & ~(uintptr_t)0x7 ); - - // If alignment rounds p0_64 beyond p1, there is no full word available. - if(p0_64 > p1) return NULL; - - return p0_64; - } - - Local void *CoreCopy·inc64(void *p ,size_t Δ){ - return (void *)((uint64_t *)p) + Δ; - } - - Local uint64_t CoreCopy·read_word_fwd(uint64_t *r){ - return *r; - } - - Local uint64_t CoreCopy·read_word_rev(uint64_t *r0, uint64_t *r1, uint64_t *r){ - return __builtin_bswap64(*(CoreCopy·floor64(r0 + (r1 - r)))); - } - - Local void *CoreCopy·byte( - uint8_t *r0 ,uint8_t *r1 ,uint8_t *w0 ,bool reverse - ){ - //---------------------------------------- - // Argument guard - // - - if(r1read0; - uint8_t *w = (uint8_t *)it->write0; - - extent_t re = it->read_extent; - extent_t we = it->write_extent; - - if(we >= re){ - CoreCopy·bytes(r ,r + re ,w); - it->read0 += re; // Fixed stepping logic - it->read_extent = 0; - it->write0 += re; - it->write_extent -= re; - if(we == re) return CoreCopy·Step·perfect_fit; - return CoreCopy·Step·write_available; - } - - CoreCopy·bytes(r ,r + we ,w); - it->read0 += we; // Fixed stepping logic - it->read_extent -= we; - it->write_extent = 0; - it->write0 += we; - return CoreCopy·Step·read_surplus; - } - - #endif // LOCAL - -#endif // IMPLEMENTATION diff --git "a/developer/cc\360\237\226\211/Core.lib.c" "b/developer/cc\360\237\226\211/Core.lib.c" index 8fbb851..2790089 100644 --- "a/developer/cc\360\237\226\211/Core.lib.c" +++ "b/developer/cc\360\237\226\211/Core.lib.c" @@ -33,7 +33,7 @@ void *write0; // write0 = NULL means no buffer or empty buffer. extent_t write_extent; bool reverse_byte_order; - } Core·AreaPairing; + } Core·AreaPairinng; typedef enum{ Core·AreaPairing·Status·valid = 0 @@ -53,31 +53,33 @@ ,Core·Step·write_gap } Core·Step·Status; - typedef struct{ + typedef Core·Step·Fn (*Core·Step·Fn)(); - bool Area·encloses_pt(void *pt ,void *pt0 ,size_t s); - bool Area·encloses_pt_strictly(void *pt ,void *pt0 ,size_t s); - bool Area·encloses(void *pt00 ,size_t e0 ,void *pt10 ,size_t e1); - bool Area·encloses_strictly(void *pt00 ,size_t e0 ,void *pt10 ,size_t e1); - bool Area·overlap(void *pt00 ,size_t s0 ,void *pt10 ,size_t s1); + typedef struct{ - uint64_t *greatest_full_64(void *p0 ,void *p1); - uint64_t *least_full_64(void *p0 ,void *p1); + // Area predicates + bool Area·encloses_pt(void *pt ,void *pt0 ,extent_t e); + bool Area·encloses_pt_strictly(void *pt ,void *pt0 ,extent_t e); + bool Area·encloses(void *pt00 ,extent_t e0 ,void *pt10 ,extent_t e1); + bool Area·encloses_strictly(void *pt00 ,extent_t e0 ,void *pt10 ,extent_t e1); + bool Area·overlap(void *pt00 ,extent_t s0 ,void *pt10 ,extent_t e1); + // 64 bit word operations bool is_aligned_64(void *p); void *floor_64(void *p); void *ceiling_64(void *p); + uint64_t *greatest_full_64(void *p0 ,void *p1); + uint64_t *least_full_64(void *p0 ,void *p1); void *inc_64(void *p ,size_t Δ); - Core·AreaPairing·Status wellformed_it(Core·AreaPairing *it); + Core·AreaPairing·Status wellformed_it(Core·AreaPairing *ap); + + Core·Step·Status Core·step(Core·Step·Fn fn ,Core·AreaPairing *ap); + Core·Step·Fn copy_8; + Core·Step·Fn copy_16; + - void *identity(void *read0 ,void *read1 ,void *write0); - void *reverse_byte_order(void *read0 ,void *read1 ,void *write0); - Core·Step·Status Step·identity(Core·AreaPairing *it); - Core·Step·Status Step·reverse_order(Core·AreaPairing *it); - Core·Step·Status Step·write_hex(Core·AreaPairing *it); - Core·Step·Status Step·read_hex(Core·AreaPairing *it); } Core·M; #endif @@ -98,7 +100,7 @@ typedef struct { Core·Step·Status status; - Core·AreaPairing *it; + Core·AreaPairing *ap; struct { ReadFn8 read_fn; } copy_8; @@ -130,10 +132,10 @@ //---------------------------------------- // Area predicates - Local bool Core·Area·encloses_point(void *pt ,void *pt0 ,extent_t e){ + Local bool Core·Area·encloses_pt(void *pt ,void *pt0 ,extent_t e){ return (pt >= pt0) && (pt <= pt0 + e); // Inclusive bounds } - Local bool Core·Area·encloses_point_strictly(void *pt ,void *pt0 ,extent_t e){ + Local bool Core·Area·encloses_pt_strictly(void *pt ,void *pt0 ,extent_t e){ return (pt > pt0) && (pt < pt0 + e); // Strictly inside } // Area 0 encloses Area 1 @@ -237,11 +239,9 @@ } //---------------------------------------- - // iterator - // An iterator is used to fill buffers in a bucket brigade fashion - // Each buffer is passed as an address to the least byte and an extent + // AreaPairing - Local Core·AreaPairing·Status Core·AreaPairing·wellformed(Copy·it *it){ + Local Core·AreaPairing·Status Core·AreaPairing·wellformed(Core·AreaPairing *ap){ bool print = false; #ifdef Core·DEBUG @@ -256,15 +256,15 @@ return Core·AreaPairing·Status·null; } - if(it->read0 == NULL){ + if(ap->read0 == NULL){ if(print) fprintf( stderr ,"%s: empty read buffer\n" ,this_name ); status |= Copy·WFIt·Status·empty_read_buffer; } - if(it->write0 == NULL){ + if(ap->write0 == NULL){ if(print) fprintf( stderr ,"%s: empty write buffer\n" ,this_name ); status |= Copy·WFIt·Status·empty_write_buffer; } - if( Copy·overlap_size_interval(it->read0 ,it->read_size ,it->write0 ,it->write_size) ){ + if( Copy·overlap_size_interval(ap->read0 ,ap->read_size ,ap->write0 ,ap->write_size) ){ if(print) fprintf( stderr ,"%s: Read and write buffers overlap!\n" ,this_name ); status |= Copy·WFIt·Status·overlap; } @@ -280,7 +280,7 @@ typedef Core·Step·Fn (*Core·Step·Fn)(); // Step function using trampoline execution model - Local Core·Step·Status Core·step(Core·Step·Fn fn ,Core·AreaPairing *it){ + Local Core·Step·Status Core·step(Core·Step·Fn fn ,Core·AreaPairing *ap){ if( fn != Core·copy_64 && fn != Core·copy_8 || @@ -302,81 +302,83 @@ Local Core·Step·Fn Core·copy_8(){ // Assign the correct read function based on byte order - if(Core·tableau.it->reverse_byte_order) + if(Core·tableau.ap->reverse_byte_order) Core·tableau.copy_8.read_fn = Core·read_8_rev; else Core·tableau.copy_8.read_fn = Core·read_8_fwd; // Determine the appropriate case and dispatch - if(Core·tableau.it->read_extent == Core·tableau.it->write_extent) + if(Core·tableau.ap->read_extent == Core·tableau.ap->write_extent) return Core·Copy8·perfect_fit; - if(Core·tableau.it->read_extent > Core·tableau.it->write_extent) + if(Core·tableau.ap->read_extent > Core·tableau.ap->write_extent) return Core·Copy8·read_surplus; return Core·Copy8·write_available; } Local Core·Step·Fn Core·Copy8·perfect_fit(){ - uint8_t *r = (uint8_t *) Core·tableau.it->read0; - uint8_t *r1 = (uint8_t *) (r + Core·tableau.it->read_extent); - uint8_t *w = (uint8_t *) Core·tableau.it->write0; + uint8_t *r = (uint8_t *) Core·tableau.ap->read0; + uint8_t *r1 = (uint8_t *) (r + Core·tableau.ap->read_extent); + uint8_t *w = (uint8_t *) Core·tableau.ap->write0; do{ - *w = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r); + *w = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r); if(r == r1) break; r++; w++; }while(true); - Core·tableau.it->read0 = NULL; // Buffer exhausted - Core·tableau.it->write0 = NULL; // Buffer exhausted + Core·tableau.ap->read0 = NULL; // Buffer exhausted + Core·tableau.ap->write0 = NULL; // Buffer exhausted Core·tableau.status = Core·Step·perfect_fit; return NULL; } Local Core·Step·Fn Core·Copy8·read_surplus(){ - uint8_t *r = (uint8_t *) Core·tableau.it->read0; - uint8_t *r1 = (uint8_t *) (r + Core·tableau.it->write_extent); - uint8_t *w = (uint8_t *) Core·tableau.it->write0; + uint8_t *r = (uint8_t *) Core·tableau.ap->read0; + uint8_t *r1 = (uint8_t *) (r + Core·tableau.ap->write_extent); + uint8_t *w = (uint8_t *) Core·tableau.ap->write0; do{ - *w = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r); + *w = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r); if(r == r1) break; r++; w++; }while(true); - Core·tableau.it->read0 = r; // Advance read pointer - Core·tableau.it->read_extent -= Core·tableau.it->write_extent; - Core·tableau.it->write0 = NULL; // Write buffer exhausted + Core·tableau.ap->read0 = r; // Advance read pointer + Core·tableau.ap->read_extent -= Core·tableau.ap->write_extent; + Core·tableau.ap->write0 = NULL; // Write buffer exhausted Core·tableau.status = Core·Step·read_surplus; return NULL; } Local Core·Step·Fn Core·Copy8·write_available(){ - uint8_t *r = (uint8_t *) Core·tableau.it->read0; - uint8_t *r1 = (uint8_t *) (r + Core·tableau.it->read_extent); - uint8_t *w = (uint8_t *) Core·tableau.it->write0; - uint8_t *w1 = (uint8_t *) (w + Core·tableau.it->write_extent); + uint8_t *r = (uint8_t *) Core·tableau.ap->read0; + uint8_t *r1 = (uint8_t *) (r + Core·tableau.ap->read_extent); + uint8_t *w = (uint8_t *) Core·tableau.ap->write0; + uint8_t *w1 = (uint8_t *) (w + Core·tableau.ap->write_extent); do{ - *w = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r); + *w = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r); if(w == w1) break; r++; w++; }while(true); - Core·tableau.it->write0 = w; // Advance write pointer - Core·tableau.it->write_extent -= Core·tableau.it->read_extent; - Core·tableau.it->read0 = NULL; // Read buffer exhausted + Core·tableau.ap->write0 = w; // Advance write pointer + Core·tableau.ap->write_extent -= Core·tableau.ap->read_extent; + Core·tableau.ap->read0 = NULL; // Read buffer exhausted Core·tableau.status = Core·Step·write_available; return NULL; } + //---------------------------------------- - // copy_64 buffer fill step + // copy_64 + // 64-bit copy function with updated AreaPairing terminology Core·Step·Fn Core·copy_64; Core·Step·Fn Core·Copy64·leadin; Core·Step·Fn Core·Copy64·bulk; @@ -386,113 +388,112 @@ Local Core·Step·Fn Core·copy_64(){ // Assign the correct read function based on byte order - if(Core·tableau.it->reverse_byte_order) + if(Core·tableau.ap->reverse_byte_order) Core·tableau.copy_64.read_fn = Core·read_64_rev; else Core·tableau.copy_64.read_fn = Core·read_64_fwd; // Determine aligned 64-bit word boundaries Core·tableau.copy_64.r0_64 = Core·least_full_64( - Core·tableau.it->read0, Core·tableau.it->read0 + Core·tableau.it->read_extent - ); + Core·tableau.ap->read0, Core·tableau.ap->read0 + Core·tableau.ap->read_extent + ); Core·tableau.copy_64.r1_64 = Core·greatest_full_64( - Core·tableau.it->read0, Core·tableau.it->read0 + Core·tableau.it->read_extent - ); + Core·tableau.ap->read0, Core·tableau.ap->read0 + Core·tableau.ap->read_extent + ); // Choose the correct function based on alignment if(Core·tableau.copy_64.r0_64 == NULL) return Core·Copy64·tail; - if(Core·is_aligned_64(Core·tableau.it->read0)) return Core·Copy64·bulk; + if(Core·is_aligned_64(Core·tableau.ap->read0)) return Core·Copy64·bulk; return Core·Copy64·leadin; } // Lead-in byte copy (until alignment) Local Core·Step·Fn Core·Copy64·leadin(){ - uint8_t *r = (uint8_t *) Core·tableau.it->read0; - uint8_t *w = (uint8_t *) Core·tableau.it->write0; + uint8_t *r = (uint8_t *) Core·tableau.ap->read0; + uint8_t *w = (uint8_t *) Core·tableau.ap->write0; uint8_t *r0_64 = (uint8_t *) Core·tableau.copy_64.r0_64; do{ - *w++ = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r0_64, r); + *w++ = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r0_64, r); if(r == r0_64) break; r++; }while(1); - Core·tableau.it->read0 = r; - Core·tableau.it->write0 = w; + Core·tableau.ap->read0 = r; + Core·tableau.ap->write0 = w; return Core·Copy64·bulk; } // Bulk word copy Local Core·Step·Fn Core·Copy64·bulk(){ - uint64_t *r64 = (uint64_t *) Core·tableau.it->read0; - uint64_t *w64 = (uint64_t *) Core·tableau.it->write0; + uint64_t *r64 = (uint64_t *) Core·tableau.ap->read0; + uint64_t *w64 = (uint64_t *) Core·tableau.ap->write0; uint64_t *r1_64 = Core·tableau.copy_64.r1_64; do{ *w64++ = Core·tableau.copy_64.read_fn( - Core·tableau.copy_64.r0_64, Core·tableau.copy_64.r1_64, r64 - ); + Core·tableau.copy_64.r0_64, Core·tableau.copy_64.r1_64, r64 + ); if(r64 == r1_64) break; r64++; }while(1); - Core·tableau.it->read0 = r64; - Core·tableau.it->write0 = w64; + Core·tableau.ap->read0 = r64; + Core·tableau.ap->write0 = w64; return Core·Copy64·tail; } // Tail byte copy (unaligned trailing bytes) Local Core·Step·Fn Core·Copy64·tail(){ - uint8_t *r = (uint8_t *) Core·tableau.it->read0; - uint8_t *w = (uint8_t *) Core·tableau.it->write0; + uint8_t *r = (uint8_t *) Core·tableau.ap->read0; + uint8_t *w = (uint8_t *) Core·tableau.ap->write0; uint8_t *r1 = (uint8_t *) Core·tableau.copy_64.r1_64; do{ - *w++ = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r); + *w++ = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r); if(r == r1) break; r++; }while(1); - Core·tableau.it->read0 = r; - Core·tableau.it->write0 = w; + Core·tableau.ap->read0 = r; + Core·tableau.ap->write0 = w; Core·tableau.status = Core·Step·perfect_fit; return NULL; } - //---------------------------------------- + // step write hex + // Forward Declarations - Core·Step·Fn Core·write_hex; - Core·Step·Fn Core·write_hex_bulk; - Core·Step·Fn Core·write_hex_read_surplus; - Core·Step·Fn Core·write_hex_write_available; + Core·Step·Fn Core·Step·write_hex; + Core·Step·Fn Core·Step·write_hex_bulk; + Core·Step·Fn Core·Step·write_hex_read_surplus; + Core·Step·Fn Core·Step·write_hex_write_available; - Core·Step·Fn Core·read_hex; - Core·Step·Fn Core·read_hex_bulk; - Core·Step·Fn Core·read_hex_read_surplus; - Core·Step·Fn Core·read_hex_write_available; + Core·Step·Fn Core·Step·read_hex; + Core·Step·Fn Core·Step·read_hex_bulk; + Core·Step·Fn Core·Step·read_hex_read_surplus; + Core·Step·Fn Core·Step·read_hex_write_available; - //---------------------------------------- // Hex Encoding: Initialize Step - Local Core·Step·Fn Core·write_hex(){ - if(Core·tableau.it->read_extent == (Core·tableau.it->write_extent >> 1)){ - return Core·write_hex_bulk; + Local Core·Step·Fn Core·Step·write_hex(){ + if(Core·tableau.area_pairing->read_extent == (Core·tableau.area_pairing->write_extent >> 1)){ + return Core·Step·write_hex_bulk; } - if(Core·tableau.it->read_extent > (Core·tableau.it->write_extent >> 1)){ - return Core·write_hex_read_surplus; + if(Core·tableau.area_pairing->read_extent > (Core·tableau.area_pairing->write_extent >> 1)){ + return Core·Step·write_hex_read_surplus; } - return Core·write_hex_write_available; + return Core·Step·write_hex_write_available; } - //---------------------------------------- // Hex Encoding: Bulk Processing (Perfect Fit) - Local Core·Step·Fn Core·write_hex_bulk(){ - uint8_t *r = (uint8_t *)Core·tableau.it->read0; - uint8_t *r1 = r + Core·tableau.it->read_extent; - uint8_t *w = (uint8_t *)Core·tableau.it->write0; + Local Core·Step·Fn Core·Step·write_hex_bulk(){ + uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0; + uint8_t *r1 = r + Core·tableau.area_pairing->read_extent; + uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0; do { *(uint16_t *)w = Core·tableau.hex.convert.byte_to_hex(*r); @@ -501,20 +502,19 @@ w += 2; } while(1); - Core·tableau.it->read0 = NULL; - Core·tableau.it->write0 = NULL; - Core·tableau.it->read_extent = 0; - Core·tableau.it->write_extent = 0; + Core·tableau.area_pairing->read0 = NULL; + Core·tableau.area_pairing->write0 = NULL; + Core·tableau.area_pairing->read_extent = 0; + Core·tableau.area_pairing->write_extent = 0; Core·tableau.status = Core·Step·perfect_fit; return NULL; } - //---------------------------------------- // Hex Encoding: Read Surplus - Local Core·Step·Fn Core·write_hex_read_surplus(){ - uint8_t *r = (uint8_t *)Core·tableau.it->read0; - uint8_t *w = (uint8_t *)Core·tableau.it->write0; - size_t limit = Core·tableau.it->write_extent >> 1; + Local Core·Step·Fn Core·Step·write_hex_read_surplus(){ + uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0; + uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0; + size_t limit = Core·tableau.area_pairing->write_extent >> 1; uint8_t *r1 = r + limit; do { @@ -524,20 +524,19 @@ w += 2; } while(1); - Core·tableau.it->read0 = r + 1; - Core·tableau.it->read_extent -= limit; - Core·tableau.it->write0 = NULL; - Core·tableau.it->write_extent = 0; + Core·tableau.area_pairing->read0 = r + 1; + Core·tableau.area_pairing->read_extent -= limit; + Core·tableau.area_pairing->write0 = NULL; + Core·tableau.area_pairing->write_extent = 0; Core·tableau.status = Core·Step·read_surplus; return NULL; } - //---------------------------------------- // Hex Encoding: Write Available - Local Core·Step·Fn Core·write_hex_write_available(){ - uint8_t *r = (uint8_t *)Core·tableau.it->read0; - uint8_t *w = (uint8_t *)Core·tableau.it->write0; - size_t limit = Core·tableau.it->read_extent; + Local Core·Step·Fn Core·Step·write_hex_write_available(){ + uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0; + uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0; + size_t limit = Core·tableau.area_pairing->read_extent; uint8_t *r1 = r + limit; do { @@ -547,32 +546,32 @@ w += 2; } while(1); - Core·tableau.it->read0 = NULL; - Core·tableau.it->read_extent = 0; - Core·tableau.it->write0 = w + 2; - Core·tableau.it->write_extent -= limit << 1; + Core·tableau.area_pairing->read0 = NULL; + Core·tableau.area_pairing->read_extent = 0; + Core·tableau.area_pairing->write0 = w + 2; + Core·tableau.area_pairing->write_extent -= limit << 1; Core·tableau.status = Core·Step·write_available; return NULL; } //---------------------------------------- - // Hex Decoding: Initialize Step - Local Core·Step·Fn Core·read_hex(){ - if((Core·tableau.it->read_extent >> 1) == Core·tableau.it->write_extent){ - return Core·read_hex_bulk; + // step read hex + + Local Core·Step·Fn Core·Step·read_hex(){ + if((Core·tableau.area_pairing->read_extent >> 1) == Core·tableau.area_pairing->write_extent){ + return Core·Step·read_hex_bulk; } - if((Core·tableau.it->read_extent >> 1) > Core·tableau.it->write_extent){ - return Core·read_hex_read_surplus; + if((Core·tableau.area_pairing->read_extent >> 1) > Core·tableau.area_pairing->write_extent){ + return Core·Step·read_hex_read_surplus; } - return Core·read_hex_write_available; + return Core·Step·read_hex_write_available; } - //---------------------------------------- // Hex Decoding: Bulk Processing (Perfect Fit) - Local Core·Step·Fn Core·read_hex_bulk(){ - uint8_t *r = (uint8_t *)Core·tableau.it->read0; - uint8_t *r1 = r + Core·tableau.it->read_extent; - uint8_t *w = (uint8_t *)Core·tableau.it->write0; + Local Core·Step·Fn Core·Step·read_hex_bulk(){ + uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0; + uint8_t *r1 = r + Core·tableau.area_pairing->read_extent; + uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0; do { *w = Core·tableau.hex.convert.hex_to_byte(*(uint16_t *)r); @@ -581,20 +580,19 @@ w++; } while(1); - Core·tableau.it->read0 = NULL; - Core·tableau.it->write0 = NULL; - Core·tableau.it->read_extent = 0; - Core·tableau.it->write_extent = 0; + Core·tableau.area_pairing->read0 = NULL; + Core·tableau.area_pairing->write0 = NULL; + Core·tableau.area_pairing->read_extent = 0; + Core·tableau.area_pairing->write_extent = 0; Core·tableau.status = Core·Step·perfect_fit; return NULL; } - //---------------------------------------- // Hex Decoding: Read Surplus - Local Core·Step·Fn Core·read_hex_read_surplus(){ - uint8_t *r = (uint8_t *)Core·tableau.it->read0; - uint8_t *w = (uint8_t *)Core·tableau.it->write0; - size_t limit = Core·tableau.it->write_extent; + Local Core·Step·Fn Core·Step·read_hex_read_surplus(){ + uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0; + uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0; + size_t limit = Core·tableau.area_pairing->write_extent; uint8_t *r1 = r + (limit << 1); do { @@ -604,20 +602,19 @@ w++; } while(1); - Core·tableau.it->read0 = r + 2; - Core·tableau.it->read_extent -= limit << 1; - Core·tableau.it->write0 = NULL; - Core·tableau.it->write_extent = 0; + Core·tableau.area_pairing->read0 = r + 2; + Core·tableau.area_pairing->read_extent -= limit << 1; + Core·tableau.area_pairing->write0 = NULL; + Core·tableau.area_pairing->write_extent = 0; Core·tableau.status = Core·Step·read_surplus; return NULL; } - //---------------------------------------- // Hex Decoding: Write Available - Local Core·Step·Fn Core·read_hex_write_available(){ - uint8_t *r = (uint8_t *)Core·tableau.it->read0; - uint8_t *w = (uint8_t *)Core·tableau.it->write0; - size_t limit = Core·tableau.it->read_extent >> 1; + Local Core·Step·Fn Core·Step·read_hex_write_available(){ + uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0; + uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0; + size_t limit = Core·tableau.area_pairing->read_extent >> 1; uint8_t *r1 = r + (limit << 1); do { @@ -627,15 +624,14 @@ w++; } while(1); - Core·tableau.it->read0 = NULL; - Core·tableau.it->read_extent = 0; - Core·tableau.it->write0 = w + 1; - Core·tableau.it->write_extent -= limit; + Core·tableau.area_pairing->read0 = NULL; + Core·tableau.area_pairing->read_extent = 0; + Core·tableau.area_pairing->write0 = w + 1; + Core·tableau.area_pairing->write_extent -= limit; Core·tableau.status = Core·Step·write_available; return NULL; } - #endif // LOCAL #endif // IMPLEMENTATION diff --git "a/developer/cc\360\237\226\211/core_work.c" "b/developer/cc\360\237\226\211/core_work.c" deleted file mode 100644 index 3ceb615..0000000 --- "a/developer/cc\360\237\226\211/core_work.c" +++ /dev/null @@ -1,324 +0,0 @@ -/* - Core - core memory operations. - - 'ATP' 'At This Point' in the code. Used in comments to state assertions. - by definition an 'extent' is one less than a 'size'. -*/ - -#define Core·DEBUG - -#ifndef FACE -#define Core·IMPLEMENTATION -#define FACE -#endif - -//-------------------------------------------------------------------------------- -// Interface - -#ifndef Core·FACE -#define Core·FACE - - #include - #include - - #define extentof(x) (sizeof(x) - 1) - #define extent_t size_t - - typedef struct{ - void *read0; - extent_t read_extent; - void *write0; - extent_t write_extent; - } Core·It; - - typedef enum{ - Core·It·Status·valid = 0 - ,Core·It·Status·null - ,Core·It·Status·null_read - ,Core·It·Status·null_write - ,Core·It·Status·overlap - } Core·It·Status; - - typedef enum{ - Core·Step·Status·perfect_fit = 0 - ,Core·Step·Status·argument_guard // something wrong with the arguments to step - ,Core·Step·Status·read_surplus - ,Core·Step·Status·read_surplus_write_gap - ,Core·Step·Status·write_available - ,Core·Step·Status·write_gap - } Core·Step·Status; - - typedef struct{ - bool Core·IntervalPts·in(void *pt ,void *pt0 ,void *pt1); - bool Core·IntervalPts·contains(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11); - bool Core·IntervalPts·overlap(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11); - - bool Core·IntervalPtSize·in(void *pt ,void *pt0 ,size_t s); - bool Core·IntervalPtSize·overlap(void *pt00 ,size_t s0 ,void *pt10 ,size_t s1); - - Core·It·Status Core·wellformed_it(Core·It *it) - - void *identity(void *read0 ,void *read1 ,void *write0); - void *reverse_byte_order(void *read0 ,void *read1 ,void *write0); - - Core·Status Core·Step·identity(Core·It *it); - Core·Status Core·Step·reverse_order(Core·It *it); - Core·Status Core·Step·write_hex(Core·It *it); - Core·Status Core·Step·read_hex(Core·It *it); - } Core·M; - -#endif - -//-------------------------------------------------------------------------------- -// Implementation - -#ifdef Core·IMPLEMENTATION - - #ifdef Core·DEBUG - #include - #endif - - // this part goes into Copylib.a - // yes this is empty, so there is no Copylib.a - #ifndef LOCAL - #endif - - #ifdef LOCAL - - // Interval predicates. - // Intervals in Copy have inclusive bounds - - Local bool Core·aligned64(void *p){ - return ((uintptr_t)p & 0x7) == 0; - } - - Local bool Core·IntervalPts·in(void *pt ,void *pt0 ,void *pt1){ - return pt >= pt0 && pt <= pt1; // Inclusive bounds - } - - Local bool Core·IntervalPtExtent·in(void *pt ,void *pt0 ,extent_t e){ - return Core·IntervalPts·in(pt ,pt0 ,pt0 + e); - } - - // interval 0 contains interval 1, overlap on boundaries allowed. - Local bool Core·IntervalPts·contains( - void *pt00 ,void *pt01 ,void *pt10 ,void *pt11 - ){ - return pt10 >= pt00 && pt11 <= pt01; - } - - Local bool Core·IntervalPtExtent·contains( - void *pt00 ,size_t e0 ,void *pt10 ,size_t e1 - ){ - contains(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1) - } - - // interval 0 properly contains interval 1, overlap on boundaries not allowed. - Local bool Core·IntervalPts·contains_proper( - void *pt00 ,void *pt01 ,void *pt10 ,void *pt11 - ){ - return pt10 > pt00 && pt11 < pt01; - } - Local bool Core·IntervalPtExtent·contains_proper( - void *pt00 ,size_t e0 ,void *pt10 ,size_t e1 - ){ - contains_proper(pt00 ,pt00 + e0 ,pt10 ,pt10 + 1) - } - - - // Possible cases of overlap, including just touching - // 1. interval 0 to the right of interval 1, just touching p00 == p11 - // 2. interval 0 to the left of interval 1, just touching p01 == p10 - // 3. interval 0 wholly contained in interval 1 - // 4. interval 0 wholly contains interval 1 - Local bool Core·IntervalPts·overlap(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11){ - return - Core·IntervalPts·in(pt00 ,pt10 ,pt11) // #1, #3 - || Core·IntervalPts·in(pt10 ,pt00 ,pt01) // #2, #4 - ; - } - - Local bool Core·IntervalPtExtent·overlap( - void *pt00 ,extent_t e0 ,void *pt10 ,extent_t e1 - ){ - return Core·IntervalPts·overlap(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1); - } - - Local Copy·It·Status Copy·wellformed_it(Copy·it *it){ - - bool print = false; - #ifdef Core·DEBUG - print = true; - #endif - - char *this_name = "Copy·wellformed_it"; - Copy·WFIt·Status status = Copy·WFIt·Status·valid; - - if(it == NULL){ - if(print) fprintf( stderr ,"%s: NULL read pointer\n" ,this_name ); - return Core·It·Status·null; - } - - if(it->read0 == NULL){ - if(print) fprintf( stderr ,"%s: NULL read pointer\n" ,this_name ); - status |= Copy·WFIt·Status·null_read; - } - - if(it->write0 == NULL){ - if(print) fprintf( stderr ,"%s: NULL write pointer\n" ,this_name ); - status |= Copy·WFIt·Status·null_write; - } - - if(it->read_size == 0){ - if(print) fprintf( stderr ,"%s: Zero-sized read buffer\n" ,this_name ); - status |= Copy·WFIt·Status·zero_read_buffer; - } - - if(it->write_size == 0){ - if(print) fprintf( stderr ,"%s: Zero-sized write buffer\n" ,this_name ); - status |= Copy·WFIt·Status·zero_write_buffer; - } - - if( Copy·overlap_size_interval(it->read0 ,it->read_size ,it->write0 ,it->write_size) ){ - if(print) fprintf( stderr ,"%s: Read and write buffers overlap!\n" ,this_name ); - status |= Copy·WFIt·Status·overlap; - } - - return status; - } - - // consider an 8 byte window that is aligned - // returns the byte pointer to the least address byte in the window - Local void *Core·floor64(void *p){ - return (uintptr_t)p & ~(uintptr_t)0x7; - } - - // consider an 8 byte window that is aligned - // returns the byte pointer to the greatest address byte in the window - Local void *Core·ceiling64(void *p){ - return (uintptr_t)p | 0x7; - } - - // byte array greatest address byte at p1 (inclusive) - // byte array least address byte at p0 (inclusive) - // returns pointer to the greatest full 64-bit word-aligned address that is ≤ p1 - // by contract, p1 must be >= p0 - Local uint64_t *Core·greatest_full_64(void *p0 ,void *p1){ - - // If p1 - 0x7 moves into a prior word while p0 does not, a prefetch hazard can occur. - // If p1 and p0 are more than 0x7 apart, they cannot be in the same word, - // but this does not guarantee a full 64-bit word exists in the range. - if((uintptr_t)p1 < (uintptr_t)p0 + 0x7) return NULL; - - // Compute the last fully aligned word at or before p1. - uint64_t *p1_64 = (void *)( ((uintptr_t)p1 - 0x7) & ~(uintptr_t)0x7 ); - - // If alignment rounds p1_64 below p0, there is no full word available. - if(p1_64 < p0) return NULL; - - return p1_64; - } - - // byte array greatest address byte at p1 (inclusive) - // byte array least address byte at p0 (inclusive) - // returns pointer to the least full 64-bit word-aligned address that is ≥ p0 - Local uint64_t *Core·least_full_64(void *p0 ,void *p1){ - - // If p0 + 0x7 moves into the next word while p1 does not, a prefetch hazard can occur. - // If p1 and p0 are more than 0x7 apart, they cannot be in the same word, - // but this does not guarantee a full 64-bit word exists in the range. - if(p1 - p0 < 0x7) return NULL; - - // Compute the first fully aligned word at or after p0. - uint64_t *p0_64 = (void *)( ((uintptr_t)p0 + 0x7) & ~(uintptr_t)0x7 ); - - // If alignment rounds p0_64 beyond p1, there is no full word available. - if(p0_64 > p1) return NULL; - - return p0_64; - } - - Local void *Core·inc64(void *p ,size_t Δ){ - return (void *)((uint64_t *)p) + Δ; - } - - Local uint64_t Core·read_word_fwd(uint64_t *r){ - return *r; - } - - Local uint64_t Core·read_word_rev(uint64_t *r0 ,uint64_t *r1 ,uint64_t *r){ - return __builtin_bswap64(*(Core·floor64(r0 + (r1 - r)))); - } - - - -typedef void *(*step_fn_t)(Core·It *it ,void *tableau); - -// Function prototypes, for forward referencing -step_fn_t Core·CopyWord64·init ,Core·CopyWord64·leadin ,Core·CopyWord64·bulk ,Core·CopyWord64·tail; - -// copy_word64 tableau structure -typedef struct{ - Core·Step·Status status -} Core·Step·tableau_t; - -typedef struct{ - Core·Step·Step·Status status - ,uint64_t *r0_64 - ,uint64_t *r1_64 -} Core·CopyWord64·tableau_t; - - -// Initialize the copy_word64 -copy_fn_t Core·Step·CopyWord64·init(Core·Step·It it ,Core·Step·tableau_t *t0){ - copy_step_tableau_t *t = (copy_step_tableau_t *) t0; - // if iterator not well formed set status and return NULL - // initialize the tableau struct from the iterator .. - // ATP we know at least one byte must be copied - // if r0_64 or r1_64 are NULL, copy the bytes, set status, and return NULL - return Core·Step·CopyWord64·leadin; -} - -// Lead-in byte copy (until alignment) -void *Core·Step·CopyWord64·leadin(Core·Step·It it ,Core·Step·tableau_t *t0){ - copy_step_tableau_t *t = (copy_step_tableau_t *)t0; - while(r < (uint8_t *)tableau->r0_64){ - *w++ = *r++; - } - return Core·Step·CopyWord64·bulk; -} - -// Bulk word copy -void *Core·Step·CopyWord64·bulk(Core·Step·It it ,Core·Step·tableau_t *t0){ - copy_step_tableau_t *t = (copy_step_tableau_t *)t0; - uint64_t *r64 = (uint64_t *)r; - uint64_t *r1_64 = tableau->r1_64; - uint64_t *w64 = (uint64_t *)w; - - while(r64 <= r1_64){ - *w64++ = *r64++; - } - // check if read1 is aligned if so, set status and return NULL otherwise - return Core·Step·CopyWord64·tail; -} - -// Tail byte copy -void *Core·Step·CopyWord64·tail(Core·Step·It it ,Core·Step·tableau_t *t){ - while(r <= r1){ - *w++ = *r++; - } - // set status on the tableau - return NULL; -} - -// Step function -Core·Step·Status step(Core·Step·It it ,step_fn_t fn ,Core·Step·tableau_t *t){ - while(fn(it ,t)); - return t->status; -} - - - - #endif // LOCAL - -#endif // IMPLEMENTATION diff --git "a/developer/deprecated\360\237\226\211/Copy.lib_2.c" "b/developer/deprecated\360\237\226\211/Copy.lib_2.c" new file mode 100644 index 0000000..f93e599 --- /dev/null +++ "b/developer/deprecated\360\237\226\211/Copy.lib_2.c" @@ -0,0 +1,347 @@ +/* + CoreCopy - Memory copy operations with attention to alignment. + Provides optimized copy and byte order reversal functions. + + 'ATP' At This Point in the code. Assertions follow. +*/ + +#define CoreCopy·DEBUG + +#ifndef FACE +#define CoreCopy·IMPLEMENTATION +#define FACE +#endif + +//-------------------------------------------------------------------------------- +// Interface + +#ifndef CoreCopy·FACE +#define CoreCopy·FACE + + #include + #include + + #define extentof(x) (sizeof(x) - 1) + #define extent_t size_t + + typedef struct{ + void *read0; + extent_t read_extent; + void *write0; + extent_t write_extent; + } CoreCopy·It; + + typedef enum{ + CoreCopy·It·Status·valid = 0 + ,CoreCopy·It·Status·null_read + ,CoreCopy·It·Status·null_write + ,CoreCopy·It·Status·overlap + } CoreCopy·It·Status; + + typedef enum{ + CoreCopy·Step·perfect_fit = 0 + ,CoreCopy·Step·argument_guard + ,CoreCopy·Step·read_surplus + ,CoreCopy·Step·read_surplus_write_gap + ,CoreCopy·Step·write_available + ,CoreCopy·Step·write_gap + } CoreCopy·Status; + + typedef struct{ + bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1); + bool CoreCopy·IntervalPts·contains(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11); + bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11); + + bool CoreCopy·IntervalPtSize·in(void *pt, void *pt0 ,size_t s); + bool CoreCopy·IntervalPtSize·overlap(void *pt00 ,size_t s0, void *pt10 ,size_t s1); + + CoreCopy·It·Status CoreCopy·wellformed_it(CoreCopy·It *it) + + void *identity(void *read0 ,void *read1 ,void *write0); + void *reverse_byte_order(void *read0 ,void *read1 ,void *write0); + + CoreCopy·Status CoreCopy·Step·identity(CoreCopy·It *it); + CoreCopy·Status CoreCopy·Step·reverse_order(CoreCopy·It *it); + CoreCopy·Status CoreCopy·Step·write_hex(CoreCopy·It *it); + CoreCopy·Status CoreCopy·Step·read_hex(CoreCopy·It *it); + } CoreCopy·M; + +#endif + +//-------------------------------------------------------------------------------- +// Implementation + +#ifdef CoreCopy·IMPLEMENTATION + + #ifdef CoreCopy·DEBUG + #include + #endif + + // this part goes into Copylib.a + // yes this is empty, so there is no Copylib.a + #ifndef LOCAL + #endif + + #ifdef LOCAL + + // Interval predicates. + // Intervals in Copy have inclusive bounds + + Local bool CoreCopy·aligned64(void *p){ + return ((uintptr_t)p & 0x7) == 0; + } + + Local bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1){ + return pt >= pt0 && pt <= pt1; // Inclusive bounds + } + + Local bool CoreCopy·in_extent_interval(void *pt, void *pt0 ,extent_t e){ + return CoreCopy·IntervalPts·in(pt ,pt0 ,pt0 + e); + } + + // interval 0 contains interval 1, overlap on boundaries allowed. + Local bool CoreCopy·IntervalPts·contains( + void *pt00 ,void *pt01 ,void *pt10 ,void *pt11 + ){ + return pt10 >= pt00 && pt11 <= pt01; + } + + // interval 0 properly contains interval 1, overlap on boundaries not allowed. + Local bool CoreCopy·contains_proper_pt_interval( + void *pt00 ,void *pt01 ,void *pt10 ,void *pt11 + ){ + return pt10 > pt00 && pt11 < pt01; + } + + // Possible cases of overlap, including just touching + // 1. interval 0 to the right of interval 1, just touching p00 == p11 + // 2. interval 0 to the left of interval 1, just touching p01 == p10 + // 3. interval 0 wholly contained in interval 1 + // 4. interval 0 wholly contains interval 1 + Local bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11){ + return + CoreCopy·IntervalPts·in(pt00 ,pt10 ,pt11) // #1, #3 + || CoreCopy·IntervalPts·in(pt10 ,pt00 ,pt01) // #2, #4 + ; + } + + Local bool CoreCopy·overlap_extent_interval(void *pt00 ,extent_t e0, void *pt10 ,extent_t e1){ + return CoreCopy·IntervalPts·overlap(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1); + } + + Local CoreCopy·It·Status CoreCopy·It·wellformed(CoreCopy·It *it){ + char *this_name = "CoreCopy·It·wellformed"; + CoreCopy·It·Status status = CoreCopy·It·Status·valid; + + if(it->read0 == NULL){ + fprintf(stderr, "%s: NULL read pointer\n", this_name); + status |= CoreCopy·It·Status·null_read; + } + + if(it->write0 == NULL){ + fprintf(stderr, "%s: NULL write pointer\n", this_name); + status |= CoreCopy·It·Status·null_write; + } + + if( + CoreCopy·overlap_extent_interval(it->read0 ,it->read_extent ,it->write0 ,it->write_extent) + ){ + fprintf(stderr, "%s: Read and write buffers overlap!\n", this_name); + status |= CoreCopy·It·Status·overlap; + } + + return status; + } + + // consider an 8 byte window that is aligned + // returns the byte pointer to the least address byte in the window + Local void *CoreCopy·floor64(void *p){ + return (uintptr_t)p & ~(uintptr_t)0x7; + } + + // consider an 8 byte window that is aligned + // returns the byte pointer to the greatest address byte in the window + Local void *CoreCopy·ceiling64(void *p){ + return (uintptr_t)p | 0x7; + } + + // byte array greatest address byte at p1 (inclusive) + // byte array least address byte at p0 (inclusive) + // returns pointer to the greatest full 64-bit word-aligned address that is ≤ p1 + // by contract, p1 must be >= p0 + Local uint64_t *CoreCopy·greatest_full_64(void *p0 ,void *p1){ + + // If p1 - 0x7 moves into a prior word while p0 does not, a prefetch hazard can occur. + // If p1 and p0 are more than 0x7 apart, they cannot be in the same word, + // but this does not guarantee a full 64-bit word exists in the range. + if((uintptr_t)p1 < (uintptr_t)p0 + 0x7) return NULL; + + // Compute the last fully aligned word at or before p1. + uint64_t *p1_64 = (void *)( ((uintptr_t)p1 - 0x7) & ~(uintptr_t)0x7 ); + + // If alignment rounds p1_64 below p0, there is no full word available. + if(p1_64 < p0) return NULL; + + return p1_64; + } + + // byte array greatest address byte at p1 (inclusive) + // byte array least address byte at p0 (inclusive) + // returns pointer to the least full 64-bit word-aligned address that is ≥ p0 + Local uint64_t *CoreCopy·least_full_64(void *p0 ,void *p1){ + + // If p0 + 0x7 moves into the next word while p1 does not, a prefetch hazard can occur. + // If p1 and p0 are more than 0x7 apart, they cannot be in the same word, + // but this does not guarantee a full 64-bit word exists in the range. + if(p1 - p0 < 0x7) return NULL; + + // Compute the first fully aligned word at or after p0. + uint64_t *p0_64 = (void *)( ((uintptr_t)p0 + 0x7) & ~(uintptr_t)0x7 ); + + // If alignment rounds p0_64 beyond p1, there is no full word available. + if(p0_64 > p1) return NULL; + + return p0_64; + } + + Local void *CoreCopy·inc64(void *p ,size_t Δ){ + return (void *)((uint64_t *)p) + Δ; + } + + Local uint64_t CoreCopy·read_word_fwd(uint64_t *r){ + return *r; + } + + Local uint64_t CoreCopy·read_word_rev(uint64_t *r0, uint64_t *r1, uint64_t *r){ + return __builtin_bswap64(*(CoreCopy·floor64(r0 + (r1 - r)))); + } + + Local void *CoreCopy·byte( + uint8_t *r0 ,uint8_t *r1 ,uint8_t *w0 ,bool reverse + ){ + //---------------------------------------- + // Argument guard + // + + if(r1read0; + uint8_t *w = (uint8_t *)it->write0; + + extent_t re = it->read_extent; + extent_t we = it->write_extent; + + if(we >= re){ + CoreCopy·bytes(r ,r + re ,w); + it->read0 += re; // Fixed stepping logic + it->read_extent = 0; + it->write0 += re; + it->write_extent -= re; + if(we == re) return CoreCopy·Step·perfect_fit; + return CoreCopy·Step·write_available; + } + + CoreCopy·bytes(r ,r + we ,w); + it->read0 += we; // Fixed stepping logic + it->read_extent -= we; + it->write_extent = 0; + it->write0 += we; + return CoreCopy·Step·read_surplus; + } + + #endif // LOCAL + +#endif // IMPLEMENTATION