+++ /dev/null
-/*
- CoreCopy - Memory copy operations with attention to alignment.
- Provides optimized copy and byte order reversal functions.
-
- 'ATP' At This Point in the code. Assertions follow.
-*/
-
-#define CoreCopy·DEBUG
-
-#ifndef FACE
-#define CoreCopy·IMPLEMENTATION
-#define FACE
-#endif
-
-//--------------------------------------------------------------------------------
-// Interface
-
-#ifndef CoreCopy·FACE
-#define CoreCopy·FACE
-
- #include <stdint.h>
- #include <stddef.h>
-
- #define extentof(x) (sizeof(x) - 1)
- #define extent_t size_t
-
- typedef struct{
- void *read0;
- extent_t read_extent;
- void *write0;
- extent_t write_extent;
- } CoreCopy·It;
-
- typedef enum{
- CoreCopy·It·Status·valid = 0
- ,CoreCopy·It·Status·null_read
- ,CoreCopy·It·Status·null_write
- ,CoreCopy·It·Status·overlap
- } CoreCopy·It·Status;
-
- typedef enum{
- CoreCopy·Step·perfect_fit = 0
- ,CoreCopy·Step·argument_guard
- ,CoreCopy·Step·read_surplus
- ,CoreCopy·Step·read_surplus_write_gap
- ,CoreCopy·Step·write_available
- ,CoreCopy·Step·write_gap
- } CoreCopy·Status;
-
- typedef struct{
- bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1);
- bool CoreCopy·IntervalPts·contains(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11);
- bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11);
-
- bool CoreCopy·IntervalPtSize·in(void *pt, void *pt0 ,size_t s);
- bool CoreCopy·IntervalPtSize·overlap(void *pt00 ,size_t s0, void *pt10 ,size_t s1);
-
- CoreCopy·It·Status CoreCopy·wellformed_it(CoreCopy·It *it)
-
- void *identity(void *read0 ,void *read1 ,void *write0);
- void *reverse_byte_order(void *read0 ,void *read1 ,void *write0);
-
- CoreCopy·Status CoreCopy·Step·identity(CoreCopy·It *it);
- CoreCopy·Status CoreCopy·Step·reverse_order(CoreCopy·It *it);
- CoreCopy·Status CoreCopy·Step·write_hex(CoreCopy·It *it);
- CoreCopy·Status CoreCopy·Step·read_hex(CoreCopy·It *it);
- } CoreCopy·M;
-
-#endif
-
-//--------------------------------------------------------------------------------
-// Implementation
-
-#ifdef CoreCopy·IMPLEMENTATION
-
- #ifdef CoreCopy·DEBUG
- #include <stdio.h>
- #endif
-
- // this part goes into Copylib.a
- // yes this is empty, so there is no Copylib.a
- #ifndef LOCAL
- #endif
-
- #ifdef LOCAL
-
- // Interval predicates.
- // Intervals in Copy have inclusive bounds
-
- Local bool CoreCopy·aligned64(void *p){
- return ((uintptr_t)p & 0x7) == 0;
- }
-
- Local bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1){
- return pt >= pt0 && pt <= pt1; // Inclusive bounds
- }
-
- Local bool CoreCopy·in_extent_interval(void *pt, void *pt0 ,extent_t e){
- return CoreCopy·IntervalPts·in(pt ,pt0 ,pt0 + e);
- }
-
- // interval 0 contains interval 1, overlap on boundaries allowed.
- Local bool CoreCopy·IntervalPts·contains(
- void *pt00 ,void *pt01 ,void *pt10 ,void *pt11
- ){
- return pt10 >= pt00 && pt11 <= pt01;
- }
-
- // interval 0 properly contains interval 1, overlap on boundaries not allowed.
- Local bool CoreCopy·contains_proper_pt_interval(
- void *pt00 ,void *pt01 ,void *pt10 ,void *pt11
- ){
- return pt10 > pt00 && pt11 < pt01;
- }
-
- // Possible cases of overlap, including just touching
- // 1. interval 0 to the right of interval 1, just touching p00 == p11
- // 2. interval 0 to the left of interval 1, just touching p01 == p10
- // 3. interval 0 wholly contained in interval 1
- // 4. interval 0 wholly contains interval 1
- Local bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11){
- return
- CoreCopy·IntervalPts·in(pt00 ,pt10 ,pt11) // #1, #3
- || CoreCopy·IntervalPts·in(pt10 ,pt00 ,pt01) // #2, #4
- ;
- }
-
- Local bool CoreCopy·overlap_extent_interval(void *pt00 ,extent_t e0, void *pt10 ,extent_t e1){
- return CoreCopy·IntervalPts·overlap(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1);
- }
-
- Local CoreCopy·It·Status CoreCopy·It·wellformed(CoreCopy·It *it){
- char *this_name = "CoreCopy·It·wellformed";
- CoreCopy·It·Status status = CoreCopy·It·Status·valid;
-
- if(it->read0 == NULL){
- fprintf(stderr, "%s: NULL read pointer\n", this_name);
- status |= CoreCopy·It·Status·null_read;
- }
-
- if(it->write0 == NULL){
- fprintf(stderr, "%s: NULL write pointer\n", this_name);
- status |= CoreCopy·It·Status·null_write;
- }
-
- if(
- CoreCopy·overlap_extent_interval(it->read0 ,it->read_extent ,it->write0 ,it->write_extent)
- ){
- fprintf(stderr, "%s: Read and write buffers overlap!\n", this_name);
- status |= CoreCopy·It·Status·overlap;
- }
-
- return status;
- }
-
- // consider an 8 byte window that is aligned
- // returns the byte pointer to the least address byte in the window
- Local void *CoreCopy·floor64(void *p){
- return (uintptr_t)p & ~(uintptr_t)0x7;
- }
-
- // consider an 8 byte window that is aligned
- // returns the byte pointer to the greatest address byte in the window
- Local void *CoreCopy·ceiling64(void *p){
- return (uintptr_t)p | 0x7;
- }
-
- // byte array greatest address byte at p1 (inclusive)
- // byte array least address byte at p0 (inclusive)
- // returns pointer to the greatest full 64-bit word-aligned address that is ≤ p1
- // by contract, p1 must be >= p0
- Local uint64_t *CoreCopy·greatest_full_64(void *p0 ,void *p1){
-
- // If p1 - 0x7 moves into a prior word while p0 does not, a prefetch hazard can occur.
- // If p1 and p0 are more than 0x7 apart, they cannot be in the same word,
- // but this does not guarantee a full 64-bit word exists in the range.
- if((uintptr_t)p1 < (uintptr_t)p0 + 0x7) return NULL;
-
- // Compute the last fully aligned word at or before p1.
- uint64_t *p1_64 = (void *)( ((uintptr_t)p1 - 0x7) & ~(uintptr_t)0x7 );
-
- // If alignment rounds p1_64 below p0, there is no full word available.
- if(p1_64 < p0) return NULL;
-
- return p1_64;
- }
-
- // byte array greatest address byte at p1 (inclusive)
- // byte array least address byte at p0 (inclusive)
- // returns pointer to the least full 64-bit word-aligned address that is ≥ p0
- Local uint64_t *CoreCopy·least_full_64(void *p0 ,void *p1){
-
- // If p0 + 0x7 moves into the next word while p1 does not, a prefetch hazard can occur.
- // If p1 and p0 are more than 0x7 apart, they cannot be in the same word,
- // but this does not guarantee a full 64-bit word exists in the range.
- if(p1 - p0 < 0x7) return NULL;
-
- // Compute the first fully aligned word at or after p0.
- uint64_t *p0_64 = (void *)( ((uintptr_t)p0 + 0x7) & ~(uintptr_t)0x7 );
-
- // If alignment rounds p0_64 beyond p1, there is no full word available.
- if(p0_64 > p1) return NULL;
-
- return p0_64;
- }
-
- Local void *CoreCopy·inc64(void *p ,size_t Δ){
- return (void *)((uint64_t *)p) + Δ;
- }
-
- Local uint64_t CoreCopy·read_word_fwd(uint64_t *r){
- return *r;
- }
-
- Local uint64_t CoreCopy·read_word_rev(uint64_t *r0, uint64_t *r1, uint64_t *r){
- return __builtin_bswap64(*(CoreCopy·floor64(r0 + (r1 - r))));
- }
-
- Local void *CoreCopy·byte(
- uint8_t *r0 ,uint8_t *r1 ,uint8_t *w0 ,bool reverse
- ){
- //----------------------------------------
- // Argument guard
- //
-
- if(r1<r0) return NULL;
-
- //----------------------------------------
- // Setup pointers
- //
-
- uint8_t *r = r0;
- uint8_t *w = w0;
-
- // Function pointer for dynamic read behavior
- uint8_t (*read_byte)(
- uint8_t * ,uint8_t * ,uint8_t *
- ) = reverse ? CoreCopy·read_byte_rev : CoreCopy·read_byte_fwd;
-
- //----------------------------------------
- // Byte-wise copy
- //
-
- do{
- *w = read_byte(r0 ,r1 ,r);
- if(r==r1) break;
- w++;
- r++;
- }while(true);
-
- return w;
- }
-
- Local void *CoreCopy·word64(void *read0 ,void *read1 ,void *write0 ,bool reverse){
-
- //----------------------------------------
- // Argument guard
-
- if(read1 < read0) return NULL;
-
- //----------------------------------------
- // Setup pointers
-
- // the read interval, for byte arrays
- uint8_t *r0 = (uint8_t *)read0;
- uint8_t *r1 = (uint8_t *)read1; // inclusive upper bound
- uint8_t *w0 = (uint8_t *)write0;
-
- // the contained word interval, inclusive bounds
- uint64_t *r0_64 = CoreCopy·least_full_64(r0 ,r1);
- uint64_t *r1_64 = CoreCopy·greatest_full_64(r0 ,r1);
-
- // swap byte order done by overloading the read function
- uint8_t (*read_byte)(uint8_t * ,uint8_t * ,uint8_t *)
- = reverse ? CoreCopy·read_byte_rev : CoreCopy·read_byte_fwd;
-
- uint64_t (*read_word)(uint64_t * ,uint64_t * ,uint64_t *)
- = reverse ? CoreCopy·read_word_rev : CoreCopy·read_word_fwd;
-
- // If no full words ,perform byte-wise copy
- if(r0_64 == NULL || r1_64 == NULL) return CoreCopy·byte(r0 ,r1 ,w0 ,reverse);
-
- //----------------------------------------
- // Align `r` to first full 64-bit word boundary
-
- uint8_t *w=w0;
- if( !CoreCopy·aligned64(r0) ){
- w = CoreCopy·byte(r0 ,r0_64 - 1 ,w ,reverse);
- }
- uint8_t *r = r0_64;
-
- //----------------------------------------
- // Bulk word-wise copy
-
- do{
- *(uint64_t *)w = read_word(r0_64 ,r1_64 ,(uint64_t *)r);
- if(r == (uint8_t *)r1_64) break;
- w = CoreCopy·inc64(w ,1);
- r = CoreCopy·inc64(r ,1);
- }while(true);
-
- // If r1 was aligned ,we're done
- if(CoreCopy·aligned64(r1)) return w;
- w = CoreCopy·inc64(w ,1);
- r = CoreCopy·inc64(r ,1);
-
- //----------------------------------------
- // Ragged tail (byte-wise copy)
-
- return CoreCopy·byte(r ,r1 ,w ,reverse);
-
- }
-
-
- /*
- Read and write pointers are incremented by `extent + 1`, ensuring they do not skip
- past the last valid byte. The previous `+1` was incorrect in cases where
- stepping already processed the last byte.
- */
- Local CoreCopy·Status CoreCopy·Step·identity(CoreCopy·It *it){
- uint8_t *r = (uint8_t *)it->read0;
- uint8_t *w = (uint8_t *)it->write0;
-
- extent_t re = it->read_extent;
- extent_t we = it->write_extent;
-
- if(we >= re){
- CoreCopy·bytes(r ,r + re ,w);
- it->read0 += re; // Fixed stepping logic
- it->read_extent = 0;
- it->write0 += re;
- it->write_extent -= re;
- if(we == re) return CoreCopy·Step·perfect_fit;
- return CoreCopy·Step·write_available;
- }
-
- CoreCopy·bytes(r ,r + we ,w);
- it->read0 += we; // Fixed stepping logic
- it->read_extent -= we;
- it->write_extent = 0;
- it->write0 += we;
- return CoreCopy·Step·read_surplus;
- }
-
- #endif // LOCAL
-
-#endif // IMPLEMENTATION
void *write0; // write0 = NULL means no buffer or empty buffer.
extent_t write_extent;
bool reverse_byte_order;
- } Core·AreaPairing;
+ } Core·AreaPairinng;
typedef enum{
Core·AreaPairing·Status·valid = 0
,Core·Step·write_gap
} Core·Step·Status;
- typedef struct{
+ typedef Core·Step·Fn (*Core·Step·Fn)();
- bool Area·encloses_pt(void *pt ,void *pt0 ,size_t s);
- bool Area·encloses_pt_strictly(void *pt ,void *pt0 ,size_t s);
- bool Area·encloses(void *pt00 ,size_t e0 ,void *pt10 ,size_t e1);
- bool Area·encloses_strictly(void *pt00 ,size_t e0 ,void *pt10 ,size_t e1);
- bool Area·overlap(void *pt00 ,size_t s0 ,void *pt10 ,size_t s1);
+ typedef struct{
- uint64_t *greatest_full_64(void *p0 ,void *p1);
- uint64_t *least_full_64(void *p0 ,void *p1);
+ // Area predicates
+ bool Area·encloses_pt(void *pt ,void *pt0 ,extent_t e);
+ bool Area·encloses_pt_strictly(void *pt ,void *pt0 ,extent_t e);
+ bool Area·encloses(void *pt00 ,extent_t e0 ,void *pt10 ,extent_t e1);
+ bool Area·encloses_strictly(void *pt00 ,extent_t e0 ,void *pt10 ,extent_t e1);
+ bool Area·overlap(void *pt00 ,extent_t s0 ,void *pt10 ,extent_t e1);
+ // 64 bit word operations
bool is_aligned_64(void *p);
void *floor_64(void *p);
void *ceiling_64(void *p);
+ uint64_t *greatest_full_64(void *p0 ,void *p1);
+ uint64_t *least_full_64(void *p0 ,void *p1);
void *inc_64(void *p ,size_t Δ);
- Core·AreaPairing·Status wellformed_it(Core·AreaPairing *it);
+ Core·AreaPairing·Status wellformed_it(Core·AreaPairing *ap);
+
+ Core·Step·Status Core·step(Core·Step·Fn fn ,Core·AreaPairing *ap);
+ Core·Step·Fn copy_8;
+ Core·Step·Fn copy_16;
+
- void *identity(void *read0 ,void *read1 ,void *write0);
- void *reverse_byte_order(void *read0 ,void *read1 ,void *write0);
- Core·Step·Status Step·identity(Core·AreaPairing *it);
- Core·Step·Status Step·reverse_order(Core·AreaPairing *it);
- Core·Step·Status Step·write_hex(Core·AreaPairing *it);
- Core·Step·Status Step·read_hex(Core·AreaPairing *it);
} Core·M;
#endif
typedef struct {
Core·Step·Status status;
- Core·AreaPairing *it;
+ Core·AreaPairing *ap;
struct {
ReadFn8 read_fn;
} copy_8;
//----------------------------------------
// Area predicates
- Local bool Core·Area·encloses_point(void *pt ,void *pt0 ,extent_t e){
+ Local bool Core·Area·encloses_pt(void *pt ,void *pt0 ,extent_t e){
return (pt >= pt0) && (pt <= pt0 + e); // Inclusive bounds
}
- Local bool Core·Area·encloses_point_strictly(void *pt ,void *pt0 ,extent_t e){
+ Local bool Core·Area·encloses_pt_strictly(void *pt ,void *pt0 ,extent_t e){
return (pt > pt0) && (pt < pt0 + e); // Strictly inside
}
// Area 0 encloses Area 1
}
//----------------------------------------
- // iterator
- // An iterator is used to fill buffers in a bucket brigade fashion
- // Each buffer is passed as an address to the least byte and an extent
+ // AreaPairing
- Local Core·AreaPairing·Status Core·AreaPairing·wellformed(Copy·it *it){
+ Local Core·AreaPairing·Status Core·AreaPairing·wellformed(Core·AreaPairing *ap){
bool print = false;
#ifdef Core·DEBUG
return Core·AreaPairing·Status·null;
}
- if(it->read0 == NULL){
+ if(ap->read0 == NULL){
if(print) fprintf( stderr ,"%s: empty read buffer\n" ,this_name );
status |= Copy·WFIt·Status·empty_read_buffer;
}
- if(it->write0 == NULL){
+ if(ap->write0 == NULL){
if(print) fprintf( stderr ,"%s: empty write buffer\n" ,this_name );
status |= Copy·WFIt·Status·empty_write_buffer;
}
- if( Copy·overlap_size_interval(it->read0 ,it->read_size ,it->write0 ,it->write_size) ){
+ if( Copy·overlap_size_interval(ap->read0 ,ap->read_size ,ap->write0 ,ap->write_size) ){
if(print) fprintf( stderr ,"%s: Read and write buffers overlap!\n" ,this_name );
status |= Copy·WFIt·Status·overlap;
}
typedef Core·Step·Fn (*Core·Step·Fn)();
// Step function using trampoline execution model
- Local Core·Step·Status Core·step(Core·Step·Fn fn ,Core·AreaPairing *it){
+ Local Core·Step·Status Core·step(Core·Step·Fn fn ,Core·AreaPairing *ap){
if(
fn != Core·copy_64 && fn != Core·copy_8
||
Local Core·Step·Fn Core·copy_8(){
// Assign the correct read function based on byte order
- if(Core·tableau.it->reverse_byte_order)
+ if(Core·tableau.ap->reverse_byte_order)
Core·tableau.copy_8.read_fn = Core·read_8_rev;
else
Core·tableau.copy_8.read_fn = Core·read_8_fwd;
// Determine the appropriate case and dispatch
- if(Core·tableau.it->read_extent == Core·tableau.it->write_extent)
+ if(Core·tableau.ap->read_extent == Core·tableau.ap->write_extent)
return Core·Copy8·perfect_fit;
- if(Core·tableau.it->read_extent > Core·tableau.it->write_extent)
+ if(Core·tableau.ap->read_extent > Core·tableau.ap->write_extent)
return Core·Copy8·read_surplus;
return Core·Copy8·write_available;
}
Local Core·Step·Fn Core·Copy8·perfect_fit(){
- uint8_t *r = (uint8_t *) Core·tableau.it->read0;
- uint8_t *r1 = (uint8_t *) (r + Core·tableau.it->read_extent);
- uint8_t *w = (uint8_t *) Core·tableau.it->write0;
+ uint8_t *r = (uint8_t *) Core·tableau.ap->read0;
+ uint8_t *r1 = (uint8_t *) (r + Core·tableau.ap->read_extent);
+ uint8_t *w = (uint8_t *) Core·tableau.ap->write0;
do{
- *w = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r);
+ *w = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r);
if(r == r1) break;
r++;
w++;
}while(true);
- Core·tableau.it->read0 = NULL; // Buffer exhausted
- Core·tableau.it->write0 = NULL; // Buffer exhausted
+ Core·tableau.ap->read0 = NULL; // Buffer exhausted
+ Core·tableau.ap->write0 = NULL; // Buffer exhausted
Core·tableau.status = Core·Step·perfect_fit;
return NULL;
}
Local Core·Step·Fn Core·Copy8·read_surplus(){
- uint8_t *r = (uint8_t *) Core·tableau.it->read0;
- uint8_t *r1 = (uint8_t *) (r + Core·tableau.it->write_extent);
- uint8_t *w = (uint8_t *) Core·tableau.it->write0;
+ uint8_t *r = (uint8_t *) Core·tableau.ap->read0;
+ uint8_t *r1 = (uint8_t *) (r + Core·tableau.ap->write_extent);
+ uint8_t *w = (uint8_t *) Core·tableau.ap->write0;
do{
- *w = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r);
+ *w = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r);
if(r == r1) break;
r++;
w++;
}while(true);
- Core·tableau.it->read0 = r; // Advance read pointer
- Core·tableau.it->read_extent -= Core·tableau.it->write_extent;
- Core·tableau.it->write0 = NULL; // Write buffer exhausted
+ Core·tableau.ap->read0 = r; // Advance read pointer
+ Core·tableau.ap->read_extent -= Core·tableau.ap->write_extent;
+ Core·tableau.ap->write0 = NULL; // Write buffer exhausted
Core·tableau.status = Core·Step·read_surplus;
return NULL;
}
Local Core·Step·Fn Core·Copy8·write_available(){
- uint8_t *r = (uint8_t *) Core·tableau.it->read0;
- uint8_t *r1 = (uint8_t *) (r + Core·tableau.it->read_extent);
- uint8_t *w = (uint8_t *) Core·tableau.it->write0;
- uint8_t *w1 = (uint8_t *) (w + Core·tableau.it->write_extent);
+ uint8_t *r = (uint8_t *) Core·tableau.ap->read0;
+ uint8_t *r1 = (uint8_t *) (r + Core·tableau.ap->read_extent);
+ uint8_t *w = (uint8_t *) Core·tableau.ap->write0;
+ uint8_t *w1 = (uint8_t *) (w + Core·tableau.ap->write_extent);
do{
- *w = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r);
+ *w = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r);
if(w == w1) break;
r++;
w++;
}while(true);
- Core·tableau.it->write0 = w; // Advance write pointer
- Core·tableau.it->write_extent -= Core·tableau.it->read_extent;
- Core·tableau.it->read0 = NULL; // Read buffer exhausted
+ Core·tableau.ap->write0 = w; // Advance write pointer
+ Core·tableau.ap->write_extent -= Core·tableau.ap->read_extent;
+ Core·tableau.ap->read0 = NULL; // Read buffer exhausted
Core·tableau.status = Core·Step·write_available;
return NULL;
}
+
//----------------------------------------
- // copy_64 buffer fill step
+ // copy_64
+ // 64-bit copy function with updated AreaPairing terminology
Core·Step·Fn Core·copy_64;
Core·Step·Fn Core·Copy64·leadin;
Core·Step·Fn Core·Copy64·bulk;
Local Core·Step·Fn Core·copy_64(){
// Assign the correct read function based on byte order
- if(Core·tableau.it->reverse_byte_order)
+ if(Core·tableau.ap->reverse_byte_order)
Core·tableau.copy_64.read_fn = Core·read_64_rev;
else
Core·tableau.copy_64.read_fn = Core·read_64_fwd;
// Determine aligned 64-bit word boundaries
Core·tableau.copy_64.r0_64 = Core·least_full_64(
- Core·tableau.it->read0, Core·tableau.it->read0 + Core·tableau.it->read_extent
- );
+ Core·tableau.ap->read0, Core·tableau.ap->read0 + Core·tableau.ap->read_extent
+ );
Core·tableau.copy_64.r1_64 = Core·greatest_full_64(
- Core·tableau.it->read0, Core·tableau.it->read0 + Core·tableau.it->read_extent
- );
+ Core·tableau.ap->read0, Core·tableau.ap->read0 + Core·tableau.ap->read_extent
+ );
// Choose the correct function based on alignment
if(Core·tableau.copy_64.r0_64 == NULL) return Core·Copy64·tail;
- if(Core·is_aligned_64(Core·tableau.it->read0)) return Core·Copy64·bulk;
+ if(Core·is_aligned_64(Core·tableau.ap->read0)) return Core·Copy64·bulk;
return Core·Copy64·leadin;
}
// Lead-in byte copy (until alignment)
Local Core·Step·Fn Core·Copy64·leadin(){
- uint8_t *r = (uint8_t *) Core·tableau.it->read0;
- uint8_t *w = (uint8_t *) Core·tableau.it->write0;
+ uint8_t *r = (uint8_t *) Core·tableau.ap->read0;
+ uint8_t *w = (uint8_t *) Core·tableau.ap->write0;
uint8_t *r0_64 = (uint8_t *) Core·tableau.copy_64.r0_64;
do{
- *w++ = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r0_64, r);
+ *w++ = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r0_64, r);
if(r == r0_64) break;
r++;
}while(1);
- Core·tableau.it->read0 = r;
- Core·tableau.it->write0 = w;
+ Core·tableau.ap->read0 = r;
+ Core·tableau.ap->write0 = w;
return Core·Copy64·bulk;
}
// Bulk word copy
Local Core·Step·Fn Core·Copy64·bulk(){
- uint64_t *r64 = (uint64_t *) Core·tableau.it->read0;
- uint64_t *w64 = (uint64_t *) Core·tableau.it->write0;
+ uint64_t *r64 = (uint64_t *) Core·tableau.ap->read0;
+ uint64_t *w64 = (uint64_t *) Core·tableau.ap->write0;
uint64_t *r1_64 = Core·tableau.copy_64.r1_64;
do{
*w64++ = Core·tableau.copy_64.read_fn(
- Core·tableau.copy_64.r0_64, Core·tableau.copy_64.r1_64, r64
- );
+ Core·tableau.copy_64.r0_64, Core·tableau.copy_64.r1_64, r64
+ );
if(r64 == r1_64) break;
r64++;
}while(1);
- Core·tableau.it->read0 = r64;
- Core·tableau.it->write0 = w64;
+ Core·tableau.ap->read0 = r64;
+ Core·tableau.ap->write0 = w64;
return Core·Copy64·tail;
}
// Tail byte copy (unaligned trailing bytes)
Local Core·Step·Fn Core·Copy64·tail(){
- uint8_t *r = (uint8_t *) Core·tableau.it->read0;
- uint8_t *w = (uint8_t *) Core·tableau.it->write0;
+ uint8_t *r = (uint8_t *) Core·tableau.ap->read0;
+ uint8_t *w = (uint8_t *) Core·tableau.ap->write0;
uint8_t *r1 = (uint8_t *) Core·tableau.copy_64.r1_64;
do{
- *w++ = Core·tableau.copy_8.read_fn(Core·tableau.it->read0, r1, r);
+ *w++ = Core·tableau.copy_8.read_fn(Core·tableau.ap->read0, r1, r);
if(r == r1) break;
r++;
}while(1);
- Core·tableau.it->read0 = r;
- Core·tableau.it->write0 = w;
+ Core·tableau.ap->read0 = r;
+ Core·tableau.ap->write0 = w;
Core·tableau.status = Core·Step·perfect_fit;
return NULL;
}
-
//----------------------------------------
+ // step write hex
+
// Forward Declarations
- Core·Step·Fn Core·write_hex;
- Core·Step·Fn Core·write_hex_bulk;
- Core·Step·Fn Core·write_hex_read_surplus;
- Core·Step·Fn Core·write_hex_write_available;
+ Core·Step·Fn Core·Step·write_hex;
+ Core·Step·Fn Core·Step·write_hex_bulk;
+ Core·Step·Fn Core·Step·write_hex_read_surplus;
+ Core·Step·Fn Core·Step·write_hex_write_available;
- Core·Step·Fn Core·read_hex;
- Core·Step·Fn Core·read_hex_bulk;
- Core·Step·Fn Core·read_hex_read_surplus;
- Core·Step·Fn Core·read_hex_write_available;
+ Core·Step·Fn Core·Step·read_hex;
+ Core·Step·Fn Core·Step·read_hex_bulk;
+ Core·Step·Fn Core·Step·read_hex_read_surplus;
+ Core·Step·Fn Core·Step·read_hex_write_available;
- //----------------------------------------
// Hex Encoding: Initialize Step
- Local Core·Step·Fn Core·write_hex(){
- if(Core·tableau.it->read_extent == (Core·tableau.it->write_extent >> 1)){
- return Core·write_hex_bulk;
+ Local Core·Step·Fn Core·Step·write_hex(){
+ if(Core·tableau.area_pairing->read_extent == (Core·tableau.area_pairing->write_extent >> 1)){
+ return Core·Step·write_hex_bulk;
}
- if(Core·tableau.it->read_extent > (Core·tableau.it->write_extent >> 1)){
- return Core·write_hex_read_surplus;
+ if(Core·tableau.area_pairing->read_extent > (Core·tableau.area_pairing->write_extent >> 1)){
+ return Core·Step·write_hex_read_surplus;
}
- return Core·write_hex_write_available;
+ return Core·Step·write_hex_write_available;
}
- //----------------------------------------
// Hex Encoding: Bulk Processing (Perfect Fit)
- Local Core·Step·Fn Core·write_hex_bulk(){
- uint8_t *r = (uint8_t *)Core·tableau.it->read0;
- uint8_t *r1 = r + Core·tableau.it->read_extent;
- uint8_t *w = (uint8_t *)Core·tableau.it->write0;
+ Local Core·Step·Fn Core·Step·write_hex_bulk(){
+ uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0;
+ uint8_t *r1 = r + Core·tableau.area_pairing->read_extent;
+ uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0;
do {
*(uint16_t *)w = Core·tableau.hex.convert.byte_to_hex(*r);
w += 2;
} while(1);
- Core·tableau.it->read0 = NULL;
- Core·tableau.it->write0 = NULL;
- Core·tableau.it->read_extent = 0;
- Core·tableau.it->write_extent = 0;
+ Core·tableau.area_pairing->read0 = NULL;
+ Core·tableau.area_pairing->write0 = NULL;
+ Core·tableau.area_pairing->read_extent = 0;
+ Core·tableau.area_pairing->write_extent = 0;
Core·tableau.status = Core·Step·perfect_fit;
return NULL;
}
- //----------------------------------------
// Hex Encoding: Read Surplus
- Local Core·Step·Fn Core·write_hex_read_surplus(){
- uint8_t *r = (uint8_t *)Core·tableau.it->read0;
- uint8_t *w = (uint8_t *)Core·tableau.it->write0;
- size_t limit = Core·tableau.it->write_extent >> 1;
+ Local Core·Step·Fn Core·Step·write_hex_read_surplus(){
+ uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0;
+ uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0;
+ size_t limit = Core·tableau.area_pairing->write_extent >> 1;
uint8_t *r1 = r + limit;
do {
w += 2;
} while(1);
- Core·tableau.it->read0 = r + 1;
- Core·tableau.it->read_extent -= limit;
- Core·tableau.it->write0 = NULL;
- Core·tableau.it->write_extent = 0;
+ Core·tableau.area_pairing->read0 = r + 1;
+ Core·tableau.area_pairing->read_extent -= limit;
+ Core·tableau.area_pairing->write0 = NULL;
+ Core·tableau.area_pairing->write_extent = 0;
Core·tableau.status = Core·Step·read_surplus;
return NULL;
}
- //----------------------------------------
// Hex Encoding: Write Available
- Local Core·Step·Fn Core·write_hex_write_available(){
- uint8_t *r = (uint8_t *)Core·tableau.it->read0;
- uint8_t *w = (uint8_t *)Core·tableau.it->write0;
- size_t limit = Core·tableau.it->read_extent;
+ Local Core·Step·Fn Core·Step·write_hex_write_available(){
+ uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0;
+ uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0;
+ size_t limit = Core·tableau.area_pairing->read_extent;
uint8_t *r1 = r + limit;
do {
w += 2;
} while(1);
- Core·tableau.it->read0 = NULL;
- Core·tableau.it->read_extent = 0;
- Core·tableau.it->write0 = w + 2;
- Core·tableau.it->write_extent -= limit << 1;
+ Core·tableau.area_pairing->read0 = NULL;
+ Core·tableau.area_pairing->read_extent = 0;
+ Core·tableau.area_pairing->write0 = w + 2;
+ Core·tableau.area_pairing->write_extent -= limit << 1;
Core·tableau.status = Core·Step·write_available;
return NULL;
}
//----------------------------------------
- // Hex Decoding: Initialize Step
- Local Core·Step·Fn Core·read_hex(){
- if((Core·tableau.it->read_extent >> 1) == Core·tableau.it->write_extent){
- return Core·read_hex_bulk;
+ // step read hex
+
+ Local Core·Step·Fn Core·Step·read_hex(){
+ if((Core·tableau.area_pairing->read_extent >> 1) == Core·tableau.area_pairing->write_extent){
+ return Core·Step·read_hex_bulk;
}
- if((Core·tableau.it->read_extent >> 1) > Core·tableau.it->write_extent){
- return Core·read_hex_read_surplus;
+ if((Core·tableau.area_pairing->read_extent >> 1) > Core·tableau.area_pairing->write_extent){
+ return Core·Step·read_hex_read_surplus;
}
- return Core·read_hex_write_available;
+ return Core·Step·read_hex_write_available;
}
- //----------------------------------------
// Hex Decoding: Bulk Processing (Perfect Fit)
- Local Core·Step·Fn Core·read_hex_bulk(){
- uint8_t *r = (uint8_t *)Core·tableau.it->read0;
- uint8_t *r1 = r + Core·tableau.it->read_extent;
- uint8_t *w = (uint8_t *)Core·tableau.it->write0;
+ Local Core·Step·Fn Core·Step·read_hex_bulk(){
+ uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0;
+ uint8_t *r1 = r + Core·tableau.area_pairing->read_extent;
+ uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0;
do {
*w = Core·tableau.hex.convert.hex_to_byte(*(uint16_t *)r);
w++;
} while(1);
- Core·tableau.it->read0 = NULL;
- Core·tableau.it->write0 = NULL;
- Core·tableau.it->read_extent = 0;
- Core·tableau.it->write_extent = 0;
+ Core·tableau.area_pairing->read0 = NULL;
+ Core·tableau.area_pairing->write0 = NULL;
+ Core·tableau.area_pairing->read_extent = 0;
+ Core·tableau.area_pairing->write_extent = 0;
Core·tableau.status = Core·Step·perfect_fit;
return NULL;
}
- //----------------------------------------
// Hex Decoding: Read Surplus
- Local Core·Step·Fn Core·read_hex_read_surplus(){
- uint8_t *r = (uint8_t *)Core·tableau.it->read0;
- uint8_t *w = (uint8_t *)Core·tableau.it->write0;
- size_t limit = Core·tableau.it->write_extent;
+ Local Core·Step·Fn Core·Step·read_hex_read_surplus(){
+ uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0;
+ uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0;
+ size_t limit = Core·tableau.area_pairing->write_extent;
uint8_t *r1 = r + (limit << 1);
do {
w++;
} while(1);
- Core·tableau.it->read0 = r + 2;
- Core·tableau.it->read_extent -= limit << 1;
- Core·tableau.it->write0 = NULL;
- Core·tableau.it->write_extent = 0;
+ Core·tableau.area_pairing->read0 = r + 2;
+ Core·tableau.area_pairing->read_extent -= limit << 1;
+ Core·tableau.area_pairing->write0 = NULL;
+ Core·tableau.area_pairing->write_extent = 0;
Core·tableau.status = Core·Step·read_surplus;
return NULL;
}
- //----------------------------------------
// Hex Decoding: Write Available
- Local Core·Step·Fn Core·read_hex_write_available(){
- uint8_t *r = (uint8_t *)Core·tableau.it->read0;
- uint8_t *w = (uint8_t *)Core·tableau.it->write0;
- size_t limit = Core·tableau.it->read_extent >> 1;
+ Local Core·Step·Fn Core·Step·read_hex_write_available(){
+ uint8_t *r = (uint8_t *)Core·tableau.area_pairing->read0;
+ uint8_t *w = (uint8_t *)Core·tableau.area_pairing->write0;
+ size_t limit = Core·tableau.area_pairing->read_extent >> 1;
uint8_t *r1 = r + (limit << 1);
do {
w++;
} while(1);
- Core·tableau.it->read0 = NULL;
- Core·tableau.it->read_extent = 0;
- Core·tableau.it->write0 = w + 1;
- Core·tableau.it->write_extent -= limit;
+ Core·tableau.area_pairing->read0 = NULL;
+ Core·tableau.area_pairing->read_extent = 0;
+ Core·tableau.area_pairing->write0 = w + 1;
+ Core·tableau.area_pairing->write_extent -= limit;
Core·tableau.status = Core·Step·write_available;
return NULL;
}
-
#endif // LOCAL
#endif // IMPLEMENTATION
+++ /dev/null
-/*
- Core - core memory operations.
-
- 'ATP' 'At This Point' in the code. Used in comments to state assertions.
- by definition an 'extent' is one less than a 'size'.
-*/
-
-#define Core·DEBUG
-
-#ifndef FACE
-#define Core·IMPLEMENTATION
-#define FACE
-#endif
-
-//--------------------------------------------------------------------------------
-// Interface
-
-#ifndef Core·FACE
-#define Core·FACE
-
- #include <stdint.h>
- #include <stddef.h>
-
- #define extentof(x) (sizeof(x) - 1)
- #define extent_t size_t
-
- typedef struct{
- void *read0;
- extent_t read_extent;
- void *write0;
- extent_t write_extent;
- } Core·It;
-
- typedef enum{
- Core·It·Status·valid = 0
- ,Core·It·Status·null
- ,Core·It·Status·null_read
- ,Core·It·Status·null_write
- ,Core·It·Status·overlap
- } Core·It·Status;
-
- typedef enum{
- Core·Step·Status·perfect_fit = 0
- ,Core·Step·Status·argument_guard // something wrong with the arguments to step
- ,Core·Step·Status·read_surplus
- ,Core·Step·Status·read_surplus_write_gap
- ,Core·Step·Status·write_available
- ,Core·Step·Status·write_gap
- } Core·Step·Status;
-
- typedef struct{
- bool Core·IntervalPts·in(void *pt ,void *pt0 ,void *pt1);
- bool Core·IntervalPts·contains(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11);
- bool Core·IntervalPts·overlap(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11);
-
- bool Core·IntervalPtSize·in(void *pt ,void *pt0 ,size_t s);
- bool Core·IntervalPtSize·overlap(void *pt00 ,size_t s0 ,void *pt10 ,size_t s1);
-
- Core·It·Status Core·wellformed_it(Core·It *it)
-
- void *identity(void *read0 ,void *read1 ,void *write0);
- void *reverse_byte_order(void *read0 ,void *read1 ,void *write0);
-
- Core·Status Core·Step·identity(Core·It *it);
- Core·Status Core·Step·reverse_order(Core·It *it);
- Core·Status Core·Step·write_hex(Core·It *it);
- Core·Status Core·Step·read_hex(Core·It *it);
- } Core·M;
-
-#endif
-
-//--------------------------------------------------------------------------------
-// Implementation
-
-#ifdef Core·IMPLEMENTATION
-
- #ifdef Core·DEBUG
- #include <stdio.h>
- #endif
-
- // this part goes into Copylib.a
- // yes this is empty, so there is no Copylib.a
- #ifndef LOCAL
- #endif
-
- #ifdef LOCAL
-
- // Interval predicates.
- // Intervals in Copy have inclusive bounds
-
- Local bool Core·aligned64(void *p){
- return ((uintptr_t)p & 0x7) == 0;
- }
-
- Local bool Core·IntervalPts·in(void *pt ,void *pt0 ,void *pt1){
- return pt >= pt0 && pt <= pt1; // Inclusive bounds
- }
-
- Local bool Core·IntervalPtExtent·in(void *pt ,void *pt0 ,extent_t e){
- return Core·IntervalPts·in(pt ,pt0 ,pt0 + e);
- }
-
- // interval 0 contains interval 1, overlap on boundaries allowed.
- Local bool Core·IntervalPts·contains(
- void *pt00 ,void *pt01 ,void *pt10 ,void *pt11
- ){
- return pt10 >= pt00 && pt11 <= pt01;
- }
-
- Local bool Core·IntervalPtExtent·contains(
- void *pt00 ,size_t e0 ,void *pt10 ,size_t e1
- ){
- contains(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1)
- }
-
- // interval 0 properly contains interval 1, overlap on boundaries not allowed.
- Local bool Core·IntervalPts·contains_proper(
- void *pt00 ,void *pt01 ,void *pt10 ,void *pt11
- ){
- return pt10 > pt00 && pt11 < pt01;
- }
- Local bool Core·IntervalPtExtent·contains_proper(
- void *pt00 ,size_t e0 ,void *pt10 ,size_t e1
- ){
- contains_proper(pt00 ,pt00 + e0 ,pt10 ,pt10 + 1)
- }
-
-
- // Possible cases of overlap, including just touching
- // 1. interval 0 to the right of interval 1, just touching p00 == p11
- // 2. interval 0 to the left of interval 1, just touching p01 == p10
- // 3. interval 0 wholly contained in interval 1
- // 4. interval 0 wholly contains interval 1
- Local bool Core·IntervalPts·overlap(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11){
- return
- Core·IntervalPts·in(pt00 ,pt10 ,pt11) // #1, #3
- || Core·IntervalPts·in(pt10 ,pt00 ,pt01) // #2, #4
- ;
- }
-
- Local bool Core·IntervalPtExtent·overlap(
- void *pt00 ,extent_t e0 ,void *pt10 ,extent_t e1
- ){
- return Core·IntervalPts·overlap(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1);
- }
-
- Local Copy·It·Status Copy·wellformed_it(Copy·it *it){
-
- bool print = false;
- #ifdef Core·DEBUG
- print = true;
- #endif
-
- char *this_name = "Copy·wellformed_it";
- Copy·WFIt·Status status = Copy·WFIt·Status·valid;
-
- if(it == NULL){
- if(print) fprintf( stderr ,"%s: NULL read pointer\n" ,this_name );
- return Core·It·Status·null;
- }
-
- if(it->read0 == NULL){
- if(print) fprintf( stderr ,"%s: NULL read pointer\n" ,this_name );
- status |= Copy·WFIt·Status·null_read;
- }
-
- if(it->write0 == NULL){
- if(print) fprintf( stderr ,"%s: NULL write pointer\n" ,this_name );
- status |= Copy·WFIt·Status·null_write;
- }
-
- if(it->read_size == 0){
- if(print) fprintf( stderr ,"%s: Zero-sized read buffer\n" ,this_name );
- status |= Copy·WFIt·Status·zero_read_buffer;
- }
-
- if(it->write_size == 0){
- if(print) fprintf( stderr ,"%s: Zero-sized write buffer\n" ,this_name );
- status |= Copy·WFIt·Status·zero_write_buffer;
- }
-
- if( Copy·overlap_size_interval(it->read0 ,it->read_size ,it->write0 ,it->write_size) ){
- if(print) fprintf( stderr ,"%s: Read and write buffers overlap!\n" ,this_name );
- status |= Copy·WFIt·Status·overlap;
- }
-
- return status;
- }
-
- // consider an 8 byte window that is aligned
- // returns the byte pointer to the least address byte in the window
- Local void *Core·floor64(void *p){
- return (uintptr_t)p & ~(uintptr_t)0x7;
- }
-
- // consider an 8 byte window that is aligned
- // returns the byte pointer to the greatest address byte in the window
- Local void *Core·ceiling64(void *p){
- return (uintptr_t)p | 0x7;
- }
-
- // byte array greatest address byte at p1 (inclusive)
- // byte array least address byte at p0 (inclusive)
- // returns pointer to the greatest full 64-bit word-aligned address that is ≤ p1
- // by contract, p1 must be >= p0
- Local uint64_t *Core·greatest_full_64(void *p0 ,void *p1){
-
- // If p1 - 0x7 moves into a prior word while p0 does not, a prefetch hazard can occur.
- // If p1 and p0 are more than 0x7 apart, they cannot be in the same word,
- // but this does not guarantee a full 64-bit word exists in the range.
- if((uintptr_t)p1 < (uintptr_t)p0 + 0x7) return NULL;
-
- // Compute the last fully aligned word at or before p1.
- uint64_t *p1_64 = (void *)( ((uintptr_t)p1 - 0x7) & ~(uintptr_t)0x7 );
-
- // If alignment rounds p1_64 below p0, there is no full word available.
- if(p1_64 < p0) return NULL;
-
- return p1_64;
- }
-
- // byte array greatest address byte at p1 (inclusive)
- // byte array least address byte at p0 (inclusive)
- // returns pointer to the least full 64-bit word-aligned address that is ≥ p0
- Local uint64_t *Core·least_full_64(void *p0 ,void *p1){
-
- // If p0 + 0x7 moves into the next word while p1 does not, a prefetch hazard can occur.
- // If p1 and p0 are more than 0x7 apart, they cannot be in the same word,
- // but this does not guarantee a full 64-bit word exists in the range.
- if(p1 - p0 < 0x7) return NULL;
-
- // Compute the first fully aligned word at or after p0.
- uint64_t *p0_64 = (void *)( ((uintptr_t)p0 + 0x7) & ~(uintptr_t)0x7 );
-
- // If alignment rounds p0_64 beyond p1, there is no full word available.
- if(p0_64 > p1) return NULL;
-
- return p0_64;
- }
-
- Local void *Core·inc64(void *p ,size_t Δ){
- return (void *)((uint64_t *)p) + Δ;
- }
-
- Local uint64_t Core·read_word_fwd(uint64_t *r){
- return *r;
- }
-
- Local uint64_t Core·read_word_rev(uint64_t *r0 ,uint64_t *r1 ,uint64_t *r){
- return __builtin_bswap64(*(Core·floor64(r0 + (r1 - r))));
- }
-
-
-
-typedef void *(*step_fn_t)(Core·It *it ,void *tableau);
-
-// Function prototypes, for forward referencing
-step_fn_t Core·CopyWord64·init ,Core·CopyWord64·leadin ,Core·CopyWord64·bulk ,Core·CopyWord64·tail;
-
-// copy_word64 tableau structure
-typedef struct{
- Core·Step·Status status
-} Core·Step·tableau_t;
-
-typedef struct{
- Core·Step·Step·Status status
- ,uint64_t *r0_64
- ,uint64_t *r1_64
-} Core·CopyWord64·tableau_t;
-
-
-// Initialize the copy_word64
-copy_fn_t Core·Step·CopyWord64·init(Core·Step·It it ,Core·Step·tableau_t *t0){
- copy_step_tableau_t *t = (copy_step_tableau_t *) t0;
- // if iterator not well formed set status and return NULL
- // initialize the tableau struct from the iterator ..
- // ATP we know at least one byte must be copied
- // if r0_64 or r1_64 are NULL, copy the bytes, set status, and return NULL
- return Core·Step·CopyWord64·leadin;
-}
-
-// Lead-in byte copy (until alignment)
-void *Core·Step·CopyWord64·leadin(Core·Step·It it ,Core·Step·tableau_t *t0){
- copy_step_tableau_t *t = (copy_step_tableau_t *)t0;
- while(r < (uint8_t *)tableau->r0_64){
- *w++ = *r++;
- }
- return Core·Step·CopyWord64·bulk;
-}
-
-// Bulk word copy
-void *Core·Step·CopyWord64·bulk(Core·Step·It it ,Core·Step·tableau_t *t0){
- copy_step_tableau_t *t = (copy_step_tableau_t *)t0;
- uint64_t *r64 = (uint64_t *)r;
- uint64_t *r1_64 = tableau->r1_64;
- uint64_t *w64 = (uint64_t *)w;
-
- while(r64 <= r1_64){
- *w64++ = *r64++;
- }
- // check if read1 is aligned if so, set status and return NULL otherwise
- return Core·Step·CopyWord64·tail;
-}
-
-// Tail byte copy
-void *Core·Step·CopyWord64·tail(Core·Step·It it ,Core·Step·tableau_t *t){
- while(r <= r1){
- *w++ = *r++;
- }
- // set status on the tableau
- return NULL;
-}
-
-// Step function
-Core·Step·Status step(Core·Step·It it ,step_fn_t fn ,Core·Step·tableau_t *t){
- while(fn(it ,t));
- return t->status;
-}
-
-
-
- #endif // LOCAL
-
-#endif // IMPLEMENTATION
--- /dev/null
+/*
+ CoreCopy - Memory copy operations with attention to alignment.
+ Provides optimized copy and byte order reversal functions.
+
+ 'ATP' At This Point in the code. Assertions follow.
+*/
+
+#define CoreCopy·DEBUG
+
+#ifndef FACE
+#define CoreCopy·IMPLEMENTATION
+#define FACE
+#endif
+
+//--------------------------------------------------------------------------------
+// Interface
+
+#ifndef CoreCopy·FACE
+#define CoreCopy·FACE
+
+ #include <stdint.h>
+ #include <stddef.h>
+
+ #define extentof(x) (sizeof(x) - 1)
+ #define extent_t size_t
+
+ typedef struct{
+ void *read0;
+ extent_t read_extent;
+ void *write0;
+ extent_t write_extent;
+ } CoreCopy·It;
+
+ typedef enum{
+ CoreCopy·It·Status·valid = 0
+ ,CoreCopy·It·Status·null_read
+ ,CoreCopy·It·Status·null_write
+ ,CoreCopy·It·Status·overlap
+ } CoreCopy·It·Status;
+
+ typedef enum{
+ CoreCopy·Step·perfect_fit = 0
+ ,CoreCopy·Step·argument_guard
+ ,CoreCopy·Step·read_surplus
+ ,CoreCopy·Step·read_surplus_write_gap
+ ,CoreCopy·Step·write_available
+ ,CoreCopy·Step·write_gap
+ } CoreCopy·Status;
+
+ typedef struct{
+ bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1);
+ bool CoreCopy·IntervalPts·contains(void *pt00 ,void *pt01 ,void *pt10 ,void *pt11);
+ bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11);
+
+ bool CoreCopy·IntervalPtSize·in(void *pt, void *pt0 ,size_t s);
+ bool CoreCopy·IntervalPtSize·overlap(void *pt00 ,size_t s0, void *pt10 ,size_t s1);
+
+ CoreCopy·It·Status CoreCopy·wellformed_it(CoreCopy·It *it)
+
+ void *identity(void *read0 ,void *read1 ,void *write0);
+ void *reverse_byte_order(void *read0 ,void *read1 ,void *write0);
+
+ CoreCopy·Status CoreCopy·Step·identity(CoreCopy·It *it);
+ CoreCopy·Status CoreCopy·Step·reverse_order(CoreCopy·It *it);
+ CoreCopy·Status CoreCopy·Step·write_hex(CoreCopy·It *it);
+ CoreCopy·Status CoreCopy·Step·read_hex(CoreCopy·It *it);
+ } CoreCopy·M;
+
+#endif
+
+//--------------------------------------------------------------------------------
+// Implementation
+
+#ifdef CoreCopy·IMPLEMENTATION
+
+ #ifdef CoreCopy·DEBUG
+ #include <stdio.h>
+ #endif
+
+ // this part goes into Copylib.a
+ // yes this is empty, so there is no Copylib.a
+ #ifndef LOCAL
+ #endif
+
+ #ifdef LOCAL
+
+ // Interval predicates.
+ // Intervals in Copy have inclusive bounds
+
+ Local bool CoreCopy·aligned64(void *p){
+ return ((uintptr_t)p & 0x7) == 0;
+ }
+
+ Local bool CoreCopy·IntervalPts·in(void *pt, void *pt0 ,void *pt1){
+ return pt >= pt0 && pt <= pt1; // Inclusive bounds
+ }
+
+ Local bool CoreCopy·in_extent_interval(void *pt, void *pt0 ,extent_t e){
+ return CoreCopy·IntervalPts·in(pt ,pt0 ,pt0 + e);
+ }
+
+ // interval 0 contains interval 1, overlap on boundaries allowed.
+ Local bool CoreCopy·IntervalPts·contains(
+ void *pt00 ,void *pt01 ,void *pt10 ,void *pt11
+ ){
+ return pt10 >= pt00 && pt11 <= pt01;
+ }
+
+ // interval 0 properly contains interval 1, overlap on boundaries not allowed.
+ Local bool CoreCopy·contains_proper_pt_interval(
+ void *pt00 ,void *pt01 ,void *pt10 ,void *pt11
+ ){
+ return pt10 > pt00 && pt11 < pt01;
+ }
+
+ // Possible cases of overlap, including just touching
+ // 1. interval 0 to the right of interval 1, just touching p00 == p11
+ // 2. interval 0 to the left of interval 1, just touching p01 == p10
+ // 3. interval 0 wholly contained in interval 1
+ // 4. interval 0 wholly contains interval 1
+ Local bool CoreCopy·IntervalPts·overlap(void *pt00 ,void *pt01, void *pt10 ,void *pt11){
+ return
+ CoreCopy·IntervalPts·in(pt00 ,pt10 ,pt11) // #1, #3
+ || CoreCopy·IntervalPts·in(pt10 ,pt00 ,pt01) // #2, #4
+ ;
+ }
+
+ Local bool CoreCopy·overlap_extent_interval(void *pt00 ,extent_t e0, void *pt10 ,extent_t e1){
+ return CoreCopy·IntervalPts·overlap(pt00 ,pt00 + e0 ,pt10 ,pt10 + e1);
+ }
+
+ Local CoreCopy·It·Status CoreCopy·It·wellformed(CoreCopy·It *it){
+ char *this_name = "CoreCopy·It·wellformed";
+ CoreCopy·It·Status status = CoreCopy·It·Status·valid;
+
+ if(it->read0 == NULL){
+ fprintf(stderr, "%s: NULL read pointer\n", this_name);
+ status |= CoreCopy·It·Status·null_read;
+ }
+
+ if(it->write0 == NULL){
+ fprintf(stderr, "%s: NULL write pointer\n", this_name);
+ status |= CoreCopy·It·Status·null_write;
+ }
+
+ if(
+ CoreCopy·overlap_extent_interval(it->read0 ,it->read_extent ,it->write0 ,it->write_extent)
+ ){
+ fprintf(stderr, "%s: Read and write buffers overlap!\n", this_name);
+ status |= CoreCopy·It·Status·overlap;
+ }
+
+ return status;
+ }
+
+ // consider an 8 byte window that is aligned
+ // returns the byte pointer to the least address byte in the window
+ Local void *CoreCopy·floor64(void *p){
+ return (uintptr_t)p & ~(uintptr_t)0x7;
+ }
+
+ // consider an 8 byte window that is aligned
+ // returns the byte pointer to the greatest address byte in the window
+ Local void *CoreCopy·ceiling64(void *p){
+ return (uintptr_t)p | 0x7;
+ }
+
+ // byte array greatest address byte at p1 (inclusive)
+ // byte array least address byte at p0 (inclusive)
+ // returns pointer to the greatest full 64-bit word-aligned address that is ≤ p1
+ // by contract, p1 must be >= p0
+ Local uint64_t *CoreCopy·greatest_full_64(void *p0 ,void *p1){
+
+ // If p1 - 0x7 moves into a prior word while p0 does not, a prefetch hazard can occur.
+ // If p1 and p0 are more than 0x7 apart, they cannot be in the same word,
+ // but this does not guarantee a full 64-bit word exists in the range.
+ if((uintptr_t)p1 < (uintptr_t)p0 + 0x7) return NULL;
+
+ // Compute the last fully aligned word at or before p1.
+ uint64_t *p1_64 = (void *)( ((uintptr_t)p1 - 0x7) & ~(uintptr_t)0x7 );
+
+ // If alignment rounds p1_64 below p0, there is no full word available.
+ if(p1_64 < p0) return NULL;
+
+ return p1_64;
+ }
+
+ // byte array greatest address byte at p1 (inclusive)
+ // byte array least address byte at p0 (inclusive)
+ // returns pointer to the least full 64-bit word-aligned address that is ≥ p0
+ Local uint64_t *CoreCopy·least_full_64(void *p0 ,void *p1){
+
+ // If p0 + 0x7 moves into the next word while p1 does not, a prefetch hazard can occur.
+ // If p1 and p0 are more than 0x7 apart, they cannot be in the same word,
+ // but this does not guarantee a full 64-bit word exists in the range.
+ if(p1 - p0 < 0x7) return NULL;
+
+ // Compute the first fully aligned word at or after p0.
+ uint64_t *p0_64 = (void *)( ((uintptr_t)p0 + 0x7) & ~(uintptr_t)0x7 );
+
+ // If alignment rounds p0_64 beyond p1, there is no full word available.
+ if(p0_64 > p1) return NULL;
+
+ return p0_64;
+ }
+
+ Local void *CoreCopy·inc64(void *p ,size_t Δ){
+ return (void *)((uint64_t *)p) + Δ;
+ }
+
+ Local uint64_t CoreCopy·read_word_fwd(uint64_t *r){
+ return *r;
+ }
+
+ Local uint64_t CoreCopy·read_word_rev(uint64_t *r0, uint64_t *r1, uint64_t *r){
+ return __builtin_bswap64(*(CoreCopy·floor64(r0 + (r1 - r))));
+ }
+
+ Local void *CoreCopy·byte(
+ uint8_t *r0 ,uint8_t *r1 ,uint8_t *w0 ,bool reverse
+ ){
+ //----------------------------------------
+ // Argument guard
+ //
+
+ if(r1<r0) return NULL;
+
+ //----------------------------------------
+ // Setup pointers
+ //
+
+ uint8_t *r = r0;
+ uint8_t *w = w0;
+
+ // Function pointer for dynamic read behavior
+ uint8_t (*read_byte)(
+ uint8_t * ,uint8_t * ,uint8_t *
+ ) = reverse ? CoreCopy·read_byte_rev : CoreCopy·read_byte_fwd;
+
+ //----------------------------------------
+ // Byte-wise copy
+ //
+
+ do{
+ *w = read_byte(r0 ,r1 ,r);
+ if(r==r1) break;
+ w++;
+ r++;
+ }while(true);
+
+ return w;
+ }
+
+ Local void *CoreCopy·word64(void *read0 ,void *read1 ,void *write0 ,bool reverse){
+
+ //----------------------------------------
+ // Argument guard
+
+ if(read1 < read0) return NULL;
+
+ //----------------------------------------
+ // Setup pointers
+
+ // the read interval, for byte arrays
+ uint8_t *r0 = (uint8_t *)read0;
+ uint8_t *r1 = (uint8_t *)read1; // inclusive upper bound
+ uint8_t *w0 = (uint8_t *)write0;
+
+ // the contained word interval, inclusive bounds
+ uint64_t *r0_64 = CoreCopy·least_full_64(r0 ,r1);
+ uint64_t *r1_64 = CoreCopy·greatest_full_64(r0 ,r1);
+
+ // swap byte order done by overloading the read function
+ uint8_t (*read_byte)(uint8_t * ,uint8_t * ,uint8_t *)
+ = reverse ? CoreCopy·read_byte_rev : CoreCopy·read_byte_fwd;
+
+ uint64_t (*read_word)(uint64_t * ,uint64_t * ,uint64_t *)
+ = reverse ? CoreCopy·read_word_rev : CoreCopy·read_word_fwd;
+
+ // If no full words ,perform byte-wise copy
+ if(r0_64 == NULL || r1_64 == NULL) return CoreCopy·byte(r0 ,r1 ,w0 ,reverse);
+
+ //----------------------------------------
+ // Align `r` to first full 64-bit word boundary
+
+ uint8_t *w=w0;
+ if( !CoreCopy·aligned64(r0) ){
+ w = CoreCopy·byte(r0 ,r0_64 - 1 ,w ,reverse);
+ }
+ uint8_t *r = r0_64;
+
+ //----------------------------------------
+ // Bulk word-wise copy
+
+ do{
+ *(uint64_t *)w = read_word(r0_64 ,r1_64 ,(uint64_t *)r);
+ if(r == (uint8_t *)r1_64) break;
+ w = CoreCopy·inc64(w ,1);
+ r = CoreCopy·inc64(r ,1);
+ }while(true);
+
+ // If r1 was aligned ,we're done
+ if(CoreCopy·aligned64(r1)) return w;
+ w = CoreCopy·inc64(w ,1);
+ r = CoreCopy·inc64(r ,1);
+
+ //----------------------------------------
+ // Ragged tail (byte-wise copy)
+
+ return CoreCopy·byte(r ,r1 ,w ,reverse);
+
+ }
+
+
+ /*
+ Read and write pointers are incremented by `extent + 1`, ensuring they do not skip
+ past the last valid byte. The previous `+1` was incorrect in cases where
+ stepping already processed the last byte.
+ */
+ Local CoreCopy·Status CoreCopy·Step·identity(CoreCopy·It *it){
+ uint8_t *r = (uint8_t *)it->read0;
+ uint8_t *w = (uint8_t *)it->write0;
+
+ extent_t re = it->read_extent;
+ extent_t we = it->write_extent;
+
+ if(we >= re){
+ CoreCopy·bytes(r ,r + re ,w);
+ it->read0 += re; // Fixed stepping logic
+ it->read_extent = 0;
+ it->write0 += re;
+ it->write_extent -= re;
+ if(we == re) return CoreCopy·Step·perfect_fit;
+ return CoreCopy·Step·write_available;
+ }
+
+ CoreCopy·bytes(r ,r + we ,w);
+ it->read0 += we; // Fixed stepping logic
+ it->read_extent -= we;
+ it->write_extent = 0;
+ it->write0 += we;
+ return CoreCopy·Step·read_surplus;
+ }
+
+ #endif // LOCAL
+
+#endif // IMPLEMENTATION