From 8b4f5c3702f0b400b5fd2922236042482b3f298e Mon Sep 17 00:00:00 2001 From: Tom Maiaroto Date: Sat, 16 Aug 2025 09:09:44 -0700 Subject: [PATCH 1/6] adjustments made to getting unique row id --- ai_plans/FIX_DUPLICATE_ROWS.md | 37 ++++++++++++++++++++++++++++++++ column.go | 17 ++++++++------- load.go | 15 +++++++++---- load_test.go | 12 +++++------ mapper_test.go | 39 ++++++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+), 18 deletions(-) create mode 100644 ai_plans/FIX_DUPLICATE_ROWS.md diff --git a/ai_plans/FIX_DUPLICATE_ROWS.md b/ai_plans/FIX_DUPLICATE_ROWS.md new file mode 100644 index 0000000..2ed92d1 --- /dev/null +++ b/ai_plans/FIX_DUPLICATE_ROWS.md @@ -0,0 +1,37 @@ +# Problem: Incorrect De-duplication When Mapping to Basic Slices + +## Summary +The `carta` library is designed to de-duplicate entities when mapping SQL rows to slices of structs (e.g., `[]User`). This is achieved by generating a unique ID for each entity based on the content of its primary key columns. This behavior is correct for handling `JOIN`s where a single entity might appear across multiple rows. + +However, this same logic is incorrectly applied when the mapping destination is a slice of a basic type (e.g., `[]string`, `[]int`). In this scenario, rows with duplicate values are treated as the same entity and are de-duplicated, which is incorrect. The desired behavior is to preserve every row from the result set, including duplicates. + +This issue is the root cause for the following problems: +1. The `if m.IsBasic` code path in `load.go` lacks test coverage because no tests exist for mapping to basic slices. +2. Attempts to write such tests lead to infinite loops and incorrect behavior because the column allocation and unique ID generation logic are not designed to handle this case. + +## Proposed Solution +The solution is to create a distinct execution path for "basic mappers" (`m.IsBasic == true`) that ensures every row is treated as a unique element. + +This will be accomplished in two main steps: + +### 1. Fix Column Allocation (`allocateColumns`) +The logic will be modified to enforce a clear rule for basic slices: the source SQL query must return **exactly one column**. + +- If `m.IsBasic` is true, the function will bypass the existing name-matching logic. +- It will validate that only one column is present in the query result. +- This single column will be assigned as the `PresentColumn` for the mapper. +- If more than one column is found, the function will return an error to prevent ambiguity. + +### 2. Fix Unique ID Generation (`loadRow`) +The logic will be modified to generate a unique ID based on the row's position rather than its content. + +- If `m.IsBasic` is true, the call to `getUniqueId(row, m)` will be bypassed. +- A new, position-based unique ID will be generated for each row (e.g., using a simple counter that increments with each row processed). +- This ensures that every row, regardless of its content, is treated as a distinct element to be added to the destination slice. + +This approach preserves the existing, correct behavior for struct mapping while introducing a new, robust path for handling basic slices correctly. + +## Plan +1. **Modify `column.go`**: Update the `allocateColumns` function to implement the single-column rule for basic mappers. +2. **Modify `load.go`**: Update the `loadRow` function to use a position-based counter for unique ID generation when `m.IsBasic` is true. +3. **Add Tests**: Create a new test case in `mapper_test.go` that maps a query result to a slice of a basic type (e.g., `[]string`) to validate the fix and provide coverage for the `m.IsBasic` code path. \ No newline at end of file diff --git a/column.go b/column.go index e265bc6..010a3d3 100644 --- a/column.go +++ b/column.go @@ -2,6 +2,7 @@ package carta import ( "database/sql" + "errors" "sort" "strings" ) @@ -17,16 +18,16 @@ type column struct { func allocateColumns(m *Mapper, columns map[string]column) error { presentColumns := map[string]column{} if m.IsBasic { - candidates := getColumnNameCandidates("", m.AncestorNames, m.Delimiter) + if len(columns) != 1 { + return errors.New("carta: when mapping to a slice of a basic type, the query must return exactly one column") + } for cName, c := range columns { - if _, ok := candidates[cName]; ok { - presentColumns[cName] = column{ - typ: c.typ, - name: cName, - columnIndex: c.columnIndex, - } - delete(columns, cName) // dealocate claimed column + presentColumns[cName] = column{ + typ: c.typ, + name: cName, + columnIndex: c.columnIndex, } + delete(columns, cName) } } else { for i, field := range m.Fields { diff --git a/load.go b/load.go index 4921606..c067c51 100644 --- a/load.go +++ b/load.go @@ -18,6 +18,7 @@ func (m *Mapper) loadRows(rows *sql.Rows, colTyps []*sql.ColumnType) (*resolver, colTypNames[i] = colTyps[i].DatabaseTypeName() } rsv := newResolver() + rowCount := 0 for rows.Next() { for i := 0; i < len(colTyps); i++ { row[i] = value.NewCell(colTypNames[i]) @@ -25,9 +26,10 @@ func (m *Mapper) loadRows(rows *sql.Rows, colTyps []*sql.ColumnType) (*resolver, if err = rows.Scan(row...); err != nil { return nil, err } - if err = loadRow(m, row, rsv); err != nil { + if err = loadRow(m, row, rsv, rowCount); err != nil { return nil, err } + rowCount++ } if err := rows.Err(); err != nil { return nil, err @@ -56,16 +58,21 @@ func (m *Mapper) loadRows(rows *sql.Rows, colTyps []*sql.ColumnType) (*resolver, // for example, if a blog has many Authors // // rows are actually []*Cell, theu are passed here as interface since sql scan requires []interface{} -func loadRow(m *Mapper, row []interface{}, rsv *resolver) error { +func loadRow(m *Mapper, row []interface{}, rsv *resolver, rowCount int) error { var ( err error dstField reflect.Value // destination field to be set with cell *value.Cell elem *element found bool + uid uniqueValId ) - uid := getUniqueId(row, m) + if m.IsBasic { + uid = uniqueValId(fmt.Sprintf("row-%d", rowCount)) + } else { + uid = getUniqueId(row, m) + } if elem, found = rsv.elements[uid]; !found { // unique row mapping found, new object @@ -216,7 +223,7 @@ func loadRow(m *Mapper, row []interface{}, rsv *resolver) error { if subMap.isNil(row) { continue } - if err = loadRow(subMap, row, elem.subMaps[i]); err != nil { + if err = loadRow(subMap, row, elem.subMaps[i], rowCount); err != nil { return err } } diff --git a/load_test.go b/load_test.go index a1bdcb8..91bc891 100644 --- a/load_test.go +++ b/load_test.go @@ -49,7 +49,7 @@ func TestLoadRow(t *testing.T) { } rsv := newResolver() - err = loadRow(m, row, rsv) + err = loadRow(m, row, rsv, 0) if err != nil { t.Fatalf("error loading row: %s", err) } @@ -98,7 +98,7 @@ func TestLoadRowNullValue(t *testing.T) { } rsv := newResolver() - err = loadRow(m, row, rsv) + err = loadRow(m, row, rsv, 0) if err != nil { t.Fatalf("error loading row: %s", err) } @@ -167,7 +167,7 @@ func TestLoadRowDataTypes(t *testing.T) { } rsv := newResolver() - err = loadRow(m, row, rsv) + err = loadRow(m, row, rsv, 0) if err != nil { t.Fatalf("error loading row: %s", err) } @@ -225,7 +225,7 @@ func TestLoadRowConversionError(t *testing.T) { } rsv := newResolver() - err = loadRow(m, row, rsv) + err = loadRow(m, row, rsv, 0) if err == nil { t.Fatalf("expected a conversion error, but got nil") } @@ -263,7 +263,7 @@ func TestLoadRowNullToNonNull(t *testing.T) { } rsv := newResolver() - err = loadRow(m, row, rsv) + err = loadRow(m, row, rsv, 0) if err == nil { t.Fatalf("expected an error when loading a null value to a non-nullable field, but got nil") } @@ -297,7 +297,7 @@ func TestLoadRowNullTypes(t *testing.T) { } rsv := newResolver() - err = loadRow(m, row, rsv) + err = loadRow(m, row, rsv, 0) if err != nil { t.Fatalf("error loading row: %s", err) } diff --git a/mapper_test.go b/mapper_test.go index c85c976..aa639ee 100644 --- a/mapper_test.go +++ b/mapper_test.go @@ -508,3 +508,42 @@ func TestMapDeeplyNestedNoLabels(t *testing.T) { t.Errorf("there were unfulfilled expectations: %s", err) } } + +func TestMapToBasicSlice(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("an error '%s' was not expected when opening a stub database connection", err) + } + defer db.Close() + + rows := sqlmock.NewRows([]string{"tag"}). + AddRow("tag1"). + AddRow("tag2"). + AddRow("tag1") // Duplicate tag + + mock.ExpectQuery("SELECT (.+) FROM tags").WillReturnRows(rows) + + sqlRows, err := db.Query("SELECT * FROM tags") + if err != nil { + t.Fatalf("error '%s' was not expected when querying rows", err) + } + + var tags []string + err = Map(sqlRows, &tags) + if err != nil { + t.Errorf("error was not expected while mapping rows: %s", err) + } + + if len(tags) != 3 { + t.Fatalf("expected 3 tags, got %d", len(tags)) + } + + expectedTags := []string{"tag1", "tag2", "tag1"} + if !reflect.DeepEqual(tags, expectedTags) { + t.Errorf("expected tags to be %+v, but got %+v", expectedTags, tags) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled expectations: %s", err) + } +} From 0553cf81ca3eb1b25043e68662f1d2584c7111af Mon Sep 17 00:00:00 2001 From: Tom Maiaroto Date: Sat, 16 Aug 2025 10:10:28 -0700 Subject: [PATCH 2/6] optimizations and more test coverage as per coderabbits good suggestions --- column.go | 5 +++-- load.go | 6 +++--- mapper_test.go | 24 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/column.go b/column.go index 010a3d3..5c8b88b 100644 --- a/column.go +++ b/column.go @@ -2,7 +2,7 @@ package carta import ( "database/sql" - "errors" + "fmt" "sort" "strings" ) @@ -19,7 +19,7 @@ func allocateColumns(m *Mapper, columns map[string]column) error { presentColumns := map[string]column{} if m.IsBasic { if len(columns) != 1 { - return errors.New("carta: when mapping to a slice of a basic type, the query must return exactly one column") + return fmt.Errorf("carta: when mapping to a slice of a basic type, the query must return exactly one column (got %d)", len(columns)) } for cName, c := range columns { presentColumns[cName] = column{ @@ -28,6 +28,7 @@ func allocateColumns(m *Mapper, columns map[string]column) error { columnIndex: c.columnIndex, } delete(columns, cName) + break } } else { for i, field := range m.Fields { diff --git a/load.go b/load.go index c067c51..e33680c 100644 --- a/load.go +++ b/load.go @@ -2,9 +2,9 @@ package carta import ( "database/sql" - "errors" "fmt" "reflect" + "strconv" "github.com/hackafterdark/carta/value" ) @@ -69,7 +69,7 @@ func loadRow(m *Mapper, row []interface{}, rsv *resolver, rowCount int) error { ) if m.IsBasic { - uid = uniqueValId(fmt.Sprintf("row-%d", rowCount)) + uid = uniqueValId("row-" + strconv.Itoa(rowCount)) } else { uid = getUniqueId(row, m) } @@ -110,7 +110,7 @@ func loadRow(m *Mapper, row []interface{}, rsv *resolver, rowCount int) error { if cell.IsNull() { _, nullable := value.NullableTypes[typ] if !(isDstPtr || nullable) { - return errors.New(fmt.Sprintf("carta: cannot load null value to type %s for column %s", typ, col.name)) + return fmt.Errorf("carta: cannot load null value to type %s for column %s", typ, col.name) } // no need to set destination if cell is null } else { diff --git a/mapper_test.go b/mapper_test.go index aa639ee..3247f53 100644 --- a/mapper_test.go +++ b/mapper_test.go @@ -547,3 +547,27 @@ func TestMapToBasicSlice(t *testing.T) { t.Errorf("there were unfulfilled expectations: %s", err) } } + +func TestMapToBasicSlice_MultipleColumnsError(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatal(err) + } + defer db.Close() + + rows := sqlmock.NewRows([]string{"tag", "extra"}). + AddRow("tag1", "x") + + mock.ExpectQuery("SELECT (.+) FROM tags").WillReturnRows(rows) + + sqlRows, err := db.Query("SELECT * FROM tags") + if err != nil { + t.Fatal(err) + } + + var tags []string + err = Map(sqlRows, &tags) + if err == nil { + t.Fatalf("expected error when mapping to []string with multiple columns, got nil") + } +} From c5e661c2dc53440bbed989663f04f447bfac91a8 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Sat, 16 Aug 2025 17:15:04 +0000 Subject: [PATCH 3/6] =?UTF-8?q?=F0=9F=93=9D=20Add=20docstrings=20to=20`fix?= =?UTF-8?q?-duplicate-rows`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docstrings generation was requested by @tmaiaroto. * https://github.com/hackafterdark/carta/pull/1#issuecomment-3193762122 The following files were modified: * `column.go` * `load.go` --- column.go | 15 +++++++++++++++ load.go | 13 ++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/column.go b/column.go index 5c8b88b..64aa52f 100644 --- a/column.go +++ b/column.go @@ -15,6 +15,21 @@ type column struct { i fieldIndex } +// allocateColumns maps result set columns into the given Mapper's fields and its sub-mappers. +// It populates m.PresentColumns and m.SortedColumnIndexes, sets AncestorNames on sub-maps, +// and removes claimed entries from the provided columns map. +// +// For a mapper marked IsBasic, the function requires exactly one remaining column in +// columns and binds that single column to the mapper; otherwise it returns an error. +// For non-basic mappers, it matches basic fields by name using getColumnNameCandidates +// (honoring the mapper/sub-map delimiter and ancestor names) and records each matched +// column (including the field index). After collecting direct-field mappings it sorts +// the resulting column indexes for m.SortedColumnIndexes and then recursively allocates +// columns for each sub-map. +// +// The function mutates the Mapper structures and the input columns map. It returns any +// error returned by recursive allocation or an error when the IsBasic column constraint +// is violated. func allocateColumns(m *Mapper, columns map[string]column) error { presentColumns := map[string]column{} if m.IsBasic { diff --git a/load.go b/load.go index e33680c..74cc663 100644 --- a/load.go +++ b/load.go @@ -57,7 +57,18 @@ func (m *Mapper) loadRows(rows *sql.Rows, colTyps []*sql.ColumnType) (*resolver, // // for example, if a blog has many Authors // -// rows are actually []*Cell, theu are passed here as interface since sql scan requires []interface{} +// loadRow maps a single scanned SQL row into the resolver using the provided Mapper. +// +// It creates or reuses an element in rsv based on a computed unique id: +// - For basic mappers (m.IsBasic) the id is "row-" (ensures per-row identity). +// - For non-basic mappers the id is derived from the row values via getUniqueId. +// +// The function expects row to contain the scanned values as []*value.Cell (passed as []interface{} because sql.Scan requires that shape). +// For each present column it converts the corresponding Cell into the destination field (handling pointers, nullable types, basic primitives, and known struct wrappers such as Time, NullBool, NullString, etc.). +// If a column is NULL, loadRow enforces that the destination is either a pointer or a type listed in value.NullableTypes; otherwise it returns an error. +// After populating the element it initializes per-submap resolvers (if any) and recursively calls loadRow for each non-nil subMap, passing the same rowCount. +// +// Returns an error on conversion failures, attempts to load null into non-nullable destinations, or on any recursive loadRow error. func loadRow(m *Mapper, row []interface{}, rsv *resolver, rowCount int) error { var ( err error From 53061117cbcb1932715afb6d3170f67a96d8d75c Mon Sep 17 00:00:00 2001 From: Tom Maiaroto Date: Sat, 16 Aug 2025 10:17:45 -0700 Subject: [PATCH 4/6] update readme to note the change in de-duplication behavior --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 11e7a34..00719ff 100644 --- a/README.md +++ b/README.md @@ -236,8 +236,9 @@ go get -u github.com/hackafterdark/carta ## Important Notes -Carta removes any duplicate rows. This is a side effect of the data mapping as it is unclear which object to instantiate if the same data arrives more than once. -If this is not a desired outcome, you should include a uniquely identifiable columns in your query and the corresponding fields in your structs. +When mapping to **slices of structs**, Carta removes duplicate entities. This is a side effect of the data mapping process, which merges rows that identify the same entity (e.g., a `Blog` with the same ID appearing in multiple rows due to a `JOIN`). To ensure correct mapping, you should always include uniquely identifiable columns (like a primary key) in your query for each struct entity. + +When mapping to **slices of basic types** (e.g., `[]string`, `[]int`), every row from the query is treated as a unique element, and **no de-duplication occurs**. To prevent relatively expensive reflect operations, carta caches the structure of your struct using the column mames of your query response as well as the type of your struct. From 62bb4de460bb2c690b4c3443d34dfb183e11f413 Mon Sep 17 00:00:00 2001 From: Tom Maiaroto Date: Sat, 16 Aug 2025 10:57:10 -0700 Subject: [PATCH 5/6] account for the issue coderabbit found with exactly one column errors. update the ai plan documentation for historic documentation. adjust the readme to add a new design philosophies section to further help illustrate, document, and guide. --- DESIGN_PHILOSOPHIES.md | 47 +++++++++++++++++ README.md | 28 +++------- ai_plans/FIX_DUPLICATE_ROWS.md | 87 ++++++++++++++++++------------- column.go | 41 +++++++++++++-- mapper.go | 2 +- mapper_test.go | 95 ++++++++++++++++++++++++++++++++++ 6 files changed, 236 insertions(+), 64 deletions(-) create mode 100644 DESIGN_PHILOSOPHIES.md diff --git a/DESIGN_PHILOSOPHIES.md b/DESIGN_PHILOSOPHIES.md new file mode 100644 index 0000000..5f53966 --- /dev/null +++ b/DESIGN_PHILOSOPHIES.md @@ -0,0 +1,47 @@ +# Design Philosophies + +This document includes information around design philosophies and decisions made to help document and illustrate scenarios one may encounter when using this package. + +## Approach +Carta adopts the "database mapping" approach (described in Martin Fowler's [book](https://books.google.com/books?id=FyWZt5DdvFkC&lpg=PA1&dq=Patterns%20of%20Enterprise%20Application%20Architecture%20by%20Martin%20Fowler&pg=PT187#v=onepage&q=active%20record&f=false)) which is useful among organizations with strict code review processes. + +## Comparison to Related Projects + +#### GORM +Carta is NOT an an object-relational mapper(ORM). + +#### sqlx +Sqlx does not track has-many relationships when mapping SQL data. This works fine when all your relationships are at most has-one (Blog has one Author) ie, each SQL row corresponds to one struct. However, handling has-many relationships (Blog has many Posts), requires running many queries or running manual post-processing of the result. Carta handles these complexities automatically. + +## Protection vs. Graceful Handling + +A core design principle of the `carta` mapper is to prioritize **user protection and clarity** over attempting a "graceful" but potentially incorrect guess. The library's guiding philosophy is to only proceed if the user's intent is perfectly clear. If there is any ambiguity in the mapping operation, `carta` will **fail fast** by returning an error, forcing the developer to be more explicit. + +Making a guess might seem helpful, but it can hide serious, silent bugs. The following scenarios illustrate the balance between failing on ambiguous operations (Protection) and handling well-defined transformations (Graceful Handling). + +--- + +### Scenario 1: Multi-column Query to a Basic Slice (Protection) + +- **Query:** `SELECT name, email FROM users` +- **Destination:** `var data []string` +- **Behavior:** `carta.Map` **returns an error immediately**: `carta: when mapping to a slice of a basic type, the query must return exactly one column (got 2)`. +- **Why this is Protection:** The library has no way of knowing if the user intended to map the `name` or the `email` column. A "graceful" solution might be to arbitrarily pick the first column, but this could lead to the wrong data being silently loaded into the slice. By failing fast, `carta` forces the developer to write an unambiguous query (e.g., `SELECT name FROM users`), ensuring the result is guaranteed to be correct. + +--- + +### Scenario 2: SQL `NULL` to a Non-nullable Go Field (Protection) + +- **Query:** `SELECT id, NULL AS name FROM users` +- **Destination:** `var users []User` (where `User.Name` is a `string`) +- **Behavior:** `carta.Map` **returns an error during scanning**: `carta: cannot load null value to type string for column name`. +- **Why this is Protection:** A standard Go `string` cannot represent a `NULL` value. A "graceful" but incorrect solution would be to use the zero value (`""`), which is valid data and semantically different from "no data". This can cause subtle bugs in application logic. By failing, `carta` forces the developer to explicitly handle nullability in their Go struct by using a pointer (`*string`) or a nullable type (`sql.NullString`), making the code more robust and correct. + +--- + +### Scenario 3: Merging `JOIN`ed Rows into Structs (Graceful Handling) + +- **Query:** `SELECT b.id, p.id FROM blogs b JOIN posts p ON b.id = p.blog_id` +- **Destination:** `var blogs []BlogWithPosts` +- **Behavior:** `carta` **gracefully handles** the fact that the same blog ID appears in multiple rows. It creates one `Blog` object and appends each unique `Post` to its `Posts` slice. +- **Why this is Graceful:** This is the core purpose of the library. There is no ambiguity. The library uses the unique ID of the `Blog` (the `b.id` column) to understand that these rows all describe the same parent entity. This is a well-defined transformation, not a guess. diff --git a/README.md b/README.md index 00719ff..a8fb7ff 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # Carta [![codecov](https://codecov.io/github/hackafterdark/carta/graph/badge.svg?token=TYvbPGGlcL)](https://codecov.io/github/hackafterdark/carta) -Dead simple SQL data mapper for complex Go structs. +A simple SQL data mapper for complex Go structs. Load SQL data onto Go structs while keeping track of has-one and has-many relationships. -Load SQL data onto Go structs while keeping track of has-one and has-many relationships +Carta is not an object-relational mapper(ORM). With large and complex datasets, using ORMs becomes restrictive and reduces performance when working with complex queries. [Read more about the design philosophy.](#design-philosophy) ## Examples Using carta is very simple. All you need to do is: @@ -93,15 +93,6 @@ blogs: }] ``` - -## Comparison to Related Projects - -#### GORM -Carta is NOT an an object-relational mapper(ORM). Read more in [Approach](#Approach) - -#### sqlx -Sqlx does not track has-many relationships when mapping SQL data. This works fine when all your relationships are at most has-one (Blog has one Author) ie, each SQL row corresponds to one struct. However, handling has-many relationships (Blog has many Posts), requires running many queries or running manual post-processing of the result. Carta handles these complexities automatically. - ## Guide ### Column and Field Names @@ -233,19 +224,14 @@ Other types, such as TIME, will will be converted from plain text in future vers go get -u github.com/hackafterdark/carta ``` +## Design Philosophy -## Important Notes +The `carta` package follows a "fail-fast" philosophy to ensure that mapping operations are unambiguous and to protect users from silent bugs. For a detailed explanation of the error handling approach and the balance between user protection and graceful handling, please see the [Design Philosophies](./DESIGN_PHILOSOPHIES.md) document. + +## Important Notes When mapping to **slices of structs**, Carta removes duplicate entities. This is a side effect of the data mapping process, which merges rows that identify the same entity (e.g., a `Blog` with the same ID appearing in multiple rows due to a `JOIN`). To ensure correct mapping, you should always include uniquely identifiable columns (like a primary key) in your query for each struct entity. When mapping to **slices of basic types** (e.g., `[]string`, `[]int`), every row from the query is treated as a unique element, and **no de-duplication occurs**. -To prevent relatively expensive reflect operations, carta caches the structure of your struct using the column mames of your query response as well as the type of your struct. - -## Approach -Carta adopts the "database mapping" approach (described in Martin Fowler's [book](https://books.google.com/books?id=FyWZt5DdvFkC&lpg=PA1&dq=Patterns%20of%20Enterprise%20Application%20Architecture%20by%20Martin%20Fowler&pg=PT187#v=onepage&q=active%20record&f=false)) which is useful among organizations with strict code review processes. - -Carta is not an object-relational mapper(ORM). With large and complex datasets, using ORMs becomes restrictive and reduces performance when working with complex queries. - -### License -Apache License +To prevent relatively expensive reflect operations, carta caches the structure of your struct using the column mames of your query response as well as the type of your struct. \ No newline at end of file diff --git a/ai_plans/FIX_DUPLICATE_ROWS.md b/ai_plans/FIX_DUPLICATE_ROWS.md index 2ed92d1..23f0202 100644 --- a/ai_plans/FIX_DUPLICATE_ROWS.md +++ b/ai_plans/FIX_DUPLICATE_ROWS.md @@ -1,37 +1,50 @@ -# Problem: Incorrect De-duplication When Mapping to Basic Slices - -## Summary -The `carta` library is designed to de-duplicate entities when mapping SQL rows to slices of structs (e.g., `[]User`). This is achieved by generating a unique ID for each entity based on the content of its primary key columns. This behavior is correct for handling `JOIN`s where a single entity might appear across multiple rows. - -However, this same logic is incorrectly applied when the mapping destination is a slice of a basic type (e.g., `[]string`, `[]int`). In this scenario, rows with duplicate values are treated as the same entity and are de-duplicated, which is incorrect. The desired behavior is to preserve every row from the result set, including duplicates. - -This issue is the root cause for the following problems: -1. The `if m.IsBasic` code path in `load.go` lacks test coverage because no tests exist for mapping to basic slices. -2. Attempts to write such tests lead to infinite loops and incorrect behavior because the column allocation and unique ID generation logic are not designed to handle this case. - -## Proposed Solution -The solution is to create a distinct execution path for "basic mappers" (`m.IsBasic == true`) that ensures every row is treated as a unique element. - -This will be accomplished in two main steps: - -### 1. Fix Column Allocation (`allocateColumns`) -The logic will be modified to enforce a clear rule for basic slices: the source SQL query must return **exactly one column**. - -- If `m.IsBasic` is true, the function will bypass the existing name-matching logic. -- It will validate that only one column is present in the query result. -- This single column will be assigned as the `PresentColumn` for the mapper. -- If more than one column is found, the function will return an error to prevent ambiguity. - -### 2. Fix Unique ID Generation (`loadRow`) -The logic will be modified to generate a unique ID based on the row's position rather than its content. - -- If `m.IsBasic` is true, the call to `getUniqueId(row, m)` will be bypassed. -- A new, position-based unique ID will be generated for each row (e.g., using a simple counter that increments with each row processed). -- This ensures that every row, regardless of its content, is treated as a distinct element to be added to the destination slice. - -This approach preserves the existing, correct behavior for struct mapping while introducing a new, robust path for handling basic slices correctly. - -## Plan -1. **Modify `column.go`**: Update the `allocateColumns` function to implement the single-column rule for basic mappers. -2. **Modify `load.go`**: Update the `loadRow` function to use a position-based counter for unique ID generation when `m.IsBasic` is true. -3. **Add Tests**: Create a new test case in `mapper_test.go` that maps a query result to a slice of a basic type (e.g., `[]string`) to validate the fix and provide coverage for the `m.IsBasic` code path. \ No newline at end of file +# Plan: Fix Incorrect De-duplication for Basic Slices + +## 1. Problem Summary +The `carta` library was incorrectly de-duplicating rows when mapping to a slice of a basic type (e.g., `[]string`). The logic, designed to merge `JOIN`ed rows for slices of structs, was misapplied, causing data loss. This also meant the `m.IsBasic` code path was entirely untested. + +The goal was to modify the library to correctly preserve all rows, including duplicates, when mapping to a basic slice, and to add the necessary test coverage. + +## 2. Evolution of the Solution + +The final solution was reached through an iterative process of implementation and refinement based on code review feedback. + +### Initial Implementation +The first version of the fix introduced two key changes: +1. **Position-Based Unique IDs:** In `load.go`, the `loadRow` function was modified. When `m.IsBasic` is true, it now generates a unique ID based on the row's position in the result set (e.g., "row-0", "row-1") instead of its content. This ensures every row is treated as a unique element. +2. **Single-Column Rule:** In `column.go`, the `allocateColumns` function was updated to enforce a strict rule: if the destination is a basic slice, the SQL query must return **exactly one column**. This prevents ambiguity. + +### Refinements from Code Review +Feedback from a code review (via Coderabbit) prompted several improvements: +- **Performance:** In `load.go`, `fmt.Sprintf` was replaced with the more performant `strconv.Itoa` for generating the position-based unique ID. +- **Idiomatic Go:** Error creation was changed from `errors.New(fmt.Sprintf(...))` to the more idiomatic `fmt.Errorf`. +- **Clearer Errors:** The error message for the single-column rule was improved to include the actual number of columns found, aiding debugging. +- **Test Coverage:** A negative test case was added to `mapper_test.go` to ensure the single-column rule correctly returns an error. + +### Final Fix: Handling Nested Basic Mappers +The most critical refinement came from identifying a flaw in the single-column rule: it did not correctly handle **nested** basic slices (e.g., a struct field like `Tags []string`). The initial logic would have incorrectly failed if other columns for the parent struct were present. + +The final patch corrected this by making the logic in `allocateColumns` more nuanced: +- **For top-level basic slices** (`len(m.AncestorNames) == 0`), the query must still contain exactly one column overall. +- **For nested basic slices**, the function now searches the remaining columns for exactly one that matches the ancestor-qualified name (e.g., `tags`). It returns an error if zero or more than one match is found. + +This final change ensures the logic is robust for both top-level and nested use cases. + +## 3. Summary of Changes Executed +1. **Modified `load.go`**: + - Updated `loadRow` to accept a `rowCount` parameter. + - Implemented logic to generate a unique ID from `rowCount` when `m.IsBasic` is true. + - Refactored error handling and string formatting based on code review feedback. +2. **Modified `column.go`**: + - Updated `allocateColumns` to differentiate between top-level and nested basic mappers, enforcing the correct single-column matching rule for each. + - Improved the error message to be more descriptive. +3. **Modified `mapper.go`**: + - Corrected the logic in `determineFieldsNames` to properly handle casing in `carta` tags, ensuring ancestor names are generated correctly. +4. **Added Tests to `mapper_test.go`**: + - Added a test for a top-level basic slice (`[]string`) to verify that duplicates are preserved. + - Added a negative test to ensure an error is returned for a multi-column query to a top-level basic slice. + - Added a test for a nested basic slice (`PostWithTags.Tags []string`) to verify correct mapping. + - Added negative tests to ensure errors are returned for nested basic slices with zero or multiple matching columns. +5. **Updated Documentation**: + - Updated `README.md` to clarify the difference in de-duplication behavior. + - Created `DESIGN_PHILOSOPHIES.md` to document the "fail-fast" error handling approach. \ No newline at end of file diff --git a/column.go b/column.go index 5c8b88b..a80fe76 100644 --- a/column.go +++ b/column.go @@ -18,17 +18,48 @@ type column struct { func allocateColumns(m *Mapper, columns map[string]column) error { presentColumns := map[string]column{} if m.IsBasic { - if len(columns) != 1 { - return fmt.Errorf("carta: when mapping to a slice of a basic type, the query must return exactly one column (got %d)", len(columns)) - } - for cName, c := range columns { + if len(m.AncestorNames) == 0 { + // Top-level basic mapper: must map exactly one column overall + if len(columns) != 1 { + return fmt.Errorf( + "carta: when mapping to a slice of a basic type, "+ + "the query must return exactly one column (got %d)", + len(columns), + ) + } + for cName, c := range columns { + presentColumns[cName] = column{ + typ: c.typ, + name: cName, + columnIndex: c.columnIndex, + } + delete(columns, cName) + break + } + } else { + // Nested basic mapper: pick exactly one matching ancestor-qualified column + candidates := getColumnNameCandidates("", m.AncestorNames, m.Delimiter) + var matched []string + for cName := range columns { + if candidates[cName] { + matched = append(matched, cName) + } + } + if len(matched) != 1 { + return fmt.Errorf( + "carta: basic sub-mapper for %v expected exactly one matching column "+ + "(ancestors %v), got %d matches", + m.Typ, m.AncestorNames, len(matched), + ) + } + cName := matched[0] + c := columns[cName] presentColumns[cName] = column{ typ: c.typ, name: cName, columnIndex: c.columnIndex, } delete(columns, cName) - break } } else { for i, field := range m.Fields { diff --git a/mapper.go b/mapper.go index 2c0c282..1f66bd8 100644 --- a/mapper.go +++ b/mapper.go @@ -244,7 +244,7 @@ func determineFieldsNames(m *Mapper) error { if tag := nameFromTag(field.Tag, CartaTagKey); tag != "" { subMap.Delimiter = "->" parts := strings.Split(tag, ",") - name = parts[0] + name = strings.TrimSpace(parts[0]) if len(parts) > 1 { for _, part := range parts[1:] { option := strings.Split(part, "=") diff --git a/mapper_test.go b/mapper_test.go index 3247f53..8a49d63 100644 --- a/mapper_test.go +++ b/mapper_test.go @@ -571,3 +571,98 @@ func TestMapToBasicSlice_MultipleColumnsError(t *testing.T) { t.Fatalf("expected error when mapping to []string with multiple columns, got nil") } } + +type PostWithTags struct { + ID int `db:"id"` + Tags []string `carta:"Tags"` +} + +func TestNestedBasicSliceMap(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("an error '%s' was not expected when opening a stub database connection", err) + } + defer db.Close() + + rows := sqlmock.NewRows([]string{"id", "Tags"}). + AddRow(1, "tag1"). + AddRow(1, "tag2") + + mock.ExpectQuery("SELECT (.+) FROM posts").WillReturnRows(rows) + + sqlRows, err := db.Query("SELECT * FROM posts") + if err != nil { + t.Fatalf("error '%s' was not expected when querying rows", err) + } + + var posts []PostWithTags + err = Map(sqlRows, &posts) + if err != nil { + t.Errorf("error was not expected while mapping rows: %s", err) + } + + if len(posts) != 1 { + t.Fatalf("expected 1 post, got %d", len(posts)) + } + + if len(posts[0].Tags) != 2 { + t.Fatalf("expected 2 tags, got %d", len(posts[0].Tags)) + } + + expectedTags := []string{"tag1", "tag2"} + if !reflect.DeepEqual(posts[0].Tags, expectedTags) { + t.Errorf("expected tags to be %+v, but got %+v", expectedTags, posts[0].Tags) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled expectations: %s", err) + } +} + +func TestNestedBasicSliceMap_NoMatchingColumnsError(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("an error '%s' was not expected when opening a stub database connection", err) + } + defer db.Close() + + rows := sqlmock.NewRows([]string{"id", "other_column"}). + AddRow(1, "value") + + mock.ExpectQuery("SELECT (.+) FROM posts").WillReturnRows(rows) + + sqlRows, err := db.Query("SELECT * FROM posts") + if err != nil { + t.Fatalf("error '%s' was not expected when querying rows", err) + } + + var posts []PostWithTags + err = Map(sqlRows, &posts) + if err == nil { + t.Errorf("expected an error when mapping a nested basic slice with no matching columns, but got nil") + } +} + +func TestNestedBasicSliceMap_MultipleMatchingColumnsError(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("an error '%s' was not expected when opening a stub database connection", err) + } + defer db.Close() + + rows := sqlmock.NewRows([]string{"id", "tags", "Tags"}). + AddRow(1, "tag1", "tag2") + + mock.ExpectQuery("SELECT (.+) FROM posts").WillReturnRows(rows) + + sqlRows, err := db.Query("SELECT * FROM posts") + if err != nil { + t.Fatalf("error '%s' was not expected when querying rows", err) + } + + var posts []PostWithTags + err = Map(sqlRows, &posts) + if err == nil { + t.Errorf("expected an error when mapping a nested basic slice with multiple matching columns, but got nil") + } +} From 229337885813153582990d862237c7078a487de2 Mon Sep 17 00:00:00 2001 From: Tom Maiaroto Date: Sat, 16 Aug 2025 11:23:32 -0700 Subject: [PATCH 6/6] suggseted rabbit changes and enhancment for trimming tag whitespace --- DESIGN_PHILOSOPHIES.md | 6 +++--- README.md | 8 ++++---- ai_plans/FIX_DUPLICATE_ROWS.md | 26 ++++++++++++------------- column.go | 8 ++++++-- mapper.go | 2 +- mapper_test.go | 35 ++++++++++++++++++++++++++++++++++ 6 files changed, 62 insertions(+), 23 deletions(-) diff --git a/DESIGN_PHILOSOPHIES.md b/DESIGN_PHILOSOPHIES.md index 5f53966..eb4f79c 100644 --- a/DESIGN_PHILOSOPHIES.md +++ b/DESIGN_PHILOSOPHIES.md @@ -8,7 +8,7 @@ Carta adopts the "database mapping" approach (described in Martin Fowler's [book ## Comparison to Related Projects #### GORM -Carta is NOT an an object-relational mapper(ORM). +Carta is NOT an object-relational mapper (ORM). #### sqlx Sqlx does not track has-many relationships when mapping SQL data. This works fine when all your relationships are at most has-one (Blog has one Author) ie, each SQL row corresponds to one struct. However, handling has-many relationships (Blog has many Posts), requires running many queries or running manual post-processing of the result. Carta handles these complexities automatically. @@ -26,7 +26,7 @@ Making a guess might seem helpful, but it can hide serious, silent bugs. The fol - **Query:** `SELECT name, email FROM users` - **Destination:** `var data []string` - **Behavior:** `carta.Map` **returns an error immediately**: `carta: when mapping to a slice of a basic type, the query must return exactly one column (got 2)`. -- **Why this is Protection:** The library has no way of knowing if the user intended to map the `name` or the `email` column. A "graceful" solution might be to arbitrarily pick the first column, but this could lead to the wrong data being silently loaded into the slice. By failing fast, `carta` forces the developer to write an unambiguous query (e.g., `SELECT name FROM users`), ensuring the result is guaranteed to be correct. +- **Why this is Protection:** The library has no way of knowing if the user intended to map the `name` or the `email` column. A "graceful" solution might be to pick the first column arbitrarily, but this could lead to the wrong data being silently loaded into the slice. By failing fast, `carta` forces the developer to write an unambiguous query (e.g., `SELECT name FROM users`), ensuring the result is guaranteed to be correct. --- @@ -34,7 +34,7 @@ Making a guess might seem helpful, but it can hide serious, silent bugs. The fol - **Query:** `SELECT id, NULL AS name FROM users` - **Destination:** `var users []User` (where `User.Name` is a `string`) -- **Behavior:** `carta.Map` **returns an error during scanning**: `carta: cannot load null value to type string for column name`. +- **Behavior:** `carta.Map` **returns an error during scanning** (e.g., `carta: cannot load NULL into non-nullable type string for column name`). - **Why this is Protection:** A standard Go `string` cannot represent a `NULL` value. A "graceful" but incorrect solution would be to use the zero value (`""`), which is valid data and semantically different from "no data". This can cause subtle bugs in application logic. By failing, `carta` forces the developer to explicitly handle nullability in their Go struct by using a pointer (`*string`) or a nullable type (`sql.NullString`), making the code more robust and correct. --- diff --git a/README.md b/README.md index a8fb7ff..2cda51f 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A simple SQL data mapper for complex Go structs. Load SQL data onto Go structs while keeping track of has-one and has-many relationships. -Carta is not an object-relational mapper(ORM). With large and complex datasets, using ORMs becomes restrictive and reduces performance when working with complex queries. [Read more about the design philosophy.](#design-philosophy) +Carta is not an object-relational mapper (ORM). With large and complex datasets, using ORMs becomes restrictive and reduces performance when working with complex queries. [Read more about the design philosophy.](#design-philosophy) ## Examples Using carta is very simple. All you need to do is: @@ -20,11 +20,11 @@ blogs := []Blog{} carta.Map(rows, &blogs) ``` -Assume that in above exmple, we are using a schema containing has-one and has-many relationships: +Assume that in the above example, we are using a schema containing has-one and has-many relationships: ![schema](https://i.ibb.co/SPH3zhQ/Schema.png) -And here is our SQL query along with the corresponging Go struct: +And here is our SQL query along with the corresponding Go struct: ``` select b.id, @@ -234,4 +234,4 @@ When mapping to **slices of structs**, Carta removes duplicate entities. This is When mapping to **slices of basic types** (e.g., `[]string`, `[]int`), every row from the query is treated as a unique element, and **no de-duplication occurs**. -To prevent relatively expensive reflect operations, carta caches the structure of your struct using the column mames of your query response as well as the type of your struct. \ No newline at end of file +To prevent relatively expensive reflect operations, carta caches the structure of your struct using the column names of your query response as well as the type of your struct. \ No newline at end of file diff --git a/ai_plans/FIX_DUPLICATE_ROWS.md b/ai_plans/FIX_DUPLICATE_ROWS.md index 23f0202..4a194b8 100644 --- a/ai_plans/FIX_DUPLICATE_ROWS.md +++ b/ai_plans/FIX_DUPLICATE_ROWS.md @@ -11,29 +11,29 @@ The final solution was reached through an iterative process of implementation an ### Initial Implementation The first version of the fix introduced two key changes: -1. **Position-Based Unique IDs:** In `load.go`, the `loadRow` function was modified. When `m.IsBasic` is true, it now generates a unique ID based on the row's position in the result set (e.g., "row-0", "row-1") instead of its content. This ensures every row is treated as a unique element. +1. **Position-Based Unique IDs:** In `load.go`, when `m.IsBasic` is true, `loadRow` generates a per-result-set unique ID (e.g., "row-0", "row-1") from the zero-based row index, ensuring every row is treated as unique within that mapping operation. 2. **Single-Column Rule:** In `column.go`, the `allocateColumns` function was updated to enforce a strict rule: if the destination is a basic slice, the SQL query must return **exactly one column**. This prevents ambiguity. ### Refinements from Code Review -Feedback from a code review (via Coderabbit) prompted several improvements: -- **Performance:** In `load.go`, `fmt.Sprintf` was replaced with the more performant `strconv.Itoa` for generating the position-based unique ID. -- **Idiomatic Go:** Error creation was changed from `errors.New(fmt.Sprintf(...))` to the more idiomatic `fmt.Errorf`. -- **Clearer Errors:** The error message for the single-column rule was improved to include the actual number of columns found, aiding debugging. -- **Test Coverage:** A negative test case was added to `mapper_test.go` to ensure the single-column rule correctly returns an error. +Feedback from code review prompted several improvements: +- **Performance:** In `load.go`, `fmt.Sprintf` was replaced with `strconv.Itoa` for generating the position-based unique ID. +- **Idiomatic Go:** Error creation now uses `fmt.Errorf` instead of `errors.New(fmt.Sprintf(...))`. +- **Clearer errors:** The single-column rule error message includes the actual number of columns found. +- **Test coverage:** A negative test was added to `mapper_test.go` to ensure the single-column rule correctly returns an error. ### Final Fix: Handling Nested Basic Mappers The most critical refinement came from identifying a flaw in the single-column rule: it did not correctly handle **nested** basic slices (e.g., a struct field like `Tags []string`). The initial logic would have incorrectly failed if other columns for the parent struct were present. The final patch corrected this by making the logic in `allocateColumns` more nuanced: -- **For top-level basic slices** (`len(m.AncestorNames) == 0`), the query must still contain exactly one column overall. +- **For top-level basic slices** (`len(m.AncestorNames) == 0`), the result set must contain exactly one projected column (as labeled by the driver after alias resolution). Expressions are allowed if aliased to a single column. - **For nested basic slices**, the function now searches the remaining columns for exactly one that matches the ancestor-qualified name (e.g., `tags`). It returns an error if zero or more than one match is found. This final change ensures the logic is robust for both top-level and nested use cases. ## 3. Summary of Changes Executed 1. **Modified `load.go`**: - - Updated `loadRow` to accept a `rowCount` parameter. - - Implemented logic to generate a unique ID from `rowCount` when `m.IsBasic` is true. + - Updated `loadRow` to accept a `rowCount` parameter and propagate it to nested mappers. + - For `m.IsBasic`, generate a per-row unique ID from `rowCount` to preserve duplicates (applies to nested basics as well). - Refactored error handling and string formatting based on code review feedback. 2. **Modified `column.go`**: - Updated `allocateColumns` to differentiate between top-level and nested basic mappers, enforcing the correct single-column matching rule for each. @@ -41,10 +41,10 @@ This final change ensures the logic is robust for both top-level and nested use 3. **Modified `mapper.go`**: - Corrected the logic in `determineFieldsNames` to properly handle casing in `carta` tags, ensuring ancestor names are generated correctly. 4. **Added Tests to `mapper_test.go`**: - - Added a test for a top-level basic slice (`[]string`) to verify that duplicates are preserved. - - Added a negative test to ensure an error is returned for a multi-column query to a top-level basic slice. - - Added a test for a nested basic slice (`PostWithTags.Tags []string`) to verify correct mapping. - - Added negative tests to ensure errors are returned for nested basic slices with zero or multiple matching columns. + - Top-level `[]string`: verifies duplicates are preserved. + - Top-level `[]string` (negative): multi-column queries produce an error. + - Nested `PostWithTags.Tags []string`: verifies correct column matching and mapping. + - Nested (negative): zero or multiple matching columns produce an error. 5. **Updated Documentation**: - Updated `README.md` to clarify the difference in de-duplication behavior. - Created `DESIGN_PHILOSOPHIES.md` to document the "fail-fast" error handling approach. \ No newline at end of file diff --git a/column.go b/column.go index 492b998..8630ad0 100644 --- a/column.go +++ b/column.go @@ -19,8 +19,12 @@ type column struct { // It populates m.PresentColumns and m.SortedColumnIndexes, sets AncestorNames on sub-maps, // and removes claimed entries from the provided columns map. // -// For a mapper marked IsBasic, the function requires exactly one remaining column in -// columns and binds that single column to the mapper; otherwise it returns an error. +// For a mapper marked IsBasic: +// - Top-level (no ancestors): requires exactly one remaining column overall and binds it. +// - Nested (has ancestors): requires exactly one matching ancestor-qualified column among +// the remaining columns and binds it. +// +// Otherwise it returns an error. // For non-basic mappers, it matches basic fields by name using getColumnNameCandidates // (honoring the mapper/sub-map delimiter and ancestor names) and records each matched // column (including the field index). After collecting direct-field mappings it sorts diff --git a/mapper.go b/mapper.go index 1f66bd8..a769f97 100644 --- a/mapper.go +++ b/mapper.go @@ -258,7 +258,7 @@ func determineFieldsNames(m *Mapper) error { } } else { if tag := nameFromTag(field.Tag, DbTagKey); tag != "" { - name = tag + name = strings.TrimSpace(tag) } else { name = field.Name } diff --git a/mapper_test.go b/mapper_test.go index 8a49d63..d5d141a 100644 --- a/mapper_test.go +++ b/mapper_test.go @@ -666,3 +666,38 @@ func TestNestedBasicSliceMap_MultipleMatchingColumnsError(t *testing.T) { t.Errorf("expected an error when mapping a nested basic slice with multiple matching columns, but got nil") } } + +func TestNestedBasicSliceMap_TagWhitespace(t *testing.T) { + type PostWithTagsWS struct { + ID int `db:"id"` + Tags []string `carta:" Tags "` // intentional whitespace + } + + db, mock, err := sqlmock.New() + if err != nil { + t.Fatal(err) + } + defer db.Close() + + rows := sqlmock.NewRows([]string{"id", "Tags"}). + AddRow(1, "tag1"). + AddRow(1, "tag2") + + mock.ExpectQuery("SELECT (.+) FROM posts").WillReturnRows(rows) + + sqlRows, err := db.Query("SELECT * FROM posts") + if err != nil { + t.Fatal(err) + } + + var posts []PostWithTagsWS + if err := Map(sqlRows, &posts); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(posts) != 1 || len(posts[0].Tags) != 2 { + t.Fatalf("unexpected result: %#v", posts) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unfulfilled expectations: %s", err) + } +}