Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] reverse nested context,while shifting from nested to root and root to nested path. #17109

Open
Shivacharangoud opened this issue Jan 24, 2025 · 0 comments
Labels

Comments

@Shivacharangoud
Copy link

Shivacharangoud commented Jan 24, 2025

Describe the bug

mapping:

PUT reverse_nested
{
    "mappings": {
      "properties": {
        "priority": {
          "type": "long"
        },
        "teams": {
          "type":"nested",
          "properties": {
            "hours": {
              "type": "long"
            },
            "name": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            }
          }
        }
      }
    }
  }

**data:**

PUT reverse_nested/_doc/1
{
  "priority":1,
  "teams":[
    {"name":"team1", "hours":10},
    {"name":"team2", "hours":20}
  ]
}

**query:**

GET reverse_nested/_search
{
  "aggs": {
    "group_by_team": {
      "nested": {
        "path": "teams"
      },
      "aggs": {
        "group_by_teamm": {
          "terms": {
            "field": "teams.name.keyword",
            "size": 10
          },
          "aggs": {
            "sum_of_hours": {
              "sum": {
                "field": "teams.hours"
              }
            },
            "reverse_to_base":{
              "reverse_nested": {
              
              },
              "aggs": {
                "group_by_priority": {
                  "terms": {
                    "field": "priority"
                  },
                  "aggs":{
                    "nested_again":{
                      "nested": {
                        "path": "teams"
                      },
                      "aggs": {
                        "sum_of_hours": {
                          "sum": {
                            "field": "teams.hours"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

**and the response:**
{
  "took": 10,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "reverse_nested",
        "_id": "1",
        "_score": 1,
        "_source": {
          "priority": 1,
          "teams": [
            {
              "name": "team1",
              "hours": 10
            },
            {
              "name": "team2",
              "hours": 20
            }
          ]
        }
      }
    ]
  },
  "aggregations": {
    "group_by_team": {
      "doc_count": 2,
      "group_by_teamm": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "team1",
            "doc_count": 1,
            "reverse_to_base": {
              "doc_count": 1,
              "group_by_priority": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": 1,
                    "doc_count": 1,
                    "nested_again": {
                      "doc_count": 2,
                      "sum_of_hours": {
                        "value": 30
                      }
                    }
                  }
                ]
              }
            },
            "sum_of_hours": {
              "value": 10
            }
          },
          {
            "key": "team2",
            "doc_count": 1,
            "reverse_to_base": {
              "doc_count": 1,
              "group_by_priority": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": 1,
                    "doc_count": 1,
                    "nested_again": {
                      "doc_count": 2,
                      "sum_of_hours": {
                        "value": 30
                      }
                    }
                  }
                ]
              }
            },
            "sum_of_hours": {
              "value": 20
            }
          }
        ]
      }
    }
  }
}

Let's try to produce a report that gives number of hours by team by priority. Doing nested aggregation on teams.name and then reverse nested to group on priority and then nested to sum teams.hours double-counts hours because second nesting on teams knows nothing about upstream nesting as it is executed in context of request and as result it will lump hours for each team on request under the top level team aggregation. sum of hour under team is 10 and 20 but when we group again with priority( under each team ) and perform sum of hour its 30 means its considering two nested team objects. how to solve this issue?

Related component

Search:Aggregations

To Reproduce

mapping:

PUT reverse_nested
{
    "mappings": {
      "properties": {
        "priority": {
          "type": "long"
        },
        "teams": {
          "type":"nested",
          "properties": {
            "hours": {
              "type": "long"
            },
            "name": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            }
          }
        }
      }
    }
  }

**data:**

PUT reverse_nested/_doc/1
{
  "priority":1,
  "teams":[
    {"name":"team1", "hours":10},
    {"name":"team2", "hours":20}
  ]
}

**query:**

GET reverse_nested/_search
{
  "aggs": {
    "group_by_team": {
      "nested": {
        "path": "teams"
      },
      "aggs": {
        "group_by_teamm": {
          "terms": {
            "field": "teams.name.keyword",
            "size": 10
          },
          "aggs": {
            "sum_of_hours": {
              "sum": {
                "field": "teams.hours"
              }
            },
            "reverse_to_base":{
              "reverse_nested": {
              
              },
              "aggs": {
                "group_by_priority": {
                  "terms": {
                    "field": "priority"
                  },
                  "aggs":{
                    "nested_again":{
                      "nested": {
                        "path": "teams"
                      },
                      "aggs": {
                        "sum_of_hours": {
                          "sum": {
                            "field": "teams.hours"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Expected behavior

expected result:

{
  "took": 10,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "reverse_nested",
        "_id": "1",
        "_score": 1,
        "_source": {
          "priority": 1,
          "teams": [
            {
              "name": "team1",
              "hours": 10
            },
            {
              "name": "team2",
              "hours": 20
            }
          ]
        }
      }
    ]
  },
  "aggregations": {
    "group_by_team": {
      "doc_count": 2,
      "group_by_teamm": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "team1",
            "doc_count": 1,
            "reverse_to_base": {
              "doc_count": 1,
              "group_by_priority": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": 1,
                    "doc_count": 1,
                    "nested_again": {
                      "doc_count": 2,
                      "sum_of_hours": {
                        "value": 10
                      }
                    }
                  }
                ]
              }
            },
            "sum_of_hours": {
              "value": 10
            }
          },
          {
            "key": "team2",
            "doc_count": 1,
            "reverse_to_base": {
              "doc_count": 1,
              "group_by_priority": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": 1,
                    "doc_count": 1,
                    "nested_again": {
                      "doc_count": 2,
                      "sum_of_hours": {
                        "value": 20
                      }
                    }
                  }
                ]
              }
            },
            "sum_of_hours": {
              "value": 20
            }
          }
        ]
      }
    }
  }
}

Additional Details

  • OS: aws mananged opensearch service.
  • Version : 2.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
Status: 🆕 New
Development

No branches or pull requests

1 participant