Skip to content

Commit

Permalink
#9 Compatibility with ES 6.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
mbok committed Apr 24, 2018
1 parent 7aa4779 commit 40e7efb
Show file tree
Hide file tree
Showing 21 changed files with 337 additions and 347 deletions.
68 changes: 60 additions & 8 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ by the error term. This may help
[[aggregations]]
## Aggregations
== Aggregations
Both aggregations are numeric aggregations that estimate the linear regression coefficients
image:http://latex.codecogs.com/gif.latex?\theta_0,%20\theta_1,%20\theta_2,.%20.%20.,%20\theta_C%20[]
based on document results of a search query. Each search result
Expand Down Expand Up @@ -159,15 +159,68 @@ and the last for the response variable. The above request returns the following
}
--------------------------------------------------

=== Data conditions
[[scoring]]
== Scoring documents by error term
[source,js]
--------------------------------------------------
/houses/_search?size=100
{
"query": {
"function_score": {
"query": {
"match" : {
"location" : "Morro Bay"
}
},
"linreg_error": {
"fields": ["size", "bedrooms", "bathrooms", "price"],
"coefficients": [227990.63952712028, 248.92285661317254, -68297.7720278421, 64406.52205356777]
"modifier": "abs"
}
}
}
}
--------------------------------------------------

[source,js]
--------------------------------------------------
{
...
"hits":{
"total": 17,
"max_score": 1997108.1,
"hits":[
{
"_index": "houses",
"_type": "prices",
"_id": "da0772IBbA54ATAiBVjR",
"_score": 1997108.1,
"_source":{"message": "144316,Morro Bay,1045000.00,3,3,2100,497.62,Foreclosure\r", "bathrooms": 3, "bedrooms": 3, "path": "/home/mbok/linreg/RealEstate.csv",…}
},
...,
{
"_index": "houses",
"_type": "prices",
"_id": "uq0772IBbA54ATAiBVjw",
"_score": 94489.49,
"_source":{"message": "137159,Morro Bay,999000.00,4,3,3360,297.32,Short Sale\r", "bathrooms": 3, "bedrooms": 4, "path": "/home/mbok/linreg/RealEstate.csv",…}
}
]
}
}
--------------------------------------------------

[none, abs, square, reciprocal, abs_reciprocal, square_reciprocal]

== Data conditions
Due to algorithmic constraints both aggregations result an empty response, if

* the search result size is less or equal than the number of indicated explanatory variables,
* values of the explanatory variables in the search result set is linearly dependent (that means
that a column can be written as a linear combination of the other columns).


## Algorithm
== Algorithm
This implementation is based on a new parallel, single-pass OLS estimation algorithm for multiple linear regression
(not yet published). By aggregating
over the data only once and in parallel the algorithm is ideally suited for large-scale, distributed data sets and
Expand All @@ -177,9 +230,8 @@ The overall complexity of the implemented algorithm to estimate the regression c
`N` denotes the size of the training data set (the number of documents in the search result set) and `C` the number
of the indicated explanatory variables (fields).

## Installation
== Installation

### Elasticsearch 5.x
For installing this plugin please choose first the proper version under the compatible
matrix which matches your Elasticsearch version and use the download link for the following command.

Expand All @@ -202,8 +254,8 @@ Do not forget to restart the node after installing.
| https://github.com/scaleborn/elasticsearch-linear-regression/releases/download/5.3.0.1/elasticsearch-linear-regression-5.3.0.1.zip[5.3.0.1] | 5.3.0 | Jun 30, 2017
|===

## Examples
### Predicting house prices
== Examples
=== Predicting house prices
The idea is very simple. We have data in our Elasticsearch index representing
sold house prices in our region with some features like square footage of
the house, # of bathrooms, # of bedrooms etc. Now we want to find out which
Expand Down Expand Up @@ -360,7 +412,7 @@ the most expensive region for our dream house:
--------------------------------------------------


## License
== License
Copyright 2017 Scaleborn UG (haftungsbeschränkt).

Licensed under the Apache License 2.0.
6 changes: 3 additions & 3 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
elasticsearch.version=5.5.2
log4j.version=2.8.2
elasticsearch.version=6.2.0
log4j.version=2.9.1
wagon-ssh-external.version=2.10
commons-math3.version=3.6.1
group=org.scaleborn.elasticsearch.plugin
name=elasticsearch-linear-regression
version=5.5.2.2
version=6.2.0.0
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@
package org.elasticsearch.search.aggregations.support;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.index.fielddata.NumericDoubleValues;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric;

/**
* Class to encapsulate a set of ValuesSource objects labeled by field name
Expand All @@ -35,70 +33,68 @@ public abstract class MultiValuesSource<VS extends ValuesSource> {
protected String[] names;
protected VS[] values;

private MultiValuesSource(final Map<String, ?> valuesSources,
final MultiValueMode multiValueMode) {
if (valuesSources != null) {
this.names = valuesSources.keySet().toArray(new String[0]);
}
this.multiValueMode = multiValueMode;
}

public boolean needsScores() {
boolean needsScores = false;
for (final ValuesSource value : this.values) {
needsScores |= value.needsScores();
}
return needsScores;
}

public String[] fieldNames() {
return this.names;
}

public static class NumericMultiValuesSource extends MultiValuesSource<ValuesSource.Numeric> {

public NumericMultiValuesSource(List<NamedValuesSourceSpec<Numeric>> valuesSources,
MultiValueMode multiValueMode) {
super(valuesSources, multiValueMode, new ValuesSource.Numeric[0]);
public NumericMultiValuesSource(final Map<String, ValuesSource.Numeric> valuesSources,
final MultiValueMode multiValueMode) {
super(valuesSources, multiValueMode);
if (valuesSources != null) {
this.values = valuesSources.values().toArray(new ValuesSource.Numeric[0]);
} else {
this.values = new ValuesSource.Numeric[0];
}
}

public NumericDoubleValues getField(final int ordinal, LeafReaderContext ctx)
public NumericDoubleValues getField(final int ordinal, final LeafReaderContext ctx)
throws IOException {
if (ordinal > names.length) {
if (ordinal > this.names.length) {
throw new IndexOutOfBoundsException(
"ValuesSource array index " + ordinal + " out of bounds");
}
return multiValueMode.select(values[ordinal].doubleValues(ctx), Double.NEGATIVE_INFINITY);
return this.multiValueMode
.select(this.values[ordinal].doubleValues(ctx), Double.NEGATIVE_INFINITY);
}
}

public static class BytesMultiValuesSource extends MultiValuesSource<ValuesSource.Bytes> {

public BytesMultiValuesSource(List<NamedValuesSourceSpec<ValuesSource.Bytes>> valuesSources,
MultiValueMode multiValueMode) {
super(valuesSources, multiValueMode, new ValuesSource.Bytes[0]);
public BytesMultiValuesSource(final Map<String, ValuesSource.Bytes> valuesSources,
final MultiValueMode multiValueMode) {
super(valuesSources, multiValueMode);
this.values = valuesSources.values().toArray(new ValuesSource.Bytes[0]);
}

public Object getField(final int ordinal, LeafReaderContext ctx) throws IOException {
return values[ordinal].bytesValues(ctx);
public Object getField(final int ordinal, final LeafReaderContext ctx) throws IOException {
return this.values[ordinal].bytesValues(ctx);
}
}

public static class GeoPointValuesSource extends MultiValuesSource<ValuesSource.GeoPoint> {

public GeoPointValuesSource(List<NamedValuesSourceSpec<ValuesSource.GeoPoint>> valuesSources,
MultiValueMode multiValueMode) {
super(valuesSources, multiValueMode, new ValuesSource.GeoPoint[0]);
public GeoPointValuesSource(final Map<String, ValuesSource.GeoPoint> valuesSources,
final MultiValueMode multiValueMode) {
super(valuesSources, multiValueMode);
this.values = valuesSources.values().toArray(new ValuesSource.GeoPoint[0]);
}
}

private MultiValuesSource(List<? extends NamedValuesSourceSpec<VS>> valuesSources,
MultiValueMode multiValueMode, VS[] emptyArray) {
if (valuesSources != null) {
this.names = new String[valuesSources.size()];
List<VS> valuesList = new ArrayList<VS>(valuesSources.size());
int i = 0;
for (NamedValuesSourceSpec<VS> spec : valuesSources) {
this.names[i++] = spec.getName();
valuesList.add(spec.getValuesSource());
}
this.values = valuesList.toArray(emptyArray);
} else {
this.names = new String[0];
this.values = emptyArray;
}
this.multiValueMode = multiValueMode;
}

public boolean needsScores() {
boolean needsScores = false;
for (ValuesSource value : values) {
needsScores |= value.needsScores();
}
return needsScores;
}

public String[] fieldNames() {
return this.names;
}
}
Loading

0 comments on commit 40e7efb

Please sign in to comment.