Skip to content

Commit

Permalink
Merge pull request #14 from milosimpson/shiftr-hash4
Browse files Browse the repository at this point in the history
New feature and two bug fixes
  • Loading branch information
milosimpson committed May 13, 2013
2 parents 6ceb403 + 726834d commit ded10c3
Show file tree
Hide file tree
Showing 25 changed files with 542 additions and 59 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ They all follow the pattern :
}
```

The best way to learn the transform syntax, is to really look at the input and output json, get an understanding of how data is moving, and then look at the transform spec and see how it facilitates it.
Look at the "input" and "output" json, get an understanding of how data is moving, and then look at the transform spec to see how it facilitates it.

For reference, [this](https://github.com/bazaarvoice/jolt/blob/master/jolt-core/src/test/resources/json/shiftr/firstSample.json) was the very first test we wrote.

Expand Down
22 changes: 18 additions & 4 deletions jolt-core/src/main/java/com/bazaarvoice/jolt/Shiftr.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.bazaarvoice.jolt.exception.SpecException;
import com.bazaarvoice.jolt.shiftr.WalkedPath;
import com.bazaarvoice.jolt.shiftr.pathelement.LiteralPathElement;
import com.bazaarvoice.jolt.shiftr.spec.CompositeSpec;

import java.util.HashMap;
Expand Down Expand Up @@ -196,10 +197,10 @@
*
* '$' Wildcard
* Valid only on the LHS of the spec.
* The existence of this wildcard is a reflection of the fact that the "data" of the input Json, can be both in the "values",
* but also can be encoded in the "keys" of the input JSON
* The existence of this wildcard is a reflection of the fact that the "data" of the input Json, can be both in the "values"
* and the "keys" of the input JSON
*
* The base case operation of Shiftr is to operate on input JSON "values", thus we need a way to specify that we want to operate on the input JSON "key".
* The base case operation of Shiftr is to copy input JSON "values", thus we need a way to specify that we want to copy the input JSON "key" instead.
*
* Thus '$' specifies that we want to use an input key, or input key derived value, as the data to be placed in the output JSON.
* '$' has the same syntax as the '&' wildcard, and can be read as, dereference to get a value, and then use that value as the data to be output.
Expand Down Expand Up @@ -240,6 +241,13 @@
* }
* </pre>
*
* '#' Wildcard
* Valid only on the RHS of the spec, nested in an array, like "[#2]"
* This wildcard is useful if you want to take a JSON map and turn it into a JSON array, and you do not care about the order of the array.
*
* While Shiftr is doing its parallel tree walk of the input data and the spec, it tracks how many matched it has processed at each level
* of the spec tree.
*
*
* '|' Wildcard
* Valid only on the LHS of the spec.
Expand Down Expand Up @@ -442,7 +450,13 @@ public Shiftr( Object spec ) {
public Object transform( Object input ) {

Map<String,Object> output = new HashMap<String,Object>();
rootSpec.apply( ROOT_KEY, input, new WalkedPath(), output );

// Create a root LiteralPathElement so that # is useful at the root level
LiteralPathElement rootLpe = new LiteralPathElement( ROOT_KEY );
WalkedPath walkedPath = new WalkedPath();
walkedPath.add( rootLpe );

rootSpec.apply( ROOT_KEY, input, walkedPath, output );

return output.get( ROOT_KEY );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
import com.bazaarvoice.jolt.exception.SpecException;
import com.bazaarvoice.jolt.shiftr.reference.AmpReference;
import com.bazaarvoice.jolt.shiftr.WalkedPath;
import com.bazaarvoice.jolt.shiftr.reference.HashReference;
import com.bazaarvoice.jolt.shiftr.reference.PathAndGroupReference;
import com.bazaarvoice.jolt.shiftr.reference.PathReference;

public class ArrayPathElement extends BasePathElement implements MatchablePathElement, EvaluatablePathElement {

public enum ArrayPathType { AUTO_EXPAND, REFERENCE, EXPLICIT_INDEX }
public enum ArrayPathType { AUTO_EXPAND, REFERENCE, HASH, EXPLICIT_INDEX }

private final ArrayPathType arrayPathType;
private final AmpReference ref;
private final PathReference ref;

private final String canonicalForm;
private final String arrayIndex;
Expand All @@ -22,7 +25,7 @@ public ArrayPathElement( String key ) {
}

ArrayPathType apt;
AmpReference r = null;
PathReference r = null;
String aI = "";

if ( key.length() == 2 ) {
Expand All @@ -32,12 +35,18 @@ public ArrayPathElement( String key ) {
else {
String meat = key.substring( 1, key.length() - 1 ); // trim the [ ]

if ( meat.contains( "&" ) ) {
if ( AmpReference.TOKEN.equals( meat.charAt( 0 ) ) ) {
r = new AmpReference( meat );
apt = ArrayPathType.REFERENCE;

canonicalForm = "[" + r.getCanonicalForm() + "]";
}
else if ( HashReference.TOKEN.equals( meat.charAt( 0 ) ) ) {
r = new HashReference( meat );
apt = ArrayPathType.HASH;

canonicalForm = "[" + r.getCanonicalForm() + "]";
}
else {
try {
Integer.parseInt( meat );
Expand Down Expand Up @@ -73,9 +82,21 @@ public String evaluate( WalkedPath walkedPath ) {
case EXPLICIT_INDEX:
return arrayIndex;

case HASH:
LiteralPathElement element = walkedPath.elementFromEnd( ref.getPathIndex() );
Integer index = element.getHashCount();
return index.toString();

case REFERENCE:
LiteralPathElement lpe = walkedPath.elementFromEnd( ref.getPathIndex() );
String keyPart = lpe.getSubKeyRef( ref.getKeyGroup() );
String keyPart;

if ( ref instanceof PathAndGroupReference ) {
keyPart = lpe.getSubKeyRef( ( (PathAndGroupReference) ref).getKeyGroup() );
}
else {
keyPart = lpe.getSubKeyRef( 0 );
}
try
{
Integer.parseInt( keyPart );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ public class LiteralPathElement extends BasePathElement implements MatchablePath

private final List<String> subKeys;

private int hashCount = 0;

public LiteralPathElement( String key ) {
super(key);

Expand Down Expand Up @@ -58,4 +60,12 @@ public String getSubKeyRef( int index ) {
public int getSubKeyCount(){
return subKeys.size();
}

public int getHashCount() {
return hashCount;
}

public void incrementHashCount() {
hashCount++;
}
}
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
package com.bazaarvoice.jolt.shiftr.reference;

import java.util.regex.Pattern;

/**
* This class parses the Jolt & syntax into useful programmatic constructs.
*
* Valid Syntax is : & &1 &(1) &(1,1)
*/
public class AmpReference extends BaseReference {
public class AmpReference extends BasePathAndGroupReference {

public static final Character TOKEN = '&';

public AmpReference( String refStr ) {
super(refStr);
}

@Override
protected char getToken() {
return '&';
return TOKEN;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
* In the CanonicalForm the first entry is how far up the WalkedPath to look for a LiteralPathElement,
* and the second entry is which part of that LiteralPathElement to ask for.
*/
public abstract class BaseReference implements Reference {
public abstract class BasePathAndGroupReference implements PathAndGroupReference {

private final int pathIndex; // equals 0 for "&" "&0" and "&(0,x)"
private final int keyGroup; // equals 0 for "&" "&0" and "&(x,0)"
private final int pathIndex; // equals 0 for "&" "&0" and "&(0,x)"

protected abstract char getToken();

public BaseReference( String refStr ) {
public BasePathAndGroupReference( String refStr ) {

if ( refStr == null || refStr.length() == 0 || getToken() != refStr.charAt( 0 ) ) {
throw new SpecException( "Invalid reference key=" + refStr + " either blank or doesn't start with correct character=" + getToken() );
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package com.bazaarvoice.jolt.shiftr.reference;

import com.bazaarvoice.jolt.exception.SpecException;

public abstract class BasePathReference implements PathReference {

private final int pathIndex; // equals 0 for "&" "&0" and "&(0,x)"

protected abstract char getToken();

public BasePathReference( String refStr ) {

if ( refStr == null || refStr.length() == 0 || getToken() != refStr.charAt( 0 ) ) {
throw new SpecException( "Invalid reference key=" + refStr + " either blank or doesn't start with correct character=" + getToken() );
}

int pathIndex = 0;

try {
if ( refStr.length() > 1 ) {

String meat = refStr.substring( 1 );

pathIndex = Integer.parseInt( meat );
}
}
catch( NumberFormatException nfe ) {
throw new SpecException( "Unable to parse '" + getToken() + "' reference key:" + refStr, nfe );
}

if ( pathIndex < 0 ) {
throw new SpecException( "Reference:" + refStr + " can not have a negative value." );
}

this.pathIndex = pathIndex;
}

@Override
public int getPathIndex() {
return pathIndex;
}

/**
* Builds the non-syntactic sugar / maximally expanded and unique form of this reference.
* @return canonical form : aka "#" -> "#0
*/
public String getCanonicalForm() {
return getToken() + Integer.toString( pathIndex );
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package com.bazaarvoice.jolt.shiftr.reference;

public class DollarReference extends BaseReference {
public class DollarReference extends BasePathAndGroupReference {

public static final Character TOKEN = '$';

public DollarReference( String refStr ) {
super(refStr);
}

@Override
protected char getToken() {
return '$';
return TOKEN;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.bazaarvoice.jolt.shiftr.reference;

/**
* TODO : Refactor the out to it's own class, as it really isn't a "Reference"
* This is just a cheap hack at the moment.
*/
public class HashReference extends BasePathReference {

public static final Character TOKEN = '#';

public HashReference( String refStr ) {
super(refStr);
}

@Override
protected char getToken() {
return TOKEN;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.bazaarvoice.jolt.shiftr.reference;


/**
* Reference is used by Shiftr when lookup up values from a WalkedPath (list of LiteralPathElements).
*
Expand All @@ -20,19 +19,7 @@
* y : keyGroup : where 0 is the whole key, and 1 thru n smaller captured parts of the key
*
*/
public interface Reference {

public int getPathIndex();
public interface PathAndGroupReference extends PathReference {

public int getKeyGroup();

/**
* Get the canonical form of this Reference.
*
* One of the uses of this method is to ensure that spec, does not contain "duplicate" keys, aka
* two keys that when you unroll the syntactic sugar, are the same thing.
*
* @return fully expanded String representation of this Reference
*/
public String getCanonicalForm();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.bazaarvoice.jolt.shiftr.reference;


/**
* Reference is used by Shiftr when lookup up values from a WalkedPath (list of LiteralPathElements).
*
* #, #0 are the same
*
* The "canonical form" is "Cx", where :
* C : the character used to determine the type of Reference
* x : pathIndex : which is how far up the walkedPath the look
*
*/
public interface PathReference {

public int getPathIndex();

/**
* Get the canonical form of this Reference.
*
* One of the uses of this method is to ensure that spec, does not contain "duplicate" keys, aka
* two keys that when you unroll the syntactic sugar, are the same thing.
*
* @return fully expanded String representation of this Reference
*/
public String getCanonicalForm();
}
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ public boolean apply( String inputKey, Object input, WalkedPath walkedPath, Map<
return false;
}

// add ourselves to the path, so that our children can reference us
walkedPath.add( thisLevel );

// Handle any special / key based children first, but don't have them block anything
Expand All @@ -176,7 +177,12 @@ public boolean apply( String inputKey, Object input, WalkedPath walkedPath, Map<
// Handle the rest of the children
executionStrategy.process( this, input, walkedPath, output );

// We are done, so remove ourselves from the walkedPath
walkedPath.removeLast();

// we matched so increment the matchCount of our parent
walkedPath.lastElement().incrementHashCount();

return true;
}

Expand Down Expand Up @@ -206,7 +212,18 @@ void processMap( CompositeSpec spec, Map<String, Object> inputMap, WalkedPath wa
void processList( CompositeSpec spec, List<Object> inputList, WalkedPath walkedPath, Map<String, Object> output ) {

for( String key : spec.literalChildren.keySet() ) {
int keyInt = Integer.parseInt( key );

int keyInt = Integer.MAX_VALUE;

try {
keyInt = Integer.parseInt( key );
}
catch( NumberFormatException nfe ) {
// If the data is an Array, but the spec keys are Non-Integer Strings,
// we are annoyed, but we don't stop the whole transform.
// Just this part of the Transform won't work.
}

if ( keyInt < inputList.size() ) {

Object subInput = inputList.get( keyInt );
Expand Down
Loading

0 comments on commit ded10c3

Please sign in to comment.