Skip to content

Commit

Permalink
Merge 'develop' for gtools-1.1.2; improved variable parsing
Browse files Browse the repository at this point in the history
Enhancements

- Improved variable parsing in general.
- Error message when variable not found now explicit.
- If `-` is found, warning message noting the default is to
  interpret that as negative, not part of a varlist.
- `ds` and `nods` control parsing options for `-`
  • Loading branch information
mcaceresb committed Nov 26, 2018
2 parents 032796d + d270e9a commit cb4d199
Show file tree
Hide file tree
Showing 30 changed files with 760 additions and 170 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: "generic-1.1.1-{build}"
version: "generic-1.1.2-{build}"

environment:
matrix:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ to provide a massive speed improvements to common Stata commands,
including: collapse, pctile, xtile, contract, egen, isid, levelsof,
duplicates, and unique/distinct.

![Stable Version](https://img.shields.io/badge/stable-v1.1.1-blue.svg?longCache=true&style=flat-square)
![Stable Version](https://img.shields.io/badge/stable-v1.1.2-blue.svg?longCache=true&style=flat-square)
![Supported Platforms](https://img.shields.io/badge/platforms-linux--64%20%7C%20osx--64%20%7C%20win--64-blue.svg?longCache=true&style=flat-square)
[![Travis Build Status](https://img.shields.io/travis/mcaceresb/stata-gtools/master.svg?longCache=true&style=flat-square&label=linux)](https://travis-ci.org/mcaceresb/stata-gtools)
[![Travis Build Status](https://img.shields.io/travis/mcaceresb/stata-gtools/master.svg?longCache=true&style=flat-square&label=osx)](https://travis-ci.org/mcaceresb/stata-gtools)
Expand Down
82 changes: 63 additions & 19 deletions build/_gtools_internal.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! version 1.1.1 14Nov2018 Mauricio Caceres Bravo, [email protected]
*! version 1.1.2 16Nov2018 Mauricio Caceres Bravo, [email protected]
*! gtools function internals

* rc 17000
Expand Down Expand Up @@ -102,6 +102,7 @@ program _gtools_internal, rclass
unsorted /// Do not sort hash values; faster
countmiss /// count # missing in output
/// (only w/certain targets)
NODS DS /// Parse - as varlist (ds) or negative (nods)
///
/// Generic stats options
/// ---------------------
Expand Down Expand Up @@ -1027,31 +1028,74 @@ program _gtools_internal, rclass
* internally later on.
*
* Last, we parse whether or not to invert the sort orner of a given
* by variable ("-" preceding it).

if ( "`anything'" != "" ) {
local clean_anything `anything'
local clean_anything: subinstr local clean_anything "+" "", all
local clean_anything: subinstr local clean_anything "-" "", all
local clean_anything `clean_anything'
cap ds `clean_anything'
if ( _rc | ("`clean_anything'" == "") ) {
local rc = _rc
di as err "Invalid call/varlist: '`anything''"
di as err "Syntax: {+|-}varname [{+|-}varname ...]"
clean_all 111
exit 111
* by variable ("-" preceding it). If option -ds- is passed, then "-"
* is interpret as the "to" operator in Stata's varlist notation.

if ( `"`anything'"' != "" ) {
local clean_anything: copy local anything
local clean_anything: subinstr local clean_anything "+" " ", all
if ( strpos(`"`clean_anything'"', "-") & ("`ds'`nods'" == "") ) {
disp as txt "'-' interpreted as negative; use option -ds- to interpret as varlist"
disp as txt "(to suppress this warning, use option -nods-)"
}
if ( "`ds'" != "" ) {
local clean_anything `clean_anything'
if ( "`clean_anything'" == "" ) {
di as err "Invalid varlist: `anything'"
clean_all 198
exit 198
}
cap ds `clean_anything'
if ( _rc ) {
cap noi ds `clean_anything'
local rc = _rc
clean_all `rc'
exit `rc'
}
local clean_anything `r(varlist)'
}
else {
local clean_anything: subinstr local clean_anything "-" " ", all
local clean_anything `clean_anything'
if ( "`clean_anything'" == "" ) {
di as err "Invalid list: '`anything''"
di as err "Syntax: [+|-]varname [[+|-]varname ...]"
clean_all 198
exit 198
}
cap ds `clean_anything'
if ( _rc ) {
local notfound
foreach var of local clean_anything {
cap confirm var `var'
if ( _rc ) {
local notfound `notfound' `var'
}
}
if ( `:list sizeof notfound' > 0 ) {
if ( `:list sizeof notfound' > 1 ) {
di as err "Variables not found: `notfound'"
}
else {
di as err "Variable `notfound' not found"
}
}
clean_all 111
exit 111
}
qui ds `clean_anything'
local clean_anything `r(varlist)'
}
local clean_anything `r(varlist)'
cap noi check_matsize `clean_anything'
if ( _rc ) {
local rc = _rc
clean_all `rc'
exit `rc'
}
}
if ( "`ds'" == "" ) local nods nods

local opts `compress' `forcestrl' glevelsof(`glevelsof')
local opts `compress' `forcestrl' glevelsof(`glevelsof') `ds'
cap noi parse_by_types `anything' `ifin', clean_anything(`clean_anything') `opts'
if ( _rc ) {
local rc = _rc
Expand Down Expand Up @@ -2566,7 +2610,7 @@ end

capture program drop parse_by_types
program parse_by_types, rclass
syntax [anything] [if] [in], [clean_anything(str) compress forcestrl glevelsof(str)]
syntax [anything] [if] [in], [clean_anything(str) compress forcestrl glevelsof(str) ds]

local ifin `if' `in'
if ( "`anything'" == "" ) {
Expand Down Expand Up @@ -2600,7 +2644,7 @@ program parse_by_types, rclass
local varlist ""
local skip = 0
local invert = 0
if ( strpos("`anything'", "-") ) {
if ( strpos("`anything'", "-") & ("`ds'" == "") ) {
while ( trim("`parse'") != "" ) {
gettoken var parse: parse, p(" -+")
if inlist("`var'", "-", "+") {
Expand Down
10 changes: 10 additions & 0 deletions build/changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Change Log
==========

## gtools-1.1.2 (2018-11-16)

### Enhancements

- Improved variable parsing in general.
- Error message when variable not found now explicit.
- If `-` is found, warning message noting the default is to
interpret that as negative, not part of a varlist.
- `ds` and `nods` control parsing options for `-`

## gtools-1.1.1 (2018-11-14)

### Features
Expand Down
87 changes: 72 additions & 15 deletions build/gcollapse.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! version 1.1.1 14Nov2018 Mauricio Caceres Bravo, [email protected]
*! version 1.1.2 16Nov2018 Mauricio Caceres Bravo, [email protected]
*! -collapse- implementation using C for faster processing

capture program drop gcollapse
Expand Down Expand Up @@ -36,6 +36,7 @@ program gcollapse, rclass
forcemem /// Use memory for writing/reading collapsed data
double /// Generate all targets as doubles
sumcheck /// Check whether sum will overflow
NODS DS /// Parse - as varlist (ds) or negative (nods)
///
compress /// Try to compress strL variables
forcestrl /// Force reading strL variables (stata 14 and above only)
Expand Down Expand Up @@ -71,24 +72,71 @@ program gcollapse, rclass
local replaceby = cond("`debug_replaceby'" == "", "", "replaceby")
local gfallbackok = `"`replaceby'`replace'`freq'`merge'`labelformat'`labelprogram'`rawstat'"' == `""'

if ( ("`ds'" != "") & ("`nods'" != "") ) {
di as err "-ds- and -nods- mutually exclusive"
exit 198
}

* Parse by call (make sure varlist is valid)
* ------------------------------------------

if ( "`by'" != "" ) {
local clean_by `by'
local clean_by: subinstr local clean_by "+" "", all
local clean_by: subinstr local clean_by "-" "", all
local clean_by `clean_by'
cap ds `clean_by'
if ( _rc | ("`clean_by'" == "") ) {
local rc = _rc
di as err "Malformed call: by(`by')"
di as err "Syntax: by([+|-]varname [[+|-]varname ...])"
CleanExit
exit 111
if ( `"`by'"' != "" ) {
local clean_by: copy local by
local clean_by: subinstr local clean_by "+" " ", all
if ( strpos(`"`clean_by'"', "-") & ("`ds'`nods'" == "") ) {
disp as txt "'-' interpreted as negative; use option -ds- to interpret as varlist"
disp as txt "(to suppress this warning, use option -nods-)"
}
if ( "`ds'" != "" ) {
local clean_by `clean_by'
if ( "`clean_by'" == "" ) {
di as err "Invalid varlist: `by'"
clean_all 198
exit 198
}
cap ds `clean_by'
if ( _rc ) {
cap noi ds `clean_by'
local rc = _rc
clean_all `rc'
exit `rc'
}
local clean_by `r(varlist)'
}
else {
local clean_by: subinstr local clean_by "-" " ", all
local clean_by `clean_by'
if ( "`clean_by'" == "" ) {
di as err "Invalid list: `by'"
di as err "Syntax: [+|-]varname [[+|-]varname ...]"
CleanExit
exit 198
}
cap ds `clean_by'
if ( _rc ) {
local notfound
foreach var of local clean_by {
cap confirm var `var'
if ( _rc ) {
local notfound `notfound' `var'
}
}
if ( `:list sizeof notfound' > 0 ) {
if ( `:list sizeof notfound' > 1 ) {
di as err "Variables not found: `notfound'"
}
else {
di as err "Variable `notfound' not found"
}
}
CleanExit
exit 111
}
qui ds `clean_by'
local clean_by `r(varlist)'
}
local clean_by `r(varlist)'
}
if ( "`ds'" == "" ) local nods nods

if ( `debug_level' ) {
disp as txt `""'
Expand Down Expand Up @@ -172,6 +220,15 @@ program gcollapse, rclass
exit 198
}

foreach var of local __gtools_gc_uniq_vars {
cap noi confirm numeric variable `var'
if ( _rc ) {
local rc = _rc
CleanExit
exit `rc'
}
}

if ( `debug_level' ) {
disp as txt `""'
disp as txt "{cmd:gcollapse} debug level `debug_level'"
Expand Down Expand Up @@ -491,7 +548,7 @@ program gcollapse, rclass
local sources sources(`__gtools_gc_vars')
local stats stats(`__gtools_gc_stats')
local targets targets(`__gtools_gc_targets')
local opts missing replace `keepmissing' `compress' `forcestrl' `_subtract'
local opts missing replace `keepmissing' `compress' `forcestrl' `_subtract' `ds' `nods'
local opts `opts' `verbose' `benchmark' `benchmarklevel' `hashmethod'
local opts `opts' `hashlib' `oncollision' debug(`debug_level') `rawstat'
local action `sources' `targets' `stats'
Expand Down
73 changes: 59 additions & 14 deletions build/gcontract.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! version 1.0.0 20Sep2018 Mauricio Caceres Bravo, [email protected]
*! version 1.0.1 16Nov2018 Mauricio Caceres Bravo, [email protected]
*! Frequency counts using C-plugins for a speedup.

cap program drop gcontract
Expand All @@ -21,6 +21,7 @@ program gcontract, rclass
FORMat(string) /// Format for percentage variables
Zero /// Include varlist combinations with 0 frequency
noMISS /// Exclude rows with missing values in varlist
NODS DS /// Parse - as varlist (ds) or negative (nods)
///
fast /// Do not preserve and restore the original dataset. Saves speed
/// but leaves data unusable if the user hits Break.
Expand All @@ -41,6 +42,11 @@ program gcontract, rclass
local benchmarklevel benchmarklevel(`benchmarklevel')
local missing = cond("`miss'" == "nomiss", "", "missing")

if ( ("`ds'" != "") & ("`nods'" != "") ) {
di as err "-ds- and -nods- mutually exclusive"
exit 198
}

* Set type and format for generated numeric variables
* ---------------------------------------------------

Expand Down Expand Up @@ -136,19 +142,58 @@ program gcontract, rclass
* Get varlist
* -----------

if ( "`anything'" != "" ) {
local varlist `anything'
local varlist: subinstr local varlist "+" "", all
local varlist: subinstr local varlist "-" "", all
cap ds `varlist'
if ( _rc | ("`varlist'" == "") ) {
local rc = _rc
di as err "Malformed call: '`anything''"
di as err "Syntax: [+|-]varname [[+|-]varname ...]"
exit 111
if ( `"`anything'"' != "" ) {
local varlist: copy local anything
local varlist: subinstr local varlist "+" " ", all
if ( strpos(`"`varlist'"', "-") & ("`ds'`nods'" == "") ) {
disp as txt "'-' interpreted as negative; use option -ds- to interpret as varlist"
disp as txt "(to suppress this warning, use option -nods-)"
}
if ( "`ds'" != "" ) {
local varlist `varlist'
if ( "`varlist'" == "" ) {
di as err "Invalid varlist: `anything'"
exit 198
}
cap ds `varlist'
if ( _rc ) {
cap noi ds `varlist'
exit _rc
}
local varlist `r(varlist)'
}
else {
local varlist: subinstr local varlist "-" " ", all
local varlist `varlist'
if ( "`varlist'" == "" ) {
di as err "Invalid list: `anything'"
di as err "Syntax: [+|-]varname [[+|-]varname ...]"
exit 198
}
cap ds `varlist'
if ( _rc ) {
local notfound
foreach var of local varlist {
cap confirm var `var'
if ( _rc ) {
local notfound `notfound' `var'
}
}
if ( `:list sizeof notfound' > 0 ) {
if ( `:list sizeof notfound' > 1 ) {
di as err "Variables not found: `notfound'"
}
else {
di as err "Variable `notfound' not found"
}
}
exit 111
}
qui ds `varlist'
local varlist `r(varlist)'
}
local varlist `r(varlist)'
}
if ( "`ds'" == "" ) local nods nods

* Create variables
* ----------------
Expand Down Expand Up @@ -181,7 +226,7 @@ program gcontract, rclass
* Call the plugin
* ---------------

local opts `weights' `missing' `unsorted' `compress' `forcestrl'
local opts `weights' `missing' `unsorted' `compress' `forcestrl' `ds' `nods'
local opts `opts' `verbose' `benchmark' `benchmarklevel'
local opts `opts' `hashlib' `oncollision' `hashmethod' `debug'

Expand All @@ -191,7 +236,7 @@ program gcontract, rclass
local rc = _rc
global GTOOLS_CALLER ""
if ( `rc' == 17999 ) {
if strpos("`anything'", "-") {
if ( strpos("`anything'", "-") & ("`ds'" == "") ) {
di as err "Cannot use fallback with inverted sorting."
exit 17000
}
Expand Down
Loading

0 comments on commit cb4d199

Please sign in to comment.