Skip to content

Commit

Permalink
getVariance1()
Browse files Browse the repository at this point in the history
  • Loading branch information
mzusin committed Mar 5, 2024
1 parent 77121ac commit c8debeb
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 69 deletions.
4 changes: 2 additions & 2 deletions dist/mz-math.esm.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions dist/mz-math.esm.js.map

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions dist/mz-math.min.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions dist/mz-math.min.js.map

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions dist/mz-math.node.cjs

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions dist/mz-math.node.cjs.map

Large diffs are not rendered by default.

54 changes: 1 addition & 53 deletions src/main/ml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,56 +79,4 @@ export const mlStandardizeTestData = (data: number[], decimalPlaces = Infinity):
*/
export const mlStandardizeUnseenData = (data: number[], mean: number, stdDev: number, decimalPlaces = Infinity): number[] => {
return mlStandardizeArray(data, mean, stdDev, decimalPlaces);
};

/*
// which means that 1 in 5 patients will miss their scheduled appointment.
def calc_prevalence(y):
return (sum(y)/len(y))
*/

/*
df[‘ScheduledDay_year’] = df[‘ScheduledDay’].dt.year
df[‘ScheduledDay_month’] = df[‘ScheduledDay’].dt.month
df[‘ScheduledDay_week’] = df[‘ScheduledDay’].dt.week
df[‘ScheduledDay_day’] = df[‘ScheduledDay’].dt.day
df[‘ScheduledDay_hour’] = df[‘ScheduledDay’].dt.hour
df[‘ScheduledDay_minute’] = df[‘ScheduledDay’].dt.minute
df[‘ScheduledDay_dayofweek’] = df[‘ScheduledDay’].dt.dayofweek
*/

// I would never use Year as a feature (but showed it anyways) because presumably we want to use this predictive model in the future and those future years wouldn’t be included in the dataset.

// However, I am kind of disappointed that the months are just for a few months of the year. This means month (and consequently week of year) probably shouldn’t be used as a feature either.

/*
// the number of days between the scheduled date and the appointment date.
df[‘delta_days’] = (df[‘AppointmentDay’]-df[‘ScheduledDay’]).dt.total_seconds()/(60*60*24)
*/

// Another thing you may want to do is convert the dayofweek into a categorical variable via one-hot encoding.

// Which shows that delta_days is basically the only features used in the model. This confirms our suspicion above that the model will likely struggle due to those same day appointments.

/*
When working with months (and days) as integers you are losing some information about them, because December (12) is far away from January (1) but in a sense they should be connected as one precedes the other. One approach to solve this problem would be projecting the months into a circle, then each month would have a corresponding angle “theta” and then you can store the “sin” and “cos” of said angle.
*/

/*
Possible suggestions of features include:
weekends vs weekdays
business hours and time of day
seasons
week of year number
month
year
beginning/end of month (pay days)
quarter
days to/from an action event(distance)
missing or incomplete data
etc.
All this depends on the data set and most won't apply.
*/
};
16 changes: 16 additions & 0 deletions src/main/statistics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ TODO:

/**
* Dispersion: the average square distance from the mean.
* Sum of (x - mean)^2 / N
*/
export const getVariance = (data: number[], decimalPlaces = Infinity) : number|undefined => {
if(!data || data.length <= 0) return undefined;
Expand All @@ -89,6 +90,21 @@ export const getVariance = (data: number[], decimalPlaces = Infinity) : number|u
return setDecimalPlaces(sum / data.length, decimalPlaces);
};

/**
* Another formula
* (Sum of x^2) / N - (mean ^ 2)
*/
export const getVariance1 = (data: number[], decimalPlaces = Infinity) : number|undefined => {
if(!data || data.length <= 0) return undefined;

const mean = getArithmeticMean(data);
if(mean === undefined) return undefined;

const sum = data.reduce((acc, val) => acc + (val ** 2), 0);

return setDecimalPlaces((sum / data.length) - (mean ** 2), decimalPlaces);
};

export const getStandardDeviation = (data: number[], decimalPlaces = Infinity) => {
const variance = getVariance(data) ?? 0;
return setDecimalPlaces(Math.sqrt(variance), decimalPlaces);
Expand Down
32 changes: 31 additions & 1 deletion test/statistics.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import { getArithmeticMean, getMedian, getMode, getVariance, getStandardDeviation } from '../src/main/statistics';
import {
getArithmeticMean,
getMedian,
getMode,
getVariance,
getVariance1,
getStandardDeviation
} from '../src/main/statistics';

describe('Statistics', () => {

Expand Down Expand Up @@ -104,6 +111,29 @@ describe('Statistics', () => {
});
});

describe('getVariance1()', () => {

test('[]', () => {
expect(getVariance1([])).toStrictEqual(undefined);
});

test('[2]', () => {
expect(getVariance1([2])).toStrictEqual(0);
});

test('[2, 2, 3, 3]', () => {
expect(getVariance1([2, 2, 3, 3])).toStrictEqual(0.25);
});

test('[0, 0, 5, 5]', () => {
expect(getVariance1([0, 0, 5, 5])).toStrictEqual(6.25);
});

test('[1, 2, 3, 8, 7]', () => {
expect(getVariance1([1, 2, 3, 8, 7], 2)).toStrictEqual(7.76);
});
});

describe('getStandardDeviation()', () => {

it('returns 0 when data is empty', () => {
Expand Down

0 comments on commit c8debeb

Please sign in to comment.