Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LDA Chart and Routine #1797

Merged
merged 14 commits into from
Dec 6, 2023
Merged
1 change: 1 addition & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"hello-world",
"istex-facet",
"labeled-resources",
"lda",
"pairing-with",
"percentage-of",
"refs",
Expand Down
58 changes: 58 additions & 0 deletions packages/ezsLodex/src/LodexExtractOutput.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import get from 'lodash.get';

/**
* Format the output in compliance with LODEX routines format.
*
* @example <caption>Input</caption>
* [
* { _id: 1, value: 2, total: 2 },
* { _id: 2, value: 4, total: 2 }
* ]
*
* @example <caption>Script</caption>
* .pipe(ezs('LodexExtractOutput', { extract: 'total' }))
*
* @example <caption>Output</caption>
* {
* total: 2
* }
*
* @name LodexOutput
* @param {boolean} [indent=false] indent or not
* @param {string[]} [extract] fields to put at the root of the output
* object
* @returns {string}
*/
function LodexExtractOutput(data, feed) {
const indent = this.getParam('indent', false);
const extract = this.getParam('extract');
const extracts = Array.isArray(extract) ? extract : [extract];
const keys = extracts.filter(x => x);

const json = d => JSON.stringify(d, null, indent ? 4 : null);

if (this.isLast()) {
feed.write('}\n');
return feed.close();
}
if (this.isFirst() && !this.isLast()) {
const values = keys.map(p => get(data, p));
feed.write('{');
if (keys.length > 0) {
let check = false;
keys.forEach((k, index) => {
if (values[index]) {
feed.write(!check ? ' ' : ',');
check = true;
feed.write(json(k));
feed.write(':');
feed.write(json(values[index]));
}
});
}
} else {
feed.write(',\n');
}
return feed.end();
}
export default LodexExtractOutput;
2 changes: 2 additions & 0 deletions packages/ezsLodex/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import labelizeFieldID from './labelizeFieldID';
import buildContext from './buildContext';
import aggregateQuery from './aggregateQuery';
import LodexJoinQuery from './joinQuery';
import LodexExtractOutput from './LodexExtractOutput';

const funcs = {
flattenPatch,
Expand Down Expand Up @@ -63,6 +64,7 @@ const funcs = {
aggregateQuery,
writeTurtle,
LodexJoinQuery,
LodexExtractOutput,
// aliases
fixFlatten: flattenPatch.flattenPatch,
LodexContext: disabled.disabled,
Expand Down
8 changes: 8 additions & 0 deletions src/app/js/formats/formats.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import istexCitation from './istexCitation';
import istexRefbibs from './istexRefbibs';
import bubblePlot from './vega-lite/component/bubble-plot';
import flowMap from './vega/component/flow-map';
import ldaChart from './vega-lite/component/lda-chart';

export const FORMATS_CATALOG = [
{
Expand Down Expand Up @@ -346,4 +347,11 @@ export const FORMATS_CATALOG = [
component: flowMap,
type: 'chart',
},
{
name: 'formatLdaChart',
description: 'formatLdaChartDescription',
componentName: 'ldaChart',
component: ldaChart,
type: 'chart',
},
];
63 changes: 63 additions & 0 deletions src/app/js/formats/vega-lite/component/lda-chart/LdaChart.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import { CustomActionVegaLite } from '../vega-lite-component';
import { VEGA_LITE_DATA_INJECT_TYPE_A } from '../../../chartsUtils';
import PropTypes from 'prop-types';
import React, { useMemo } from 'react';

const LdaChart = ({ data, title, colors }) => {
const spec = useMemo(() => {
return {
$schema: 'https://vega.github.io/schema/vega-lite/v5.json',
config: { legend: { disable: true } },
title: title,
encoding: {
y: { field: 'word', type: 'nominal', sort: null },
x: { field: 'word_weight', type: 'quantitative' },
},
layer: [
{
mark: 'bar',
encoding: {
color: {
field: 'word_weight',
scale: { range: colors.split(' ') },
},
},
},
{
mark: {
type: 'text',
align: 'left',
baseline: 'middle',
dx: 3,
},
encoding: {
text: {
field: 'word_weight',
type: 'quantitative',
format: '.2f',
},
},
},
],
width: 'container',
height: { step: 20 },
};
}, [data, title, colors]);
return (
<CustomActionVegaLite
spec={spec}
data={{
values: data,
}}
injectType={VEGA_LITE_DATA_INJECT_TYPE_A}
/>
);
};

LdaChart.propTypes = {
data: PropTypes.any.isRequired,
title: PropTypes.string.isRequired,
colors: PropTypes.string.isRequired,
};

export default LdaChart;
51 changes: 51 additions & 0 deletions src/app/js/formats/vega-lite/component/lda-chart/LdaChartAdmin.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import React from 'react';
import translate from 'redux-polyglot/translate';
import PropTypes from 'prop-types';
import { Box } from '@mui/material';

import { polyglot as polyglotPropTypes } from '../../../../propTypes';
import ColorPickerParamsAdmin from '../../../shared/ColorPickerParamsAdmin';
import { MONOCHROMATIC_DEFAULT_COLORSET } from '../../../colorUtils';
import updateAdminArgs from '../../../shared/updateAdminArgs';

export const defaultArgs = {
colors: MONOCHROMATIC_DEFAULT_COLORSET,
};

const LdaChartAdmin = props => {
const { args, p } = props;
const { colors } = args;

const handleColors = colors => {
updateAdminArgs('colors', colors || defaultArgs.colors, props);
};

return (
<Box
display="flex"
flexWrap="wrap"
justifyContent="space-between"
gap={2}
>
<ColorPickerParamsAdmin
colors={colors}
onChange={handleColors}
polyglot={p}
/>
</Box>
);
};

LdaChartAdmin.defaultProps = {
args: defaultArgs,
};

LdaChartAdmin.propTypes = {
args: PropTypes.shape({
colors: PropTypes.string,
}),
onChange: PropTypes.func.isRequired,
p: polyglotPropTypes.isRequired,
};

export default translate(LdaChartAdmin);
126 changes: 126 additions & 0 deletions src/app/js/formats/vega-lite/component/lda-chart/LdaChartView.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import React, { useMemo } from 'react';
import compose from 'recompose/compose';
import { connect } from 'react-redux';
import PropTypes from 'prop-types';
import _ from 'lodash';
import { Grid, Paper } from '@mui/material';

import injectData from '../../../injectData';
import { field as fieldPropTypes } from '../../../../propTypes';
import LdaChart from './LdaChart';

const LdaChartView = props => {
const { values, topics } = useMemo(() => {
const rawValues = props.data.values ?? [];

const topics = _.chain(rawValues)
.flatMap(o => Object.keys(o.value.topics))
.uniq()
.sort((a, b) =>
a.localeCompare(b, 'fr', {
sensitivity: 'accent',
numeric: true,
usage: 'sort',
ignorePunctuation: true,
}),
)
.value();

/**
* @type {Map<string, {word: string, word_weight: string}[]>}
*/
const values = new Map();

for (const rawValue of rawValues) {
/**
* @type {any}
*/
const topicsValues = rawValue.value.topics;
Object.entries(topicsValues).forEach(entry => {
const topic = entry[0];
/**
* @type {{word: string, word_weight: string}[]}
*/
let currentWords = entry[1].words;
const previousWords = values.get(topic);

if (previousWords) {
currentWords = currentWords.map(word => {
const preWord = previousWords.find(
preV => preV.word === word.word,
);
return {
word: word.word,
word_weight:
parseFloat(word.word_weight) +
parseFloat(preWord.word_weight),
};
});
currentWords.sort((a, b) => {
if (a.word_weight > b.word_weight) {
return -1;
}
if (a.word_weight < b.word_weight) {
return 1;
}
return 0;
});
}

values.set(topic, currentWords);
});
}

return {
values: Object.fromEntries(values),
topics,
};
}, [props.data]);

return (
<div style={{ margin: '12px' }}>
<Grid
container
justifyContent="center"
rowSpacing={1}
columnSpacing={1}
>
{topics.map(topic => (
<Grid key={topic} item xs={6}>
<Paper style={{ padding: '6px' }}>
<LdaChart
data={values[topic]}
title={topic}
colors={props.colors}
/>
</Paper>
</Grid>
))}
</Grid>
</div>
);
};

const mapStateToProps = (state, { formatData }) => {
if (!formatData) {
return {
data: {
values: [],
},
};
}
return {
data: {
values: formatData,
},
};
};

LdaChartView.propTypes = {
field: fieldPropTypes.isRequired,
resource: PropTypes.object.isRequired,
data: PropTypes.any,
colors: PropTypes.string.isRequired,
};

export default compose(injectData(), connect(mapStateToProps))(LdaChartView);
12 changes: 12 additions & 0 deletions src/app/js/formats/vega-lite/component/lda-chart/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import DefaultFormat from '../../../DefaultFormat';
import Icon from '../../VegaLiteIcon';
import Component from './LdaChartView';
import AdminComponent, { defaultArgs } from './LdaChartAdmin';

export default {
...DefaultFormat,
Component,
AdminComponent,
defaultArgs,
Icon,
};
38 changes: 38 additions & 0 deletions workers/routines/lda.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; suppress inspection "DuplicateSectionInFile" for whole file
; suppress inspection "DuplicateKeyInSection" for whole file
; Export and format LDA precompute data into an valid format for vega-lite
prepend = delegate?file=../worker.ini
mimeType = application/json
label = lda

[use]
plugin = basics
plugin = lodex
plugin = analytics

[buildContext]
connectionStringURI = get('connectionStringURI')

[env]
path = precomputeName
value = get('field.0')

[assign]
path = precomputeFilter
value = fix({ name: env('precomputeName') })

[LodexRunQuery]
collection = precomputed
filter = get('precomputeFilter')

[assign]
path = total
value = get('data').size()

[keep]
path = data
path = total

[LodexExtractOutput]
extract = data
extract = total