Skip to content

Commit

Permalink
add toloka scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
fonhorst committed Jul 6, 2023
1 parent 7d618a6 commit 4d89d9a
Show file tree
Hide file tree
Showing 7 changed files with 1,015 additions and 0 deletions.
32 changes: 32 additions & 0 deletions toloka/Estimate_topics_interpretability/input-data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"exp_id": {
"type": "string",
"hidden": true,
"required": true
},
"wordset": {
"type": "string",
"hidden": false,
"required": true
},
"model_id": {
"type": "string",
"hidden": true,
"required": true
},
"topic_id": {
"type": "string",
"hidden": true,
"required": true
},
"dataset_name": {
"type": "string",
"hidden": true,
"required": true
},
"correct_bad_words": {
"type": "json",
"hidden": false,
"required": false
}
}
1 change: 1 addition & 0 deletions toloka/Estimate_topics_interpretability/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Read the set of words. Try to uderstand if they correspond to a single common topic (may be with exception of a few words that don't).<div><div>&nbsp;If it is true, give the topic name with a one word or a short sentance.</div><div>Optionally, identify the words that doesn't belong to the common topica among the wordset.</div><div><br></div><div>Examples:&nbsp;&nbsp;<br> <h3><ul><li><b>Good topic</b>: <br>run, swimming, athelete, ski, training, exercises, paddle, referee<br><br><b>Topic name</b>: sport</li><li>Good topic (but with "bad" words that don't belong to the common topic):<br>greenhouse, chicken, tomato, eggs,&nbsp;<br><br>Topic name:&nbsp;</li><li>Bad topic (cannot be identified)</li><li>Bad topic (mixing of two or more topics)</li></ul></h3><div><br></div><div><br></div><div><br></div> <ul><li><b>Advertising and spam</b>. This category includes texts that ask users to go to an external resource or buy a product, offer earnings, or advertise dating sites. Such messages often contain shortened links to various sites.</li><li><b>Nonsense</b>. The text is a meaningless set of characters or words. Emoticons and emojis, nicknames and hashtags don't belong to this category.</li><li><b>Insults</b>. This category includes insults and threats that are clearly targeted at a user.</li><li><b>Violation of the law</b>. The text incites extremist activities, violence, criminal activity, or hatred based on gender, race, nationality, or belonging to a social group; or the text promotes suicide, drugs, or the sale of weapons.</li><li><b>Profanity</b>. This category includes comments that contain obscenities or profanity.</li></ul></div></div>
28 changes: 28 additions & 0 deletions toloka/Estimate_topics_interpretability/output-data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"quality": {
"type": "string",
"hidden": false,
"required": true
},
"bad_words": {
"type": "json",
"hidden": false,
"required": true
},
"topic_name": {
"type": "string",
"hidden": false,
"required": false
},
"golden_bad_words": {
"type": "boolean",
"hidden": false,
"required": true
},
"golden_binary_quality": {
"type": "boolean",
"hidden": false,
"required": true
}
}

91 changes: 91 additions & 0 deletions toloka/Estimate_topics_interpretability/task.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/* Task on the page */
.task {
border: 1px solid #ccc;
width: 500px;
padding: 15px;
display: inline-block;
}

.tsk-block {
border-radius: 3px;
margin-bottom: 10px;
}

.obj-text {
border: 1px solid #ccc;
padding: 15px 15px 15px 71px;
position: relative;
background-color: #e6f7dc;
}

/* Quotation mark */
.quote-sign {
background-image: url('data:image/svg+xml;utf8,<svg width="36" height="36" viewBox="0 0 1792 1792" xmlns="http://www.w3.org/2000/svg"><path fill="silver" d="M832 960v384q0 80-56 136t-136 56h-384q-80 0-136-56t-56-136v-704q0-104 40.5-198.5t109.5-163.5 163.5-109.5 198.5-40.5h64q26 0 45 19t19 45v128q0 26-19 45t-45 19h-64q-106 0-181 75t-75 181v32q0 40 28 68t68 28h224q80 0 136 56t56 136zm896 0v384q0 80-56 136t-136 56h-384q-80 0-136-56t-56-136v-704q0-104 40.5-198.5t109.5-163.5 163.5-109.5 198.5-40.5h64q26 0 45 19t19 45v128q0 26-19 45t-45 19h-64q-106 0-181 75t-75 181v32q0 40 28 68t68 28h224q80 0 136 56t56 136z"/></svg>');
background-position: center center;
background-repeat: no-repeat;
background-size: contain;
width: 36px;
height: 36px;
position: absolute;
top: 7px;
left: 15px;
}

.tsk-block fieldset {
padding: 10px 20px;
border-radius: 3px;
border: 1px solid #ccc;
margin: 0;
}

.tsk-block legend {
font-weight: bold;
padding: 0 6px;
}

.field_type_checkbox {
display: block;
}

.task__error {
border-radius: 3px;
}

.second_scale {
display: none;
}

/* Displaying task content on mobile devices */
@media screen and (max-width: 600px) {
.task-suite {
padding: 0;
}

.task {
width: 100%;
margin: 0;
}

.task-suite div:not(:last-child) {
margin-bottom: 10px;
}

.hint_label,
.field__hotkey {
display: none;
}

.field_type_checkbox {
white-space: normal;
}

.quote-sign {
width: 20px;
height: 20px;
top: 13px;
}

.obj-text {
padding: 15px 10px 15px 41px;
}
}
29 changes: 29 additions & 0 deletions toloka/Estimate_topics_interpretability/task.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<div class="tsk-block obj-text">
<!-- Text of comment. The component in triple curly brackets allows you to upload the task with formatted text (HTML tags and styles) -->
<div class="quote-sign"></div>
{{{wordset}}}
</div>

<!-- Radio buttons -->
<div class="tsk-block">
<fieldset>
<legend>Can you name a single topic that is represented by this set of words?</legend>
{{field type="radio" name="quality" value="good" size="L" label="Yes" hotkey="1" class="yes"}}
{{field type="radio" name="quality" value="rather_good" size="L" label="Rather yes" hotkey="1" class="yes"}}
{{field type="radio" name="quality" value="rather_bad" size="L" label="Rather no" hotkey="3" class="no"}}
{{field type="radio" name="quality" value="bad" size="L" label="No" hotkey="4" class="no"}}
</fieldset>
</div>

<!-- Checkboxes -->
<div class="tsk-block second_scale">
<legend>Give a name in one or few words to the topic described with the wordset</legend>
{{field type="input" name="topic_name" value="" placeholder="Topic name" validation-show="right-center"}}

<fieldset>
<legend>Which words are out of the topic?</legend>
{{#each words}}
{{field type="checkbox" name=(concat "bad_words." this.name) label=this.title}}
{{/each}}
</fieldset>
</div>
152 changes: 152 additions & 0 deletions toloka/Estimate_topics_interpretability/task.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
exports.Task = extend(TolokaHandlebarsTask, function(options) {
TolokaHandlebarsTask.call(this, options);
}, {
is_good_topic: function(solution) {
const positives = ['good', 'rather_good']
return positives.includes(solution.output_values['quality'])
},

intersection: function(setA, setB) {
return new Set(
[...setA].filter(element => setB.has(element))
);
},

union: function(setA, setB) {
return new Set(
[...setA, ...setB]
);
},

bad_words_set: function(obj) {
let badWordsSet = new Set();
for (var prop in obj) {
if (Object.prototype.hasOwnProperty.call(obj, prop)) {
let word = prop;
let is_bad = obj[word];
if (is_bad) {
badWordsSet.add(word);
}
}
}

return badWordsSet;
},

setSolution: function(solution) {
TolokaHandlebarsTask.prototype.setSolution.apply(this, arguments);
var workspaceOptions = this.getWorkspaceOptions();

var tname = solution.output_values['topic_name'] || "";
this.setSolutionOutputValue("topic_name", tname);

if (this.rendered) {
if (!workspaceOptions.isReviewMode && !workspaceOptions.isReadOnly) {
// Show a set of checkboxes if the answer "There are violations" (BAD) is selected. Otherwise, hide it
if (solution.output_values['quality']) {

var row = this.getDOMElement().querySelector('.second_scale');
row.style.display = this.is_good_topic(solution) ? 'block' : 'none';

if (!this.is_good_topic(solution)) {
let data = this.getTemplateData();
let words_out = {};
for (let i = 0; i < data.words.length; i++) {
words_out[data.words[i].name] = false;
}

this.setSolutionOutputValue("bad_words", words_out);

this.setSolutionOutputValue("topic_name", "");

}
}
}
}
},

getTemplateData: function() {
let data = TolokaHandlebarsTask.prototype.getTemplateData.call(this);

const words = data.wordset.split(" ");
let word_outs = [];
for (let i = 0; i < words.length; i++) {
word_outs.push({'name': words[i], 'title': words[i]});
}

data.words = word_outs;

return data;
},

// Error message processing
addError: function(message, field, errors) {
errors || (errors = {
task_id: this.getOptions().task.id,
errors: {}
});
errors.errors[field] = {
message: message
};

return errors;
},

// Checking the answers: if the answer "There are violations" is selected, at least one checkbox must be checked
validate: function(solution) {
var errors = null;
var topic_name = solution.output_values.topic_name;
topic_name = typeof topic_name !== 'undefined' ? topic_name.trim() : "";
let bad_topic_name = topic_name.length < 3 || topic_name.length > 50

if (this.is_good_topic(solution) && bad_topic_name) {
errors = this.addError("Topic name is less than 3 symbols or more than 50", '__TASK__', errors);
}

var correctBadWords = this.getTask().input_values.correct_bad_words;
var golden;
if (!correctBadWords) {
golden = false;
} else {
var badWords = solution.output_values.bad_words;

let correctBadWordsSet = this.bad_words_set(correctBadWords);
let badWordsSet = this.bad_words_set(badWords);

var intersection = this.intersection(correctBadWordsSet, badWordsSet) ;
var union = this.union(correctBadWordsSet, badWordsSet);
var golden = intersection.size / union.size >= 0.8 ? true : false;
}
this.setSolutionOutputValue("golden_bad_words", golden);

var goldenBinaryQuality = this.is_good_topic(solution);
this.setSolutionOutputValue("golden_binary_quality", goldenBinaryQuality);

return errors || TolokaHandlebarsTask.prototype.validate.apply(this, arguments);
},

// Open the second question block in verification mode to see the checkboxes marked by the performer
onRender: function() {
var workspaceOptions = this.getWorkspaceOptions();

if (workspaceOptions.isReviewMode || workspaceOptions.isReadOnly || this.is_good_topic(this.getSolution())){
var row = this.getDOMElement().querySelector('.second_scale');
row.style.display = 'block';
}

this.rendered = true;
}
});

function extend(ParentClass, constructorFunction, prototypeHash) {
constructorFunction = constructorFunction || function() {
};
prototypeHash = prototypeHash || {};
if (ParentClass) {
constructorFunction.prototype = Object.create(ParentClass.prototype);
}
for (var i in prototypeHash) {
constructorFunction.prototype[i] = prototypeHash[i];
}
return constructorFunction;
}
Loading

0 comments on commit 4d89d9a

Please sign in to comment.