Malaya $label classification
+$p
+diff --git a/README.rst b/README.rst index 56167ccd..2f4d6d9e 100644 --- a/README.rst +++ b/README.rst @@ -42,7 +42,7 @@ Features - **Emotion Analysis** - From BERT, Fast-Text, Dynamic-Memory Network, Sparse Tensorflow, Attention Neural Network to build deep emotion analysis models. + From Attention-Recurrent model, Sparse Tensorflow, Self-Attention to build deep emotion analysis models. - **Entities Recognition** Latest state-of-art CRF deep learning models to do Naming Entity Recognition. @@ -66,16 +66,19 @@ Features - **ELMO (biLM)** Provide pretrained bahasa wikipedia and bahasa news ELMO, with easy interface and visualization. +- **Relevancy Analysis** + + From Dilated Convolutional Neural Network and Self-Attention to build deep relevancy analysis models. - **Sentiment Analysis** - From BERT, Fast-Text, Dynamic-Memory Network, Sparse Tensorflow, Attention Neural Network to build deep sentiment analysis models. + From Attention-Recurrent model, Sparse Tensorflow and Self-Attention to build deep sentiment analysis models. - **Spell Correction** Using local Malaysia NLP researches to auto-correct any bahasa words. - Stemmer - **Subjectivity Analysis** - From BERT, Fast-Text, Dynamic-Memory Network, Sparse Tensorflow, Attention Neural Network to build deep subjectivity analysis models. + From Attention-Recurrent model, Sparse Tensorflow and Self-Attention to build deep subjectivity analysis models. - **Summarization** Using skip-thought with attention state-of-art to give precise unsupervised summarization. @@ -84,7 +87,7 @@ Features Provide LDA2Vec, LDA, NMF and LSA interface for easy topic modelling with topics visualization. - **Toxicity Analysis** - From BERT, Fast-Text, Dynamic-Memory Network, Attention Neural Network to build deep toxicity analysis models. + From Attention-Recurrent model, Self-Attention to build deep toxicity analysis models. - **Word2Vec** Provide pretrained bahasa wikipedia and bahasa news Word2Vec, with easy interface and visualization. diff --git a/docs/Api.rst b/docs/Api.rst index 109f9390..8eccc823 100644 --- a/docs/Api.rst +++ b/docs/Api.rst @@ -69,9 +69,6 @@ malaya.normalize .. automodule:: malaya.normalize :members: -.. autoclass:: malaya.normalize._DEEP_NORMALIZER() - :members: - .. autoclass:: malaya.normalize._SPELL_NORMALIZE() :members: @@ -96,6 +93,12 @@ malaya.preprocessing .. automodule:: malaya.preprocessing :members: +malaya.relevancy +------------------ + +.. automodule:: malaya.relevancy + :members: + malaya.sentiment ----------------- diff --git a/docs/Dataset.rst b/docs/Dataset.rst index f0b373dd..69dfe8b1 100644 --- a/docs/Dataset.rst +++ b/docs/Dataset.rst @@ -1,6 +1,14 @@ Dataset ======= +.. raw:: html + +
+ + + +
+ We want to make sure not just the code we open-sourced, but also goes to dataset, so everyone can validate. @@ -8,252 +16,6 @@ You can check in `Malaya-Dataset$p
+$p
+$p
+a&&(a=u),n.push(u)}
+for(var h=0;ha&&(a=d)}
+return r.y0=o,r.max=a,r}
+function km(t){var e=0;d(t.children,function(t){km(t);var i=t.value;y(i)&&(i=i[0]),e+=i});var i=t.value;y(i)&&(i=i[0]),(null==i||isNaN(i))&&(i=e),i<0&&(i=0),y(t.value)?t.value[0]=i:t.value=i}
+function Pm(t,e,i){function n(){r.ignore=r.hoverIgnore}
+function o(){r.ignore=r.normalIgnore}
+Zw.call(this);var a=new QS({z2:xP});a.seriesIndex=e.seriesIndex;var r=new qS({z2:_P,silent:t.getModel("label").get("silent")});this.add(a),this.add(r),this.updateData(!0,t,"normal",e,i),this.on("emphasis",n).on("normal",o).on("mouseover",n).on("mouseout",o)}
+function Nm(t,e,i){var n=t.getVisual("color"),o=t.getVisual("visualMeta");o&&0!==o.length||(n=null);var a=t.getModel("itemStyle").get("color");if(a)return a;if(n)return n;if(0===t.depth)return i.option.color[0];var r=i.option.color.length;return a=i.option.color[Om(t)%r]}
+function Om(t){for(var e=t;e.depth>1;)e=e.parentNode;return l(t.getAncestors()[0].children,e)}
+function Em(t,e,i){return i!==yP.NONE&&(i===yP.SELF?t===e:i===yP.ANCESTOR?t===e||t.isAncestorOf(e):t===e||t.isDescendantOf(e))}
+function Rm(t,e){var i=t.children||[];t.children=zm(i,e),i.length&&d(t.children,function(t){Rm(t,e)})}
+function zm(t,e){if("function"==typeof e)return t.sort(e);var i="asc"===e;return t.sort(function(t,e){var n=(t.getValue()-e.getValue())*(i?1:-1);return 0===n?(t.dataIndex-e.dataIndex)*(i?-1:1):n})}
+function Bm(t,e){return e=e||[0,0],f(["x","y"],function(i,n){var o=this.getAxis(i),a=e[n],r=t[n]/2;return"category"===o.type?o.getBandWidth():Math.abs(o.dataToCoord(a-r)-o.dataToCoord(a+r))},this)}
+function Vm(t,e){return e=e||[0,0],f([0,1],function(i){var n=e[i],o=t[i]/2,a=[],r=[];return a[i]=n-o,r[i]=n+o,a[1-i]=r[1-i]=e[1-i],Math.abs(this.dataToPoint(a)[i]-this.dataToPoint(r)[i])},this)}
+function Gm(t,e){var i=this.getAxis(),n=e instanceof Array?e[0]:e,o=(t instanceof Array?t[0]:t)/2;return"category"===i.type?i.getBandWidth():Math.abs(i.dataToCoord(n-o)-i.dataToCoord(n+o))}
+function Fm(t,e){return f(["Radius","Angle"],function(i,n){var o=this["get"+i+"Axis"](),a=e[n],r=t[n]/2,s="dataTo"+i,l="category"===o.type?o.getBandWidth():Math.abs(o[s](a-r)-o[s](a+r));return"Angle"===i&&(l=l*Math.PI/180),l},this)}
+function Wm(t){var e,i=t.type;if("path"===i){var n=t.shape,o=null!=n.width&&null!=n.height?{x:n.x||0,y:n.y||0,width:n.width,height:n.height}:null,a=tv(n);(e=Wn(a,null,o,n.layout||"center")).__customPathData=a}else "image"===i?(e=new hi({})).__customImagePath=t.style.image:"text"===i?(e=new qS({})).__customText=t.style.text:e=new(0,bM[i.charAt(0).toUpperCase()+i.slice(1)]);return e.__customGraphicType=i,e.name=t.name,e}
+function Hm(t,e,n,o,a,r,s){var l={},u=n.style||{};if(n.shape&&(l.shape=i(n.shape)),n.position&&(l.position=n.position.slice()),n.scale&&(l.scale=n.scale.slice()),n.origin&&(l.origin=n.origin.slice()),n.rotation&&(l.rotation=n.rotation),"image"===t.type&&n.style){h=l.style={};d(["x","y","width","height"],function(e){Zm(e,h,u,t.style,r)})}
+if("text"===t.type&&n.style){var h=l.style={};d(["x","y"],function(e){Zm(e,h,u,t.style,r)}),!u.hasOwnProperty("textFill")&&u.fill&&(u.textFill=u.fill),!u.hasOwnProperty("textStroke")&&u.stroke&&(u.textStroke=u.stroke)}
+if("group"!==t.type&&(t.useStyle(u),r)){t.style.opacity=0;var c=u.opacity;null==c&&(c=1),So(t,{style:{opacity:c}},o,e)}
+r?t.attr(l):bo(t,l,o,e),n.hasOwnProperty("z2")&&t.attr("z2",n.z2||0),n.hasOwnProperty("silent")&&t.attr("silent",n.silent),n.hasOwnProperty("invisible")&&t.attr("invisible",n.invisible),n.hasOwnProperty("ignore")&&t.attr("ignore",n.ignore),n.hasOwnProperty("info")&&t.attr("info",n.info);var f=n.styleEmphasis,p=!1===f;t.__cusHasEmphStl&&null==f||!t.__cusHasEmphStl&&p||(no(t,f),t.__cusHasEmphStl=!p),s&&ho(t,!p)}
+function Zm(t,e,i,n,o){null==i[t]||o||(e[t]=i[t],i[t]=n[t])}
+function Um(t,e,i,n){function o(t){null==t&&(t=h),v&&(c=e.getItemModel(t),d=c.getModel(AP),f=c.getModel(DP),p=e.getItemVisual(t,"color"),v=!1)}
+var s=t.get("renderItem"),l=t.coordinateSystem,u={};l&&(u=l.prepareCustoms?l.prepareCustoms():LP[l.type](l));var h,c,d,f,p,g=r({getWidth:n.getWidth,getHeight:n.getHeight,getZr:n.getZr,getDevicePixelRatio:n.getDevicePixelRatio,value:function(t,i){return null==i&&(i=h),e.get(e.getDimension(t||0),i)},style:function(i,n){null==n&&(n=h),o(n);var r=c.getModel(IP).getItemStyle();null!=p&&(r.fill=p);var s=e.getItemVisual(n,"opacity");return null!=s&&(r.opacity=s),fo(r,d,null,{autoColor:p,isRectText:!0}),r.text=d.getShallow("show")?A(t.getFormattedLabel(n,"normal"),gu(e,n)):null,i&&a(r,i),r},styleEmphasis:function(i,n){null==n&&(n=h),o(n);var r=c.getModel(TP).getItemStyle();return fo(r,f,null,{isRectText:!0},!0),r.text=f.getShallow("show")?D(t.getFormattedLabel(n,"emphasis"),t.getFormattedLabel(n,"normal"),gu(e,n)):null,i&&a(r,i),r},visual:function(t,i){return null==i&&(i=h),e.getItemVisual(i,t)},barLayout:function(t){if(l.getBaseAxis)return Tl(r({axis:l.getBaseAxis()},t),n)},currentSeriesIndices:function(){return i.getCurrentSeriesIndices()},font:function(t){return _o(t,i)}},u.api||{}),m={context:{},seriesId:t.id,seriesName:t.name,seriesIndex:t.seriesIndex,coordSys:u.coordSys,dataInsideLength:e.count(),encode:Xm(t.getData())},v=!0;return function(t,i){return h=t,v=!0,s&&s(r({dataIndexInside:t,dataIndex:e.getRawIndex(t),actionType:i?i.type:null},m),g)}}
+function Xm(t){var e={};return d(t.dimensions,function(i,n){var o=t.getDimensionInfo(i);if(!o.isExtraCoord){var a=o.coordDim;(e[a]=e[a]||[])[o.coordDimIndex]=n}}),e}
+function jm(t,e,i,n,o,a){return(t=Ym(t,e,i,n,o,a,!0))&&a.setItemGraphicEl(e,t),t}
+function Ym(t,e,i,n,o,a,r){var s=!i,l=(i=i||{}).type,u=i.shape,h=i.style;if(t&&(s||null!=l&&l!==t.__customGraphicType||"path"===l&&ev(u)&&tv(u)!==t.__customPathData||"image"===l&&iv(h,"image")&&h.image!==t.__customImagePath||"text"===l&&iv(u,"text")&&h.text!==t.__customText)&&(o.remove(t),t=null),!s){var c=!t;return!t&&(t=Wm(i)),Hm(t,e,i,n,a,c,r),"group"===l&&qm(t,e,i,n,a),o.add(t),t}}
+function qm(t,e,i,n,o){var a=i.children,r=a?a.length:0,s=i.$mergeChildren,l="byName"===s||i.diffChildrenByName,u=!1===s;if(r||l||u)
+if(l)Km({oldChildren:t.children()||[],newChildren:a||[],dataIndex:e,animatableModel:n,group:t,data:o});else{u&&t.removeAll();for(var h=0;h
":"",m=g+f.join(g||", ");return{renderMode:n,content:m,style:l}}(g):o(f?hr(h,t,c[0]):m?g[0]:g)).content,_=a.seriesIndex+"at"+u,b=ia({color:v,type:"item",renderMode:n,markerId:_});l[_]=v,++u;var S=h.getName(t),M=this.name;Oi(this)||(M=""),M=M?Qo(M)+(e?": ":r):"";var I="string"==typeof b?b:b.content;return{html:e?I+M+x:M+I+(S?Qo(S)+": "+x:x),markers:l}},isAnimationEnabled:function(){if(z_.node)return!1;var t=this.getShallow("animation");return t&&this.getData().count()>this.getShallow("animationThreshold")&&(t=!1),t},restoreData:function(){this.dataTask.dirty()},getColorFromPalette:function(t,e,i){var n=this.ecModel,o=JM.getColorFromPalette.call(this,t,e,i);return o||(o=n.getColorFromPalette(t,e,i)),o},coordDimToDataDim:function(t){return this.getRawData().mapDimension(t,!0)},getProgressive:function(){return this.get("progressive")},getProgressiveThreshold:function(){return this.get("progressiveThreshold")},getAxisTooltipData:null,getTooltipPosition:null,pipeTask:null,preventIncremental:null,pipelineContext:null});h(OI,LI),h(OI,JM);var EI=function(){this.group=new Zw,this.uid=No("viewComponent")};EI.prototype={constructor:EI,init:function(t,e){},render:function(t,e,i,n){},dispose:function(){},filterForExposedEvent:null};var RI=EI.prototype;RI.updateView=RI.updateLayout=RI.updateVisual=function(t,e,i,n){},Xi(EI),Ki(EI,{registerWhenExtend:!0});var zI=function(){var t=Bi();return function(e){var i=t(e),n=e.pipelineContext,o=i.large,a=i.progressiveRender,r=i.large=n.large,s=i.progressiveRender=n.progressiveRender;return!!(o^r||a^s)&&"reset"}},BI=Bi(),VI=zI();Mr.prototype={type:"chart",init:function(t,e){},render:function(t,e,i,n){},highlight:function(t,e,i,n){Tr(t.getData(),n,"emphasis")},downplay:function(t,e,i,n){Tr(t.getData(),n,"normal")},remove:function(t,e){this.group.removeAll()},dispose:function(){},incrementalPrepareRender:null,incrementalRender:null,updateTransform:null,filterForExposedEvent:null};var GI=Mr.prototype;GI.updateView=GI.updateLayout=GI.updateVisual=function(t,e,i,n){this.render(t,e,i,n)},Xi(Mr),Ki(Mr,{registerWhenExtend:!0}),Mr.markUpdateMethod=function(t,e){BI(t).updateMethod=e};var FI={incrementalPrepareRender:{progress:function(t,e){e.view.incrementalRender(t,e.model,e.ecModel,e.api,e.payload)}},render:{forceFirstProgress:!0,progress:function(t,e){e.view.render(e.model,e.ecModel,e.api,e.payload)}}},WI="\0__throttleOriginMethod",HI="\0__throttleRate",ZI="\0__throttleType",UI={createOnAllSeries:!0,performRawSeries:!0,reset:function(t,e){var i=t.getData(),n=(t.visualColorAccessPath||"itemStyle.color").split("."),o=t.get(n)||t.getColorFromPalette(t.name,null,e.getSeriesCount());if(i.setVisual("color",o),!e.isSeriesFiltered(t)){"function"!=typeof o||o instanceof cM||i.each(function(e){i.setItemVisual(e,"color",o(t.getDataParams(e)))});return{dataEach:i.hasItemOption?function(t,e){var i=t.getItemModel(e).get(n,!0);null!=i&&t.setItemVisual(e,"color",i)}:null}}}},XI={toolbox:{brush:{title:{rect:"Rectangular selection",polygon:"Circle selection",lineX:"Horizontal selection",lineY:"Vertical selection",keep:"Keep",clear:"Clear"}},dataView:{title:"Data View",lang:["Data View","Shut Down","Refresh"]},dataZoom:{title:{zoom:"Zoom",back:"Back"}},magicType:{title:{line:"Switch to a line chart",bar:"Switch to histogram",stack:"Switch to stack",tiled:"Switch to tiled"}},restore:{title:"Restore"},saveAsImage:{title:"Save as Image",lang:["Right click to save as picture"]}},series:{typeNames:{pie:"Piechart",bar:"Barplot",line:"Lineplot",scatter:"Scatterplot",effectScatter:"EffectScatter",radar:"Radar",tree:"Tree",treemap:"Treemap",boxplot:"Boxplot",candlestick:"Candlestick",k:"K-linechart",heatmap:"Heatmap",map:"Map",parallel:"Parallel",lines:"Lines",graph:"Relation",sankey:"Sankey",funnel:"Funnel",gauge:"Gauge",pictorialBar:"PictorialBar",themeRiver:"themeRiver",sunburst:"Sunburst"}},aria:{general:{withTitle:"This is a chart about '{title}'.",withoutTitle:"This is a chart, "},series:{single:{prefix:"",withName:"The chart type is {seriesType}, which means {seriesName}.",withoutName:"The chart type is {seriesType}."},multiple:{prefix:"It consists of {seriesCount} chart series.",withName:"The {seriesId} series is a {seriesType} representing {seriesName}, ",withoutName:"The {seriesId} series is a {seriesType}, ",separator:{middle:";",end:"。"}}},data:{allData:"Its data is -",partialData:"Among them, the first {displayCnt} item is -",withName:"The data for {name} is {value}",withoutName:"{value}",separator:{middle:",",end:""}}}},jI=function(t,e){function i(t,e){if("string"!=typeof t)return t;var i=t;return d(e,function(t,e){i=i.replace(new RegExp("\\{\\s*"+e+"\\s*\\}","g"),t)}),i}
+function n(t){var e=a.get(t);if(null==e){for(var i=t.split("."),n=XI.aria,o=0;oC[1]&&(C[1]=D)}
+if(!o.pure){var L=u[b];if(w&&null==L)
+if(null!=w.name)u[b]=L=w.name;else if(null!=i){var k=r[i],P=a[k][S];if(P){L=P[M];var N=l[k].ordinalMeta;N&&N.categories.length&&(L=N.categories[L])}}
+var O=null==w?null:w.id;null==O&&null!=L&&(d[L]=d[L]||0,O=L,d[L]>0&&(O+="__ec__"+d[L]),d[L]++),null!=O&&(h[b]=O)}}!o.persistent&&o.clean&&o.clean(),this._rawCount=this._count=e,this._extent={},$s(this)}},iA.count=function(){return this._count},iA.getIndices=function(){var t=this._indices;if(t){var e=t.constructor,i=this._count;if(e===Array){n=new e(i);for(o=0;o=0&&e=0&&a<0)&&(o=u,a=l,n.length=0),n.push(r))}
+return n},iA.getRawIndex=Qs,iA.getRawDataItem=function(t){if(this._rawData.persistent)return this._rawData.getItem(this.getRawIndex(t));for(var e=[],i=0;ic[1]&&(c[1]=x),d[f++]=_}
+return o._count=f,o._indices=d,o.getRawIndex=tl,o},iA.getItemModel=function(t){var e=this.hostModel;return new Lo(this.getRawDataItem(t),e,e&&e.ecModel)},iA.diff=function(t){var e=this;return new Hs(t?t.getIndices():[],this.getIndices(),function(e){return el(t,e)},function(t){return el(e,t)})},iA.getVisual=function(t){var e=this._visual;return e&&e[t]},iA.setVisual=function(t,e){if(YT(t))
+for(var i in t)t.hasOwnProperty(i)&&this.setVisual(i,t[i]);else this._visual=this._visual||{},this._visual[t]=e},iA.setLayout=function(t,e){if(YT(t))
+for(var i in t)t.hasOwnProperty(i)&&this.setLayout(i,t[i]);else this._layout[t]=e},iA.getLayout=function(t){return this._layout[t]},iA.getItemLayout=function(t){return this._itemLayouts[t]},iA.setItemLayout=function(t,e,i){this._itemLayouts[t]=i?a(this._itemLayouts[t]||{},e):e},iA.clearItemLayouts=function(){this._itemLayouts.length=0},iA.getItemVisual=function(t,e,i){var n=this._itemVisuals[t],o=n&&n[e];return null!=o||i?o:this.getVisual(e)},iA.setItemVisual=function(t,e,i){var n=this._itemVisuals[t]||{},o=this.hasItemVisual;if(this._itemVisuals[t]=n,YT(e))
+for(var a in e)e.hasOwnProperty(a)&&(n[a]=e[a],o[a]=!0);else n[e]=i,o[e]=!0},iA.clearAllVisual=function(){this._visual={},this._itemVisuals=[],this.hasItemVisual={}};var nA=function(t){t.seriesIndex=this.seriesIndex,t.dataIndex=this.dataIndex,t.dataType=this.dataType};iA.setItemGraphicEl=function(t,e){var i=this.hostModel;e&&(e.dataIndex=t,e.dataType=this.dataType,e.seriesIndex=i&&i.seriesIndex,"group"===e.type&&e.traverse(nA,e)),this._graphicEls[t]=e},iA.getItemGraphicEl=function(t){return this._graphicEls[t]},iA.eachItemGraphicEl=function(t,e){d(this._graphicEls,function(i,n){i&&t&&t.call(e,i,n)})},iA.cloneShallow=function(t){if(!t){var e=f(this.dimensions,this.getDimensionInfo,this);t=new eA(e,this.hostModel)}
+if(t._storage=this._storage,Ks(t,this),this._indices){var i=this._indices.constructor;t._indices=new i(this._indices)}else t._indices=null;return t.getRawIndex=t._indices?tl:Qs,t},iA.wrapMethod=function(t,e){var i=this[t];"function"==typeof i&&(this.__wrappedMethods=this.__wrappedMethods||[],this.__wrappedMethods.push(t),this[t]=function(){var t=i.apply(this,arguments);return e.apply(this,[t].concat(C(arguments)))})},iA.TRANSFERABLE_METHODS=["cloneShallow","downSample","map"],iA.CHANGABLE_METHODS=["filterSelf","selectRange"];var oA=function(t,e){return e=e||{},rl(e.coordDimensions||[],t,{dimsDef:e.dimensionsDefine||t.dimensionsDefine,encodeDef:e.encodeDefine||t.encodeDefine,dimCount:e.dimensionsCount,generateCoord:e.generateCoord,generateCoordCount:e.generateCoordCount})};gl.prototype.parse=function(t){return t},gl.prototype.getSetting=function(t){return this._setting[t]},gl.prototype.contain=function(t){var e=this._extent;return t>=e[0]&&t<=e[1]},gl.prototype.normalize=function(t){var e=this._extent;return e[1]===e[0]?.5:(t-e[0])/(e[1]-e[0])},gl.prototype.scale=function(t){var e=this._extent;return t*(e[1]-e[0])+e[0]},gl.prototype.unionExtent=function(t){var e=this._extent;t[0]