From 760e6e7312ff9f6f6d2c0e02447186092b429feb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacek=20Kr=C3=B3likowski?= Date: Wed, 15 Mar 2017 14:13:01 +0000 Subject: [PATCH] Add Elixir language support (#478) * First, crude version of the elixir language handler. * multiple elixir plugin improvements - attributes, atoms, strings, ... * elixir-lang - added support for number literals with underscores * elixir-lang: added support for atoms as keys in keyword lists * Elixir: added support for sigils, simplified the keywords regex * Elixir: added more keywords from Kernel.SpecialForms * Elixir: adding missing keywords and more permissive atom/variable names * Elixir: better support for binaries/bitstrings * Elixir: added support for the `iex` prompt for interactive examples * added a failing test for elixir * Elixir: fixed the failing test for elixir syntax highlighting * Elixir: Highlighting constructs like `%{"THIS": :foo}` as atoms * Elixir: fixed false-positive highlighting for 0XFF and 0O77 * Elixir: fixing the license text. * Elixir: fixing multiline charlists * Elixir: more robust multiline strings and charlists * Elixir: making string recognition simpler and more correct --- src/lang-ex.js | 82 ++++++++++++++++++++++++++++++++++++++ tests/prettify_test_2.html | 57 ++++++++++++++++++++++++++ tests/prettify_test_2.js | 54 ++++++++++++++++++++++++- 3 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 src/lang-ex.js diff --git a/src/lang-ex.js b/src/lang-ex.js new file mode 100644 index 0000000..9d0aa64 --- /dev/null +++ b/src/lang-ex.js @@ -0,0 +1,82 @@ +/** + * @license + * Copyright (C) 2017 Jacek Królikowski + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @fileoverview + * Registers a language handler for Elixir. + * + * @author nietaki@gmail.com + */ + +PR['registerLangHandler']( + PR['createSimpleLexer']( + [ + [PR['PR_PLAIN'], /^[\t\n\r \xA0]+/, null, '\t\n\r \xA0'], + // # comments + [PR['PR_COMMENT'], /^#.*/, null, '#'], + // a (possibly multiline) charlist + [PR['PR_LITERAL'], /^'(?:[^'\\]|\\(?:.|\n|\r))*'?/, null, '\''], + // @attributes + [PR['PR_ATTRIB_NAME'], /^@\w+/, null, '@'], + [PR['PR_PUNCTUATION'], /^[!%&()*+,\-;<=>?\[\\\]^{|}]+/, null, + '!%&()*+,-;<=>?[\\]^{|}'], + // Borrowed from lang-erlang.js: + [PR['PR_LITERAL'], + /^(?:0o[0-7](?:[0-7]|_[0-7])*|0x[\da-fA-F](?:[\da-fA-F]|_[\da-fA-F])*|\d(?:\d|_\d)*(?:\.\d(?:\d|_\d)*)?(?:[eE][+\-]?\d(?:\d|_\d)*)?)/, + null, '0123456789'] + ], + [ + // the iex> prompt for interactive examples + [PR['PR_ATTRIB_NAME'], /^iex(?:\(\d+\))?> /], + // special case for binaries, so that they don't get presented like atoms + [PR['PR_PUNCTUATION'], /^::/], + // atoms - :__a_word or :"colon followed by a string" + [PR['PR_LITERAL'], /^:(?:\w+[\!\?\@]?|"(?:[^"\\]|\\.)*"?)/], + // compile-time information + [PR['PR_ATTRIB_NAME'], /^(?:__(?:CALLER|ENV|MODULE|DIR)__)/], + // keywords + [PR['PR_KEYWORD'], + /^(?:alias|case|catch|def(?:delegate|exception|impl|macrop?|module|overridable|p?|protocol|struct)|do|else|end|fn|for|if|in|import|quote|raise|require|rescue|super|throw|try|unless|unquote(?:_splicing)?|use|when|with|yield)\b/], + [PR['PR_LITERAL'], /^(?:true|false|nil)\b/], + // atoms as keyword list keys + // NOTE: this does also handle the %{"I'm an atom": :foo} case + // + // Contains negative lookahead to handle <> + [PR['PR_LITERAL'], /^(?:\w+[\!\?\@]?|"(?:[^"\\]|\\.)*"):(?!:)/], + // heredoc: triple double-quoted multi-line string. + // + // NOTE: the opening """ needs to be followed by a newline + [PR['PR_STRING'], + /^"""\s*(\r|\n)+(?:""?(?!")|[^\\"]|\\(?:.|\n|\r))*"{0,3}/], + // A double-quoted multi-line string + [PR['PR_STRING'], + /^"(?:[^"\\]|\\(?:.|\n|\r))*"?(?!")/], + // types + [PR['PR_TYPE'], /^[A-Z]\w*/], + // variables not meant to be used or private functions + [PR['PR_COMMENT'], /^_\w*/], + // plain: variables, functions, ... + [PR['PR_PLAIN'], /^[$a-z]\w*[\!\?]?/], + // sigils with the same starting and ending character. + // Key part: X(?:[^X\r\n\\]|\\.)+X where X is the sigil character + [PR['PR_ATTRIB_VALUE'], /^~[A-Z](?:\/(?:[^\/\r\n\\]|\\.)+\/|\|(?:[^\|\r\n\\]|\\.)+\||"(?:[^"\r\n\\]|\\.)+"|'(?:[^'\r\n\\]|\\.)+')[A-Z]*/i], + // sigils with a different starting and ending character. + // Key part: X(?:[^Y\r\n\\]|\\.)+Y where X and Y are the starting and ending characters + [PR['PR_ATTRIB_VALUE'], /^~[A-Z](?:\((?:[^\)\r\n\\]|\\.)+\)|\[(?:[^\]\r\n\\]|\\.)+\]|\{(?:[^\}\r\n\\]|\\.)+\}|\<(?:[^\>\r\n\\]|\\.)+\>)[A-Z]*/i], + [PR['PR_PUNCTUATION'], /^(?:\.+|\/|[:~])/] + ]), + ['ex','exs']); diff --git a/tests/prettify_test_2.html b/tests/prettify_test_2.html index ca40403..3ad9d72 100644 --- a/tests/prettify_test_2.html +++ b/tests/prettify_test_2.html @@ -25,6 +25,7 @@ 'lang-clj.js', 'lang-css.js', 'lang-dart.js', + 'lang-ex.js', 'lang-lisp.js', 'lang-llvm.js', 'lang-matlab.js', @@ -1083,5 +1084,61 @@

MATLAB

%} +

Elixir

+
+defmodule Foo.Bar do
+  @moduledoc """
+  Tests syntax highlighting for Elixir
+  """
+
+  use Bitwise
+  require Logger
+  alias __MODULE__, as: This
+
+  @default_token_length 10_000
+
+
+  @spec token(length :: integer) :: String.t
+
+  def token(length \\ @default_token_length), do: String.duplicate("x", length)
+
+
+  defp _not_exported(), do: 0xFF + 0xF_F - 0xff
+
+
+  def other(foo, bar) do
+    fun = fn{_a, b} -> b + 1_3.1_4 end
+    fun.(1.0e+20)
+    _str = "string without #{inspect(42)} interpolation" <> " some more
+    with newlines \
+    and newlines"
+    charlist = 'some\'chars
+    with newlines \
+    and newlines'
+    <<x::utf8, _y::size(8), data::binary>> = "fooo"
+    ls = [1 | [2, 3]]
+    map = %{"baz" => "ban"}
+    map = %{foo: :bar, "yes, this compiles": :"also an atom"}
+    :erlang.time()
+    case {foo, bar} do
+      {1, 2} -> 3
+      _something_else -> :error
+      _ -> :"this won't match"
+    end
+    r = 2
+    _bitwise_not = ~~~r
+
+    ~r/foo/iu # regex sigils are treated as normal ones
+    ~S|we have "quotes" and 'quotes' and| <> " more string"
+    ~c"custom sigil char \"is\" fine too"
+    ~r'hello'
+    ~w[hell\] #o] #sigil does not expand to the comment
+    ~w{hello}
+    ~C<hello>
+  end
+
+end
+
+ diff --git a/tests/prettify_test_2.js b/tests/prettify_test_2.js index 3ad054f..239ca88 100644 --- a/tests/prettify_test_2.js +++ b/tests/prettify_test_2.js @@ -956,5 +956,57 @@ var goldens = { '%%}`END`PLN\n' + '`ENDy`END`PUN=`END`LIT20`END`PUN;`END`PLN\n' + '`END`COM%}`END' - ) + ), + elixir: ('`KWDdefmodule`END`PLN `END`TYPFoo`END`PUN.`END`TYPBar`END`PLN `END`KWDdo`END`PLN\n' + +' `END`ATN@moduledoc`END`PLN `END`STR"""\n' + +' Tests syntax highlighting for Elixir\n' + +' """`END`PLN\n' + +'\n' + +' `END`KWDuse`END`PLN `END`TYPBitwise`END`PLN\n' + +' `END`KWDrequire`END`PLN `END`TYPLogger`END`PLN\n' + +' `END`KWDalias`END`PLN `END`ATN__MODULE__`END`PUN,`END`PLN `END`LITas:`END`PLN `END`TYPThis`END`PLN\n' + +'\n' + +' `END`ATN@default_token_length`END`PLN `END`LIT10_000`END`PLN\n' + +'\n' + +'\n' + +' `END`ATN@spec`END`PLN token`END`PUN(`END`PLNlength `END`PUN::`END`PLN integer`END`PUN)`END`PLN `END`PUN::`END`PLN `END`TYPString`END`PUN.`END`PLNt\n' + +'\n' + +' `END`KWDdef`END`PLN token`END`PUN(`END`PLNlength `END`PUN\\\\`END`PLN `END`ATN@default_token_length`END`PUN),`END`PLN `END`KWDdo`END`PUN:`END`PLN `END`TYPString`END`PUN.`END`PLNduplicate`END`PUN(`END`STR"x"`END`PUN,`END`PLN length`END`PUN)`END`PLN\n' + +'\n' + +'\n' + +' `END`KWDdefp`END`PLN `END`COM_not_exported`END`PUN(),`END`PLN `END`KWDdo`END`PUN:`END`PLN `END`LIT0xFF`END`PLN `END`PUN+`END`PLN `END`LIT0xF_F`END`PLN `END`PUN-`END`PLN `END`LIT0xff`END`PLN\n' + +'\n' + +'\n' + +' `END`KWDdef`END`PLN other`END`PUN(`END`PLNfoo`END`PUN,`END`PLN bar`END`PUN)`END`PLN `END`KWDdo`END`PLN\n' + +' fun `END`PUN=`END`PLN `END`KWDfn`END`PUN{`END`COM_a`END`PUN,`END`PLN b`END`PUN}`END`PLN `END`PUN->`END`PLN b `END`PUN+`END`PLN `END`LIT1_3.1_4`END`PLN `END`KWDend`END`PLN\n' + +' fun`END`PUN.(`END`LIT1.0e+20`END`PUN)`END`PLN\n' + +' `END`COM_str`END`PLN `END`PUN=`END`PLN `END`STR"string without #{inspect(42)} interpolation"`END`PLN `END`PUN<>`END`PLN `END`STR" some more\n' + +' with newlines \\\n' + +' and newlines"`END`PLN\n' + +' charlist `END`PUN=`END`PLN `END`LIT\'some\\\'chars\n' + +' with newlines \\\n' + +' and newlines\'`END`PLN\n' + +' `END`PUN<<`END`PLNx`END`PUN::`END`PLNutf8`END`PUN,`END`PLN `END`COM_y`END`PUN::`END`PLNsize`END`PUN(`END`LIT8`END`PUN),`END`PLN data`END`PUN::`END`PLNbinary`END`PUN>>`END`PLN `END`PUN=`END`PLN `END`STR"fooo"`END`PLN\n' + +' ls `END`PUN=`END`PLN `END`PUN[`END`LIT1`END`PLN `END`PUN|`END`PLN `END`PUN[`END`LIT2`END`PUN,`END`PLN `END`LIT3`END`PUN]]`END`PLN\n' + +' map `END`PUN=`END`PLN `END`PUN%{`END`STR"baz"`END`PLN `END`PUN=>`END`PLN `END`STR"ban"`END`PUN}`END`PLN\n' + +' map `END`PUN=`END`PLN `END`PUN%{`END`LITfoo:`END`PLN `END`LIT:bar`END`PUN,`END`PLN `END`LIT"yes, this compiles":`END`PLN `END`LIT:"also an atom"`END`PUN}`END`PLN\n' + +' `END`LIT:erlang`END`PUN.`END`PLNtime`END`PUN()`END`PLN\n' + +' `END`KWDcase`END`PLN `END`PUN{`END`PLNfoo`END`PUN,`END`PLN bar`END`PUN}`END`PLN `END`KWDdo`END`PLN\n' + +' `END`PUN{`END`LIT1`END`PUN,`END`PLN `END`LIT2`END`PUN}`END`PLN `END`PUN->`END`PLN `END`LIT3`END`PLN\n' + +' `END`COM_something_else`END`PLN `END`PUN->`END`PLN `END`LIT:error`END`PLN\n' + +' `END`COM_`END`PLN `END`PUN->`END`PLN `END`LIT:"this won\'t match"`END`PLN\n' + +' `END`KWDend`END`PLN\n' + +' r `END`PUN=`END`PLN `END`LIT2`END`PLN\n' + +' `END`COM_bitwise_not`END`PLN `END`PUN=`END`PLN `END`PUN~~~`END`PLNr\n' + +'\n' + +' `END`ATV~r/foo/iu`END`PLN `END`COM# regex sigils are treated as normal ones`END`PLN\n' + +' `END`ATV~S|we have "quotes" and \'quotes\' and|`END`PLN `END`PUN<>`END`PLN `END`STR" more string"`END`PLN\n' + +' `END`ATV~c"custom sigil char \\"is\\" fine too"`END`PLN\n' + +' `END`ATV~r\'hello\'`END`PLN\n' + +' `END`ATV~w[hell\\] #o]`END`PLN `END`COM#sigil does not expand to the comment`END`PLN\n' + +' `END`ATV~w{hello}`END`PLN\n' + +' `END`ATV~C<hello>`END`PLN\n' + +' `END`KWDend`END`PLN\n' + +'\n' + +'`END`KWDend`END') };