Skip to content

Commit

Permalink
port sqlite extension to libpcre2
Browse files Browse the repository at this point in the history
  • Loading branch information
obfusk committed Aug 31, 2024
1 parent 02e6255 commit fd2df02
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 137 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ __pycache__
/htmlcov

/jiten/.version
/jiten/_sqlite3_pcre.*.so
/jiten/_sqlite3_pcre*.so
/tmp
/tmp-html
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ coverage: tmp-html

clean: cleanup
rm -f jiten/res/*.sqlite3
rm -f jiten/_sqlite3_pcre.*.so
rm -f jiten/_sqlite3_pcre*.so
rm -fr jiten.egg-info/
$(MAKE) -C jiten/res/jmdict clean
$(MAKE) -C jiten/res/sentences clean
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,15 +233,15 @@ $ make
* Python >= 3.8 + Flask + click +
[kanjidraw](https://github.com/obfusk/kanjidraw).
* To build the SQLite PCRE C extension: a C compiler (e.g.
`gcc`/`clang`) and the `sqlite3` & `pcre` libraries & headers.
`gcc`/`clang`) and the `sqlite3` & `pcre2` libraries & headers.
* Basic build tools like `make` and `patch`.
* To run the WebView GUI:
[pywebview](https://pywebview.flowrl.com) >= 3.3.5.

### Debian/Ubuntu

```bash
$ apt install python3-dev build-essential libsqlite3-dev libpcre3-dev
$ apt install python3-dev build-essential libsqlite3-dev libpcre2-dev
$ apt install python3-flask # optional: Flask & click w/o pip
```

Expand Down
6 changes: 3 additions & 3 deletions jiten/jmdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@
from . import misc as M
from . import pitch as P
from .kana import katakana2hiragana
from .sql import sqlite_do, load_pcre_extension
from .sql import sqlite_do, load_pcre2_extension

DBVERSION = 14 # NB: update this when data/schema changes
SQLITE_FILE = M.resource_path("res/jmdict.sqlite3")
Expand Down Expand Up @@ -678,7 +678,7 @@ def search(q, langs = [LANGS[0]], max_results = None, # {{{1
""".format(fltr, ordr, limit),
dict(q = "%"+q+"%", si = sinfo)) # safe!
elif M.q2like(q):
load_pcre_extension(c.connection)
load_pcre2_extension(c.connection)
prms = dict(q = M.q2like(q), re = M.q2rx(q), si = sinfo)
query = ("""
SELECT rank, seq, jlpt FROM (
Expand All @@ -697,7 +697,7 @@ def search(q, langs = [LANGS[0]], max_results = None, # {{{1
{} {} {}
""".format(fltr, ordr, limit), prms) # safe!
else:
load_pcre_extension(c.connection)
load_pcre2_extension(c.connection)
query = ("""
SELECT rank, seq, jlpt FROM (
SELECT entry FROM kanji WHERE elem REGEXP :re
Expand Down
4 changes: 2 additions & 2 deletions jiten/kanji.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@

from . import jmdict as J
from . import misc as M
from .sql import sqlite_do, load_pcre_extension
from .sql import sqlite_do, load_pcre2_extension

SQLITE_FILE = M.resource_path("res/kanji.sqlite3")
KANJIDIC_FILE = M.resource_path("res/jmdict/kanjidic2.xml.gz")
Expand Down Expand Up @@ -407,7 +407,7 @@ def search(q, max_results = None, level = None, jlpt = None,
for r in c.execute("SELECT * FROM entry WHERE code = ?", (ord(char),)):
yield row2entry(r) # #=1
else:
load_pcre_extension(c.connection)
load_pcre2_extension(c.connection)
for r in c.execute(f"""
SELECT * FROM entry WHERE (
on_ REGEXP :re OR
Expand Down
2 changes: 1 addition & 1 deletion jiten/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def download_file(url, file, sha512 = None, tmp = ".tmp"): # {{{1
),

libffi = dict(url = "https://github.com/libffi/libffi"),
libpcre = dict(url = "https://www.pcre.org"),
libpcre2 = dict(url = "https://www.pcre.org"),
openssl = dict(url = "https://www.openssl.org"),
pyjnius = dict(url = "https://github.com/kivy/pyjnius"),
python3 = dict(url = "https://www.python.org"),
Expand Down
6 changes: 3 additions & 3 deletions jiten/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def sqlite_do(file, write = False):
finally:
conn.close()

def load_pcre_extension(conn):
spec = importlib.util.find_spec("jiten._sqlite3_pcre")
if spec is None: raise RuntimeError("jiten._sqlite3_pcre not found")
def load_pcre2_extension(conn):
spec = importlib.util.find_spec("jiten._sqlite3_pcre2")
if spec is None: raise RuntimeError("jiten._sqlite3_pcre2 not found")
conn.enable_load_extension(True)
if hasattr(conn, "load_extension"):
conn.load_extension(spec.origin)
Expand Down
2 changes: 1 addition & 1 deletion jiten/templates/_help.html
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ <h5 class="mb-0">
target="_blank" rel="noopener">Python Regex Syntax</a>.
<br/>
For when the details matter:
<a href="https://www.pcre.org/original/doc/html/pcresyntax.html"
<a href="https://www.pcre.org/current/doc/html/pcre2syntax.html"
target="_blank" rel="noopener">PCRE Syntax</a>
(what jiten actually uses).
<h6 class="mt-4">Prefix "Commands"</h6>
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
clean = "clean" in sys.argv[1:]

info = Path(__file__).with_name("README.md").read_text(encoding = "utf8")
pcre = setuptools.Extension("jiten._sqlite3_pcre", ["sqlite3-pcre.c"],
libraries = "pcre sqlite3".split())
pcre2 = setuptools.Extension("jiten._sqlite3_pcre2", ["sqlite3-pcre2.c"],
libraries = "pcre2-8 sqlite3".split())

data = [ "res/jlpt/N" + l + "-" + x for l in "12345"
for x in "kanji vocab-eng vocab-hiragana".split() ] \
Expand Down Expand Up @@ -98,5 +98,5 @@
python_requires = ">=3.8",
install_requires = ["Flask", "click>=6.0", "kanjidraw>=0.2.1"],
extras_require = dict(gui = ["pywebview>=3.3.5"]),
ext_modules = [pcre],
ext_modules = [pcre2],
)
120 changes: 0 additions & 120 deletions sqlite3-pcre.c

This file was deleted.

129 changes: 129 additions & 0 deletions sqlite3-pcre2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Written by Alexey Tourbin <[email protected]>.
* Modified by FC (Fay) Stegerman <[email protected]>.
*
* The author has dedicated the code to the public domain. Anyone is free
* to copy, modify, publish, use, compile, sell, or distribute the original
* code, either in source code form or as a compiled binary, for any purpose,
* commercial or non-commercial, and by any means.
*/
#define PCRE2_CODE_UNIT_WIDTH 8
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <pcre2.h>
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1

typedef struct {
char *str;
pcre2_code *code;
pcre2_match_data *match_data;
} cache_entry;

#ifndef CACHE_SIZE
#define CACHE_SIZE 16
#endif

static void regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv)
{
const char *re, *str;
pcre2_code *code;
pcre2_match_data *match_data;

assert(argc == 2);

re = (const char *) sqlite3_value_text(argv[0]);
if (re == NULL) {
sqlite3_result_error(ctx, "[REGEXP] no regexp", -1);
return;
}

str = (const char *) sqlite3_value_text(argv[1]);
if (str == NULL) {
sqlite3_result_error(ctx, "[REGEXP] no string", -1);
return;
}

/* simple LRU cache */
{
int i;
int found = 0;
cache_entry *cache = sqlite3_user_data(ctx);

assert(cache != NULL);

for (i = 0; i < CACHE_SIZE && cache[i].str; i++) {
if (strcmp(re, cache[i].str) == 0) {
found = 1;
break;
}
}
if (found) {
if (i > 0) {
cache_entry c = cache[i];
memmove(cache + 1, cache, i * sizeof(cache_entry));
cache[0] = c;
}
}
else {
cache_entry c;
int err_no;
PCRE2_SIZE err_off;
c.code = pcre2_compile((PCRE2_SPTR)re, PCRE2_ZERO_TERMINATED,
PCRE2_UTF | PCRE2_UCP, &err_no, &err_off, NULL);
if (c.code == NULL) {
PCRE2_UCHAR err[256];
pcre2_get_error_message(err_no, err, sizeof(err));
char *e = sqlite3_mprintf("[REGEXP] %s: %s (offset %d)", re, err, err_off);
sqlite3_result_error(ctx, e, -1);
sqlite3_free(e);
return;
}
c.match_data = pcre2_match_data_create_from_pattern(c.code, NULL);
if (c.match_data == NULL) {
sqlite3_result_error(ctx, "[REGEXP] pcre2_match_data_create_from_pattern: ENOMEM", -1);
pcre2_code_free(c.code);
return;
}
c.str = strdup(re);
if (c.str == NULL) {
sqlite3_result_error(ctx, "[REGEXP] strdup: ENOMEM", -1);
pcre2_match_data_free(c.match_data);
pcre2_code_free(c.code);
return;
}
i = CACHE_SIZE - 1;
if (cache[i].str) {
free(cache[i].str);
assert(cache[i].code != NULL);
pcre2_match_data_free(cache[i].match_data);
pcre2_code_free(cache[i].code);
}
memmove(cache + 1, cache, i * sizeof(cache_entry));
cache[0] = c;
}
code = cache[0].code;
match_data = cache[0].match_data;
}

{
int rc;
assert(code != NULL);
rc = pcre2_match(code, (PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, 0, 0, match_data, NULL);
sqlite3_result_int(ctx, rc >= 0);
return;
}
}

int sqlite3_extension_init(sqlite3 *db, char **err, const sqlite3_api_routines *api)
{
SQLITE_EXTENSION_INIT2(api)
cache_entry *cache = calloc(CACHE_SIZE, sizeof(cache_entry));
if (cache == NULL) {
*err = "[REGEXP] calloc: ENOMEM";
return 1;
}
sqlite3_create_function(db, "REGEXP", 2, SQLITE_UTF8, cache, regexp, NULL, NULL);
return 0;
}

0 comments on commit fd2df02

Please sign in to comment.