-
-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathorb-core.el
609 lines (538 loc) · 23.9 KB
/
orb-core.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
;;; orb-core.el --- Org Roam BibTeX: core library -*- lexical-binding: t -*-
;; Copyright © 2020-2022 Mykhailo Shevchuk
;; Copyright © 2020 Leo Vivier
;; Author: Mykhailo Shevchuk <[email protected]>
;; Leo Vivier <[email protected]>
;; URL: https://github.com/org-roam/org-roam-bibtex
;; This file is NOT part of GNU Emacs.
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License along with
;; this program; see the file LICENSE. If not, visit
;; <https://www.gnu.org/licenses/>.
;;; Commentary:
;;
;; This file provides org-roam-bibtex' dependencies and thus should
;; normally be required by org-roam-bibtex feature libraries. It
;; defines customize groups and provides general utility functions
;; that depend on extra features provided through org-roam,
;; bibtex-completion and their dependencies.
;;; Code:
;; ============================================================================
;;; Dependencies
;; ============================================================================
(require 'orb-utils)
(require 'orb-compat)
(eval-when-compile
(require 'cl-macs)
(require 'subr-x)
(require 'rx))
(declare-function
bibtex-completion-get-entry "bibtex-completion" (entry-key))
(declare-function
bibtex-completion-get-value "bibtex-completion" (field entry &optional default))
(declare-function
bibtex-completion-find-pdf (key-or-entry &optional find-additional))
;; ============================================================================
;;; Customize groups
;; ============================================================================
;;
;; All modules should put their `defgroup' definitions here
;; Defcustom definitions should stay in respective files
(defgroup org-roam-bibtex nil
"Org-roam integration with BibTeX software."
:group 'org-roam
:prefix "orb-")
(defgroup orb-note-actions nil
"Orb Note Actions - run actions in note's context."
:group 'org-roam-bibtex
:prefix "orb-note-actions-")
(defgroup orb-pdf-scrapper nil
"Orb PDF Scrapper - retrieve references from PDF."
:group 'org-roam-bibtex
:prefix "orb-pdf-scrapper-")
(defgroup orb-anystyle nil
"Elisp interface to `anystyle-cli`."
:group 'org-roam-bibtex
:prefix "orb-anystyle-")
(defgroup orb-autokey nil
"Automatic generation of BibTeX citation keys."
:group 'org-roam-bibtex
:prefix "orb-autokey-")
;; ============================================================================
;;; BibTeX fields and their special handling
;; ============================================================================
(defcustom orb-bibtex-field-aliases
'(("=type=" . "entry-type")
("=key=" . "citekey")
("=has-pdf=" . "pdf?")
("=has-note=" . "note?")
("citation-number" . "#"))
"Alist of ORB-specific field aliases of the form (FIELD . ALIAS).
The ALIAS can be used instead of the FIELD anywhere in ORB's
configuration. This variable is useful to replace
`bibtex-completion''s internal '='-embraced virtual fields with
more casual alternatives."
:group 'org-roam-bibtex
:type '(repeat
(cons (string :tag "Field name")
(string :tag "Alias name"))))
(defcustom orb-attached-file-extensions '("pdf")
"When retrieving an attached file, keep files with only these extensions.
This is a list of file extensions without a dot as case-insensitive
strings.
Set it to nil to keep all file names regardless of their extensions.
BibTeX entries are searched for attached files according to
`bibtex-completion-pdf-field' (default `file') and in
BibDesk-specific `Bdsk-File-N' fields."
:group 'org-roam-bibtex
:type '(repeat :tag "List of extensions" (string)))
(defcustom orb-abbreviate-file-name t
"Non-nil to force abbreviation of file names by `orb-get-attached-file'.
When this option is set to a non-nil value, the filename returned
by `orb-get-attached-file' will get the home directory part
abbreviated to `~/'. Symlinked directories will be abbreviated
according to `directory-abbrev-alist', see `abbreviate-file-name'
for details.
An as-is value will be used otherwise."
:group 'org-roam-bibtex
:type '(choice
(const :tag "Yes" t)
(const :tag "No" nil)))
(defcustom orb-open-attached-file-as-truename t
"Non-nil to open attached files with their true names.
When this option is set non-nil, `orb-open-attached-file' will
open files using their true names. You may want to set it to nil
if using file symlinks and experiencing problems such as
discussed here:
https://github.com/org-roam/org-roam-bibtex/issues/259
An as-is value will be used otherwise."
:group 'org-roam-bibtex
:type '(choice
(const :tag "Yes" t)
(const :tag "No" nil)))
(defcustom orb-use-bibdesk-attachments nil
"Whether to look up BibDesk-specific file fields `Bdsk-File'.
If this is non-nil, attachments given in BibDesk-specific file
fields will be considered in addition to those found through the
`bibtex-completion-find-pdf' mechanism when performing a template
expansion, opening an attachment with `orb-note-actions' or
scraping a PDF with `orb-pdf-scrapper'.
Duplicates will be resolved, but since duplicate comparison is
performed using `file-truename', this will lead to expansion of
symlink paths if such are used in the normal BibTeX `file' field,
for example. See also `orb-abbreviate-file-name' on how to
abbreviate the retrieved filenames.
Set this to symbol `only' to look up only BibDesk attachments and
do not use `bibtex-completion-find-pdf'."
:group 'org-roam-bibtex
:type '(choice
(const :tag "Yes" t)
(const :tag "BibDesk only" only)
(const :tag "No" nil)))
(defsubst orb-resolve-field-alias (alias)
"Return ALIAS association from `orb-bibtex-field-aliases'.
Return ALIAS if association was not found."
(or (car (rassoc alias orb-bibtex-field-aliases)) alias))
(defun orb-get-bibdesk-filenames (entry)
"Return filenames stored in BibDesk file fields \"Bdsk-File-N\".
ENTRY is a BibTeX entry as returned by `bibtex-completion-get-entry'.
The variable `orb-attached-file-extensions' is respected."
;; NOTE: Mac-specific, hard-coded
(let* ((bdsk-file-fields
(seq-filter (lambda (cell)
(string-match-p "Bdsk-File" (car cell)))
entry))
(strip-value-rx
(rx (seq (opt (in "\"{"))
(group (* (not (in "\"{}"))))
(opt (in "\"}")))))
(filename-rx
(concat
(rx (seq "Users/" (* anychar)))
(if orb-attached-file-extensions
(regexp-opt orb-attached-file-extensions t)
"pdf")))
(bdsk-files
(mapcar
(lambda (cell)
(let ((val (cdr cell))
file)
(when (string-match strip-value-rx val)
(setq file (base64-decode-string (match-string 1 val)))
(when (string-match filename-rx file)
(concat "/" (match-string 0 file))))))
bdsk-file-fields)))
(seq-filter (lambda (val) val) bdsk-files)))
;;;###autoload
(defun orb-get-attached-file (citekey)
"Look up files associated with a BibTeX entry identified by CITEKEY.
Files are searched for using `bibtex-completion-find-pdf',
meaning that Mendeley, Zotero and plain file paths are all
supported, and variables `bibtex-completion-pdf-field' and
`bibtex-completion-library-path' are respected. Additionally,
the BibTeX entry is searched for BibDesk-specific file fields
`Bdsk-File-N'.
If `orb-attached-file-extensions' is non-nil, return only file paths
matching the respective extensions.
If `orb-abbreviate-file-name' is non-nil, force an abbreviated
file name.
Depending on the value of `orb-use-bibdesk-attachments', the
BibDesk-specific file fields `Bdsk-File-N' may or may not be used
for the lookup.
If multiple files have been found, the user will be prompted to
select one."
(condition-case err
(when-let* ((entry (bibtex-completion-get-entry citekey))
(paths
(--> (pcase orb-use-bibdesk-attachments
(`nil (bibtex-completion-find-pdf
entry bibtex-completion-find-additional-pdfs))
(`only (orb-get-bibdesk-filenames entry))
(_
(-->
(nconc (bibtex-completion-find-pdf entry)
(orb-get-bibdesk-filenames entry))
(-map #'file-truename it)
(-uniq it))))
(if (not orb-attached-file-extensions)
it ; do not filter by extensions
;; filter by extensions
(--filter
(when-let ((ext (file-name-extension it)))
(member-ignore-case ext orb-attached-file-extensions))
it))))
(path (if (cdr paths)
(completing-read "File to use: " paths)
(car paths))))
(if orb-abbreviate-file-name
(abbreviate-file-name path)
path))
;; ignore any errors that may be thrown by `bibtex-completion-find-pdf'
;; don't stop the capture process
(error
(orb-warning
(format "error in `orb-get-attached-file`: %s %s"
(car err) (cdr err))))))
;;;###autoload
(defun orb-open-attached-file (citekey)
"Open a file associated with CITEKEY.
CITEKEY must be a list for compatibility with `bibtex-completion'
functions, which also expect a list.
This is a modified and simplified version of `bibtex-completion-open-pdf',
which uses `orb-get-bibdesk-filenames' under the hood and is therefore
compatible with BibDesk. The file is opened with the function set in
`bibtex-completion-pdf-open-function'.
The intended primary use is with `orb-note-actions'."
(let* ((key (car citekey))
(attachment (orb-get-attached-file key)))
(if attachment
(funcall bibtex-completion-pdf-open-function
(if orb-open-attached-file-as-truename
(file-truename attachment)
attachment))
(message "No PDF(s) found for this entry: %s" key))))
;; ============================================================================
;;;; Orb autokey
;; ============================================================================
(defcustom orb-autokey-format "%a%y%T[4][1]"
"Format string for automatically generated citation keys.
Supported wildcards:
Basic
==========
%a |author| - first author's (or editor's) last name
%t |title | - first word of title
%f{field} |field | - first word of arbitrary field
%y |year | - year YYYY
%p |page | - first page
%e{(expr)} |elisp | - execute elisp expression
Extended
==========
1. Capitalized versions:
%A |author| >
%T |title | > Same as %a,%t,%f{field} but
%F{field} |field | > preserve original capitalization
2. Starred versions
%a*, %A* |author| - include author's (editor's) initials
%t*, %T* |title | - do not ignore words in `orb-autokey-titlewords-ignore'
%y* |year | - year's last two digits __YY
%p* |page | - use \"pagetotal\" field instead of default \"pages\"
3. Optional parameters
%a[N][M][D] |author| >
%t[N][M][D] |title | > include first N words/names
%f{field}[N][M][D] |field | > include at most M first characters of word/name
%p[D] |page | > put delimiter D between words
N and M should be a single digit 1-9. Putting more digits or any
other symbols will lead to ignoring the optional parameter and
those following it altogether. D should be a single alphanumeric
symbol or one of `-_.:|'.
Optional parameters work both with capitalized and starred
versions where applicable.
4. Elisp expression
- can be anything
- should return a string or nil
- will be evaluated before expanding other wildcards and therefore
can insert other wildcards
- will have `entry' variable bound to the value of BibTeX entry the key
is being generated for, as returned by `bibtex-completion-get-entry'.
The variable may be safely manipulated in a destructive manner.
%e{(or (bibtex-completion-get-value \"volume\" entry) \"N/A\")}
%e{(my-function entry)}
Key generation is performed by `orb-autokey-generate-key'."
:risky t
:type 'string
:group 'org-roam-bibtex)
(defcustom orb-autokey-titlewords-ignore
'("A" "An" "On" "The" "Eine?" "Der" "Die" "Das"
"[^[:upper:]].*" ".*[^[:upper:][:lower:]0-9].*")
"Patterns from title that will be ignored during key generation.
Every element is a regular expression to match parts of the title
that should be ignored during automatic key generation. Case
sensitive."
;; Default value was take from `bibtex-autokey-titleword-ignore'.
:type '(repeat :tag "Regular expression" regexp)
:group 'orb-autokey)
(defcustom orb-autokey-empty-field-token "N/A"
"String to use when BibTeX field is nil or empty."
:type 'string
:group 'orb-autokey)
(defcustom orb-autokey-invalid-symbols
" \"'()={},~#%\\"
"Characters not allowed in a BibTeX key.
The key will be stripped of these characters."
:type 'string
:group 'orb-autokey)
(defun orb--autokey-format-field (field &rest specs)
"Return BibTeX FIELD formatted according to plist SPECS.
Recognized keys:
==========
:entry - BibTeX entry to use
:value - Value of BibTeX field to use
instead retrieving it from :entry
:capital - capitalized version
:starred - starred version
:words - first optional parameter (number of words)
:characters - second optional parameter (number of characters)
:delimiter - third optional parameter (delimiter)
All values should be strings, including those representing numbers.
This function is used internally by `orb-autokey-generate-key'."
(declare (indent 1))
(-let* (((&plist :entry entry
:value value
:capital capital
:starred starred
:words words
:characters chars
:delimiter delim) specs)
;; field values will be split into a list of words. `separator' is a
;; regexp for word separators: either a whitespace, one or more
;; dashes, or en dash, or em dash
(separator "\\([ \n\t]\\|[-]+\\|[—–]\\)")
(invalid-chars-rx
(rx-to-string `(any ,orb-autokey-invalid-symbols) t))
(delim (or delim ""))
result)
;; 0. virtual field "=name=" is used internally here and in
;; `orb-autokey-generate-key'; it stands for author or editor
(if (string= field "=name=")
;; in name fields, logical words are full names consisting of several
;; words and containing spaces and punctuation, separated by a logical
;; separator, the word "and"
(setq separator " and "
value (or value
(bibtex-completion-get-value "author" entry)
(bibtex-completion-get-value "editor" entry)))
;; otherwise proceed with value or get it from entry
(setq value (or value
(bibtex-completion-get-value field entry))))
(if (or (not value)
(string-empty-p value))
(setq result orb-autokey-empty-field-token)
(when (> (length value) 0)
(save-match-data
;; 1. split field into words
(setq result (split-string value separator t "[ ,.;:-]+"))
;; 1a) only for title;
;; STARRED = include words from `orb-autokey-titlewords-ignore
;; unstarred version filters the keywords, starred ignores this block
(when (and (string= field "title")
(not starred))
(let ((ignore-rx (concat "\\`\\(:?"
(mapconcat #'identity
orb-autokey-titlewords-ignore
"\\|") "\\)\\'"))
(words ()))
(setq result (dolist (word result (nreverse words))
(unless (string-match-p ignore-rx word)
(push word words))))))
;; 2. take number of words equal to WORDS if that is set
;; or just the first word; also 0 = 1.
(if words
(setq words (string-to-number words)
result (-take (if (> words (length result))
(length result)
words)
result))
(setq result (list (car result))))
;; 2a) only for "=name=" field, i.e. author or editor
;; STARRED = include initials
(when (string= field "=name=")
;; NOTE: here we expect name field 'Doe, J. B.'
;; should ideally be able to handle 'Doe, John M. Longname, Jr'
(let ((r-x (if starred
"[ ,.\t\n]"
"\\`\\(.*?\\),.*\\'"))
(rep (if starred "" "\\1"))
(words ()))
(setq result
(dolist (name result (nreverse words))
(push (s-replace-regexp r-x rep name) words)))))
;; 3. take at most CHARS number of characters from every word
(when chars
(let ((words ()))
(setq chars (string-to-number chars)
result (dolist (word result (nreverse words))
(push
(substring word 0
(if (< chars (length word))
chars
(length word)))
words)))))
;; 4. almost there: concatenate words, include DELIMiter
(setq result (mapconcat #'identity result delim))
;; 5. CAPITAL = preserve case
(unless capital
(setq result (downcase result))))))
;; return result stripped of the invalid characters
(s-replace-regexp invalid-chars-rx "" result t)))
(defun orb--autokey-evaluate-expression (expr &optional entry)
"Evaluate arbitrary elisp EXPR passed as readable string.
The expression will have value of ENTRY bound to `entry' variable
at its disposal. ENTRY should be a BibTeX entry as returned by
`bibtex-completion-get-entry'. The result returned should be a
string or nil."
(let ((result (eval `(let ((entry (quote ,(copy-tree entry))))
,(read expr)))))
(unless (or (stringp result)
(not result))
(user-error "Result: %s, invalid type. \
Expression must be string or nil" result))
(or result "")))
;;;###autoload
(defun orb-autokey-generate-key (entry &optional control-string)
"Generate citation key from ENTRY according to `orb-autokey-format'.
Return a string. If optional CONTROL-STRING is non-nil, use it
instead of `orb-autokey-format'."
(let* ((case-fold-search nil)
(str (or control-string orb-autokey-format))
;; star regexp: group 3!
(star '(opt (group-n 3 "*")))
;; optional parameters: regexp groups 4-6!
(opt1 '(opt (and "[" (opt (group-n 4 digit)) "]")))
(opt2 '(opt (and "[" (opt (group-n 5 digit)) "]")))
(opt3 '(opt (and "[" (opt (group-n 6 (any alnum "_.:|-"))) "]")))
;; capital letters: regexp group 2!
;; author wildcard regexp
(a-rx (macroexpand
`(rx (group-n 1 (or "%a" (group-n 2 "%A"))
,star ,opt1 ,opt2 ,opt3))))
;; title wildcard regexp
(t-rx (macroexpand
`(rx (group-n 1 (or "%t" (group-n 2 "%T"))
,star ,opt1 ,opt2 ,opt3))))
;; any field wildcard regexp
;; required parameter: group 7!
(f-rx (macroexpand
`(rx (group-n 1 (or "%f" (group-n 2 "%F"))
(and "{" (group-n 7 (1+ letter)) "}")
,opt1 ,opt2 ,opt3))))
;; year wildcard regexp
(y-rx (rx (group-n 1 "%y" (opt (group-n 3 "*")))))
;; page wildcard regexp
(p-rx (macroexpand `(rx (group-n 1 "%p" ,star ,opt3))))
;; elisp expression wildcard regexp
;; elisp sexp: group 8!
(e-rx (rx (group-n 1 "%e"
"{" (group-n 8 "(" (1+ ascii) ")") "}"))))
;; Evaluating elisp expression should go the first because it can produce
;; additional wildcards
(while (string-match e-rx str)
(setq str (replace-match
(save-match-data
(orb--autokey-evaluate-expression
(match-string 8 str) entry)) t nil str 1)))
;; Expanding all other wildcards are actually
;; variations of calls to `orb--autokey-format-field' with many
;; commonalities, so we wrap it into a macro
(cl-macrolet
((expand
(wildcard &key field value entry capital
starred words characters delimiter)
(let ((cap (or capital '(match-string 2 str)))
(star (or starred '(match-string 3 str)))
(opt1 (or words '(match-string 4 str)))
(opt2 (or characters '(match-string 5 str)))
(opt3 (or delimiter '(match-string 6 str))))
`(while (string-match ,wildcard str)
(setq str (replace-match
;; we can safely pass nil values
;; `orb--autokey-format-field' should
;; handle them correctly
(orb--autokey-format-field ,field
:entry ,entry :value ,value
:capital ,cap :starred ,star
:words ,opt1 :characters ,opt2 :delimiter ,opt3)
t nil str 1))))))
;; Handle author wildcards
(expand a-rx
:field "=name="
:value (or (bibtex-completion-get-value "author" entry)
(bibtex-completion-get-value "editor" entry)))
;; Handle title wildcards
(expand t-rx
:field "title"
:value (or (bibtex-completion-get-value "title" entry) ""))
;; Handle custom field wildcards
(expand f-rx
:field (match-string 7 str)
:entry entry)
;; Handle pages wildcards %p*[-]
(expand p-rx
:field (if (match-string 3 str)
"pagetotal" "pages")
:entry entry
:words "1"))
;; Handle year wildcards
;; it's simple, so we do not use `orb--autokey-format-field' here
;; year should be well-formed: YYYY
;; TODO: put year into cl-macrolet
(let ((year (or (bibtex-completion-get-value "year" entry)
(bibtex-completion-get-value "date" entry))))
(if (or (not year)
(string-empty-p year)
(string= year orb-autokey-empty-field-token))
(while (string-match y-rx str)
(setq str (replace-match orb-autokey-empty-field-token
t nil str 1)))
(while (string-match y-rx str)
(setq year (format "%04d" (string-to-number year))
str (replace-match
(format "%s" (if (match-string 3 str)
(substring year 2 4)
(substring year 0 4)))
t nil str 1)))))
str))
(provide 'orb-core)
;;; orb-core.el ends here
;; Local Variables:
;; coding: utf-8
;; fill-column: 79
;; End: