-
Notifications
You must be signed in to change notification settings - Fork 0
/
wmk_utils.py
1269 lines (1155 loc) · 48.8 KB
/
wmk_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import re
import datetime
import unicodedata
import sqlite3
import hashlib
import json
import locale
from mako.exceptions import TemplateLookupException
def slugify(s):
"""
Make a 'slug' from the given string. If it seems to end with a file
extension, remove that first and re-append a lower case version of it before
returning the result. Probably only works for Latin text.
"""
if not isinstance(s, str):
# print("WARNING: NOT A STRING: ", s)
s = str(s)
ext = ''
ext_re = r'(\.[a-zA-Z0-9]{1,8})$'
found = re.search(ext_re, s)
if found:
ext = found.group(1).lower()
s = re.sub(ext_re, '', s)
# Get rid of single quotes
s = re.sub(r"[']+", '-', s)
# Remove accents
s = ''.join(c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn')
s = s.lower()
# Normalization may leave extra quotes (?)
s = re.sub(r"[']+", '-', s)
# Some special chars:
for c, r in (('þ', 'th'), ('æ', 'ae'), ('ð', 'd')):
s = s.replace(unicodedata.normalize('NFKD', c), r)
ret = ''
for _ in s:
if re.match(r'^[-a-z0-9]$', _):
ret += _
elif not re.match(r'^[´¨`]$', _):
ret += '-'
# Prevent double dashes, remove leading and trailing ones
ret = re.sub(r'--+', '-', ret)
ret = ret.strip('-')
return ret + ext
class attrdict(dict):
"""
Dict with the keys as attributes (or member variables), for nicer-looking
and more convenient lookups.
If the encapsulated dict has keys corresponding to the built-in attributes
of dict, i.e. one of 'clear', 'copy', 'fromkeys', 'get', 'items', 'keys',
'pop', 'popitem', 'setdefault', 'update', or 'values', these will be renamed
so as to have a leading underscore.
An attempt to access a non-existing key as an attribute results in an empty
attrdict. Chained attrdict access is provided to dict values of keys in the
original dict (so, e.g., `page.more.nested.val` works does not raise an error
even if the `more` key is not present).
"""
__reserved = dir(dict())
__reserved.append('__reserved')
__reserved = set(__reserved)
def __init__(self, *args, **kwargs):
if len(args) == 1 and isinstance(args[0], dict) and not kwargs:
kwargs = args[0]
for k in attrdict.__reserved:
if k in kwargs:
kwargs['_'+k] = kwargs.pop(k)
dict.__init__(self, *args, **kwargs)
for k in self:
if isinstance(self[k], dict) and not isinstance(self[k], attrdict):
self[k] = attrdict(self[k])
elif isinstance(self[k], list):
for i, it in enumerate(self[k]):
if isinstance(it, dict) and not isinstance(it, attrdict):
self[k][i] = attrdict(it)
self.__dict__ = self
def __setitem__(self, k, v):
if k in attrdict.__reserved:
super().__setitem__('_'+k, v)
else:
super().__setitem__(k, v)
def __setattr__(self, k, v):
if k in attrdict.__reserved:
super().__setattr__('_'+k, v)
else:
super().__setattr__(k, v)
def __getattr__(self, k):
if k in attrdict.__reserved:
return super().__getattr(k)
try:
return self.__dict__[k]
except KeyError:
#return Undefined()
return attrdict({})
def __call__(self):
# For Jinja2
return attrdict({})
class MDContentList(list):
"""
Filterable MDCONTENT, for ease of list components.
"""
def match_entry(self, pred):
"""
Filter by all available info: source_file, source_file_short, target,
template, data (i.e. template_context), doc (markdown source),
url, rendered (html fragment, i.e. CONTENT).
"""
return MDContentList([_ for _ in self if pred(_)])
def match_ctx(self, pred):
"Filter by template context (page, site, MTIME, SELF_URL, etc.)"
return MDContentList([_ for _ in self if pred(_['data'])])
def match_page(self, pred):
"Filter by page variables"
return MDContentList([_ for _ in self if pred(_['data']['page'])])
def match_doc(self, pred):
"Filter by Markdown body"
return MDContentList([_ for _ in self if pred(_['doc'])])
def group_by(self, pred, normalize=None, keep_empty=False):
"""
Group items in an MDContentList using a given criterion.
- `pred`: A callable receiving a content item and returning a string or
a list of strings. For convenience, `pred` may also be specified as
a string and is then interpreted as the value of the named `page`
variable, e.g. `category`.
- `normalize`: a callable that transforms the grouping values, e.g.
to lowercase.
- `keep_empty`: Normally items are omitted if their predicate evaluates
to the empty string. This can be overridden by setting this to True.
Returns a dict whose keys are strings and whose values are MDContentList
instances.
"""
if isinstance(pred, str):
pagekey = pred
pred = lambda x: x['data']['page'].get(pagekey, '')
found = {}
for it in self:
keys = pred(it)
if not isinstance(keys, list):
keys = [keys]
if normalize:
keys = list(set([normalize(_) for _ in keys]))
for k in keys:
if not k and not keep_empty:
continue
if k in found:
found[k].append(it)
else:
found[k] = MDContentList([it])
return found
def sorted_by(self, key, reverse=False, default_val=-1):
if isinstance(default_val, str):
k = lambda x: locale.strxfrm(
x['data']['page'].get(key, default_val))
else:
k = lambda x: x['data']['page'].get(key, default_val)
return MDContentList(sorted(self, key=k, reverse=reverse))
def sorted_by_date(self, newest_first=True, date_key='DATE'):
k = lambda x: str(
x['data'][date_key]
if date_key in ('DATE', 'MTIME') \
else x['data']['page'].get(date_key, x['data']['DATE']))
return MDContentList(sorted(self, key=k, reverse=newest_first))
def sorted_by_title(self, reverse=False):
return self.sorted_by('title', reverse=reverse, default_val='ZZZ')
def in_date_range(self, start, end, date_key='DATE'):
std = lambda ts: str(ts).replace(' ', 'T') # standard ISO fmt
def found(x):
pg = x['page']
date = x[date_key] if date_key in ('DATE', 'MTIME') else pg.get(date_key, x['DATE'])
return std(start) <= std(date) <= std(end)
return self.match_ctx(found)
def posts(self, ordered=True):
"""
Posts, i.e. blog entries, are defined as content in specific directories
(posts, blog) or having a 'type' attribute of 'post', 'blog',
'blog-entry' or 'blog_entry'.
"""
is_post = lambda x: (x['source_file_short'].strip('/').startswith(('posts/', 'blog/'))
or x['data']['page'].get('type', '') in (
'post', 'blog', 'blog-entry', 'blog_entry'))
ret = self.match_entry(is_post)
return ret.sorted_by_date() if ordered else ret
def non_posts(self):
"""
'Pages', i.e. all entries that are NOT posts/blog entreis.
"""
not_post = lambda x: not (
x['source_file_short'].strip('/').startswith(('posts/', 'blog/'))
or x['data']['page'].get('type', '') in (
'post', 'blog', 'blog-entry', 'blog_entry'))
return self.match_entry(not_post)
def has_slug(self, sluglist):
"""
Pages with any of the given slugs.
"""
if isinstance(sluglist, str):
sluglist = (sluglist, )
slugpred = lambda x: x.slug in sluglist
return self.match_page(slugpred)
def has_id(self, idlist):
"""
Pages with any of the given ids.
"""
if isinstance(idlist, str):
idlist = (idlist, )
idpred = lambda x: x['id'] in idlist
return self.match_page(idpred)
def url_match(self, url_pred):
return self.match_entry(lambda x: urlpred(x['url']))
def path_match(self, src_pred):
return self.match_entry(lambda x: src_pred(x['source_file_short']))
def get_content_taxonomies(self):
"""
Get information about the taxonomies that have been singled out in
content files using the TAXONOMY feature.
"""
ret = []
for it in self:
pg = it['data']['page']
tx = pg.TAXONOMY
if tx:
tx_name = tx.name or (
tx.taxon[0] if isinstance(tx.taxon, list) else tx.taxon)
item_urlpat = re.sub(r'/index.html$', '/{}/index.html', it['url'])
ret.append({
'taxon': tx.taxon,
'order': tx.order or 'name',
'name': tx_name,
'name_singular': tx.name_singular or None,
'name_plural': tx.name_plural or None,
'list_url': it['url'],
'item_url_pattern': item_urlpat, # expand with .format(slugify(item))
'page_id': pg['id'],
})
return ret
def get_used_taxonomies(self):
"""
Get all known taxonomies that are present in this MDContent list. These
are (1) the standard taxonomies tags, sections, categories and authors;
and (2) anything defined under as a TAXONOMY in a content page. Returns
a list of dicts with the keys 'taxon', 'name', 'name_singular' and
'name_plural'. If the taxonomy belongs to the latter group, then
'order', 'list_url', 'item_url_pattern' and 'page_id' will be present as
well. If a standard taxonomy has been handled as a content page
TAXONOMY, the latter type takes precedence (i.e. the standard one is
omitted from the list).
"""
standard = [
{
'name': 'tags',
'name_singular': 'tag',
'name_plural': 'tags',
'taxon': ['tags', 'tag'],
},
{
'name': 'section',
'name_singular': 'section',
'name_plural': 'sections',
'taxon': ['section', 'sections'],
},
{
'name': 'category',
'name_singular': 'category',
'name_plural': 'categories',
'taxon': ['category', 'categories'],
},
{
'name': 'author',
'name_singular': 'author',
'name_plural': 'authors',
'taxon': ['author', 'authors'],
},
]
found = self.get_content_taxonomies() or []
known = set()
for it in found:
tx = it['taxon'] if isinstance(it['taxon'], (list, tuple)) else [it['taxon']]
for ti in tx:
known.add(ti)
for it in self:
pg = it['data']['page']
for std in standard:
if tuple(std['taxon']) in known:
continue
is_known = False
is_present = False
for ti in std['taxon']:
if ti in known:
is_known = True
break
if pg.get(ti):
is_present = True
if is_present and not is_known:
found.append(std)
known.add(tuple(std['taxon']))
return found
def has_taxonomy(self, haystack_keys, needles):
"""
Look for instances of `needles` (e.g. `['good', 'bad']`) in the taxonomy
characterized by `haystack_keys` (.e.g `['tag', 'tags']`).
"""
if not needles:
return MDContentList([])
if not isinstance(needles, (list, tuple)):
needles = [needles]
is_bool = len(needles) == 1 and isinstance(needles[0], bool) and needles[0]
if not is_bool:
needles = [_.lower() for _ in needles]
def found(x):
for k in haystack_keys:
if k in x:
if is_bool and x[k]:
return True ## at least one tag/category/etc. is present
if isinstance(x[k], (list, tuple)):
for _ in x[k]:
if not _:
continue
if _.lower() in needles:
return True
elif x[k] and x[k].lower() in needles:
return True
return False
return self.match_page(found)
def taxonomy_info(self, keys, order='count', tostring=None):
"""
A list of values for any of the keys in `keys`. The values are assumed
to be strings/ints or lists of strings/ints. Example usage:
tags = MDCONTENT.taxonomy_info(['tag', 'tags'])
Each record in the returned list looks like
{'name': f1, # First found form of this taxon
'slug': slug, # result of slugifying first-found-item
'forms': [f1,f2...], # Different forms of this taxon found (e.g. lower/uppercase)
'count': n, # how many documents match
'items': items, } # MDContentList object
TODO: Add more ordering options.
"""
if isinstance(keys, str):
keys = [keys]
if not keys:
return []
taxons = {}
slug2name = {}
seen_urls = set()
def _additem(tx, item):
seen_key = ':'.join([slugify(tx), item['url']])
if tx in taxons:
taxons[tx]['count'] += 1
if not item['url'] in seen_urls:
taxons[tx]['items'].append(item)
seen_urls.add(seen_key)
else:
slug = slugify(tx)
if slug in slug2name:
taxons[slug2name[slug]]['count'] += 1
if not seen_key in seen_urls:
taxons[slug2name[slug]]['items'].append(item)
seen_urls.add(seen_key)
if not tx in taxons[slug2name[slug]]['forms']:
taxons[slug2name[slug]]['forms'].append(tx)
else:
taxons[tx] = {
'name': tx,
'slug': slug,
'forms': [tx],
'count': 1,
'items': MDContentList([item]),
}
seen_urls.add(seen_key)
slug2name[slug] = tx
for it in self:
pg = it['data']['page']
for k in keys:
if k in pg:
if isinstance(pg[k], (str, int)):
_additem(pg[k], it)
elif isinstance(pg[k], (list, tuple)):
for tx in pg[k]:
if tostring and not isinstance(tx, (str, int)):
_additem(tostring(tx), it)
else:
_additem(tx, it)
elif tostring:
_additem(tostring(pg[k]), it)
found = list(taxons.values())
if order == 'count':
found.sort(key=lambda x: x['count'], reverse=True)
elif order in ('name', 'slug'):
found.sort(key=lambda x: locale.strxfrm(x[order]), reverse=False)
return found
def get_categories(self, order='name'):
"Categories along with list of pages/posts in them."
return self.taxonomy_info(['category', 'categories'], order)
def get_tags(self, order='name'):
"Tags along with list of pages/posts tagged with them."
return self.taxonomy_info(['tag', 'tags'], order)
def get_sections(self, order='name'):
"Sections along with list of pages/posts in them."
return self.taxonomy_info(['section', 'sections'], order)
def get_authors(self, order='name', tostring=None):
"""
Authors along with list of pages/posts created by them.
NOTE: If there are any authors that are not not specified as a string,
you must pass a `tostring` callable (e.g. `lambda x: x['name']`) so as
to coerce it into one.
"""
return self.taxonomy_info(['author', 'authors'], order, tostring=tostring)
def in_category(self, catlist):
"Pages/posts in any of the listed categories."
return self.has_taxonomy(['category', 'categories'], catlist)
def has_tag(self, taglist):
"Pages/posts having any of the given tags."
return self.has_taxonomy(['tag', 'tags'], taglist)
def in_section(self, sectionlist):
"Pages/posts in any of the given sections."
return self.has_taxonomy(['section', 'sections'], sectionlist)
def page_match(self, match_expr, ordering=None, limit=None, inverse=False):
"""
The `match expr` is either a dict or a list of dicts. Each dict contains
one or more of the following keys, all of which must match. If a list of
dicts is given, the union of matching entries from all dicts is
returned.
- `title`: A regular expression which will be applied to the page title.
- `slug`: A regular expression which will be applied to the slug.
- `id`: A string or list of strings which must match the id exactly.
- `url`: A regular expression which will be applied to the target URL.
- `path`: A regular expression which will be applied to the path to the markdown
source file (i.e. the `source_file_short` field).
- `doc`: A regular expression which will be applied to the body of the markdown
source document.
- `date_range`: A list containing two ISO-formatted dates and optionally a date
key (`DATE` by default)
- `has_attrs`: A list of frontmatter variable names. Matching pages must have a
non-empty value for each of them.
- `attrs`: A dict where each key is the name of a frontmatter variable and the
value is the value of that attribute. If the value is a string, it will be
matched case-insensitively. All key-value pairs must match.
- `has_tag`, `in_section`, `in_category`: The values are lists of tags, sections
or categories, respectively, at least one of which must match
(case-insensitively).
- `is_post`: If set to True, will match if the page is a blog post; if set to
False will match if the page is not a blog post.
- `exclude_url`: The page with this URL should be omitted (normally the
calling page).
`inverse` means that all the above conditions except `exclude_url` will
be negated, i.e. will NOT match the specified title, slug, url, etc.
The `ordering` parameter, if specified, should be either
`title`, `slug`, `url`, `weight` or `date`, with an optional `-` in
front to indicate reverse ordering. The `limit`, if specified, indicates
the maximum number of pages to return.
"""
found = MDContentList([])
known_conds = (
'title', 'slug', 'id', 'url', 'path', 'doc', 'date_range',
'has_attrs', 'attrs', 'has_tag', 'in_section', 'in_category',
'is_post', 'exclude_url')
boolval = lambda x: not bool(x) if inverse else bool(x)
if isinstance(match_expr, dict):
if not match_expr:
raise Exception('No condition for page_match')
for k in match_expr:
if not k in known_conds:
raise Exception('Unknown condition for page_match: %s' % k)
def pred(c):
x = match_expr
p = c['data']['page']
if 'exclude_url' in x:
# Normalize both URLs somewhat
c_url = c['url'].replace('/index.html', '/')
x_url = x['exclude_url'].replace('/index.html', '/')
if c_url == x_url:
return False
for k in ('title', 'slug'):
if k in x and not boolval(re.search(x[k], p.get(k, ''), flags=re.I)):
return False
if 'id' in x:
idlist = (x['id'], ) if isinstance(x['id'], str) else x['id']
if not boolval(p['id'] in idlist):
return False
if 'url' in x and not boolval(re.search(x['url'], c['url'], flags=re.I)):
return False
if 'path' in x and not boolval(re.search(x['path'], c['source_file_short'], flags=re.I)):
return False
if 'doc' in x and not boolval(re.search(x['doc'], c['doc'], flags=re.I)):
return False
if 'has_attrs' in x:
for a in x['has_attrs']:
if not boolval(p.get(a)):
return False
if 'attrs' in x:
for k in x['attrs']:
if not boolval(str(p.get(k, '')).lower() == str(x['attrs'][k]).lower()):
return False
if 'has_tag' in x:
if not boolval(MDContentList([c]).has_tag(x['has_tag'])):
return False
if 'in_section' in x:
if not boolval(MDContentList([c]).in_section(x['in_section'])):
return False
if 'in_category' in x:
if not boolval(MDContentList([c]).in_category(x['in_category'])):
return False
if 'is_post' in x:
posts = MDContentList([c]).posts(ordered=False)
if x['is_post'] and not boolval(posts):
return False
elif not x['is_post'] and boolval(posts):
return False
if 'date_range' in x and not boolval(MDContentList([c]).in_date_range(*x['date_range'])):
return False
return True
found = self.match_entry(pred)
elif isinstance(match_expr, (list, tuple)):
accum = {}
for exp in match_expr:
partial = self.page_match(exp)
for it in partial:
if it['url'] in accum:
continue
accum[it['url']] = it
found = MDContentList(list(accum.values()))
else:
raise Exception(
'page_match: the match_expr must be either a dict or a list of dicts')
if ordering and found:
# title,slug,url,date
reverse = False
if ordering[0] == '-':
reverse = True
ordering = ordering[1:]
if ordering in ('title', 'slug'):
found = found.sorted_by(ordering, reverse, 'ZZZ')
elif ordering == 'url':
k = lambda x: x.get('url', 'zzz')
found = MDContentList(sorted(found, key=k, reverse=reverse))
elif ordering == 'weight':
k = lambda x: int(x['data']['page'].get('weight', 999999))
found = MDContentList(sorted(found, key=k, reverse=reverse))
elif ordering.startswith('date'):
if ':' in ordering:
dateorder, datefield = ordering.split(':')
else:
datefield = 'DATE'
found = found.sorted_by_date(
newest_first=reverse, date_key=datefield)
else:
raise Exception('Unknown ordering for page_match: %s' % ordering)
if limit and len(found) > limit:
found = MDContentList(found[:limit])
return found
def write_to(self, dest, context, extra_kwargs=None, template=None):
"""
Add self to the context as 'CHUNK' and call the calling template again
(or a different template if 'template' is specified), putting the result
in dest. Directories are created if necessary. Useful for tag pages and
such. Minimal usage in a template:
mdcontent_chunk.write_to('/my/path/index.html', context)
Note that the calling template must be careful to avoid infinite loops.
"""
if extra_kwargs is None:
extra_kwargs = {}
if template is None:
template = context.get('SELF_TEMPLATE')
full_path = os.path.join(context.get('WEBROOT'), dest.strip('/'))
dest_dir = re.sub(r'/[^/]+$', '', full_path)
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
lookup = context.get('LOOKUP') or context.lookup
try:
tpl = lookup.get_template(template)
except TemplateLookupException:
tpl = lookup.get_template('base/' + template)
kw = dict(**context.kwargs) if hasattr(context, 'kwargs') else context
kw['SELF_URL'] = dest
kw['CHUNK'] = self
with open(full_path, 'w') as f:
f.write(tpl.render(**kw, **extra_kwargs))
def paginate(self, pagesize=5, context=None):
"""
Divides the page list into chunks of size `pagesize` and returns
a tuple consisting of the chunks and a list of page_urls (one for each
page, in order). If an appropriate template context is provided, pages
2 and up will be written to the webroot output directory. Without the
context, the page_urls will be None.
NOTE: It is the responsibility of the calling template to check the
'_page' variable for the current page to be rendered (this defaults to
1). Each iteration will get all chunks and must use this variable to
limit itself appriopriately.
TODO: Rewrite this in terms of write_to().
"""
page_urls = None
chunks = [self[i:i+pagesize] for i in range(0, len(self), pagesize)] or [[]]
if len(chunks) < 2:
# We only have one page -- no need to do anything further
if context:
page_urls = [context.get('SELF_URL')]
return (chunks, page_urls)
elif context:
# We have the context and can thus write the output for pages 2 and up.
# We need the template, the template lookup object, the _page, the
# webroot and the self_url of the caller.
curpage = int(context.get('_page', 1))
self_url = context.get('SELF_URL')
page_urls = [self_url]
if self_url.endswith('/'):
self_url += 'index.html'
# TODO: make url/output path configurable (creating directories if needed)
url_pat = re.sub(r'\.html$', r'__page_{}.html', self_url)
for i in range(2, len(chunks)+1):
page_urls.append(url_pat.format(i))
if curpage == 1:
# So as only to write the output once, we do it for all pages > 1
# only on page 1.
self_tpl = context.get('SELF_TEMPLATE')
webroot = context.get('WEBROOT')
lookup = context.get('LOOKUP') or context.lookup
page_template = lookup.get_template(self_tpl)
for pg in range(2, len(chunks)+1):
# For compatibility with Jinja
kw = dict(**context.kwargs) if hasattr(context, 'kwargs') else dict(**context)
kw['_page'] = pg
output_fn = os.path.join(webroot, url_pat.format(pg).strip('/'))
with open(output_fn, 'w') as fpg:
fpg.write(page_template.render(**kw))
return (chunks, page_urls)
else:
# We cannot write output since we lack context.
# Return all chunks along with their length. A page_urls value of
# None mean that the caller must take care of writing the output
# (and that all chunks are present in the
# first item in the return value).
return (chunks, None)
def get_db(self):
"""
Get a connection to an in-memory SQLite database representing the pages
in the MDContent list.
"""
if hasattr(self, '_db'):
return self._db
db = sqlite3.connect(':memory:')
db.row_factory = sqlite3.Row
def _locale_collation(a, b):
va = locale.strxfrm(a)
vb = locale.strxfrm(b)
return 1 if va > vb else -1 if va < vb else 0
db.create_collation('locale', _locale_collation)
_casefold = lambda x: x.casefold() if isinstance(x, str) else str(x or '').casefold()
db.create_function('casefold', 1, _casefold, deterministic=True)
cur = db.cursor()
fixed_cols = [
'url', 'source_file', 'source_file_short', 'target',
'template', 'MTIME', 'DATE', 'doc', 'rendered', ]
page_cols = set()
guess_type = {}
for it in self:
pg = it['data']['page']
valid_keys = [_ for _ in pg.keys() if re.match(r'^[a-z][a-zA-Z0-9_]*$', _)]
for k in valid_keys:
page_cols.add('page_' + k)
if not k in guess_type:
if isinstance(pg[k], bool):
guess_type[k] = 'bool'
elif isinstance(pg[k], int):
guess_type[k] = 'int'
elif isinstance(pg[k], float):
guess_type[k] = 'numeric'
elif isinstance(pg[k], datetime.date):
guess_type[k] = 'date'
elif isinstance(pg[k], datetime.datetime):
guess_type[k] = 'timestamp'
elif isinstance(pg[k], (list, dict)):
guess_type[k] = 'json'
else:
guess_type[k] = 'text'
sql = """
CREATE TABLE content (
url text,
source_file text,
source_file_short text,
target text,
template text,
mtime timestamp,
"date" timestamp,
doc text,
rendered text"""
for pc in page_cols:
sql += ',\n %s %s' % (pc, guess_type[pc[5:]])
sql += "\n);"
cur.execute(sql);
page_cols_list = list(page_cols)
all_cols = fixed_cols + page_cols_list
ins_sql = "INSERT INTO content (%s) VALUES (%s)" % (
', '.join(all_cols), ', '.join([':'+_ for _ in all_cols]))
def _val(v):
if not v and isinstance(v, dict):
return None
elif isinstance(v, (bool, int, float, str)):
return v
elif isinstance(v, (datetime.date, datetime.datetime)):
return str(v)
elif v is None:
return None
else:
return json.dumps(v, default=str, ensure_ascii=False)
for it in self:
rec = {}
for k in fixed_cols:
rec[k] = it['data'][k] if k.upper()==k else it[k]
for pc in page_cols_list:
k = pc[5:]
rec[pc] = _val(it['data']['page'].get(k))
cur.execute(ins_sql, rec)
db.commit()
self._db = db
return db
def get_db_columns(self):
"""
Gets a list of columns in the content table of the SQLite database
provided by the .get_db() method (some columns are fixed, but many
of the `page_*` columns depend upon the metadata of the content items).
"""
db = self.get_db()
cur = db.cursor()
cur.execute("select * from content where 0=1")
return [_[0] for _ in cur.description]
def page_match_sql(self, where_clause=None, bind=None,
order_by=None, limit=None, offset=None,
raw_sql=None, raw_result=False, first=False):
"""
Filter this MDContentList by a SQL SELECT statement run against the
SQLite database generated by self.get_db(). Parameters: `where_clause`
(string), `bind` (bind values for the where clause), `order_by`
(string), `limit` (int), `offset` (int), `raw_sql` (string),
`raw_result` (string), `first` (bool). Either `where_clause` or
`raw_sql` must be specified. If `first` is True, only the first item in
the result is returned (or None, if the list of results is empty).
"""
db = self.get_db()
cur = db.cursor()
if not (where_clause or raw_sql):
raise Exception('Need either where_clause or raw_sql')
if raw_sql:
sql = raw_sql
if not raw_result and not 'source_file' in sql.lower():
raise Exception(
'The raw_sql has no source_file column')
else:
sql = "SELECT {0} FROM content WHERE {1}".format(
'*' if raw_result else 'source_file', where_clause)
if order_by:
sql += ' ORDER BY {}'.format(order_by)
if limit:
sql += ' LIMIT {}'.format(int(limit))
if offset:
sql += ' OFFSET {}'.format(int(offset))
res = cur.execute(sql, bind) if bind else cur.execute(sql)
if raw_result:
return res.fetchone() if first else res
else:
self_as_dict = dict([(_['source_file'], _) for _ in self])
if first:
it = res.fetchone()
return self_as_dict[it['source_file']] if it else it
res_as_list = [self_as_dict[_['source_file']] for _ in res.fetchall()]
return MDContentList(res_as_list)
class RenderCache:
"""
Extremely simple cache for rendered HTML, keyed on a SHA1 hash of the
markdown contents and the serialized rendering options.
May become invalid if shortcodes change without changes in the markdown
source.
"""
SQL_INIT = """
CREATE TABLE cache (
key varchar not null primary key,
val text,
creat int not null default (strftime('%s', 'now')),
upd int not null default (strftime('%s', 'now'))
);
"""
SQL_GETROW = "SELECT val FROM cache WHERE key = :key"
SQL_INS = "INSERT INTO cache (key, val) VALUES (:key, :val)"
SQL_UPD = "UPDATE cache SET val = :val, upd = strftime('%s', 'now') WHERE key = :key"
def __init__(self, doc, optstr='', projdir=None):
if not projdir:
cachedir = '/tmp'
else:
cachedir = os.path.join(projdir, 'tmp')
if not os.path.exists(cachedir):
os.mkdir(cachedir)
self.filename = os.path.join(
cachedir, 'wmk_render_cache.%d.db') % os.getuid()
need_init = not os.path.exists(self.filename)
self.db = sqlite3.connect(self.filename)
self.cur = self.db.cursor()
self.in_cache = False
if need_init:
self.cur.execute(self.SQL_INIT)
self.key = hashlib.sha1(
doc.encode('utf-8') + str(optstr).encode('utf-8')).hexdigest()
def get_cache(self):
self.cur.execute(self.SQL_GETROW, {'key': self.key})
row = self.cur.fetchone()
self.in_cache = True if row else False
return row[0] if row else None
def write_cache(self, html):
if self.in_cache:
return
prev_val = self.get_cache()
if prev_val is None:
self.cur.execute(self.SQL_INS, {'key': self.key, 'val': html})
self.cur.execute('COMMIT')
self.in_cache = True
elif prev_val != html:
# An update should actually never happen; if it does, the optstr
# will not have been based on all relevant options
self.cur.execute(self.SQL_UPD, {'key': self.key, 'val': html})
self.cur.execute('COMMIT')
class NavBase:
is_root = False
is_section = False
is_link = False
title = None
url = None # only applicable to links
parent = None
children = [] # empty for links
level = 0
next = None # only applicable to *local* links
previous = None # only applicable to *local* links
attr = {} # things like link_target, css_class, css_id...
def nav_item_list(self, items, level=-1):
"""
For assembling a nav from a properly formatted list.
"""
ret = []
for it in items:
if not isinstance(it, dict):
raise ValueError('Bad input; not dict: ' + str(it))
# Special case: Section as dict rather than list
if 'title' in it and 'children' in it:
title = it.pop('title')
children = it.pop('children')
url = it.pop('url') if 'url' in it else None
if children:
ret.append(
NavSection(
title=title, children=children,
parent=self, level=level+1, url=url, attr=it))
else:
ret.append(
NavLink(
title=title, url=url, parent=self,
level=level+1, attr=it))
continue
elif len(it) != 1:
raise ValueError('Bad input: ' + str(it))
for title in it:
if isinstance(it[title], list):
if ' [url=' in title:
_title, url = title.split(' [url=')
url = url.rstrip(']')
else:
_title = title
url = None
ret.append(
NavSection(title=_title, children=it[title],
parent=self, level=level+1, url=url))
elif isinstance(it[title], str):
ret.append(
NavLink(title=title, url=it[title],
parent=self, level=level+1))
elif isinstance(it[title], dict):
# Special case: Link as dict rather than str
ret.append(
NavLink(title=title, url=it[title]['url'],
parent=self, level=level+1, attr=it[title]))
return ret
def find_item(self, title=None, url=None, normalize=None):
"Find item in nav by title or url."
if normalize is None:
normalize = lambda x: x
for child in self.children:
if title and child.title.lower() == title.lower():
return child
elif url and normalize(child.url) == normalize(url):
return child
elif child.children:
found = child.find_item(title=title, url=url)
if found:
return found
return None
class NavItem(NavBase):
@property
def ancestors(self):
ret = []
parent = self.parent
while parent:
ret.append(parent)
parent = parent.parent
return ret
@property
def siblings(self):
parent = self.parent
return [c for c in self.parent.children if c != self]
@property
def is_local(self):
if 'is_local' in self.attr:
return self.attr['is_local']
if not self.url:
return False
return not self.url.startswith(('https:', 'http:', 'mailto:'))
def is_url(self, url, normalize):
"The given url is the same as self.url after normalization."
if not self.url:
return False
return normalize(url) == normalize(self.url)
def descendant_is_url(self, url, normalize):
"Has a descendant for which is_url() is True."
if not self.children: