Skip to content

Commit

Permalink
Merge pull request #800 from LeXofLeviafan/order-by-netloc
Browse files Browse the repository at this point in the history
order by netloc
  • Loading branch information
jarun authored Dec 8, 2024
2 parents 789c08f + d6ac2d0 commit 0e9cb92
Show file tree
Hide file tree
Showing 13 changed files with 206 additions and 118 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -553,9 +553,9 @@ PROMPT KEYS:

$ buku --random --print

44. Print out 3 **random** bookmarks **ordered** by title (reversed) and url:
44. Print out 3 **random** bookmarks **ordered** by netloc (reversed), title and url:

$ buku --random 3 --order ,-title,+url --print
$ buku --random 3 --order ,-netloc,title,+url --print

45. Print out a single **random** bookmark matching **search** criteria, and **export** into a Markdown file (in DB order):

Expand Down
65 changes: 40 additions & 25 deletions buku
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ from typing import Any, Dict, List, Optional, Tuple, NamedTuple
from collections.abc import Sequence, Set, Callable
from warnings import warn
import xml.etree.ElementTree as ET
from urllib.parse import urlparse # urllib3.util.parse_url() encodes netloc

import urllib3
from bs4 import BeautifulSoup
from bs4.dammit import EncodingDetector
from urllib3.exceptions import LocationParseError
from urllib3.util import Retry, make_headers, parse_url
from urllib3.util import Retry, make_headers

try:
from mypy_extensions import TypedDict
Expand Down Expand Up @@ -462,6 +462,10 @@ class BookmarkVar(NamedTuple):
def taglist(self) -> List[str]:
return [x for x in self.tags_raw.split(',') if x]

@property
def netloc(self) -> str:
return get_netloc(self.url) or ''

bookmark_vars = lambda xs: ((x if isinstance(x, BookmarkVar) else BookmarkVar(*x)) for x in xs)


Expand Down Expand Up @@ -593,6 +597,7 @@ class BukuDb:
# Create a connection
conn = sqlite3.connect(dbfile, check_same_thread=False)
conn.create_function('REGEXP', 2, regexp)
conn.create_function('NETLOC', 1, get_netloc)
cur = conn.cursor()

# Create table if it doesn't exist
Expand Down Expand Up @@ -629,22 +634,22 @@ class BukuDb:
Fields are listed in priority order, with '+'/'-' prefix signifying ASC/DESC; assuming ASC if not specified.
Other than names from DB, you can pass those from JSON export."""
names = {'index': 'id', 'uri': 'url', 'description': 'desc', **({'title': 'metadata'} if for_db else {'metadata': 'title'})}
valid = list(names) + list(names.values()) + ['tags']
valid = list(names) + list(names.values()) + ['tags', 'netloc']
_fields = [(re.sub(r'^[+-]', '', s), not s.startswith('-')) for s in (fields or [])]
_fields = [(names.get(field, field), direction) for field, direction in _fields if field in valid]
return _fields or [('id', True)]

def _sort(self, records: List[BookmarkVar], fields=['+id'], ignore_case=True) -> List[BookmarkVar]:
text_fields = (set() if not ignore_case else {'url', 'desc', 'title', 'tags'})
text_fields = (set() if not ignore_case else {'url', 'desc', 'title', 'tags', 'netloc'})
get = lambda x, k: (getattr(x, k) if k not in text_fields else str(getattr(x, k) or '').lower())
order = self._ordering(fields, for_db=False)
return sorted(bookmark_vars(records), key=lambda x: [SortKey(get(x, k), ascending=asc) for k, asc in order])

def _order(self, fields=['+id'], ignore_case=True) -> str:
"""Converts field list to SQL 'ORDER BY' parameters. (See also BukuDb._ordering().)"""
text_fields = (set() if not ignore_case else {'url', 'desc', 'metadata', 'tags'})
return ', '.join(f'{field if field not in text_fields else "LOWER("+field+")"} {"ASC" if direction else "DESC"}'
for field, direction in self._ordering(fields))
get = lambda field: ('LOWER(NETLOC(url))' if field == 'netloc' else field if field not in text_fields else f'LOWER({field})')
return ', '.join(f'{get(field)} {"ASC" if direction else "DESC"}' for field, direction in self._ordering(fields))

def get_rec_all(self, *, lock: bool = True, order: List[str] = ['id']):
"""Get all the bookmarks in the database.
Expand Down Expand Up @@ -4053,6 +4058,20 @@ def import_html(html_soup: BeautifulSoup, add_parent_folder_as_tag: bool, newtag
)


def get_netloc(url):
"""Get the netloc token, or None."""

try:
netloc = urlparse(url).netloc
if not netloc and not urlparse(url).scheme:
# Try to prepend '//' and get netloc
netloc = urlparse('//' + url).netloc
return netloc or None
except Exception as e:
LOGERR('%s, URL: %s', e, url)
return None


def is_bad_url(url):
"""Check if URL is malformed.
Expand All @@ -4069,16 +4088,8 @@ def is_bad_url(url):
True if URL is malformed, False otherwise.
"""

# Get the netloc token
try:
netloc = parse_url(url).netloc
if not netloc:
# Try of prepend '//' and get netloc
netloc = parse_url('//' + url).netloc
if not netloc:
return True
except LocationParseError as e:
LOGERR('%s, URL: %s', e, url)
netloc = get_netloc(url)
if not netloc:
return True

LOGDBG('netloc: %s', netloc)
Expand All @@ -4088,10 +4099,7 @@ def is_bad_url(url):
return True

# netloc should have at least one '.'
if netloc.rfind('.') < 0:
return True

return False
return '.' not in netloc


def is_nongeneric_url(url):
Expand Down Expand Up @@ -4277,6 +4285,14 @@ def get_data_from_page(resp):
return (None, None, None)


def extract_auth(url):
"""Convert an url into an (auth, url) tuple [the returned URL will contain no auth part]."""
_url = urlparse(url)
if _url.username is None: # no '@' in netloc
return None, url
auth = _url.username + ('' if _url.password is None else f':{_url.password}')
return auth, url.replace(auth + '@', '')

def gen_headers():
"""Generate headers for network connection."""

Expand All @@ -4293,15 +4309,14 @@ def gen_headers():
MYPROXY = os.environ.get('https_proxy')
if MYPROXY:
try:
url = parse_url(MYPROXY)
auth, MYPROXY = extract_auth(MYPROXY)
except Exception as e:
LOGERR(e)
return

# Strip username and password (if present) and update headers
if url.auth:
MYPROXY = MYPROXY.replace(url.auth + '@', '')
auth_headers = make_headers(basic_auth=url.auth)
if auth:
auth_headers = make_headers(basic_auth=auth)
MYHEADERS.update(auth_headers)

LOGDBG('proxy: [%s]', MYPROXY)
Expand Down Expand Up @@ -5179,7 +5194,7 @@ def browse(url):
If True, tries to open links in a GUI based browser.
"""

if not parse_url(url).scheme:
if not urlparse(url).scheme:
# Prefix with 'http://' if no scheme
# Otherwise, opening in browser fails anyway
# We expect http to https redirection
Expand Down
6 changes: 3 additions & 3 deletions buku.1
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ Exclude bookmarks matching the specified keywords. Works with --sany, --sall, --
Output random bookmarks out of the selection (1 unless amount is specified).
.TP
.BI \--order " fields [...]"
Order printed/exported records by the given fields (from DB or JSON). You can specify sort direction for each by prepending '+'/'-' (default is '+').
Order printed/exported records by the given fields (from DB or JSON) and/or netloc. You can specify sort direction for each by prepending '+'/'-' (default is '+').
.SH ENCRYPTION OPTIONS
.TP
.BI \-l " " \--lock " [N]"
Expand Down Expand Up @@ -946,11 +946,11 @@ Print out a single \fBrandom\fR bookmark:
.EE
.PP
.IP 44. 4
Print out 3 \fBrandom\fR bookmarks \fBordered\fR by title (reversed) and url:
Print out 3 \fBrandom\fR bookmarks \fBordered\fR by netloc (reversed), title and url:
.PP
.EX
.IP
.B buku --random 3 --order ,-title,+url
.B buku --random 3 --order ,-netloc,title,+url
.EE
.PP
.IP 45. 4
Expand Down
2 changes: 1 addition & 1 deletion bukuserver/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def clean(self, value):

class BookmarkOrderFilter(BaseFilter):
DIR_LIST = [('asc', _l('natural')), ('desc', _l('reversed'))]
FIELDS = ['index', 'url', 'title', 'description', 'tags']
FIELDS = ['index', 'url', 'netloc', 'title', 'description', 'tags']

def __init__(self, field, *args, **kwargs):
self.field = field
Expand Down
41 changes: 22 additions & 19 deletions bukuserver/translations/de/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
Expand Up @@ -196,73 +196,73 @@ msgstr ""
msgid "Index"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:221
#: /home/lex/Work/buku/bukuserver/views.py:223
#, python-format
msgid "url invalid: %(url)s"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:232
#: /home/lex/Work/buku/bukuserver/views.py:234
msgid "Failed to create record."
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:245
#: /home/lex/Work/buku/bukuserver/views.py:552
#: /home/lex/Work/buku/bukuserver/views.py:247
#: /home/lex/Work/buku/bukuserver/views.py:554
msgid "Failed to delete record."
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:260
#: /home/lex/Work/buku/bukuserver/views.py:262
msgid "Invalid search mode combination"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:348
#: /home/lex/Work/buku/bukuserver/views.py:350
msgid "netloc match"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:381
#: /home/lex/Work/buku/bukuserver/views.py:383
msgid "contain"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:382
#: /home/lex/Work/buku/bukuserver/views.py:384
msgid "not contain"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:383
#: /home/lex/Work/buku/bukuserver/views.py:385
msgid "number equal"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:384
#: /home/lex/Work/buku/bukuserver/views.py:386
msgid "number not equal"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:385
#: /home/lex/Work/buku/bukuserver/views.py:387
msgid "number greater than"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:386
#: /home/lex/Work/buku/bukuserver/views.py:388
msgid "number smaller than"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:412
#: /home/lex/Work/buku/bukuserver/views.py:570
#: /home/lex/Work/buku/bukuserver/views.py:414
#: /home/lex/Work/buku/bukuserver/views.py:572
msgid "Failed to update record."
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:427
#: /home/lex/Work/buku/bukuserver/views.py:429
msgid "<UNTAGGED>"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:433
#: /home/lex/Work/buku/bukuserver/views.py:470
#: /home/lex/Work/buku/bukuserver/views.py:435
#: /home/lex/Work/buku/bukuserver/views.py:472
msgctxt "tag"
msgid "Name"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:433
#: /home/lex/Work/buku/bukuserver/views.py:435
msgctxt "tag"
msgid "Usage Count"
msgstr ""

#: /home/lex/Work/buku/bukuserver/views.py:533
#: /home/lex/Work/buku/bukuserver/views.py:535
msgid "top most common"
msgstr ""

Expand Down Expand Up @@ -539,6 +539,9 @@ msgstr ""
msgid "by url"
msgstr ""

msgid "by netloc"
msgstr ""

msgid "by title"
msgstr ""

Expand Down
Loading

0 comments on commit 0e9cb92

Please sign in to comment.