| Must Know | Classes | Functions | 
|---|---|---|
| Collections | Itertools | Functools | 
|---|---|---|
| String | Int | Set | Tuple | 
|---|---|---|---|
| Conditional | For-Loop | Try-Except | Design | Ipython | 
|---|---|---|---|---|
| Built-ins | 
|---|
| Numpy | Pandas | Matplotlib (Pyplot) | 
|---|---|---|
| Seaborn | 
|---|
[(i, j) for i in range(3) for j in range(3) if i > j]
# [(1, 0), (2, 0), (2, 1)]li = [1, 2, 3]
li = [*map(lambda x: x * 10, li)]
#li = [10, 20, 30]num1 = [100, 1, 20]
num2 = [19, 4, 94]
num3 = [40, 6, 30]
[*map(lambda x, y, z: max(x, y, z), num1, num2, num3)]
# [100, 6, 94]names = ['Liam', 'Olivia', 'Noah', 'Emma', 'Oliver', 'Ava']
choice = filter(lambda x: x.startswith('O'), names)
print(*choice, sep=', ') # Olivia, Olivera = [1, 2, 3]
b = [4, 5, 6]
c = [*zip(a, b)]  # [(1, 4), (2, 5), (3, 6)]
a, b = zip(*c)    # a=(1, 2, 3),  b=(4, 5, 6)def example(a, *arg, b=0, **kwarg):
    print(a)     # 1
    print(arg)   # (2, 3)
    print(b)     # 1
    print(kwarg) # {'x': 'a', 'y': [1, 2, 3]}
example(1, 2, 3, b=1, x='a', y=[1, 2, 3])def func(greet, time, name):
    print(greet, time, name)
func(*["Good", "Morning"], **{"name": "Jay"})
# Good Morning Jaya, b, *_ = [1, 2, 3, 4, 5]
# 1, 2, [3, 4, 5]first, *amid, last = map(lambda x: x**2, range(1, 10000))
first  # 1
last   # 99980001sales = [("Pencil", 0.22, 1500), ("Notebook", 1.30, 550)]
for product, *_ in sales:
    print(product)
    # Pencil, Notebookdef compute(i):
    return i, i ** 2, i ** 3, i ** 4, i ** 5
num, power, cube, *_ = compute(3)
power  # 9
cube   # 27number = {"one": 1, "two": 2}
letter = {"a": "A", "b": "B"}
combine = {**number, **letter}
combine  # {'one': 1, 'two': 2, 'a': 'A', 'b': 'B'}def square_it(value):
    for i in range(value):
        yield i**2
li = square_it(10_000_000)
[i for i in li if i < 50]  # [0, 1, 4, 9, 16, 25, 36, 49]def count_decorator(count):  # new decorator with argument
    def decorator(orig_func):
        def wrapper(*args, **kwargs):
            print(f"func name: {orig_func.__name__}")
            print(f"func args: {args}, {kwargs}")
            for _ in range(count):  # use the argument
                orig_func(*args, **kwargs)
        return wrapper
    return decorator  # return the original decorator
@count_decorator(2)
def greet(msg):
    print(msg)
greet("hello")
# func name: greet
# func args: ('hello',), {}
# hello
# hello@contextmanager
def enterFolder(folderName):
    home = os.getcwd()
    os.chdir(folderName)
    yield
    os.chdir(home)
with enterFolder('folder1'), open('example1.txt', 'w') as f:
    f.write('file1')class BinaryInt(str):
    def __new__(cls, val):
        return str.__new__(cls, f"{val: b}")
    def __add__(self, val):
        val += int(self, 2)
        return f"{val:b}"
a = BinaryInt(2)
print(a)      # 10
print(a + 4)  # 110class Meta(type):
    def __new__(mtcls, name, bases, attrs):
        if name != "Base" and "must_to_do" not in attrs:
            raise TypeError("Bad Class: must_to_do() is needed")
        return super().__new__(mtcls, name, bases, attrs)
class Base(metaclass=Meta):
    def server_func(self):
        return self.must_to_do()
class Derived(Base):
    ...
# TypeError: Bad Class: must_to_do() is neededimport concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    futures = [executor.submit(load_url, url, 60) for url in URLS]
    for future in concurrent.futures.as_completed(futures):
        result = future.result()
        print(len(result))
with concurrent.futures.ProcessPoolExecutor() as executor:
    results = executor.map(load_url, URLS, [60] * len(URLS), chunksize=4)
    for result in results:
        print(len(result))class Person:
    def __init__(self, name):
        self.name = name
    def say(self):
        return f"I'm {self.name}"
p = Person("Jay")
p.say() == Person.say(p)  # Trueclass Employee:
    num_emp = 0  # Class variable
    def __init__(self, pay):
        self.pay = pay  # Instance variable
        Employee.num_emp += 1
e1 = Employee(100)
e2 = Employee(200)
e1.num_emp        # 2
Employee.num_emp  # 2
e1.pay  # 100
Employee.pay  # AttributeError: type object 'Employee' has no attribute 'pay'class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
    @staticmethod
    def splitPersonString(string, split_sign="-"):
        return string.split(split_sign)
    @classmethod
    def fromString(cls, cls_str):
        return cls(*cls.splitPersonString(cls_str, ", "))
p1 = Person.fromString("Jay, 99")
p1.name  # Jay
p1.age   # 99class Dog:
    _weight = 5  # private variable
    def __bark(self):  # name mangling fucntion
        print("bark")
dog = Dog()
dog._weight       # 5
dog.__bark()      # AttributeError: 'Dog' object has no attribute '__bark'
dog._Dog__bark()  # barkclass User:
    def __init__(self, first_name, last_name, password):
        self.first_name = first_name
        self.last_name = last_name
        self.password = password
    @property
    def fullname(self):
        return f"{self.first_name} {self.last_name}"
    @property
    def password(self):
        raise AttributeError("password is not readable.")
    @password.setter
    def password(self, passord):
        from hashlib import md5
        self.password_hash = md5(b"{password}").hexdigest()
user = User("Mimi", "Wang", "0000")
user.fullname       # Mimi Wang
user.password_hash  # 7fbccc9c3a9a5afef65563cd00404c1416
user.password       # Attribute Error: password is not readable.min([1, 2, 31])  # builtins min
min = "global min"
def outer():
    # we can do "global min" here to change global
    min = "enclosing min"
    
    def inner():
        # we can do "nonlocal min" here to change enclosing
        min = "local min"from abc import ABC, abstractmethod
class Base(ABC, object):
    @property
    @abstractmethod
    def foo(self):
        ...
    @abstractmethod
    def do(self):
        ...from dataclasses import InitVar, dataclass, field
from typing import List
@dataclass
class InventoryItem:
    name: str
    unit_price: float = field(default=0.0)
    quantity_on_hand: int = field(default=0, repr=False)
    parts: List[str] = field(default_factory=list)
    parts_number: InitVar[int] = 0
    def __post_init__(self, parts_number):
        self.parts.extend([f"part{i}" for i in range(1, parts_number + 1)])
item = InventoryItem("product", parts_number=2)
# InventoryItem (name = 'product', unit_price=0.0, parts=['part1', 'part2'])def getClass(x):
    if x == 1:
        for i in range(11):
            class Example:
                a = i
        return Example
cls = getClass(1)
cls.b = "123"
print(cls.a, cls.b)  # 10 123def add_with_b(b):
    def add(a):
        return a + b
    return add
add4 = add_with_b(4)
add4(3)  # 7
add4(7)  # 11class Cat:
    def __repr__(self):
        return f"({self.name}: {self.age})"
listOfCats = []
attrs = [{"name": "meow1", "age": 5}, {"name": "meow2", "age": 10}]
for attr in attrs:
    cat = Cat()
    for key, val in attr.items():
        setattr(cat, key, val)
    listOfCats.append(cat)
print(listOfCats)
# [(meow1: 5), (meow2: 10)]for i in range(100):
    def say():
        print(i)
def returnFunc(a):
    if a < 100:
        def mul(b):
            print(a * b)
        return mul
    else:
        def add(b):
            print(a + b)
        return addfrom collections import defaultdict
d = defaultdict(list)
d["a"] = [1, 2, 3]
d["b"].append(4)
d["c"].extend([5, 6])
# defaultdict(<class 'list'>, {'a': [1, 2, 3], 'b': [4], 'c': [5, 6]})from collections import OrderedDict
location = ["C", "B", "A"]
population = [32, 46, 12]
d = OrderedDict({l: p for l, p in zip(location, population)})
# OrderedDict([('C', 32), ('B', 46), ('A', 12)])
d["D"] = 44
# OrderedDict([('C', 32), ('B', 46), ('A', 12), ('D', 44)])
d.popitem(last=False)
# OrderedDict([('B', 46), ('A', 12), ('D', 44)])
d.move_to_end("D", last=False)
# OrderedDict ([( 'D', 44), ('B', 46), ('A', 12)])from collections import Counter
c = Counter(cats=4, dogs=8)
# Counter({'dogs': 8, 'cats': 4})
c.update(birds=10)
# Counter({'birds': 10, 'dogs': 8, 'cats': 4})
c = c - Counter({"birds": 5})
# Counter({'dogs': 8, 'birds': 5, 'cats': 4})
c.most_common(2)
# [('dogs', 8), ('birds', 5)]from collections import namedtuple
Dog = namedtuple("Dog", "name, age")
d1 = Dog("funny", 4)
features = ["happy", 3]
d2 = Dog._make(features)
# Dog(name='happy', age=3)
d2._asdict()
# OrderedDict([('name', 'happy'), ('age', 3)])from collections import deque
li = [40, 30, 50, 46, 39, 44]
d = deque(li[:2])
# Let 's compute the moving average with range=3
d.appendleft(0)
s = sum(d)
for elem in li[2:]:
    s += elem - d.popleft()
    d.append(elem)
    print(s / 3)
    # 40, 42, 45, 43from itertools import count 
gen = count(2.5, 0.5)
for x in gen:
    print(x)
    # 2.5, 3.0, 3.5, 4.0, ... non-stopfrom itertools import cycle 
gen = cycle([1, 2, 3])
for x in gen:
    print(x)
    # 1, 2, 3, 1, 2, ... non-stopfrom itertools import repeat 
class Cat:
    ...
    
gen = repeat(Cat(), 2)
for cat in gen:
    print(cat)
    # <__main__.Cat object at 0x0000019AC1C5D348>
    # <__main__.Cat object at 0x0000019AC1C5D348>import operator
from itertools import accumulate
gen = accumulate([1, 2, 3, 4])
list(gen)  # [1, 3, 6, 10]
gen = accumulate([1, 2, 3, 4], func=operator.mul)
list(gen)  # [1, 2, 6, 24]from itertools import chain
gen = chain([1, 2], [3, 4])
list(gen)  # [1, 2, 3, 4]
gen = chain("AB", "CD")
list(gen)  # [A, B, C, D]from itertools import compress
gen = compress([1, 2, 3], [1, 0, 1])
gen = compress([1, 2, 3], [True, False, True])  # same
list(gen)  # [1, 3]from itertools import filterfalse
gen = filterfalse(lambda x: x%2 == 0, [1, 2, 3])
list(gen)  # [1, 3]from itertools import groupby
gen = groupby("AABBCCCAA")  # default func = lambda x: x
for k, g in gen:
    print(k, list(g))
    # A [A, A]
    # B [B, B]
    # C [C, C, C]
    # A [A, A]
gen = groupby([1, 2, 3, 4], lambda x: x // 3)
for k, g in gen:
    print(k, list(g))
    # 0 [1, 2]
    # 1 [3, 4]
gen = groupby([("A", 100), ("B", 200), ("C", 600)], lambda x: x[1] > 500)
for k, g in gen:
    print(k, list(g))
    # False [(A, 100), (B, 200)]
    # True  [(C, 600)]gen = islice([1, 2, 3], 2)  # equals to A[:2]
list(gen)  # [1, 2]
gen = islice("ABCD", 2, 4)  # equals to A[2:4]
list(gen)  # [C, D]
gen = islice("ABCD", 0, None, 2)  # equals to A[::2]
list(gen)  # [A, C]from itertools import starmap
# with only one argument
gen = starmap(lambda x: x.lower(), "ABCD")
list(gen)  # [a, b, c, d]
# with 2 arguments
gen = starmap(lambda x, y: x + y, [(1, 2), (3, 4)])
list(gen)  # [3, 7]
# with different size of arugments
gen = starmap(lambda *keys: sum(keys) / len(keys), [[3, 8, 3], [4, 2]])
list(gen)  # [4.6666667, 3.0]from itertools import takewhile
gen = takewhile(lambda x: x < 2, [1, 2, 3, 2, 1])
list(gen)  # [1]
gen = takewhile(lambda x: x.isupper(), "ABCdefgHIJ")
list(gen)  # [A, B, C]gen = dropwhile(lambda x: x < 2, [1, 2, 3, 2, 1])
list(gen)  # [2, 3, 2, 1]
gen = dropwhile(lambda x: x.isupper(), "ABCdefgHIJ")
list(gen)  # [d, e, f, g, H, I, J]from itertools import zip_longest
gen = zip_longest("ABC", ("X", "Y"))
list(gen)  # [('A', 'X'), ('B', 'Y'), ('C', None)]
gen = zip_longest("ABC", [1, 2], fillvalue=-1)
list(gen)  # [('A', 1), ('B', 2), ('C', -1)]from itertools import product
gen = product("AB", "CD")
list(gen)  # [AC, AD, BC, BD]
gen = product("AB", repeat=2)
list(gen)  # [AA, AB, BA, BB]
gen = product("AB", "CD", repeat=2)
list(gen)
# [ACAC, ACAD, ACBC, ACBD,
#  ADAC, ADAD, ADBC, ADBD,
#  BCAC, BCAD, BCBC, BCBD,
#  BDAC, BDAD, BDBC, BDBD]gen = permutations("ABC") # same as r=3
list(gen)  # [ABC, ACB, BAC, BCA, CAB, CBA]
gen = permutations("ABC", r=2)
list(gen)  # [AB, AC, BA, BC, CA, CB]
gen = permutations("ABC", r=1)
list(gen)  # [A, B, C]gen = combinations("ABC", 1)
list(gen)
# [A, B, C]
gen = combinations("ABC", 2)
list(gen)
# [AB, AC, BC]
gen = combinations("ABC", 3)
list(gen)
# [ABC]gen = combinations_with_replacement("ABC", 1)
list(gen)
# [A, B, C]
gen = combinations_with_replacement("ABC", 2)
list(gen)
# [AA, AB, AC, 
#  BB, BC, 
#  CC]
gen = combinations_with_replacement("ABC", 3)
list(gen)
# [AAA, AAB, AAC, ABB, ABC, ACC,
#  BBB, BBC, BCC,
#  CCC]from functools import reduce
reduce(lambda x, y: x - y, [1, 2, 3, 4, 5], 100)  # 85first_name = "Kain"
last_name = "Mccarthy"
print(f"Hi, I'm {first_name} {last_name}.")  # Hi, I'm Kain Mccarthy.
pi = 3.14159265359
print(f"{pi:.2f}")  # 3.14
d = {"name": "Shelly"}
print(f"She is {d['name']}")  # She is Shelly
i = 1000000
print(f"{i:,}")  # 1,000,000
# Ref:
#   * https://youtu.be/nghuHvKLhJA
#   * https://blog.louie.lu/2017/08/08/outdate-python-string-format-and-fstring/a = 100_000_000
b =  10_000_000
c =  1_0_0
print(f"{a+b+c:,}")  # 110,000,100
# Ref:
#   * https://youtu.be/C-gEQdGVXbk&t=140long_list = [i for i in range(100_000_000)]
long_set = set(long_list)
%%time
100_000_000 in long_list
# False
# Wall time: 1.26 s
%%time
100_000_000 in long_set
# False
# Wall time: 0 ns
# Ref:
#   * https://stackoverflow.com/questions/2831212/python-sets-vs-lists/17945009
#   * https://youtu.be/r3R3h5ly_8g?t=1010a, b = 1, 2
a  # 1
b  # 2
a, b = b, a
a  # 2
b  # 1
# Ref:
#   * https://youtu.be/VBokjWj_cEA?list=LL&t=445if x < 1:
    x += 1
else:
    x -= 1
# equivalent to:
x = (x + 1) if (x < 1) else (x - 1)
# Ref:
#   * https://www.youtube.com/watch?v=C-gEQdGVXbk&t=34sarr = ["a", "b", "c"]
for index, element in enumerate(arr):
    print(index, element)
    # 0 a
    # 1 b
    # 2 c
for index, element in enumerate(arr, start=3):
    print(index, element)
    # 3 a
    # 4 b
    # 5 c
# Ref
#   * https://youtu.be/VBokjWj_cEA?list=LL&t=190for text in "to be or not to be".split():
    if text.strip().startswith("o"):
        print(f"Found it! `{text}`")
        break
else:
    print("Not found")
# Found it! `or`
# Ref:
#   * https://www.youtube.com/watch?v=Dh-0lAyc3Bctry:
    print(1/1)
except Exception as e:
    print(e)
else:
    print("Safe")  # executed when except didn't happen
finally:
    print("Done")  # Always executed
# 1.0
# Safe
# Done
# Ref:
#   * https://youtu.be/VBokjWj_cEA?list=LL&t=1331def func(a: str, b: int = 3) -> str:
    return a*b
func.__annotations__  # {'a': <class 'str'>, 'b': <class 'int'>, 'return': <class 'str'>}
func("hi")     # hihihi
func("hi", 5)  # hihihihihidef func(a: "str longer than 5", b: 1+2 = 3) -> "str longer b times":
    return a*b
func.__annotations__  # {'a': 'str longer than 5', 'b': 3, 'return': 'str longer b times'}
func("hi")         # hihihi
func("ohayou", 2)  # ohayouohayoufrom typing import Any, Dict, Iterable, List, Union
def func(a: List[int], b: Union[str, int], c: Dict[str, int], d: Iterable, e: Any):
    print(len(a))
    
    print(f"{b} can be str or int.")
    
    print(f"{c['something']} will return int.")
    
    for i in d:
        print(i)
        
    print(f"{type(e)} can be any type.")
# Ref:
#   * https://myapollo.com.tw/zh-tw/python-typing-module/# Style 1
def my_abstract_method(self):
    pass
# Style 2
def my_abstract_method(self):
    ...
# Style 3
def my_abstract_method(self):
    """
    This function is ...
    """
# Ref:
#   * https://stackoverflow.com/questions/55274977/when-is-the-usage-of-the-python-ellipsis-to-be-preferred-over-pass
#   * https://stackoverflow.com/questions/772124/what-does-the-ellipsis-object-do#%%
1+1
# 2
# Ref:
#   * https://code.visualstudio.com/docs/python/jupyter-support-py%time sleep(0.3)  
# Wall time: 310 ms
%timeit sleep(0.3)
# 311 ms Β± 2.06 ms per loop (mean Β± std. dev. of 7 runs, 1 loop each)%%time
for i in range(10):
    sleep(0.1)
# Wall time: 1.09 s
%%timeit
for i in range(10):
    sleep(0.1)
# 1.09 s Β± 2.07 ms per loop (mean Β± std. dev. of 7 runs, 1 loop each)- https://stackoverflow.com/questions/17579357/time-time-vs-timeit-timeit
- https://blog.csdn.net/shuibuzhaodeshiren/article/details/86650688
!pip install -U memory_profiler
%load_ext memory_profiler%memit [i for i in range(1000)]
# peak memory: 51.31 MiB, increment: 0.36 MiB%%memit
l = []
for x in range(10000):
    l.append(x*2)
# peak memory: 52.76 MiB, increment: 0.70 MiB- https://pypi.org/project/memory-profiler/
- https://ipython-books.github.io/44-profiling-the-memory-usage-of-your-code-with-memory_profiler/
sub_folder = Path("subfolder/subfolder")
sub_folder.mkdir(parents=True, exist_ok=True)
file_ = sub_folder / Path("test.txt")
file_.touch()
file_.write_text("Hello")
file_.read_text()
file_.unlink()
Path("subfolder/subfolder").rmdir()np.array([[1, 2], [3, 4], [5, 6]])  # create from list
np.zeros((3, 3))  # create filled with 0's
np.ones((2, 4, 4))  # create filled with 1's
np.empty((5, 2))  # create with speed
np.arange(2, 10, 3)  # create array from range (start, end, step_size)
np.linspace(5, 50, 20)  # create a linear space (start, end, num_elements)
# create from random generator
rng = np.random.default_rng(seed=42)
rng.random((2, 4))
rng.normal(3, 2.5, size=(2, 4))  # sample from N(3, 6.25)
rng.integers(low=2, high=10, size=(10, 2))  # random integer matrixnp.sort(a, axis=None)
np.sort(a, axis=-1)[::-1]
a.sort()
a[::-1].sort()
np.concatenate((a, b), axis=None)
np.concatenate((a, b), axis=2)a = np.arange(5)           # [0, 1, 2, 3, 4]
b = np.ones(5, dtype=int)  # [1, 1, 1, 1, 1]
a + b       # [1 2 3 4 5]
a - b       # [-1  0  1  2  3]
a ^ 2       # [ 0  1  4  9 16]
a * 10      # [ 0 10 20 30 40]
a > 2       # [False False False  True  True]
np.sqrt(a)  # [0. , 1.  , 1.41421356, 1.73205081, 2. ]
a*b  # [0 1 2 3 4]
a@b  # 10A = np.random.default_rng(42).random((2, 4))
# [[0.77395605, 0.43887844, 0.85859792, 0.69736803],
#  [0.09417735, 0.97562235, 0.7611397 , 0.78606431]])
A.max()        # 0.97562235
A.max(axis=0)  # [0.77395605, 0.97562235, 0.85859792, 0.78606431]
A.max(axis=1)  # [0.85859792, 0.97562235]
A.mean()        # 0.6732255180088094
A.mean(axis=0)  # [0.4340667 , 0.7072504 , 0.80986881, 0.74171617]
A.mean(axis=1)  # [0.69220011, 0.65425093]# Index and slicing arrays
x[1, 3] == x[1][3]
y[1:5:2, ::3]
# Indexing arrays
x[np.array([0, 1, 2, -1, -2])]
y[np.array([1, 2, 3]), 1:4:2]
y[np.array([1, 2]), np.array([-1, -1])]
# Masking arrays
x[x>5]
x[(x%2==0) | (x>7)]
y[[True]*3 + [False] + [True] + [False], 2::2]
# Ellipsis syntax
x[-1, ..., 3]  # same as x[-1, :, 3]
x[:3, ...]  # same as x[0:3, :, :] and x[0:3] and x[:3]
x[::2, ..., np.array([0, 2])]  # same as x[0:5:2, :, np.array([0, 2])]A = np.array([[[1, 2, 3], [4, 5, 6]], [[4, 6, 8], [2, 1, 6]]])
A.shape  # (2, 2, 3)
A = A.reshape(3, 2, 2)  # (3, 2, 2)
A = A[np.newaxis, ...]  # (1, 3, 2, 2)
A = np.expand_dims(A, axis=4)  # (1, 3, 2, 2, 1)
A = A.flatten()  # (12,)
A = A.reshape(2, -1, 2)  # (2, 3, 2)# shallow copy: values will change on every variable
a = np.arange(10).reshape(5, 2)
b = a.view()
c = a.reshape(-1)
d = a[:3, :1]
# deep copy: copy and create an entirely new array
a = np.arange(10000000)
b = a[:100].copy()
del a# scalar broadcasting
a = np.array([1, 2, 3])
a * 3  # [3, 6, 9]
# general broadcasting
a =  np.ones( (8, 1, 6, 1))
b = np.zeros(    (7, 1, 5))
(a*b).shape  # 8, 7, 6, 5
# outer product
a = np.arange(4)[:, np.newaxis]  # (4, 1)
b = np.array([1, 2, 3])  # (3,)
a + b  # (4, 3)
# [0]  + [1, 2, 3] =  [1 2 3]
# [1]                 [2 3 4]
# [2]                 [3 4 5]
# [3]                 [4 5 6]# Create Series
pd.Series([1, 2, 3, 4, 5])
pd.Series(np.arange(1, 6), index=list("abcde"))
pd.Series({"a": 100, "b": 50, "c": 120})
pd.Series("hi", index=list("12345"))
# Create DataFrame
pd.DataFrame({
    "col_1": [1, 2, 3, 4, 5],
    "col_2": np.arange(1, 6),
    "col_3": pd.Series(np.arange(1, 7), index=list("abc123")),
}, index=list("abcde"))
pd.DataFrame(
    [
        {"a": 1, "b": 2},
        {"b": 10, "c": 5},
        {"a": 55, "b": 489, "c": 32, "d": 590},
    ],
    index=["first", "second", "third"],
    columns=list("ab")
)
pd.DataFrame(
    np.arange(10).reshape(2, 5),
    # [[0,1,2,3,4], [5,6,7,8,9]]
    index=pd.date_range("20200101", periods=2),
    columns=list("abcde"))
# Viewing
df.head(2)
df.tail(3)
df.index
df.columns
df.to_numpy()
df.sort_index()
df.sort_values("col_name")| Single Column | Multiple Columns | Continuous Columns | All Columns | |
|---|---|---|---|---|
| Single Row | df.loc[row, column]ordf.at[row, column] | df.loc[row, [column, column]] | df.loc[row, column:column] | df.loc[row] | 
| Multiple Rows | df.loc[[row, row], column] | df.loc[[row, row], [column, column]] | df.loc[[row, row], column:column] | df.loc[[row, row]] | 
| Continuous Rows | df.loc[row:row, column] | df.loc[row:row, [column, column]] | df.loc[row:row, column:column] | df[row:row] | 
| All Rows | df[column] | df[[column, column]]ordf.loc[:, [column, column]] | df.loc[:, column:column] | df | 
df["col1"]
df[["col1", "col2"]]
df["row1":"row5"]
df.loc["row1", "col1"]  # df.iloc[0, 0]
df.at["row1", "col1"]  # df.iat[0, 0]
df.loc["row1", ["col1", "col2"]]  # df.iloc[0, [0, 1]]
df.loc["row1", "col1":"col5"]  # df.iloc[0, 0:4]
df.loc[["row1", "row2"]]  # df.iloc[[0, 1]]
df.loc["row1":"row5", "col1"]  # df.iloc[0:4, 0]
df[(df["col1"] > 18)]
df[(df > 6) & (df < 25)]
df[df["col1"].isin([10, 15, 0])]- df.ilocis same as- df.locbut using position.
- df.iatis same as- df.atbut using position.
- Details π₯
# Modify columns
df["col1"] += 10
df.loc[:, "col1"] = "bar"
df.loc[:, ["col1", "col3"]] = np.arange(12).reshape(6, 2)
# Modify single element
df.loc["row1", "col1"] = 0
df.iloc[0, 0] = 1
# Modify by boolean indexing
df[df < 100] = -df
# Append
df["total"] = df.sum(axis=1).to_numpy()
df["gt"] = df["total"] > 50000
df["foo"] = "bar"
# Insert
df.insert(0, "col0", df["col2"][:2])  # col_index, col_name, values
# Delete column
del df["total"]
df.drop(columns=["foo"], inplace=True)  # same as `df.drop(["foo"], axis=1)`
gt50000 = df.pop("gt50000")
# Delete row
df.drop(["e", "d"], inplace=True)
# Handle NaN
miss_df.dropna(how='any')
miss_df.fillna(value=10000000)# Arithmetic
df + df2
df - df.iloc[0]
1 / df
# Numpy
np.sqrt(df)
np.max(df, axis=1)
# Built-in
df.mean()
df.max(axis=1)
# Apply
df.apply(np.cumsum, axis=1)
df.apply(lambda x: x.sum() / x.size)  # x means df
# Series
s.value_counts()
s.str.upper()
s.str.split("-").str.get(0)# Concat rows
pd.concat([df[:3], df.iloc[7:, :2]])
# Merge two DataFrame
pd.merge(df, df2, on="name", how="right")# Groupby
df.groupby("col_A").sum()
df.groupby(["col_A", "col_B"]).max()
# Categorical - discrete
df["grade"] = df["grade"].astype("category")
df["grade"].cat.categories = ["Bad", "Good", "Excellent"]
df.sort_values(by="grade")
df.groupby("grade").size()
# Categorical - continuous
df["grade-labels"] = pd.cut(df["score"], bins=range(0, 120, 20), labels=list("EDCBA"))# Rename Columns
df.columns = ["col_one", "col_two"]
df = df.add_prefix("Xx_")
df = df.add_suffix("_xX")
df.columns = df.columns.str.replace("Xx", "Oo")
df.columns = df.columns.str.replace("xX", "oO")
# Reverse Row or Column Order
df.loc[::-1].reset_index(drop=True) # reverse rows
df.loc[:, ::-1] # reverse columns
# Split DataFrame into 2 random subsets
sub1 = df.sample(frac=0.75, random_state=42)
sub2 = df.drop(sub1.index)
sub1.index = sub1.index.sort_values()
sub2.index = sub2.index.sort_values()
# Filter by Category (or Largest Category)
df[df.genre.isin(["A", "D"])]
df[~df.genre.isin(["A", "D"])]
df[df.genre.isin(df.genre.value_counts().nlargest(1).index)]
# Split String into Multiple Columns
df[["first", "last"]] = df["name"].str.split(' ', expand=True)
df["city"] = df["location"].str.split(", ", expand=True)[0]
# Change Display Options (Not Change Data)
pd.set_option("display.float_format", "${:.2f}".format)
pd.reset_option("display.float_format")
# Style a DataFrame
style = {"Date": "{:%Y/%m/%d}", "Value": "${:d}", "Volume": "{:,}"}
df.style.format(style) \
    .hide_index() \
    .highlight_max("Value", color="red") \
    .highlight_min("Value", color="green") \
    .bar("Area", color="orange", align="zero") \
    .background_gradient(subset="Volume", cmap="Greens") \
    .set_caption("Random Chart")import matplotlib.pyplot as plt
# with this magic function, we can skip `plt.show()`
%matplotlib inline
plt.plot(np.sin(np.linspace(0, 10, 100)), "*-b", lw=2, markersize=5, label="sin(x)")
plt.plot(np.log(np.arange(100)), c="g", ls="--", marker=".", lw=2, markersize=5, label="log(x)")
plt.xlabel("X here")
plt.ylabel("Y here")
plt.title("sin(x) and log(x)")
plt.grid()
plt.legend()
plt.text(x=70, y=-1, s="hahahaha")
plt.annotate("wow \nmax", xy=(16, 1), xytext=(40, 0.9), arrowprops={"facecolor": "orange", "shrink": 0.05})
plt.annotate("wow \nmax again", xy=(78, 1), xytext=(95, 0.9), arrowprops={"facecolor": "red", "shrink": 0.05})# Object-oriented style
fig1, ax = plt.subplots()
ax.plot(...)
fig2, axs = plt.subplots(2, 1)
axs[0].plot(...)
axs[1].plot(...)
# Pyplot style
plt.figure(1)
plt.title("Figure 1")
plt.figure(2)
plt.subplot(311)
plt.title("Figure 2")
plt.subplot(323)
plt.subplot(324)
plt.subplot(337)
plt.subplot(338)
plt.subplot(339)years = [1.1, 1.3, 1.5, 2.0, 2.2, ...]
salary = [39343.00, 46205.00, 37731.00, 43525.00, 39891.00, ...]
salary_mean = np.mean(salary)
# Line Plots
plt.plot(years, 
         salary,
         marker="o",
         markersize=5,
         lw=2,
         ls="-",
         )
# Filling Areas
plt.fill_between(years, 
                 salary,
                 salary_mean,
                 where=(salary > salary_mean),
                 alpha=.4,
                 color="green", 
                 edgecolor="black",
                 interpolate=True,
                 label="On Average"
                 )import matplotlib.dates as mdates
dates = np.arange(np.datetime64("2021-01-01"), np.datetime64("2021-01-22"))
prices = np.random.default_rng(42).normal(500, 30, len(dates))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%a, %d %m"))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=7))
plt.gca().xaxis.set_minor_locator(mdates.DayLocator())
plt.plot_date(dates, prices, 
              ls="solid",
              c="orange",
              marker="^",
              markersize=10)
plt.grid()
plt.tight_layout()temperature = [14.2, 16.4, 11.9, 15.2, ...]
ice_cream_sales = [215, 325, 185, 332, ...]
colors = np.array(ice_cream_sales) / np.linalg.norm(ice_cream_sales)
plt.scatter(temperature, ice_cream_sales, 
            s=ice_cream_sales,  # set the size according to the prices of the ice cream
            c=colors,  # set the colors according to the prices of the ice cream
            cmap="Greens",  # preferred color type
            edgecolor="black",  # the edge color of points
            lw=0.5,  # the edge width of points
            alpha=.75,
            )
plt.xlabel("temperature")
plt.ylabel("ice cream price")
plt.yscale("log")   # use log scale on y-axis to handle outliners
cbar = plt.colorbar()
cbar.set_label("Expensive")
plt.tight_layout()# Bar Charts
ages = [25, 26, 27, 28, 29, ...]
salary_all = [38496, 42000, 46752, 49320, 53200, ...]
index = np.arange(len(ages))
width = 0.25
plt.bar(index - width, salary_all, width=0.25, label="All Devs")
plt.bar(index, salary_py, width=0.25, label="Python")
plt.bar(index + width, salary_js, width=0.25, label="JavaScript")
plt.xticks(ticks=index, labels=ages)
plt.title("Median Salary (USD) by Age")
plt.xlabel("Ages")
plt.ylabel("Median Salary (USD)")
plt.legend()
plt.tight_layout()
# Horizontal Bar Charts
language = ['JavaScript', 'HTML/CSS', 'SQL', 'Python', ...]
popularity = [59219, 55466, 47544, 36443, ...]
plt.barh(language, popularity)
plt.title("Most Popular Languages")
plt.xlabel("Number of People Who Use")
plt.tight_layout()grade = ["A", "B", "C", "D", "E"]
number = [10, 18, 23, 8, 5]
explode = [0.1, 0, 0, 0, 0]
plt.pie(number, 
        labels=grade,
        shadow=True,
        autopct="%1.1f%%",
        pctdistance=0.6,
        startangle=90,
        explode=explode
        )
plt.title("Test Grade")
plt.tight_layout()height_stats = np.random.default_rng(42).normal(160, 15, 1000)
interval_bin = [120, 130, 140, 150, 160, 170, 180, 190, 200]
plt.hist(height_stats, bins=interval_bin,
         edgecolor="black", lw=1, density=True)
# Plot the probability density curve
import scipy.stats as ss
density = ss.kde.gaussian_kde(height_stats)
index = np.arange(120, 200)
plt.plot(index, 
         density.evaluate(index), 
         color="pink",
         lw=3,
         ls="--",
         label="Probability Density")
# Plot the mean line
plt.axvline(np.mean(height_stats), c="orange", lw=5, label="Height Mean")
plt.legend()
plt.title("Height Stats")
plt.xlabel("Heights")
plt.ylabel("Probability Density")
plt.tight_layout()years = [1950, 1960, 1970, 1980, 1990, 2000, 2010, 2018]
population_by_continent = {
    'africa': [228, 284, 365, 477, 631, 814, 1044, 1275],
    'americas': [340, 425, 519, 619, 727, 840, 943, 1006],
    'asia': [1394, 1686, 2120, 2625, 3202, 3714, 4169, 4560],
    'europe': [220, 253, 276, 295, 310, 303, 294, 293],
    'oceania': [12, 15, 19, 22, 26, 31, 36, 39],
}
y = population_by_continent.values()
labels = population_by_continent.keys()
colors = ["#96ceb4", "#ffeead", "#ff6f69", "#ffcc5c", "#88d8b0"]
plt.style.use("seaborn")
plt.stackplot(years, y, labels=labels, colors=colors)
plt.legend(loc="upper left")
plt.title("World Population")
plt.xlabel("Year")
plt.ylabel("Population (Millions)")
plt.tight_layout()img = mpimg.imread("https://www.catster.com/wp-content/uploads/1970/01/Am-ShortHair-breed_getty1140883355-768x513.png")
plt.imshow(img)
# Applying pseudocolor schemes
plt.imshow(img[..., 0], cmap="gray")
plt.colorbar()
# Flipping Photos Vertically or Horizontally
plt.imshow(img[::-1])  # Reverse at the first axis == vertical flip
plt.imshow(img[:, ::-1])  # Reverse at the second axis == horizontal flip# Switch Style
plt.style.use("seaborn-pastel")
# Data
x = np.random.default_rng(42).integers(0, 100, 100)
y = (2*x+1) * np.random.default_rng(43).normal(5, 1, 100)
regr = sklearn.linear_model.LinearRegression()
regr.fit(x[:, np.newaxis], y[:, np.newaxis])
regr_line = regr.predict(x[:, np.newaxis])
# Plotting with fancy color and colormap
plt.scatter(x, y, c=y, alpha=0.25, cmap="plasma")
plt.plot(x, regr_line, 
         color="darkviolet", 
         alpha=0.5,
         lw=5, ls="-",
         label="regression line")
plt.title("Linear Regression Test")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.colorbar()x = np.array(range(1, 5))
y = x**2
df = pd.DataFrame(zip(x, y), columns=["col_1", "col_2"])
# Plotting with data parameter
def plot():
    sns.lineplot(x="col_1", y="col_2", data=df)
# Seaborn Styles
sns.set_style("white")
# Scaling the plots
sns.set_context("paper", font_scale=1.5)
# Changing the figure Size
plt.figure(figsize=(8, 4))  # width, height 
# Using Seaborn with Matplotlib
plt.subplot(211)
plt.title("Square X")
plot()
# Seaborn Styles Context Manager
with sns.axes_style("darkgrid"):
    plt.subplot(212)
    plot()
plt.tight_layout()# Sequential Palette
palette = sns.color_palette("YlGn")
sns.palplot(palette)
plt.title("YlGn Colormap (Sequential)")
# Diverging Palette
palette = sns.color_palette("coolwarm")
sns.palplot(palette)
plt.title("coolwarm Colormap (Diverging)")
# Qualitative Palette
palette = sns.color_palette("Pastel2")
sns.palplot(palette)
plt.title("Pastel2 Colormap (Qualitative)")data = sns.load_dataset("iris")
plt.figure(figsize=(11, 3))
plt.subplot(121)
sns.lineplot(x="sepal_length", y="sepal_width", data=data)
plt.subplot(122)
sns.lineplot(x="petal_length", y="petal_width", data=data)grid = sns.FacetGrid(data, col="species")
grid.map(plt.plot, "sepal_width")x_vars = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
y_vars = ["species"]
grid = sns.PairGrid(data, x_vars=x_vars, y_vars=y_vars)
grid.map(sns.barplot)





















