hi, I don't know why I cannot run your code in my jupyterlab, please help #117

lin9178618872 · 2023-02-10T04:55:59Z

the code I cannot run is
from typing import List
from collections import Counter

def raw_majority_vote(labels: List[str]) -> str:
votes = Counter(labels)
winner, _ = votes.most_common(1)[0]
return winner

assert raw_majority_vote(['a', 'b', 'c', 'b']) == 'b'

def majority_vote(labels: List[str]) -> str:
"""Assumes that labels are ordered from nearest to farthest."""
vote_counts = Counter(labels)
winner, winner_count = vote_counts.most_common(1)[0]
num_winners = len([count
for count in vote_counts.values()
if count == winner_count])

if num_winners == 1:
    return winner                     # unique winner, so return it
else:
    return majority_vote(labels[:-1]) # try again without the farthest

Tie, so look at first 4, then 'b'

assert majority_vote(['a', 'b', 'c', 'b', 'a']) == 'b'

from typing import NamedTuple
from scratch.linear_algebra import Vector, distance

class LabeledPoint(NamedTuple):
point: Vector
label: str

def knn_classify(k: int,
labeled_points: List[LabeledPoint],
new_point: Vector) -> str:

# Order the labeled points from nearest to farthest.
by_distance = sorted(labeled_points,
                     key=lambda lp: distance(lp.point, new_point))

# Find the labels for the k closest
k_nearest_labels = [lp.label for lp in by_distance[:k]]

# and let them vote.
return majority_vote(k_nearest_labels)

import random

def random_point(dim: int) -> Vector:
return [random.random() for _ in range(dim)]

def random_distances(dim: int, num_pairs: int) -> List[float]:
return [distance(random_point(dim), random_point(dim))
for _ in range(num_pairs)]

def main():
from typing import Dict
import csv
from collections import defaultdict

def parse_iris_row(row: List[str]) -> LabeledPoint:
    """
    sepal_length, sepal_width, petal_length, petal_width, class
    """
    measurements = [float(value) for value in row[:-1]]
    # class is e.g. "Iris-virginica"; we just want "virginica"
    label = row[-1].split("-")[-1]

    return LabeledPoint(measurements, label)

with open('iris.data') as f:
    reader = csv.reader(f)
    iris_data = [parse_iris_row(row) for row in reader]

# We'll also group just the points by species/label so we can plot them.
points_by_species: Dict[str, List[Vector]] = defaultdict(list)
for iris in iris_data:
    points_by_species[iris.label].append(iris.point)

from matplotlib import pyplot as plt
metrics = ['sepal length', 'sepal width', 'petal length', 'petal width']
pairs = [(i, j) for i in range(4) for j in range(4) if i < j]
marks = ['+', '.', 'x']  # we have 3 classes, so 3 markers

fig, ax = plt.subplots(2, 3)

for row in range(2):
    for col in range(3):
        i, j = pairs[3 * row + col]
        ax[row][col].set_title(f"{metrics[i]} vs {metrics[j]}", fontsize=8)
        ax[row][col].set_xticks([])
        ax[row][col].set_yticks([])

        for mark, (species, points) in zip(marks, points_by_species.items()):
            xs = [point[i] for point in points]
            ys = [point[j] for point in points]
            ax[row][col].scatter(xs, ys, marker=mark, label=species)

ax[-1][-1].legend(loc='lower right', prop={'size': 6})
# plt.show()



plt.savefig('im/iris_scatter.png')
plt.gca().clear()

import random
from scratch.machine_learning import split_data

random.seed(12)
iris_train, iris_test = split_data(iris_data, 0.70)
assert len(iris_train) == 0.7 * 150
assert len(iris_test) == 0.3 * 150

from typing import Tuple

# track how many times we see (predicted, actual)
confusion_matrix: Dict[Tuple[str, str], int] = defaultdict(int)
num_correct = 0

for iris in iris_test:
    predicted = knn_classify(5, iris_train, iris.point)
    actual = iris.label

    if predicted == actual:
        num_correct += 1

    confusion_matrix[(predicted, actual)] += 1

pct_correct = num_correct / len(iris_test)
print(pct_correct, confusion_matrix)

import tqdm
dimensions = range(1, 101)

avg_distances = []
min_distances = []

random.seed(0)
for dim in tqdm.tqdm(dimensions, desc="Curse of Dimensionality"):
    distances = random_distances(dim, 10000)      # 10,000 random pairs
    avg_distances.append(sum(distances) / 10000)  # track the average
    min_distances.append(min(distances))          # track the minimum

min_avg_ratio = [min_dist / avg_dist
                 for min_dist, avg_dist in zip(min_distances, avg_distances)]

if name == "main": main()

and it give me error

ModuleNotFoundError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_23996\3035052844.py in
26
27 from typing import NamedTuple
---> 28 from scratch.linear_algebra import Vector, distance
29
30 class LabeledPoint(NamedTuple):

ModuleNotFoundError: No module named 'scratch.linear_algebra'

The text was updated successfully, but these errors were encountered:

jamslaugh · 2023-02-12T20:17:28Z

It is an import error. You should install somehow that scratch module. There are different ways of doing so. In this case, follow what is written in the readme in order to export python path. Otherwise you can use sys library inside python.

dallabonagabriel · 2023-02-14T02:53:43Z

Hi bro. I will make the necessary change! Thank's. Em dom., 12 de fev. de 2023 17:18, Giacomo Matrone ***@***.***> escreveu:

…

It is an import error. You should install somehow that scratch module. There are different ways of doing so. In this case, follow what is written in the readme in order to export python path. Otherwise you can use sys library inside python. — Reply to this email directly, view it on GitHub <#117 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/ARR5MF4CR75NVDSSQFL7FO3WXFAQ5ANCNFSM6AAAAAAUXLVGZY> . You are receiving this because you are subscribed to this thread.Message ID: ***@***.***>

lin9178618872 · 2023-02-14T03:11:24Z

It is an import error. You should install somehow that scratch module. There are different ways of doing so. In this case, follow what is written in the readme in order to export python path. Otherwise you can use sys library inside python.

hi, I follow ur reply and do it in pycharm, but it gives me a new error:
C:\Users\96057\PycharmProjects\pythonProject\venv\Scripts\python.exe C:\Users\96057\PycharmProjects\pythonProject\main.py
Traceback (most recent call last):
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 162, in
if name == "main": main()
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 82, in main
iris_data = [parse_iris_row(row) for row in reader]
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 82, in
iris_data = [parse_iris_row(row) for row in reader]
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 77, in parse_iris_row
label = row[-1].split("-")[-1]
IndexError: list index out of range

Process finished with exit code 1

lin9178618872 · 2023-02-14T03:13:10Z

Hi bro. I will make the necessary change! Thank's. Em dom., 12 de fev. de 2023 17:18, Giacomo Matrone @.> escreveu:
…
It is an import error. You should install somehow that scratch module. There are different ways of doing so. In this case, follow what is written in the readme in order to export python path. Otherwise you can use sys library inside python. — Reply to this email directly, view it on GitHub <#117 (comment)>, or unsubscribe https://github.com/notifications/unsubscribe-auth/ARR5MF4CR75NVDSSQFL7FO3WXFAQ5ANCNFSM6AAAAAAUXLVGZY . You are receiving this because you are subscribed to this thread.Message ID: @.>

hi, I follow ur reply and do it in pycharm, but it gives me a new error:
C:\Users\96057\PycharmProjects\pythonProject\venv\Scripts\python.exe C:\Users\96057\PycharmProjects\pythonProject\main.py
Traceback (most recent call last):
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 162, in
if name == "main": main()
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 82, in main
iris_data = [parse_iris_row(row) for row in reader]
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 82, in
iris_data = [parse_iris_row(row) for row in reader]
File "C:\Users\96057\PycharmProjects\pythonProject\main.py", line 77, in parse_iris_row
label = row[-1].split("-")[-1]
IndexError: list index out of range

Process finished with exit code 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

hi, I don't know why I cannot run your code in my jupyterlab, please help #117

hi, I don't know why I cannot run your code in my jupyterlab, please help #117

lin9178618872 commented Feb 10, 2023

jamslaugh commented Feb 12, 2023

dallabonagabriel commented Feb 14, 2023 via email

lin9178618872 commented Feb 14, 2023

lin9178618872 commented Feb 14, 2023

hi, I don't know why I cannot run your code in my jupyterlab, please help #117

hi, I don't know why I cannot run your code in my jupyterlab, please help #117

Comments

lin9178618872 commented Feb 10, 2023

Tie, so look at first 4, then 'b'

and it give me error

jamslaugh commented Feb 12, 2023

dallabonagabriel commented Feb 14, 2023 via email

lin9178618872 commented Feb 14, 2023

lin9178618872 commented Feb 14, 2023