Skip to content

Commit

Permalink
FIX failing sphinx-gallery CI (skrub-data#1145)
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincent-Maladiere authored Nov 20, 2024
1 parent b2c4f82 commit 2b8d68b
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 159 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
key: saved-cache
- run:
command: ./build_tools/circle/build_doc.sh
no_output_timeout: 40m
no_output_timeout: 30m
- store_artifacts:
path: doc/_build/html
destination: doc
Expand Down
6 changes: 5 additions & 1 deletion examples/02_text_with_string_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,12 @@ def plot_box_results(named_results):
# column in a dataframe. By default, |TextEncoder| uses the e5-small-v2 model.
from skrub import TextEncoder

text_encoder = TextEncoder(
"sentence-transformers/paraphrase-albert-small-v2",
device="cpu",
)
text_encoder_pipe = clone(gap_pipe).set_params(
**{"tablevectorizer__high_cardinality": TextEncoder()}
**{"tablevectorizer__high_cardinality": text_encoder}
)
text_encoder_results = cross_validate(text_encoder_pipe, X, y, scoring="roc_auc")
results.append(("TextEncoder", text_encoder_results))
Expand Down
14 changes: 8 additions & 6 deletions examples/07_multiple_key_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@

from skrub.datasets import fetch_figshare

seed = 1
flights = fetch_figshare("41771418").X

# Sampling for faster computation.
flights = flights.sample(20_000, random_state=1, ignore_index=True)
flights = flights.sample(5_000, random_state=seed, ignore_index=True)
flights.head()

###############################################################################
Expand Down Expand Up @@ -85,7 +87,7 @@

weather = fetch_figshare("41771457").X
# Sampling for faster computation.
weather = weather.sample(100_000, random_state=1, ignore_index=True)
weather = weather.sample(10_000, random_state=seed, ignore_index=True)
weather.head()

########################################################################
Expand Down Expand Up @@ -164,10 +166,10 @@
###############################################################################
# The results:

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

scores = cross_val_score(pipeline_hgb, X, y)
scores.mean()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
pipeline_hgb.fit(X_train, y_train).score(X_test, y_test)

###############################################################################
# Conclusion
Expand All @@ -177,4 +179,4 @@
# on imprecise and multiple-key correspondences.
# This is made easy by skrub's |Joiner| transformer.
#
# Our final cross-validated accuracy score is 0.58.
# Our final cross-validated accuracy score is 0.55.
Loading

0 comments on commit 2b8d68b

Please sign in to comment.