Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [sc-25972] Refine NameRank SDK #515

Merged
merged 7 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/good-terms-move.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@namehash/namerank": minor
---

feat: [sc-25972] Update NameRank SDK documentation
36 changes: 28 additions & 8 deletions apps/api.namerank.io/namerank/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,44 @@ class NLPLabelAnalysis(BaseModel):

inspection: inspector_models.InspectorResult = Field(description='The result of the label inspection')
status: LabelStatus = Field(description='The normalization status of the label')
probability: float = Field(description='The probability of the label')
log_probability: float = Field(description='The natural logarithm of the probability')
word_count: int = Field(description='The number of words in the label')
top_tokenization: Optional[list[str]] = Field(description='The most likely tokenization of the label')
tokenizations: list[dict] = Field(description='All possible tokenizations of the label')
probability: float = Field(
description='The probability that this label represents a meaningful word or phrase in natural language, based on statistical language models. Higher values indicate the label is more likely to be meaningful text rather than random characters.',
ge=0.0,
le=1.0,
)
log_probability: float = Field(
description='The natural logarithm of the probability score. Log probabilities are often more useful for comparing labels since they convert multiplicative relationships to additive ones and better handle very small probabilities.',
le=0.0,
)
word_count: int = Field(
description='The minimum number of words across all valid tokenizations of the label that contain no gaps. '
'Will be 0 if no valid tokenization without gaps is found. For example, labels containing only '
'numbers or special characters may have a word_count of 0.'
'Note: this is not the number of words in the top_tokenization, but the minimum number of words across all valid tokenizations without gaps.',
ge=0,
)
top_tokenization: Optional[list[str]] = Field(
description="The recommended tokenization of the label into words. We give priority to tokenizations that don't have gaps and have fewer words, even if they are less probable. Will be None if: "
'no valid tokenization is found, or the label is not normalized, or the tokenization process was interrupted by recursion limit, or word_count is 0. '
'When present, this contains the tokens from the tokenization with the fewest words (and highest probability among ties) that has no gaps.'
)
tokenizations: list[dict] = Field(
description='Up to 1000 possible tokenizations of the label, ordered by probability from highest to lowest. Each dict '
'contains the tokenization and its probability score. Will be an empty list if no valid '
'tokenizations are found. For very long or complex labels, not all possible tokenizations may be included.'
)


class NameRankReport(BaseModel):
purity_score: float = Field(
title='Purity score of the input',
description='For single labels, returns the score directly. For 2-label names (e.g., "nick.eth"), returns the score for the first label ("nick"). For 3 or more labels, returns 0. If the label is not inspected, this field will be 0.',
description='Score indicating the purity/cleanliness of the name. For single labels, returns the score directly. For 2-label names (e.g., "nick.eth"), returns the score for the first label ("nick"). For 3 or more labels, returns 0. If the label is not inspected, this field will be 0. The score ranges from 0.0 to 1.0 inclusive, where 0.0 indicates lowest purity and 1.0 indicates highest purity.',
ge=0.0,
le=1.0,
)

interesting_score: float = Field(
title='Interesting score of the input',
description='For single labels, returns the score directly. For 2-label names (e.g., "nick.eth"), returns the score for the first label ("nick"). For 3 or more labels, returns 0. If the label is not inspected, this field will be 0.',
description='Score indicating how interesting/memorable the name is. For single labels, returns the score directly. For 2-label names (e.g., "nick.eth"), returns the score for the first label ("nick"). For 3 or more labels, returns 0. If the label is not inspected, this field will be 0. The score ranges from 0.0 to 1.0 inclusive, where 0.0 indicates least interesting and 1.0 indicates most interesting.',
ge=0.0,
le=1.0,
)
Expand Down
107 changes: 102 additions & 5 deletions packages/namerank-sdk/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import fetch from "cross-fetch";
import { Network, NameGuardReport } from "@namehash/nameguard";
import { InspectorResult } from "./labelinspector";

const ETH_TLD = "eth";

Expand All @@ -9,24 +10,117 @@ export enum LabelStatus {
Unknown = "unknown",
}

/**
* Represents a single tokenization result with its probability scores
*/
export interface Tokenization {
/** The tokens that make up this tokenization */
tokens: string[];
/**
* The probability that this label represents a meaningful word or phrase in natural language,
* based on statistical language models. Higher values indicate the label is more likely to be
* meaningful text rather than random characters.
* Must be between 0.0 and 1.0 inclusive.
*/
probability: number;
/**
* The natural logarithm of the probability score. Log probabilities are often more useful for
* comparing labels since they convert multiplicative relationships to additive ones and better
* handle very small probabilities.
* Must be less than or equal to 0.0.
*/
log_probability: number;
}

export interface NLPLabelAnalysis {
inspection: any;
/**
* The result of the label inspection.
* - If status is Normalized, inspection will be of type InspectorResultNormalized
* - If status is Unnormalized, inspection will be of type InspectorResultUnnormalized
*/
inspection: InspectorResult;

/** The normalization status of the label */
status: LabelStatus;

/**
* The probability that this label represents a meaningful word or phrase in natural language,
* based on statistical language models. Higher values indicate the label is more likely to be
* meaningful text rather than random characters.
* Must be between 0.0 and 1.0 inclusive.
*/
probability: number;

/**
* The natural logarithm of the probability score. Log probabilities are often more useful for
* comparing labels since they convert multiplicative relationships to additive ones and better
* handle very small probabilities.
* Must be less than or equal to 0.0.
*/
log_probability: number;

/**
* The minimum number of words across all valid tokenizations of the label that contain no gaps.
* Will be 0 if no valid tokenization without gaps is found. For example, labels containing only
* numbers or special characters may have a word_count of 0.
* Note: this is not the number of words in the top_tokenization, but the minimum number of words
* across all valid tokenizations without gaps.
* Always a non-negative integer.
*/
word_count: number;

/**
* The recommended tokenization of the label into words. We give priority to tokenizations that don't
* have gaps and have fewer words, even if they are less probable. Will be undefined if:
* - no valid tokenization is found
* - the label is not normalized
* - the tokenization process was interrupted by recursion limit
* - word_count is 0
* When present, this contains the tokens from the tokenization with the fewest words (and highest
* probability among ties) that has no gaps.
*/
top_tokenization?: string[];
tokenizations: Record<string, any>[];

/**
* Up to 1000 possible tokenizations of the label, ordered by probability from highest to lowest. Each entry
* contains the tokenization and its probability score. Will be an empty list if no valid
* tokenizations are found. For very long or complex labels, not all possible tokenizations may be included.
*/
tokenizations: Tokenization[];
}

export interface NameRankReport {
/**
* Score indicating the purity/cleanliness of the name. For single labels, returns the score directly. For 2-label names (e.g., "nick.eth"), returns the
* score for the first label ("nick"). For 3 or more labels, returns 0. If the label is not inspected,
* this field will be 0. The score ranges from 0.0 to 1.0 inclusive, where 0.0 indicates lowest purity
* and 1.0 indicates highest purity.
*/
purity_score: number;

/**
* Score indicating how interesting/memorable the name is. For single labels, returns the score directly. For 2-label names (e.g., "nick.eth"), returns the
* score for the first label ("nick"). For 3 or more labels, returns 0. If the label is not inspected,
* this field will be 0. The score ranges from 0.0 to 1.0 inclusive, where 0.0 indicates least interesting
* and 1.0 indicates most interesting.
*/
interesting_score: number;

/**
* The result of the NLP analysis on the label. This field will be undefined when the name is uninspected.
* A name is considered uninspected when either:
* 1. The count of unknown labels (distinct or non-distinct) exceeds MAX_INSPECTED_NAME_UNKNOWN_LABELS (5)
* 2. The count of characters in the name (including dots) exceeds MAX_INSPECTED_NAME_CHARACTERS (200)
* When a name is uninspected, both purity_score and interesting_score will be 0, and nameguard.inspected will be false.
*/
analysis?: NLPLabelAnalysis;
}

export interface NameRankResponse {
/** The NameRank analysis report */
namerank: NameRankReport;

/** The NameGuard security analysis report */
nameguard: NameGuardReport;
}

Expand All @@ -53,7 +147,10 @@ export const MAX_INSPECTED_NAME_CHARACTERS = 200;
const MAX_INSPECTED_NAME_UNKNOWN_LABELS = 5;

export interface NameRankOptions {
/** The endpoint URL for the NameRank API */
namerankEndpoint?: string;

/** The Ethereum network to use for name resolution */
network?: Network;
}

Expand Down Expand Up @@ -81,9 +178,9 @@ export class NameRank {
*
* @param {string} name The name for NameRank to inspect.
* @param {InspectNameOptions} options The options for the inspection.
* @returns {Promise<NameRankReport>} A promise that resolves with the `NameRankReport` of the name.
* @returns {Promise<NameRankResponse>} A promise that resolves with the `NameRankResponse` of the name.
* @example
* const nameRankReport = await namerank.inspectName('vitalik.eth');
* const nameRankResponse = await namerank.inspectName('vitalik.eth');
*/
public inspectName(
name: string,
Expand Down Expand Up @@ -163,6 +260,6 @@ const defaultClient = createClient();
* It can inspect individual names or batch names.
* @example
* import { namerank } from '@namehash/namerank';
* const nameRankReport = await namerank.inspectName('vitalik.eth');
* const nameRankResponse = await namerank.inspectName('vitalik.eth');
*/
export const namerank = defaultClient;
Loading
Loading