Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,26 @@ const schema = Type.Object({
METADATA_FETCH_MAX_REDIRECTIONS: Type.Number({ default: 5 }),

/**
* Base URL for a public gateway which will provide access to all IPFS resources. Defaults to
* `https://cloudflare-ipfs.com`.
* Base URL for a public gateway which will provide access to all IPFS resources when metadata
* URLs use the `ipfs:` or `ipns:` protocol schemes. Defaults to `https://cloudflare-ipfs.com`.
*/
PUBLIC_GATEWAY_IPFS: Type.String({ default: 'https://cloudflare-ipfs.com' }),
/**
* Base URL for a public gateway which will provide access to all Arweave resources. Defaults to
* Extra header key to add to the request when fetching metadata from the configured IPFS gateway,
* for example if authentication is required. Must be in the form 'Header: Value'.
*/
PUBLIC_GATEWAY_IPFS_EXTRA_HEADER: Type.Optional(Type.String()),
/**
* List of public IPFS gateways that will be replaced with the value of `PUBLIC_GATEWAY_IPFS`
* whenever a metadata URL has these gateways hard coded in `http:` or `https:` URLs.
*/
PUBLIC_GATEWAY_IPFS_REPLACED: Type.String({
default: 'ipfs.io,dweb.link,gateway.pinata.cloud,cloudflare-ipfs.com,infura-ipfs.io',
}),

/**
* Base URL for a public gateway which will provide access to all Arweave resources when metadata
* URLs use the `ar:` protocol scheme. Defaults to
* `https://arweave.net`.
*/
PUBLIC_GATEWAY_ARWEAVE: Type.String({ default: 'https://arweave.net' }),
Expand Down
38 changes: 16 additions & 22 deletions src/token-processor/images/image-cache.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ENV } from '../../env';
import { parseDataUrl, getFetchableDecentralizedStorageUrl } from '../util/metadata-helpers';
import { parseDataUrl, getFetchableMetadataUrl } from '../util/metadata-helpers';
import { logger } from '@hirosystems/api-toolkit';
import { PgStore } from '../../pg/pg-store';
import { Readable } from 'node:stream';
Expand All @@ -16,7 +16,6 @@ import {
} from '../util/errors';
import { pipeline } from 'node:stream/promises';
import { Storage } from '@google-cloud/storage';
import { RetryableJobError } from '../queue/errors';

/** Saves an image provided via a `data:` uri string to disk for processing. */
function convertDataImage(uri: string, tmpPath: string): string {
Expand All @@ -33,10 +32,15 @@ function convertDataImage(uri: string, tmpPath: string): string {
return filePath;
}

async function downloadImage(imgUrl: string, tmpPath: string): Promise<string> {
async function downloadImage(
imgUrl: string,
tmpPath: string,
headers?: Record<string, string>
): Promise<string> {
return new Promise((resolve, reject) => {
const filePath = `${tmpPath}/image`;
fetch(imgUrl, {
headers,
dispatcher: new Agent({
headersTimeout: ENV.METADATA_FETCH_TIMEOUT_MS,
bodyTimeout: ENV.METADATA_FETCH_TIMEOUT_MS,
Expand Down Expand Up @@ -109,22 +113,23 @@ async function transformImage(filePath: string, resize: boolean = false): Promis
* For a list of configuration options, see `env.ts`.
*/
export async function processImageCache(
imgUrl: string,
rawImgUrl: string,
contractPrincipal: string,
tokenNumber: bigint
): Promise<string[]> {
logger.info(`ImageCache processing token ${contractPrincipal} (${tokenNumber}) at ${imgUrl}`);
logger.info(`ImageCache processing token ${contractPrincipal} (${tokenNumber}) at ${rawImgUrl}`);
try {
const gcs = new Storage();
const gcsBucket = ENV.IMAGE_CACHE_GCS_BUCKET_NAME as string;

const tmpPath = `tmp/${contractPrincipal}_${tokenNumber}`;
fs.mkdirSync(tmpPath, { recursive: true });
let original: string;
if (imgUrl.startsWith('data:')) {
original = convertDataImage(imgUrl, tmpPath);
if (rawImgUrl.startsWith('data:')) {
original = convertDataImage(rawImgUrl, tmpPath);
} else {
original = await downloadImage(imgUrl, tmpPath);
const { url: httpUrl, fetchHeaders } = getFetchableMetadataUrl(rawImgUrl);
original = await downloadImage(httpUrl.toString(), tmpPath, fetchHeaders);
}

const image1 = await transformImage(original);
Expand Down Expand Up @@ -152,10 +157,10 @@ export async function processImageCache(
typeError.cause instanceof errors.BodyTimeoutError ||
typeError.cause instanceof errors.ConnectTimeoutError
) {
throw new ImageTimeoutError(new URL(imgUrl));
throw new ImageTimeoutError(new URL(rawImgUrl));
}
if (typeError.cause instanceof errors.ResponseExceededMaxSizeError) {
throw new ImageSizeExceededError(`ImageCache image too large: ${imgUrl}`);
throw new ImageSizeExceededError(`ImageCache image too large: ${rawImgUrl}`);
}
if ((typeError.cause as any).toString().includes('ECONNRESET')) {
throw new ImageHttpError(`ImageCache server connection interrupted`, typeError);
Expand All @@ -165,17 +170,6 @@ export async function processImageCache(
}
}

/**
* Converts a raw image URI from metadata into a fetchable URL.
* @param uri - Original image URI
* @returns Normalized URL string
*/
export function normalizeImageUri(uri: string): string {
if (uri.startsWith('data:')) return uri;
const fetchableUrl = getFetchableDecentralizedStorageUrl(uri);
return fetchableUrl.toString();
}

export async function reprocessTokenImageCache(
db: PgStore,
contractPrincipal: string,
Expand All @@ -186,7 +180,7 @@ export async function reprocessTokenImageCache(
for (const token of imageUris) {
try {
const [cached, thumbnail] = await processImageCache(
getFetchableDecentralizedStorageUrl(token.image).toString(),
getFetchableMetadataUrl(token.image).toString(),
contractPrincipal,
BigInt(token.token_number)
);
Expand Down
4 changes: 2 additions & 2 deletions src/token-processor/queue/job/process-token-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { StacksNodeRpcClient } from '../../stacks-node/stacks-node-rpc-client';
import { SmartContractClarityError, TooManyRequestsHttpError } from '../../util/errors';
import {
fetchAllMetadataLocalesFromBaseUri,
getFetchableDecentralizedStorageUrl,
getFetchableMetadataUrl,
getTokenSpecificUri,
} from '../../util/metadata-helpers';
import { RetryableJobError } from '../errors';
Expand Down Expand Up @@ -194,7 +194,7 @@ export class ProcessTokenJob extends Job {
return;
}
// Before we return the uri, check if its fetchable hostname is not already rate limited.
const fetchable = getFetchableDecentralizedStorageUrl(uri);
const fetchable = getFetchableMetadataUrl(uri).url;
const rateLimitedHost = await this.db.getRateLimitedHost({ hostname: fetchable.hostname });
if (rateLimitedHost) {
const retryAfter = Date.parse(rateLimitedHost.retry_after);
Expand Down
80 changes: 62 additions & 18 deletions src/token-processor/util/metadata-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
UndiciCauseTypeError,
} from './errors';
import { RetryableJobError } from '../queue/errors';
import { normalizeImageUri, processImageCache } from '../images/image-cache';
import { processImageCache } from '../images/image-cache';
import {
RawMetadataLocale,
RawMetadataLocalizationCType,
Expand All @@ -40,6 +40,22 @@ const METADATA_FETCH_HTTP_AGENT = new Agent({
},
});

/**
* A metadata URL that was analyzed and normalized into a fetchable URL. Specifies the URL, the
* gateway type, and any extra headers that may be required to fetch the metadata.
*/
export type FetchableMetadataUrl = {
url: URL;
gateway: 'ipfs' | 'arweave' | null;
fetchHeaders?: Record<string, string>;
};

/**
* List of public IPFS gateways that will be replaced with the value of `ENV.PUBLIC_GATEWAY_IPFS`
* whenever a metadata URL has these gateways hard coded in `http:` or `https:` URLs.
*/
const PUBLIC_GATEWAY_IPFS_REPLACED = ENV.PUBLIC_GATEWAY_IPFS_REPLACED.split(',');

/**
* Fetches all the localized metadata JSONs for a token. First, it downloads the default metadata
* JSON and parses it looking for other localizations. If those are found, each of them is then
Expand Down Expand Up @@ -172,9 +188,8 @@ async function parseMetadataForInsertion(
let cachedImage: string | undefined;
let cachedThumbnailImage: string | undefined;
if (image && typeof image === 'string' && ENV.IMAGE_CACHE_PROCESSOR_ENABLED) {
const normalizedUrl = normalizeImageUri(image);
[cachedImage, cachedThumbnailImage] = await processImageCache(
normalizedUrl,
image,
contract.principal,
token.token_number
);
Expand Down Expand Up @@ -243,14 +258,16 @@ async function parseMetadataForInsertion(
export async function fetchMetadata(
httpUrl: URL,
contract_principal: string,
token_number: bigint
token_number: bigint,
headers?: Record<string, string>
): Promise<string | undefined> {
const url = httpUrl.toString();
try {
logger.info(`MetadataFetch for ${contract_principal}#${token_number} from ${url}`);
const result = await request(url, {
method: 'GET',
throwOnError: true,
headers,
dispatcher:
// Disable during tests so we can inject a global mock agent.
process.env.NODE_ENV === 'test' ? undefined : METADATA_FETCH_HTTP_AGENT,
Expand Down Expand Up @@ -304,10 +321,13 @@ export async function getMetadataFromUri(
return parseJsonMetadata(token_uri, content);
}

// Support HTTP/S URLs otherwise
const httpUrl = getFetchableDecentralizedStorageUrl(token_uri);
// Support HTTP/S URLs otherwise.
// Transform the URL to use a public gateway if necessary.
const { url: httpUrl, fetchHeaders } = getFetchableMetadataUrl(token_uri);
const urlStr = httpUrl.toString();
const content = await fetchMetadata(httpUrl, contract_principal, token_number);

// Fetch the metadata.
const content = await fetchMetadata(httpUrl, contract_principal, token_number, fetchHeaders);
return parseJsonMetadata(urlStr, content);
}

Expand All @@ -332,31 +352,55 @@ function parseJsonMetadata(url: string, content?: string): RawMetadata {

/**
* Helper method for creating http/s url for supported protocols.
* * URLs with `http` or `https` protocols are returned as-is.
* * URLs with `http` or `https` protocols are returned as-is. But if they are public IPFS gateways,
* they are replaced with `ENV.PUBLIC_GATEWAY_IPFS`.
* * URLs with `ipfs` or `ipns` protocols are returned with as an `https` url using a public IPFS
* gateway.
* * URLs with `ar` protocols are returned as `https` using a public Arweave gateway.
* @param uri - URL to convert
* @returns Fetchable URL
*/
export function getFetchableDecentralizedStorageUrl(uri: string): URL {
export function getFetchableMetadataUrl(uri: string): FetchableMetadataUrl {
try {
const parsedUri = new URL(uri);
if (parsedUri.protocol === 'http:' || parsedUri.protocol === 'https:') return parsedUri;
if (parsedUri.protocol === 'ipfs:') {
const result: FetchableMetadataUrl = {
url: parsedUri,
gateway: null,
fetchHeaders: undefined,
};
if (parsedUri.protocol === 'http:' || parsedUri.protocol === 'https:') {
// If this is a known public IPFS gateway, replace it with `ENV.PUBLIC_GATEWAY_IPFS`.
if (PUBLIC_GATEWAY_IPFS_REPLACED.includes(parsedUri.hostname)) {
result.url = new URL(`${ENV.PUBLIC_GATEWAY_IPFS}${parsedUri.pathname}`);
result.gateway = 'ipfs';
} else {
result.url = parsedUri;
}
} else if (parsedUri.protocol === 'ipfs:') {
const host = parsedUri.host === 'ipfs' ? 'ipfs' : `ipfs/${parsedUri.host}`;
return new URL(`${ENV.PUBLIC_GATEWAY_IPFS}/${host}${parsedUri.pathname}`);
}
if (parsedUri.protocol === 'ipns:') {
return new URL(`${ENV.PUBLIC_GATEWAY_IPFS}/${parsedUri.host}${parsedUri.pathname}`);
result.url = new URL(`${ENV.PUBLIC_GATEWAY_IPFS}/${host}${parsedUri.pathname}`);
result.gateway = 'ipfs';
} else if (parsedUri.protocol === 'ipns:') {
result.url = new URL(`${ENV.PUBLIC_GATEWAY_IPFS}/${parsedUri.host}${parsedUri.pathname}`);
result.gateway = 'ipfs';
} else if (parsedUri.protocol === 'ar:') {
result.url = new URL(`${ENV.PUBLIC_GATEWAY_ARWEAVE}/${parsedUri.host}${parsedUri.pathname}`);
result.gateway = 'arweave';
} else {
throw new MetadataParseError(`Unsupported uri protocol: ${uri}`);
}
if (parsedUri.protocol === 'ar:') {
return new URL(`${ENV.PUBLIC_GATEWAY_ARWEAVE}/${parsedUri.host}${parsedUri.pathname}`);

if (result.gateway === 'ipfs' && ENV.PUBLIC_GATEWAY_IPFS_EXTRA_HEADER) {
const [key, value] = ENV.PUBLIC_GATEWAY_IPFS_EXTRA_HEADER.split(':');
result.fetchHeaders = {
[key.trim()]: value.trim(),
};
}

return result;
} catch (error) {
throw new MetadataParseError(`Invalid uri: ${uri}`);
}
throw new MetadataParseError(`Unsupported uri protocol: ${uri}`);
}

export function parseDataUrl(
Expand Down
35 changes: 31 additions & 4 deletions tests/token-queue/metadata-helpers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { MockAgent, setGlobalDispatcher } from 'undici';
import { ENV } from '../../src/env';
import { MetadataHttpError, MetadataParseError } from '../../src/token-processor/util/errors';
import {
getFetchableDecentralizedStorageUrl,
getFetchableMetadataUrl,
getMetadataFromUri,
getTokenSpecificUri,
fetchMetadata,
Expand Down Expand Up @@ -209,19 +209,46 @@ describe('Metadata Helpers', () => {
test('get fetchable URLs', () => {
ENV.PUBLIC_GATEWAY_IPFS = 'https://cloudflare-ipfs.com';
ENV.PUBLIC_GATEWAY_ARWEAVE = 'https://arweave.net';
ENV.PUBLIC_GATEWAY_IPFS_EXTRA_HEADER = 'Authorization: Bearer 1234567890';

const arweave = 'ar://II4z2ziYyqG7-kWDa98lWGfjxRdYOx9Zdld9P_I_kzE/9731.json';
expect(getFetchableDecentralizedStorageUrl(arweave).toString()).toBe(
const fetch1 = getFetchableMetadataUrl(arweave);
expect(fetch1.url.toString()).toBe(
'https://arweave.net/II4z2ziYyqG7-kWDa98lWGfjxRdYOx9Zdld9P_I_kzE/9731.json'
);
expect(fetch1.gateway).toBe('arweave');
expect(fetch1.fetchHeaders).toBeUndefined();

const ipfs =
'ipfs://ipfs/bafybeifwoqwdhs5djtx6vopvuwfcdrqeuecayp5wzpzjylxycejnhtrhgu/vague_art_paintings/vague_art_paintings_6_metadata.json';
expect(getFetchableDecentralizedStorageUrl(ipfs).toString()).toBe(
const fetch2 = getFetchableMetadataUrl(ipfs);
expect(fetch2.url.toString()).toBe(
'https://cloudflare-ipfs.com/ipfs/bafybeifwoqwdhs5djtx6vopvuwfcdrqeuecayp5wzpzjylxycejnhtrhgu/vague_art_paintings/vague_art_paintings_6_metadata.json'
);
expect(fetch2.gateway).toBe('ipfs');
expect(fetch2.fetchHeaders).toEqual({ Authorization: 'Bearer 1234567890' });

const ipfs2 = 'ipfs://QmYCnfeseno5cLpC75rmy6LQhsNYQCJabiuwqNUXMaA3Fo/1145.png';
expect(getFetchableDecentralizedStorageUrl(ipfs2).toString()).toBe(
const fetch3 = getFetchableMetadataUrl(ipfs2);
expect(fetch3.url.toString()).toBe(
'https://cloudflare-ipfs.com/ipfs/QmYCnfeseno5cLpC75rmy6LQhsNYQCJabiuwqNUXMaA3Fo/1145.png'
);
expect(fetch3.gateway).toBe('ipfs');
expect(fetch3.fetchHeaders).toEqual({ Authorization: 'Bearer 1234567890' });

const ipfs3 = 'https://ipfs.io/ipfs/QmYCnfeseno5cLpC75rmy6LQhsNYQCJabiuwqNUXMaA3Fo/1145.png';
const fetch4 = getFetchableMetadataUrl(ipfs3);
expect(fetch4.url.toString()).toBe(
'https://cloudflare-ipfs.com/ipfs/QmYCnfeseno5cLpC75rmy6LQhsNYQCJabiuwqNUXMaA3Fo/1145.png'
);
expect(fetch4.gateway).toBe('ipfs');
expect(fetch4.fetchHeaders).toEqual({ Authorization: 'Bearer 1234567890' });

const http = 'https://test.io/1.json';
const fetch5 = getFetchableMetadataUrl(http);
expect(fetch5.url.toString()).toBe(http);
expect(fetch5.gateway).toBeNull();
expect(fetch5.fetchHeaders).toBeUndefined();
});

test('replace URI string tokens', () => {
Expand Down