Skip to content

Commit

Permalink
added hash caching strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
sairaj-mote committed Dec 13, 2023
1 parent 23a6ec8 commit f051c61
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 14 deletions.
38 changes: 25 additions & 13 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,13 @@ async function fetchAndHashContent(url, visitedUrls = new Set()) {
}

visitedUrls.add(url);

const response = await axios.get(url, { responseType: 'arraybuffer', timeout: 10000 });
const content = response.data.toString('utf-8');

// Parse HTML content to identify linked resources
const root = parseHtml(content);
const linkedResources = root.querySelectorAll('link[rel="stylesheet"], script[src]');
// Fetch and hash linked resources
const linkedResourceHashes = await Promise.all(linkedResources.map(async (resource) => {
const linkedResource = await Promise.all(linkedResources.map(async (resource) => {
const resourceUrl = parseUrl(resource.getAttribute('href') || resource.getAttribute('src'), true);
let absoluteResourceUrl = resourceUrl.href;
if (!resourceUrl.hostname) {
Expand All @@ -91,10 +89,10 @@ async function fetchAndHashContent(url, visitedUrls = new Set()) {
}));

// Combine the content and hashes of linked resources
return `${content}_${linkedResourceHashes.join('_')}`;
return `${content}_${linkedResource.join('_')}`;
}


const hashCache = new Map();
// API endpoint to start the recursive download and hashing
app.post('/hash', async (req, res) => {
try {
Expand All @@ -107,17 +105,31 @@ app.post('/hash', async (req, res) => {

const promises = urls.map(async (url) => {
const urlWithoutHashAndQuery = parseUrlWithoutHashAndQuery(url);
const hashedContent = await fetchAndHashContent(urlWithoutHashAndQuery);
const fileHash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
return { url, fileHash };
let hash;
// regex to identify owner and repo name from https://owner.github.io/repo-name
const githubRepoRegex = /https?:\/\/([\w-]+)\.github\.io\/([\w-]+)/;
if (githubRepoRegex.test(urlWithoutHashAndQuery)) {
const [, owner, repo] = githubRepoRegex.exec(urlWithoutHashAndQuery) || [null, null, null,];
const { data } = await axios.get(`https://api.github.com/repos/${owner}/${repo}`);
const lastUpdated = new Date(data.pushed_at);

const cached = hashCache.get(urlWithoutHashAndQuery);
if (cached && cached.lastUpdated >= lastUpdated) {
hash = cached.hash;
} else {
const hashedContent = await fetchAndHashContent(urlWithoutHashAndQuery);
hash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
hashCache.set(urlWithoutHashAndQuery, { hash, lastUpdated });
}
} else {
const hashedContent = await fetchAndHashContent(urlWithoutHashAndQuery);
hash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
}

return { url, hash };
});

let results = await Promise.all(promises);
results = results.reduce((acc, { url, fileHash }) => {
acc[url] = fileHash;
return acc;
}, {});

res.json(results);
} catch (error) {
res.status(500).json({ error: error.message });
Expand Down
2 changes: 1 addition & 1 deletion index.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f051c61

Please sign in to comment.