Skip to content

Commit

Permalink
Fix professors for sections not being scraped (#244)
Browse files Browse the repository at this point in the history
Updated the scraper to account for the API changes in Banner. Updated Yarn to the Berry version through Corepack. Updated the Dockerfile build chain to use the updated Yarn as well as decrease image size.

Co-authored-by: mehallhm <[email protected]>
  • Loading branch information
mehallhm and mehallhm authored Jan 21, 2025
1 parent e05fc90 commit addbf55
Show file tree
Hide file tree
Showing 15 changed files with 11,702 additions and 8,100 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
TWILIO_AUTH_TOKEN: 123
elasticURL: "http://localhost:9200"
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Build the Docker container
run: docker build --no-cache -t cca-test:latest .
Expand Down Expand Up @@ -57,7 +57,7 @@ jobs:
run: curl localhost:4000/.well-known/apollo/server-health

- name: Run a custom scrape (shouldn't take more than ~5 minutes)
run: docker exec test sh -c "LOG_LEVEL=VERBOSE CUSTOM_SCRAPE=true TERMS_TO_SCRAPE=202140 yarn prod:scrape"
run: docker exec test sh -c "LOG_LEVEL=VERBOSE CUSTOM_SCRAPE=true TERMS_TO_SCRAPE=202140 node scrapers/main.js"

#### If these start failing, it might not be our fault
## Since this test uses live data, there is a chance it could change
Expand Down Expand Up @@ -100,7 +100,7 @@ jobs:
"SELECT name FROM courses WHERE subject = 'CS' and class_id = '3500';" | grep "Object"
- name: Run the updater ONLY ONCE, so that it scrapes missing classes
run: docker exec test sh -c "LOG_LEVEL=VERBOSE CUSTOM_SCRAPE=true UPDATE_ONLY_ONCE=true TERMS_TO_SCRAPE=202140 yarn prod:updater"
run: docker exec test sh -c "LOG_LEVEL=VERBOSE CUSTOM_SCRAPE=true UPDATE_ONLY_ONCE=true TERMS_TO_SCRAPE=202140 node services/updater.js"
# This should take 5 minutes MAX.
# In the scrape step, we used `CUSTOM_SCRAPE=true`. That limits what courses are being scraped - see `scrapers/filter` for more details.
# We need to use that here as well.
Expand Down
44 changes: 31 additions & 13 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,56 @@ jobs:
name: prettier
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
ref: ${{ github.head_ref }}

- name: Prettify code
uses: creyD/prettier_action@v3.0
uses: creyD/prettier_action@v4.3
with:
prettier_options: --write **/*.{js,ts}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

lint:
name: Lint & Type checks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: install node
uses: actions/setup-node@v2
- uses: actions/checkout@v4

- name: Install Node
uses: actions/setup-node@v4
with:
node-version: "22"
- uses: bahmutov/npm-install@v1
- name: Linting

- name: Enable Corepack
run: corepack enable

- name: Install deps
run: yarn install

- name: Check linter
run: yarn lint
- name: Checks Typescript types

- name: Check types
run: yarn tsc

dependency_checks:
name: Dependency Checks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: install node
uses: actions/setup-node@v2
- uses: actions/checkout@v4

- name: Install Node
uses: actions/setup-node@v4
with:
node-version: "22"
- uses: bahmutov/npm-install@v1

- name: Enable Corepack
run: corepack enable

- name: Install deps
run: yarn install

- name: Checks for duplicate definitions in the yarn lockfile
run: yarn yarn-deduplicate --fail
run: yarn dedupe --check
55 changes: 38 additions & 17 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,22 @@ jobs:
elasticURL: "http://localhost:9200"
NODE_COVERALLS_DEBUG: 1
steps:
- uses: actions/checkout@v2
- name: install node
uses: actions/setup-node@v2
- uses: actions/checkout@v4

- name: Install Node
uses: actions/setup-node@v4
with:
node-version: "22"
- uses: bahmutov/npm-install@v1

- run: yarn unittest --coverage
- name: Enable Corepack
run: corepack enable

- name: Install deps
run: yarn install

- name: Run unit tests
run: yarn unittest --coverage

- name: Coveralls
uses: coverallsapp/github-action@master
with:
Expand Down Expand Up @@ -55,26 +63,35 @@ jobs:
elasticURL: "http://localhost:9200"
NODE_COVERALLS_DEBUG: 1
steps:
- uses: actions/checkout@v2
- name: install node
uses: actions/setup-node@v2
- uses: actions/checkout@v4

- name: Install Node
uses: actions/setup-node@v4
with:
node-version: "22"
- uses: bahmutov/npm-install@v1

- run: yarn db:migrate
- name: Enable Corepack
run: corepack enable

- run: yarn db:refresh
- name: Install deps
run: yarn install

- run: yarn test --coverage --detectOpenHandles
- name: Setup database
run: |
yarn db:migrate
yarn db:refresh
- name: Run the mixed tests
run: yarn test --coverage --detectOpenHandles
- name: Coveralls
uses: coverallsapp/github-action@master
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
flag-name: General tests
parallel: true

- run: yarn dbtest --coverage
- name: Run the integration tests
run: yarn dbtest --coverage
- name: Coveralls
uses: coverallsapp/github-action@master
with:
Expand Down Expand Up @@ -107,14 +124,18 @@ jobs:
NODE_ENV: dev
DEV: true
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: install node
uses: actions/setup-node@v2
- name: Install Node
uses: actions/setup-node@v4
with:
node-version: "22"

- uses: bahmutov/npm-install@v1
- name: Enable Corepack
run: corepack enable

- name: Install deps
run: yarn install

- run: mv ./data/cache_2022_summer ./cache

Expand Down
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,12 @@ cache.zip
logs/
coverage/
.idea/

# Yarn
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/sdks
!.yarn/versions
1 change: 1 addition & 0 deletions .yarnrc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nodeLinker: node-modules
31 changes: 26 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# build environment
FROM node:22-alpine as build
FROM node:22-alpine AS build
WORKDIR /app

# Install deps
COPY package.json /app/package.json
COPY yarn.lock /app/yarn.lock
COPY .yarnrc.yml /app/.yarnrc.yml

RUN corepack enable
RUN yarn install --frozen-lockfile

# Copy source
COPY graphql /app/graphql
COPY prisma /app/prisma
Expand All @@ -19,16 +24,32 @@ COPY infrastructure/prod /app
COPY babel.config.json /app

RUN yarn build
RUN rm -rf node_modules

FROM node:22-alpine AS dist
WORKDIR /dist
RUN corepack enable

COPY --from=build /app/dist .

# TODO: This should be a `yarn workspaces focus --production` but
# the dev and non-dev deps are a tangled mess rn
RUN yarn workspaces focus

# Manually install openssl for Prisma. The Alpine image should already
# have it, but Prisma fails to detect it for some reason
RUN set -ex; \
apk update; \
apk add --no-cache \
openssl

# Get RDS Certificate
RUN apk update && apk add wget && rm -rf /var/cache/apk/* \
&& wget "https://s3.amazonaws.com/rds-downloads/rds-ca-2019-root.pem"
ENV dbCertPath /app/rds-ca-2019-root.pem
ENV dbCertPath=/app/rds-ca-2019-root.pem

ENV NODE_ENV=prod

ENTRYPOINT ["/app/entrypoint.sh"]
ENTRYPOINT ["/dist/entrypoint.sh"]

EXPOSE 4000 8080
CMD ["yarn", "prod"]
CMD ["node", "graphql/index.js"]
2 changes: 0 additions & 2 deletions infrastructure/dev/compose.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
version: "2"

services:
postgresql:
image: postgres:11.19-bullseye
Expand Down
12 changes: 6 additions & 6 deletions infrastructure/dev/docker-compose-server.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
version: "2"

services:
postgresql:
image: postgres:11.19-bullseye
Expand All @@ -9,8 +7,9 @@ services:
- ./docker-postgresql-multiple-databases:/docker-entrypoint-initdb.d
- pg:/var/lib/postgresql/data
environment:
- POSTGRES_MULTIPLE_DATABASES=searchneu_dev,searchneu_test
- POSTGRES_USER=postgres
POSTGRES_MULTIPLE_DATABASES: searchneu_dev,searchneu_test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: default_password
es:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2
ports:
Expand All @@ -22,12 +21,12 @@ services:
depends_on:
- es
- postgresql
command: yarn prod
ports:
- 4000:4000
- 8080:8080
environment:
DATABASE_URL: postgresql://postgres@postgresql:5432/searchneu_dev
DATABASE_URL: postgresql://postgres:default_password@postgresql:5432/searchneu_dev
POSTGRES_PASSWORD: default_password
elasticURL: http://es:9200
TWILIO_PHONE_NUMBER:
TWILIO_ACCOUNT_SID:
Expand All @@ -36,5 +35,6 @@ services:
CLIENT_ORIGIN: http://localhost:5000
JWT_SECRET:
SLACK_WEBHOOK_URL:

volumes:
pg:
6 changes: 2 additions & 4 deletions infrastructure/prod/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
#!/bin/sh

cd dist
yarn install --production
yarn prod:db:migrate
# Run a production prisma migration
yarn prisma migrate deploy --preview-feature
yarn db:refresh
cd ..

exec "$@"
14 changes: 6 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,8 @@
"dbtest": "jest -i --projects tests/database --verbose",
"about:unittest": "//// Runs unit tests. Does not need db, elasticsearch, spun up. Does not need the Docker containers to be running.",
"unittest": "jest -i --projects tests/unit --verbose",
"about:build_backend": "//// Compiles this project",
"build_backend": "rm -rf dist && mkdir -p dist && babel --extensions '.js,.ts' . -d dist/ --copy-files --ignore node_modules --ignore .git --include-dotfiles && rm -rf dist/.git",
"about:build": "//// Compiles this project, surpressing output",
"build": "yarn -s build_backend",
"about:build": "//// Compiles this project",
"build": "rm -rf dist && mkdir -p dist && babel --extensions '.js,.ts' . -d dist/ --copy-files --ignore node_modules --ignore .git --include-dotfiles && rm -rf dist/.git",
"about:prod:scrape": "//// Runs the scrapers (same as `yarn scrape`, but on the compiled project)",
"prod:scrape": "node dist/scrapers/main.js",
"about:prod": "//// Runs the API (same as `yarn dev`, but on the compiled project)",
Expand All @@ -67,7 +65,7 @@
"@babel/node": "^7.0.0",
"@babel/register": "^7.0.0",
"@elastic/elasticsearch": "7.17.0",
"@prisma/client": "^5.0.0",
"@prisma/client": "5.22.0",
"@typescript-eslint/typescript-estree": "^8.10.0",
"amplitude": "^6.0.0",
"apollo-server": "^3.13.0",
Expand Down Expand Up @@ -140,14 +138,14 @@
"prettier": "^3.0.0",
"pretty-quick": "4",
"prisma": "^5.0.0",
"typescript": "^5.6.0",
"yarn-deduplicate": "^6.0.2"
"typescript": "^5.6.0"
},
"about:engines": [
"This helps us ensure that only specific versions of Node are used to run this project",
"Older versions may lead to unexpected errors due to dependency issues, etc."
],
"engines": {
"node": ">=20"
}
},
"packageManager": "[email protected]"
}
27 changes: 24 additions & 3 deletions scrapers/classes/parsersxe/termParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,31 @@ class TermParser {
);

const bodyObj = JSON.parse(req.body);
if (bodyObj.success) {
return { items: bodyObj.data, totalCount: bodyObj.totalCount };

if (!bodyObj.success) {
return false;
}
return false;

bodyObj.data = await Promise.all(
bodyObj.data.map(async (sr: SectionSR) => {
const resp = await request.get(
"https://nubanner.neu.edu/StudentRegistrationSsb/ssb/searchResults/getFacultyMeetingTimes",
{
searchParams: {
term: termId,
courseReferenceNumber: sr.courseReferenceNumber,
},
},
);

const body = await JSON.parse(resp.body);
sr.faculty = body.fmt[0]?.faculty ?? [];

return sr;
}),
);

return { items: bodyObj.data, totalCount: bodyObj.totalCount };
})) as SectionSR[];
} catch (error) {
macros.error(`Could not get section data for ${termId}`);
Expand Down
Loading

0 comments on commit addbf55

Please sign in to comment.