diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml new file mode 100644 index 0000000..ff5e77b --- /dev/null +++ b/.github/workflows/push.yaml @@ -0,0 +1,16 @@ +name: CI + +on: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Run a one-line script + env: + BOT_TOKEN: ${{ secrets.BOT_TOKEN }} + run: bash crawl_news.sh diff --git a/crawl_news.sh b/crawl_news.sh new file mode 100644 index 0000000..b8940df --- /dev/null +++ b/crawl_news.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +OLD_FEED=/tmp/test.txt +CURRENT_FEED=$(mktemp -p /tmp crawl_feed.XXX) +BOT_TOKEN=${BOT_TOKEN:=""} +CHANNEL_ID=${CHANNEL_ID:="@dcss_news"} +URL="https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" + +SHORT_COMMITS=$(mktemp -p /tmp crawl.XXXX) + +wget https://github.com/crawl/crawl/commits/master.atom -O /tmp/master.atom + +xml sel -N x=http://www.w3.org/2005/Atom -t -m "//x:entry" -v "x:content" -v "x:link/@href" -n -o "----" -n /tmp/master.atom | sed -e 's/&\|&/\&/g' -e 's/<\|<//g' -e 's/"\|"/"/g' -e "s/'\|'/'/g" -e 's/ | / /g' | grep -v '^$' | sed 's/^[ \t]*//' > $CURRENT_FEED + +push_news() { + DATA=$(echo "$1" | grep -v "^\s*$") + IFS=$'\n' read -rd '' HEADER LINK <<<"$DATA" + + if [[ $(echo "$DATA" | wc -l ) == 2 ]] + then + # TODO: normalize the string(converting html entities) + echo "${HEADER}" >> $SHORT_COMMITS + else + curl -s -X POST "$URL" -d chat_id=$ID -d text="$1" + fi +} + +export -f push_news +export SHORT_COMMITS + +# 's/^[[:space:]]*$/\x0/' +if [ -f "$OLD_FEED" ] +then + diff $OLD_FEED $CURRENT_FEED --changed-group-format="%>" --unchanged-group-format="" | tac -s $'\n----' | sed -e 's/----/\x0/' -e 's/<[^>]\+>//g' | xargs -0 -I{} -- bash -c 'push_news "$@"' _ {} +else + sed -e 's/----/\x0/' -e 's/<[^>]\+>//g' $CURRENT_FEED | xargs -0 -I{} -- bash -c 'push_news "$@"' _ {} +fi + + +while read -r COMMIT; do + curl -s -X POST $URL -d chat_id=$ID -d parse_mode="HTML" -d text="$COMMIT" +done < $SHORT_COMMITS