File tree Expand file tree Collapse file tree 3 files changed +13
-6
lines changed Expand file tree Collapse file tree 3 files changed +13
-6
lines changed Original file line number Diff line number Diff line change @@ -19,16 +19,17 @@ jobs:
1919 os : [ubuntu-latest]
2020 EXTRA : [false] # used to force includes to get included
2121 include :
22- - python-version : ' 3.7'
23- os : ubuntu-20.04 # oldest version on github actions
22+ - python-version : ' 3.8'
23+ # see https://github.com/duckdb/duckdb/blob/main/.github/workflows/Python.yml for duckdb python versions
24+ os : ubuntu-22.04 # oldest version on github actions
2425 EXTRA : true
2526 - python-version : ' 3.11'
2627 os : macos-latest
2728 EXTRA : true
2829 - python-version : ' 3.13'
2930 os : macos-latest
3031 EXTRA : true
31- - python-version : ' 3.7 '
32+ - python-version : ' 3.8 '
3233 os : windows-latest
3334 EXTRA : true
3435 - python-version : ' 3.13'
7273 run : make duck_cloudfront
7374
7475 - name : make wreck_the_warc
76+ shell : bash # or windows will throw an error
7577 run : make wreck_the_warc
Original file line number Diff line number Diff line change @@ -78,7 +78,7 @@ wreck_the_warc:
7878 @echo we will break and then fix this warc
7979 cp whirlwind.warc.gz testing.warc.gz
8080 rm -f testing.warc
81- gunzip testing.warc.gz
81+ gzip -d testing.warc.gz # windows gunzip no work-a
8282 @echo
8383 @echo iterate over this uncompressed warc: works
8484 python ./warcio-iterator.py testing.warc
@@ -90,7 +90,7 @@ wreck_the_warc:
9090 python ./warcio-iterator.py testing.warc.gz || /usr/bin/true
9191 @echo
9292 @echo " now let's do it the right way"
93- gunzip testing.warc.gz
93+ gzip -d testing.warc.gz
9494 warcio recompress testing.warc testing.warc.gz
9595 @echo
9696 @echo and now iterating works
Original file line number Diff line number Diff line change 22import glob
33import json
44import os .path
5- import subprocess
65import sys
76import gzip
7+ import platform
8+ import io
89
910import duckdb
1011
@@ -82,6 +83,10 @@ def get_files(algo, crawl):
8283
8384
8485def main (algo , crawl ):
86+ windows = True if platform .system () == 'Windows' else False
87+ if windows :
88+ # windows stdout is often cp1252
89+ sys .stdout = io .TextIOWrapper (sys .stdout .buffer , encoding = 'utf-8' )
8590 files = get_files (algo , crawl )
8691 retries_left = 100
8792
You can’t perform that action at this time.
0 commit comments