Hello World
" - async for result in parser.ingest(data): - assert result.strip() == "Hello World" - - -@pytest.mark.asyncio -@patch("pypdf.PdfReader") -async def test_pdf_parser(mock_pdf_reader): - with contextlib.suppress(asyncio.CancelledError): - parser = PDFParser() - mock_pdf_reader.return_value.pages = [ - MagicMock(extract_text=lambda: "Page text") - ] - data = b"fake PDF data" - async for result in parser.ingest(data): - assert result == "Page text" - - -@pytest.mark.asyncio -@patch("pptx.Presentation") -async def test_ppt_parser(mock_presentation): - with contextlib.suppress(asyncio.CancelledError): - mock_slide = MagicMock() - mock_shape = MagicMock(text="Slide text") - mock_slide.shapes = [mock_shape] - mock_presentation.return_value.slides = [mock_slide] - parser = PPTParser() - data = b"fake PPT data" - async for result in parser.ingest(data): - assert result == "Slide text" - - -@pytest.mark.asyncio -@patch("docx.Document") -async def test_docx_parser(mock_document): - with contextlib.suppress(asyncio.CancelledError): - mock_paragraph = MagicMock(text="Paragraph text") - mock_document.return_value.paragraphs = [mock_paragraph] - parser = DOCXParser() - data = b"fake DOCX data" - async for result in parser.ingest(data): - assert result == "Paragraph text" - - -@pytest.mark.asyncio -async def test_csv_parser(): - with contextlib.suppress(asyncio.CancelledError): - parser = CSVParser() - data = "col1,col2\nvalue1,value2" - async for result in parser.ingest(data): - assert result == "col1, col2" - break - - -@pytest.mark.asyncio -@patch("openpyxl.load_workbook") -async def test_xlsx_parser(mock_load_workbook): - with contextlib.suppress(asyncio.CancelledError): - mock_sheet = MagicMock() - mock_sheet.iter_rows.return_value = [(1, 2), (3, 4)] - mock_workbook = MagicMock(worksheets=[mock_sheet]) - mock_load_workbook.return_value = mock_workbook - parser = XLSXParser() - data = b"fake XLSX data" - async for result in parser.ingest(data): - assert result == "1, 2" - break - - -@pytest.mark.asyncio -async def test_markdown_parser(): - with contextlib.suppress(asyncio.CancelledError): - parser = MDParser() - data = "# Header\nContent" - async for result in parser.ingest(data): - assert result.strip() == "Header\nContent" diff --git a/py/tests/core/providers/database/relational/test_document_db.py b/py/tests/core/providers/database/relational/test_document_db.py index 940cd6370..add3aa7f4 100644 --- a/py/tests/core/providers/database/relational/test_document_db.py +++ b/py/tests/core/providers/database/relational/test_document_db.py @@ -7,7 +7,8 @@ DocumentInfo, DocumentType, IngestionStatus, - RestructureStatus, + KGCreationStatus, + KGEnrichmentStatus, ) @@ -30,7 +31,7 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) await temporary_postgres_db_provider.relational.upsert_documents_overview( document_info @@ -54,8 +55,8 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): assert inserted_document.size_in_bytes == document_info.size_in_bytes assert inserted_document.ingestion_status == document_info.ingestion_status assert ( - inserted_document.restructuring_status - == document_info.restructuring_status + inserted_document.kg_extraction_status + == document_info.kg_extraction_status ) # Update the document and verify the changes @@ -88,7 +89,7 @@ async def test_delete_from_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) await temporary_postgres_db_provider.relational.upsert_documents_overview( document_info @@ -119,7 +120,7 @@ async def test_get_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) document_info2 = DocumentInfo( id=UUID("00000000-0000-0000-0000-000000000004"), @@ -131,7 +132,7 @@ async def test_get_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=2048, ingestion_status=IngestionStatus.SUCCESS, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) await temporary_postgres_db_provider.relational.upsert_documents_overview( [document_info1, document_info2] diff --git a/py/tests/core/providers/kg/test_kg.py b/py/tests/core/providers/kg/test_kg.py deleted file mode 100644 index d82fad50f..000000000 --- a/py/tests/core/providers/kg/test_kg.py +++ /dev/null @@ -1,240 +0,0 @@ -from unittest.mock import AsyncMock, MagicMock, Mock -from uuid import uuid4 - -import pytest - -from core.base import ( - Community, - DocumentFragment, - Entity, - GenerationConfig, - KGExtraction, - Triple, -) -from core.pipes.kg.clustering import KGClusteringPipe -from core.pipes.kg.extraction import AsyncPipe, KGTriplesExtractionPipe - - -@pytest.fixture -def kg_extraction_pipe(): - return KGTriplesExtractionPipe( - config=MagicMock(), - kg_provider=MagicMock(), - database_provider=MagicMock(), - llm_provider=MagicMock(), - prompt_provider=MagicMock(), - chunking_provider=MagicMock(), - ) - - -@pytest.fixture -def document_fragment(): - return DocumentFragment( - id=uuid4(), - collection_ids=[uuid4()], - extraction_id=uuid4(), - document_id=uuid4(), - user_id=uuid4(), - data="Test data", - metadata={}, - ) - - -@pytest.fixture -def kg_extraction_input(): - return DocumentFragment( - document_id=uuid4(), - generation_config=GenerationConfig(), - ) - - -@pytest.mark.asyncio -async def test_extract_kg_success(kg_extraction_pipe, document_fragment): - kg_extraction_pipe.llm_provider.aget_completion = AsyncMock( - return_value=MagicMock( - choices=[ - MagicMock( - message=MagicMock( - content=( - '("entity"$$$$Entity1$$$$Category1$$$$Description1)' - '("relationship"$$$$Entity1$$$$Entity2$$$$Predicate$$$$Description$$$$0.8)' - ) - ) - ) - ] - ) - ) - result = await kg_extraction_pipe.extract_kg( - [document_fragment], GenerationConfig(), 10, [], [] - ) - - assert isinstance(result, KGExtraction) - assert len(result.entities) == 1 - assert len(result.triples) == 1 - assert result.entities["Entity1"].name == "Entity1" - assert result.triples[0].subject == "Entity1" - assert result.triples[0].object == "Entity2" - - -# TODO - Revive extraction testing after recent refactor -# @pytest.mark.asyncio -# async def test_run_logic(kg_extraction_pipe, kg_extraction_input): -# def mock_input_generator(): -# return kg_extraction_input.dict() # AsyncPipe.Input(message=document_fragment.dict()) - -# input_mock = MagicMock() -# input_mock.message = mock_input_generator() - -# kg_extraction_pipe.extract_kg = Mock( -# return_value=KGExtraction( -# fragment_id=document_fragment.id, -# document_id=document_fragment.document_id, -# entities={ -# "TestEntity": Entity( -# name="TestEntity", -# category="TestCategory", -# description="TestDescription", -# ) -# }, -# triples=[ -# Triple( -# subject="TestSubject", -# predicate="TestPredicate", -# object="TestObject", -# ) -# ], -# ) -# ) - -# results = [ -# result -# async for result in kg_extraction_pipe._run_logic( -# input_mock, MagicMock(), "run_id" -# ) -# ] - -# # test failing due to issues with mock -# # assert len(results) == 2 -# # for result in results: -# # assert isinstance(result, KGExtraction) -# # assert len(result.entities) == 1 -# # assert len(result.triples) == 1 - - -@pytest.fixture -def mock_kg_provider(mocker): - provider = mocker.Mock() - provider.get_all_entities.return_value = [ - Entity( - name=f"Entity{i}", - category=f"Category{i%2+1}", - description=f"Description{i}", - ) - for i in range(1, 4) - ] - - provider.get_entities.return_value = [ - Entity( - name=f"Entity{i}", - category=f"Category{i%2+1}", - description=f"Description{i}", - ) - for i in range(1, 4) - ] - - provider.get_triples.return_value = [ - Triple( - subject=f"Entity{i}", - predicate=f"Predicate{i%2+1}", - object=f"Entity{i+1}", - ) - for i in range(1, 4) - ] - - provider.get_communities.return_value = [ - Community( - id=f"Community{i}", - level=f"Level{i%2+1}", - short_id=f"Short{i}", - title=f"Title{i}", - entity_ids=[f"Entity{i}"], - relationship_ids=[f"Relationship{i}"], - ) - for i in range(1, 4) - ] - - return provider - - -@pytest.fixture -def mock_embedding_provider(mocker): - provider = mocker.Mock() - provider.get_embeddings.return_value = [ - [0.1 * i, 0.2 * i, 0.3 * i] for i in range(1, 4) - ] - provider.async_get_embedding = AsyncMock(return_value=[0.1, 0.2, 0.3]) - return provider - - -@pytest.fixture -def mock_completion_provider(mocker): - provider = mocker.Mock() - - async def mock_aget_completion(*args, **kwargs): - return mocker.Mock( - choices=[ - mocker.Mock(message=mocker.Mock(content="Cluster summary")) - ] - ) - - provider.aget_completion = mock_aget_completion - return provider - - -@pytest.fixture -def mock_prompt_provider(mocker): - provider = mocker.Mock() - provider.get_message_payload.return_value = mocker.Mock( - task_prompt_name="graphrag_community_reports", - task_inputs={"input_text": "Test input text"}, - ) - provider._get_message_payload.return_value = { - "task_prompt_name": "graphrag_community_reports", - "task_inputs": {"input_text": "Test input text"}, - } - return provider - - -@pytest.fixture -def kg_clustering_pipe( - mocker, - mock_kg_provider, - mock_embedding_provider, - mock_completion_provider, - mock_prompt_provider, -): - return KGClusteringPipe( - kg_provider=mock_kg_provider, - embedding_provider=mock_embedding_provider, - llm_provider=mock_completion_provider, - prompt_provider=mock_prompt_provider, - n_clusters=2, - ) - - -# Test is failing due to a dependency of graspologic failing to install: /hyppo/kgof/fssd.py:4: ModuleNotFoundError -# @pytest.mark.asyncio -# async def test_cluster_kg(kg_clustering_pipe): -# triples = [ -# Triple(subject="Entity1", predicate="relatedTo", object="Entity2"), -# Triple(subject="Entity2", predicate="relatedTo", object="Entity3"), -# Triple(subject="Entity3", predicate="relatedTo", object="Entity1"), -# ] - -# result = [] -# async for community in kg_clustering_pipe.cluster_kg(triples): -# result.append(community) - -# assert len(result) == 1 -# assert result[0]["id"] == "0_0" -# assert result[0]["title"] == "_" diff --git a/py/tests/core/providers/parsing/conftest.py b/py/tests/core/providers/parsing/conftest.py deleted file mode 100644 index 0a654bba1..000000000 --- a/py/tests/core/providers/parsing/conftest.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from core.base import ParsingConfig -from core.providers.parsing.unstructured_parsing import ( - UnstructuredParsingProvider, -) - - -@pytest.fixture -def parsing_config(): - return ParsingConfig() - - -@pytest.fixture -def unstructured_parsing_provider(parsing_config): - return UnstructuredParsingProvider(use_api=False, config=parsing_config) diff --git a/py/tests/core/providers/parsing/test_unstructured_parsing.py b/py/tests/core/providers/parsing/test_unstructured_parsing.py deleted file mode 100644 index c6dd36d14..000000000 --- a/py/tests/core/providers/parsing/test_unstructured_parsing.py +++ /dev/null @@ -1,73 +0,0 @@ -from io import BytesIO -from unittest.mock import AsyncMock, MagicMock, patch - -import httpx -import pytest - -from core import generate_id_from_label -from core.base import Document, DocumentExtraction, DocumentType -from core.providers.parsing.unstructured_parsing import FallbackElement - - -@pytest.mark.asyncio -async def test_parse_fallback(unstructured_parsing_provider): - document = Document( - id=generate_id_from_label("test_parse_fallback"), - collection_ids=[], - user_id=generate_id_from_label("test_user"), - type=DocumentType.PNG, - metadata={}, - ) - chunk_size = 128 - - async def mock_ingest(file_content, chunk_size): - for text in ["text1", "text2"]: - yield text - - mock_parser = AsyncMock() - mock_parser.ingest = mock_ingest - unstructured_parsing_provider.parsers[DocumentType.PNG] = mock_parser - - elements = [ - element - async for element in unstructured_parsing_provider.parse_fallback( - b"test_data", document, chunk_size - ) - ] - - assert len(elements) == 2 - assert isinstance(elements[0], FallbackElement) - assert elements[0].text == "text1" - assert elements[0].metadata == {"chunk_id": 0} - assert isinstance(elements[1], FallbackElement) - assert elements[1].text == "text2" - assert elements[1].metadata == {"chunk_id": 1} - - -@pytest.mark.asyncio -async def test_parse_with_local_unstructured(unstructured_parsing_provider): - document = Document( - id=generate_id_from_label("test_parse_with_local_unstructured"), - collection_ids=[], - user_id=generate_id_from_label("test_user"), - type=DocumentType.DOCX, - metadata={"title": "test_title"}, - ) - - async def mock_post(url, json, **kwargs): - return MagicMock( - json=MagicMock(return_value={"elements": [{"text": "test_text"}]}) - ) - - with patch.object(httpx.AsyncClient, "post", side_effect=mock_post): - extractions = [ - extraction - async for extraction in unstructured_parsing_provider.parse( - BytesIO(b"test_data"), document - ) - ] - - assert len(extractions) == 1 - assert isinstance(extractions[0], DocumentExtraction) - assert extractions[0].data == "test_text" - assert extractions[0].metadata["partitioned_by_unstructured"] is True diff --git a/py/tests/regression/expected_outputs/test_document_management.json b/py/tests/regression/expected_outputs/test_document_management.json index 7114b58d2..eee3aec01 100644 --- a/py/tests/regression/expected_outputs/test_document_management.json +++ b/py/tests/regression/expected_outputs/test_document_management.json @@ -64,7 +64,7 @@ { "message": "Ingestion task queued successfully.", "task_id": "2fe08ebb-7157-452c-811c-7ede2b05e750", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" }, { "message": "Ingestion task queued successfully.", @@ -148,7 +148,7 @@ { "message": "Ingestion task queued successfully.", "task_id": "07acc8f2-48d6-4e1c-8932-3f0c594105bf", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" }, { "message": "Ingestion task queued successfully.", @@ -238,7 +238,7 @@ "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", @@ -474,7 +474,7 @@ "message": "Update task queued successfully.", "task_id": "1828ac12-0804-47cf-9623-8110324a52b1", "document_ids": [ - "db02076e-989a-59cd-98d5-e24e15a0bd27" + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" ] } }, @@ -549,7 +549,7 @@ "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", @@ -830,7 +830,7 @@ "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", diff --git a/py/tests/regression/expected_outputs/test_observability.json b/py/tests/regression/expected_outputs/test_observability.json index 9caa47047..cef9c1e32 100644 --- a/py/tests/regression/expected_outputs/test_observability.json +++ b/py/tests/regression/expected_outputs/test_observability.json @@ -17,7 +17,7 @@ "2f576170-c4f9-5141-a910-a0924f341de4", "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", "57eec3df-cf68-5559-a80d-ae3fb55b9af1", - "db02076e-989a-59cd-98d5-e24e15a0bd27", + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "d421207a-d799-5806-8d67-46b2005b15d4", "451adbbd-e24e-5c9b-80a8-f424b6c7accb", "5b1bd54f-4d70-56b9-a017-a618bc75f94c" diff --git a/py/tests/regression/expected_outputs/test_user_management.json b/py/tests/regression/expected_outputs/test_user_management.json index 8ce54e668..174311e69 100644 --- a/py/tests/regression/expected_outputs/test_user_management.json +++ b/py/tests/regression/expected_outputs/test_user_management.json @@ -104,7 +104,7 @@ "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", "d421207a-d799-5806-8d67-46b2005b15d4", - "db02076e-989a-59cd-98d5-e24e15a0bd27", + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "716fea3a-826b-5b27-8e59-ffbd1a35455a", "2f576170-c4f9-5141-a910-a0924f341de4", "3e157b3a-8469-51db-90d9-52e7d896b49b" diff --git a/py/tests/regression/observed_outputs/test_document_management.json b/py/tests/regression/observed_outputs/test_document_management.json index a916b77ca..a78253d1b 100644 --- a/py/tests/regression/observed_outputs/test_document_management.json +++ b/py/tests/regression/observed_outputs/test_document_management.json @@ -2,479 +2,1098 @@ "ingest_sample_files": { "results": [ { - "message": "Ingestion task queued successfully.", - "task_id": "234d5f77-3a09-4ee4-873b-dc97db81afe8", + "message": "Ingestion task completed successfully.", "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" }, { - "message": "Ingestion task queued successfully.", - "task_id": "e361567a-cc8f-47be-914a-0e07286fa3bb", + "message": "Ingestion task completed successfully.", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" }, { - "message": "Ingestion task queued successfully.", - "task_id": "327685d0-ef5f-4405-a522-bdf8cfc79789", + "message": "Ingestion task completed successfully.", "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" }, { - "message": "Ingestion task queued successfully.", - "task_id": "965b2b3f-74aa-4a39-9c36-ab1aad27fb25", + "message": "Ingestion task completed successfully.", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" }, { - "message": "Ingestion task queued successfully.", - "task_id": "1a467abd-31bf-4c21-b05d-dcb36822b050", + "message": "Ingestion task completed successfully.", "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" }, { - "message": "Ingestion task queued successfully.", - "task_id": "0ae9a48e-bc5c-4eea-bf86-54bf00b09f8f", + "message": "Ingestion task completed successfully.", "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" }, { - "message": "Ingestion task queued successfully.", - "task_id": "4915b7ed-e255-459b-8af0-3a423e09c881", + "message": "Ingestion task completed successfully.", "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" }, { - "message": "Ingestion task queued successfully.", - "task_id": "96248159-021b-442e-9d14-783a1e80b39f", + "message": "Ingestion task completed successfully.", "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" }, { - "message": "Ingestion task queued successfully.", - "task_id": "e886cf3f-362a-4b41-ab98-303a03a2292a", + "message": "Ingestion task completed successfully.", "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" }, { - "message": "Ingestion task queued successfully.", - "task_id": "0ae3812e-071b-4b12-ba2d-a425c9d2bacc", + "message": "Ingestion task completed successfully.", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" }, { - "message": "Ingestion task queued successfully.", - "task_id": "41c6357d-0657-44a1-a0e4-12e99d552b82", + "message": "Ingestion task completed successfully.", "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" }, { - "message": "Ingestion task queued successfully.", - "task_id": "1985e9a6-9e2c-4f0f-9d3c-99f8be6db02b", + "message": "Ingestion task completed successfully.", "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" }, { - "message": "Ingestion task queued successfully.", - "task_id": "937ea186-fa97-4a08-a13c-240de51f9e9a", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "message": "Ingestion task completed successfully.", + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" }, { - "message": "Ingestion task queued successfully.", - "task_id": "ceb2f270-595d-4bd6-bfd1-bd2925dc5b96", + "message": "Ingestion task completed successfully.", "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" }, { - "message": "Ingestion task queued successfully.", - "task_id": "9c90d5e4-1513-48c3-a598-beaccd8fcf09", + "message": "Ingestion task completed successfully.", "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" }, { - "message": "Ingestion task queued successfully.", - "task_id": "d88a4951-22ec-4778-b65c-298710b8e937", + "message": "Ingestion task completed successfully.", "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" } ] }, "reingest_sample_file": { + "results": "{\"detail\":{\"message\":\"Error during ingestion: Document b4ac4dd6-5f27-596e-a55b-7cf242ca30aa was already ingested and is not in a failed state.\",\"error_type\":\"R2RException\"}}" + }, + "documents_overview": { "results": [ { - "message": "Ingestion task queued successfully.", - "task_id": "f0d71ddb-7516-4331-ad7e-c7f0205f6f66", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" - }, - { - "message": "Ingestion task queued successfully.", - "task_id": "d2c55f19-ceb4-4aad-995d-37569419b40e", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" - }, - { - "message": "Ingestion task queued successfully.", - "task_id": "9e15204c-4f3a-41cc-9f34-0deff009df46", - "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_3.html", + "version": "v0", + "size_in_bytes": 166556, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:31.874002Z", + "updated_at": "2024-09-27T17:01:31.874010Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "dadbbaf6-559d-48d4-8db4-881f33760af5", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "png", + "metadata": { + "version": "v0" + }, + "title": "screen_shot.png", + "version": "v0", + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:17.143733Z", + "updated_at": "2024-09-27T17:01:17.143741Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "0892bd5e-57a5-47f5-9a0a-8c558db7b4c1", - "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "yc_companies.txt", + "version": "v0", + "size_in_bytes": 62948, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:15.832273Z", + "updated_at": "2024-09-27T17:01:15.832281Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "04b91d55-2b46-40a7-9ddb-da362286f6bc", - "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle.txt", + "version": "v0", + "size_in_bytes": 97804, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:13.749890Z", + "updated_at": "2024-09-27T17:01:13.749901Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "142a0871-1ab2-4f9b-a9c5-b77ff10141ea", - "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_2.html", + "version": "v0", + "size_in_bytes": 166816, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:13.111406Z", + "updated_at": "2024-09-27T17:01:13.111416Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "c60fa8db-33c4-4d66-8317-f4615568835d", - "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "test.txt", + "version": "v0", + "size_in_bytes": 28, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:12.788282Z", + "updated_at": "2024-09-27T17:01:12.788287Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "154593d2-8498-4b92-b765-005dfa9b0de0", - "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:49.981071Z", + "updated_at": "2024-09-27T17:00:49.981086Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "e076c320-2e29-4787-92a6-f60afd2db533", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" + "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_5.html", + "version": "v0", + "size_in_bytes": 165040, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:49.315963Z", + "updated_at": "2024-09-27T17:00:49.315980Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "c7bfee83-259d-4663-9dcc-05d8def34632", - "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "graphrag.pdf", + "version": "v0", + "size_in_bytes": 2287544, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:48.676977Z", + "updated_at": "2024-09-27T17:00:48.676985Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "96875e20-aa60-4ba9-a0c7-013bfd883e68", - "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_4.html", + "version": "v0", + "size_in_bytes": 157484, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:48.182144Z", + "updated_at": "2024-09-27T17:00:48.182154Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "706ea773-4345-48cb-a5c8-b4296d7cf906", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle_v2.txt", + "version": "v0", + "size_in_bytes": 3380, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:47.523137Z", + "updated_at": "2024-09-27T17:00:47.523153Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "55182343-2d74-43e7-873b-50ec76f4c6cd", - "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample2.mp3", + "version": "v0", + "size_in_bytes": 96608, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:46.224925Z", + "updated_at": "2024-09-27T17:00:46.224929Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "b41719da-cb6f-4667-919e-23fd0b7bf094", - "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "uber_2021.pdf", + "version": "v0", + "size_in_bytes": 2507312, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:10.909182Z", + "updated_at": "2024-09-27T17:00:10.909191Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "228230e4-4f54-413e-9655-2c9ae06c6754", - "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" - } - ] - }, - "documents_overview": { - "results": [ - { - "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "type": "mp3", "metadata": { "version": "v0" }, - "title": "pg_essay_3.html", + "title": "sample.mp3", "version": "v0", - "size_in_bytes": 166556, + "size_in_bytes": 162228, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.456776Z", - "updated_at": "2024-09-20T22:50:10.456787Z", + "created_at": "2024-09-27T17:00:08.852276Z", + "updated_at": "2024-09-27T17:00:08.852281Z", "ingestion_attempt_number": null }, { - "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "png", + "type": "txt", "metadata": { "version": "v0" }, - "title": "screen_shot.png", + "title": "got.txt", "version": "v0", - "size_in_bytes": 1055688, + "size_in_bytes": 12656, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.445592Z", - "updated_at": "2024-09-20T22:50:10.445596Z", + "created_at": "2024-09-27T17:00:08.177884Z", + "updated_at": "2024-09-27T17:00:08.177887Z", "ingestion_attempt_number": null }, { - "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "html", "metadata": { "version": "v0" }, - "title": "pg_essay_2.html", + "title": "pg_essay_1.html", "version": "v0", - "size_in_bytes": 166816, + "size_in_bytes": 175340, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.506868Z", - "updated_at": "2024-09-20T22:50:09.506871Z", + "created_at": "2024-09-27T17:00:07.272659Z", + "updated_at": "2024-09-27T17:00:07.272681Z", "ingestion_attempt_number": null + } + ], + "total_entries": 16 + }, + "document_chunks_test": { + "results": [ + { + "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "A Project of One's Own", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "June 2021A few days ago, on the way home from school, my nine year old son\ntold me he couldn't wait to get home to write more of the story he\nwas working on. This made me as happy as anything I've heard him", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "94aacd72-c343-5467-a57f-e35069b26d05", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "say \u2014 not just because he was excited about his story, but because\nhe'd discovered this way of working. Working on a project of your\nown is as different from ordinary work as skating is from walking.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "db93e935-83e2-5930-84f1-00e60acb2b67", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "It's more fun, but also much more productive.What proportion of great work has been done by people who were\nskating in this sense? If not all of it, certainly a lot.There is something special about working on a project of your own.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "4aa0529e-f7f3-5a0b-b70c-3d5957be6eea", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "I wouldn't say exactly that you're happier. A better word would be\nexcited, or engaged. You're happy when things are going well, but\noften they aren't. When I'm writing an essay, most of the time I'm", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "6b216db3-962d-593b-9d8a-ad096df71900", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "worried and puzzled: worried that the essay will turn out badly,\nand puzzled because I'm groping for some idea that I can't see\nclearly enough. Will I be able to pin it down with words? In the", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "6c192997-1f46-5646-9060-0c5c4fdb5b0c", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "end I usually can, if I take long enough, but I'm never sure; the\nfirst few attempts often fail.You have moments of happiness when things work out, but they don't\nlast long, because then you're on to the next problem. So why do", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "bebcc3b0-1911-58e9-b34d-9844e6c7cebb", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "it at all? Because to the kind of people who like working this way,\nnothing else feels as right. You feel as if you're an animal in its\nnatural habitat, doing what you were meant to do \u2014 not always", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "dd2b7c32-50ae-541e-82d3-c63eb38dbd63", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "happy, maybe, but awake and alive.Many kids experience the excitement of working on projects of their\nown. The hard part is making this converge with the work you do as\nan adult. And our customs make it harder. We treat \"playing\" and", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "3b446288-a384-527b-9ce0-5ae3ff4a6127", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "\"hobbies\" as qualitatively different from \"work\". It's not clear\nto a kid building a treehouse that there's a direct (though long)\nroute from that to architecture or engineering. And instead of", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "1f24eedd-a498-57df-98f2-35ff3f545136", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "pointing out the route, we conceal it, by implicitly treating the\nstuff kids do as different from real work.\n[1]Instead of telling kids that their treehouses could be on the path\nto the work they do as adults, we tell them the path goes through", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "7f5e1067-018e-5a6d-88f8-69a89977bfee", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "school. And unfortunately schoolwork tends to be very different from\nworking on projects of one's own. It's usually neither a project,\nnor one's own. So as school gets more serious, working on projects", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d94e3ccb-f524-524b-acd1-5c3676c25901", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of one's own is something that survives, if at all, as a thin thread\noff to the side.It's a bit sad to think of all the high school kids turning their\nbacks on building treehouses and sitting in class dutifully learning", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "41159be9-49fa-5dd6-b8ba-2f17fa152514", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "collaboration. And interestingly, it shades into collaboration in\ntwo different ways. One way to collaborate is to share a single\nproject. For example, when two mathematicians collaborate on a proof", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "ec8bf70e-7cfd-5404-9963-05de778a7027", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "about Darwin or Newton to pass some exam, when the work that made\nDarwin and Newton famous was actually closer in spirit to building\ntreehouses than studying for exams.If I had to choose between my kids getting good grades and", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "91a8d83a-d47b-5a74-a99f-af4843371342", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "working on ambitious projects of their own, I'd pick\nthe projects. And not because I'm an indulgent parent, but because\nI've been on the other end and I know which has more predictive\nvalue. When I was picking startups for Y Combinator, I didn't care", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "2cfc596a-5770-5448-b1b1-b2ac0dd411f6", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "about applicants' grades. But if they'd worked on projects of their\nown, I wanted to hear all about those.\n[2]It may be inevitable that school is the way it is. I'm not saying\nwe have to redesign it (though I'm not saying we don't), just that", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "a77ed9c4-5964-5bad-b6dc-37eed79476ce", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "we should understand what it does to our attitudes to work \u2014 that\nit steers us toward the dutiful plodding kind of work, often using\ncompetition as bait, and away from skating.There are occasionally times when schoolwork becomes a project of", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "5294e1b5-4060-52fb-9c64-849ac9f64e1e", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "one's own. Whenever I had to write a paper, that would become a\nproject of my own \u2014 except in English classes, ironically, because\nthe things one has to write in English classes are so\nbogus. And", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "208dff84-1585-5b7d-8884-22b9e7866552", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "bogus. And\nwhen I got to college and started taking CS classes, the programs\nI had to write became projects of my own. Whenever I was writing\nor programming, I was usually skating, and that has been true ever", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "38d53fb3-23a8-5acd-a380-880597165f89", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "since.So where exactly is the edge of projects of one's own? That's an\ninteresting question, partly because the answer is so complicated,\nand partly because there's so much at stake. There turn out to be", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "4d486be0-9a77-5ea7-9dca-d9e89b2ebca7", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "two senses in which work can be one's own: 1) that you're doing it\nvoluntarily, rather than merely because someone told you to, and\n2) that you're doing it by yourself.The edge of the former is quite sharp. People who care a lot about", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "09c22efb-c142-534a-9447-17494f84c9a3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "their work are usually very sensitive to the difference between\npulling, and being pushed, and work tends to fall into one category\nor the other. But the test isn't simply whether you're told to do", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "e5d1b3d3-139f-5c23-801d-20558174d518", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "something. You can choose to do something you're told to do. Indeed,\nyou can own it far more thoroughly than the person who told you to\ndo it.For example, math homework is for most people something they're", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "af6ba0d0-c99c-5222-89ac-c5d8d6b28e7c", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "told to do. But for my father, who was a mathematician, it wasn't.\nMost of us think of the problems in a math book as a way to test\nor develop our knowledge of the material explained in each section.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "55a6064d-e548-5026-9304-380b8f666adb", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "But to my father the problems were the part that mattered, and the\ntext was merely a sort of annotation. Whenever he got a new math\nbook it was to him like being given a puzzle: here was a new set", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "fbcce017-8d52-5f53-8285-a11289f21a36", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of problems to solve, and he'd immediately set about solving all\nof them.The other sense of a project being one's own \u2014 working on it by\noneself \u2014 has a much softer edge. It shades gradually into", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "e8a5204a-a1a6-5497-a2ad-af07eb4bace7", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "that takes shape in the course of a conversation between them. The\nother way is when multiple people work on separate projects of their\nown that fit together like a jigsaw puzzle. For example, when one", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "bfa628c5-9800-54a4-a63e-07a08f2fefc2", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "person writes the text of a book and another does the graphic design.\n[3]These two paths into collaboration can of course be combined. But\nunder the right conditions, the excitement of working on a project", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "44b6b2da-5060-5cff-b49d-c2c07e4ab53b", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of one's own can be preserved for quite a while before disintegrating\ninto the turbulent flow of work in a large organization. Indeed,\nthe history of successful organizations is partly the history of\ntechniques for preserving that excitement.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "cdc83d2a-2dfb-5562-88bd-6854cfcfa560", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "techniques for preserving that excitement.\n[4]The team that made the original Macintosh were a great example of\nthis phenomenon. People like Burrell Smith and Andy Hertzfeld and\nBill Atkinson and Susan Kare were not just following orders. They", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "8393b1e4-dc5c-58a4-96e0-b11311bc5ef4", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "were not tennis balls hit by Steve Jobs, but rockets let loose by\nSteve Jobs. There was a lot of collaboration between them, but\nthey all seem to have individually felt the excitement of", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d0a3ea65-fd9f-598e-91cb-ad60ca8cb225", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "working on a project of one's own.In Andy Hertzfeld's book on the Macintosh, he describes how they'd\ncome back into the office after dinner and work late into the night.\nPeople who've never experienced the thrill of working on a project", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "90eaa50f-e7f1-5dbe-a5b3-406f524cab69", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "they're excited about can't distinguish this kind of working long\nhours from the kind that happens in sweatshops and boiler rooms,\nbut they're at opposite ends of the spectrum. That's why it's a", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "f258c9a9-6d19-5a0f-a6a4-fe90a34e3886", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "mistake to insist dogmatically on \"work/life balance.\" Indeed, the\nmere expression \"work/life\" embodies a mistake: it assumes work and\nlife are distinct. For those to whom the word \"work\" automatically", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "0a3b3f7e-adec-5c9b-95b3-6439ecffb0bd", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "implies the dutiful plodding kind, they are. But for the skaters,\nthe relationship between work and life would be better represented\nby a dash than a slash. I wouldn't want to work on anything that I didn't", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "028bbbdb-ac34-570d-b06d-8d942d9c655d", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "want to take over my life.Of course, it's easier to achieve this level of motivation when\nyou're making something like the Macintosh. It's easy for something\nnew to feel like a project of your own. That's one of the reasons", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "420e6cc0-cd53-52a8-9603-b6341df16ca3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "for the tendency programmers have to rewrite things that don't need\nrewriting, and to write their own versions of things that already\nexist. This sometimes alarms managers, and measured by total number", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "ca29fb7e-3f3b-5214-b1ff-8bbfe75ef8e1", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of characters typed, it's rarely the optimal solution. But it's not\nalways driven simply by arrogance or cluelessness.\nWriting code from scratch is also much more rewarding \u2014 so much\nmore rewarding that a good programmer can end up net ahead, despite", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "7fa17848-6703-50d0-87bc-957ea785bf09", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "the shocking waste of characters. Indeed, it may be one of the\nadvantages of capitalism that it encourages such rewriting. A company\nthat needs software to do something can't use the software already", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "6806b7f4-8531-530f-a148-d4018961a842", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "written to do it at another company, and thus has to write their\nown, which often turns out better.\n[5]The natural alignment between skating and solving new problems is\none of the reasons the payoffs from startups are so high. Not only", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d07324d2-645c-57c3-bafb-3abfde5607f6", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "is the market price of unsolved problems higher, you also get a\ndiscount on productivity when you work on them. In fact, you get a\ndouble increase in productivity: when you're doing a clean-sheet", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "7bee3ec0-8bc2-5809-9885-ad77e36c619b", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "design, it's easier to recruit skaters, and they get to spend all\ntheir time skating.Steve Jobs knew a thing or two about skaters from having watched\nSteve Wozniak. If you can find the right people, you only have to", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "36f55631-f972-5444-b7d9-df38f149c1ce", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "tell them what to do at the highest level. They'll handle the\ndetails. Indeed, they insist on it. For a project to feel like your\nown, you must have sufficient autonomy. You can't be working to\norder, or slowed down", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "eef34619-2797-5024-8873-42a011f9d1b9", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "order, or slowed down\nby bureaucracy.One way to ensure autonomy is not to have a boss at all. There are\ntwo ways to do that: to be the boss yourself, and to work on projects\noutside of work. Though they're at opposite ends of the scale", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "512b2629-40f6-5611-b39a-ded0925bba83", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "financially, startups and open source projects have a lot in common,\nincluding the fact that they're often run by skaters. And indeed,\nthere's a wormhole from one end of the scale to the other: one of\nthe best ways to discover", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "ed505044-a207-5d2e-b658-eb4356e81901", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "the best ways to discover\nstartup ideas is to work on a project\njust for fun.If your projects are the kind that make money, it's easy to work\non them. It's harder when they're not. And the hardest part, usually,", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "8cb9f991-78db-5796-9774-d3fd28e544e6", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "is morale. That's where adults have it harder than kids. Kids just\nplunge in and build their treehouse without worrying about whether\nthey're wasting their time, or how it compares to other treehouses.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "c70d905f-608f-5d7d-bcbe-5a20f591a5e8", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "And frankly we could learn a lot from kids here. The high standards\nmost grownups have for \"real\" work do not always serve us well.The most important phase in a project of one's own is at the", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "a7a7fc21-f1da-595a-9571-7112607e1e27", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "beginning: when you go from thinking it might be cool to do x to\nactually doing x. And at that point high standards are not merely\nuseless but positively harmful. There are a few people who start", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "c1cba552-2381-5b9b-88c9-96bd58029425", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "too many new projects, but far more, I suspect, who are deterred\nby fear of failure from starting projects that would have succeeded\nif they had.But if we couldn't benefit as kids from the knowledge that our", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d66492c5-ce35-5b44-b6ad-1783115ffc74", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "treehouses were on the path to grownup projects, we can at least\nbenefit as grownups from knowing that our projects are on a path\nthat stretches back to treehouses. Remember that careless confidence", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "90bde670-7caa-5f09-93ba-ab2b82b20ac3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "you had as a kid when starting something new? That would be a\npowerful thing to recapture.If it's harder as adults to retain that kind of confidence, we at\nleast tend to be more aware of what we're doing. Kids bounce, or", + "metadata": { + "version": "v0", + "document_type": "html" + } }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", - "collection_ids": [], + "extraction_id": "08e0d5a1-fee3-5074-972d-b2cc05f05780", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "are herded, from one kind of work to the next, barely realizing\nwhat's happening to them. Whereas we know more about different types\nof work and have more control over which we do. Ideally we can have", "metadata": { - "version": "v0" - }, - "title": "aristotle.txt", - "version": "v0", - "size_in_bytes": 97804, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.476022Z", - "updated_at": "2024-09-20T22:50:09.476024Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "d421207a-d799-5806-8d67-46b2005b15d4", - "collection_ids": [], + "extraction_id": "8c4ab727-e5b9-5301-ae57-b21812cfd5d1", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "the best of both worlds: to be deliberate in choosing to work on\nprojects of our own, and carelessly confident in starting new ones.\nNotes[1]\n\"Hobby\" is a curious word. Now it means work that isn't real", "metadata": { - "version": "v0" - }, - "title": "yc_companies.txt", - "version": "v0", - "size_in_bytes": 62948, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.471883Z", - "updated_at": "2024-09-20T22:50:09.471886Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", - "collection_ids": [], + "extraction_id": "2ba7e1e5-59e3-5376-b9dc-0f215279624f", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "work \u2014 work that one is not to be judged by \u2014 but originally it just\nmeant an obsession in a fairly general sense (even a political\nopinion, for example) that one metaphorically rode as a child rides", "metadata": { - "version": "v0" - }, - "title": "test.txt", - "version": "v0", - "size_in_bytes": 28, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.470416Z", - "updated_at": "2024-09-20T22:50:09.470418Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "2f576170-c4f9-5141-a910-a0924f341de4", - "collection_ids": [], + "extraction_id": "37babbc6-03b8-596b-85c5-7cb11461c4f3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", + "collection_ids": [], + "text": "a hobby-horse. It's hard to say if its recent, narrower meaning is\na change for the better or the worse. For sure there are lots of\nfalse positives \u2014 lots of projects that end up being important but", "metadata": { - "version": "v0" - }, - "title": "lyft_2021.pdf", - "version": "v0", - "size_in_bytes": 1920404, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.469252Z", - "updated_at": "2024-09-20T22:50:09.469256Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", - "collection_ids": [], + "extraction_id": "adc03e1d-89db-5c36-bb16-318eac3cb755", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "collection_ids": [], + "text": "are dismissed initially as mere hobbies. But on the other hand, the\nconcept provides valuable cover for projects in the early, ugly\nduckling phase.[2]\nTiger parents, as parents so often do, are fighting the last", "metadata": { - "version": "v0" - }, - "title": "pg_essay_5.html", - "version": "v0", - "size_in_bytes": 165040, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.465810Z", - "updated_at": "2024-09-20T22:55:12.531882Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", - "collection_ids": [], + "extraction_id": "32ff9dd1-fce0-5766-8c3e-0bb8a9068e0e", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", + "collection_ids": [], + "text": "war. Grades mattered more in the old days when the route to success\nwas to acquire\ncredentials\nwhile ascending some predefined ladder.\nBut it's just as well that their tactics are focused on grades. How", "metadata": { - "version": "v0" - }, - "title": "graphrag.pdf", - "version": "v0", - "size_in_bytes": 2287544, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.455913Z", - "updated_at": "2024-09-20T22:55:12.517199Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", - "collection_ids": [], + "extraction_id": "9222c916-8bc4-58ef-8c58-924ed71a35d0", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "collection_ids": [], + "text": "awful it would be if they invaded the territory of projects, and\nthereby gave their kids a distaste for this kind of work by forcing\nthem to do it. Grades are already a grim, fake world, and aren't", "metadata": { - "version": "v0" - }, - "title": "pg_essay_4.html", - "version": "v0", - "size_in_bytes": 157484, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:07.036336Z", - "updated_at": "2024-09-20T22:55:12.484462Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", - "collection_ids": [], + "extraction_id": "00505280-32be-5925-b394-eeddb336ab26", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "harmed much by parental interference, but working on one's own\nprojects is a more delicate, private thing that could be damaged\nvery easily.[3]\nThe complicated, gradual edge between working on one's own", "metadata": { - "version": "v0" - }, - "title": "aristotle_v2.txt", - "version": "v0", - "size_in_bytes": 3380, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:05.565359Z", - "updated_at": "2024-09-20T22:55:12.516690Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", - "collection_ids": [], + "extraction_id": "bac4a7ed-732e-5034-8932-1cffa3fb6b23", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", + "collection_ids": [], + "text": "projects and collaborating with others is one reason there is so\nmuch disagreement about the idea of the \"lone genius.\" In practice\npeople collaborate (or not) in all kinds of different ways, but the", "metadata": { - "version": "v0" - }, - "title": "sample2.mp3", - "version": "v0", - "size_in_bytes": 96608, - "ingestion_status": "chunking", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.622762Z", - "updated_at": "2024-09-20T22:55:12.486232Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "collection_ids": [], + "extraction_id": "13168f68-b395-58ea-8015-32a52e1b577f", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", + "collection_ids": [], + "text": "idea of the lone genius is definitely not a myth. There's a core\nof truth to it that goes with a certain way of working.[4]\nCollaboration is powerful too. The optimal organization would\ncombine collaboration and ownership in such a way as to do the least", "metadata": { - "version": "v0" - }, - "title": "uber_2021.pdf", - "version": "v0", - "size_in_bytes": 2507312, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.211252Z", - "updated_at": "2024-09-20T22:55:12.509328Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", - "collection_ids": [], + "extraction_id": "e879b314-b886-50fd-8503-a3ff208f1796", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", + "collection_ids": [], + "text": "damage to each. Interestingly, companies and university departments\napproach this ideal from opposite directions: companies insist on\ncollaboration, and occasionally also manage both to recruit skaters", "metadata": { - "version": "v0" - }, - "title": "sample.mp3", - "version": "v0", - "size_in_bytes": 162228, - "ingestion_status": "storing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.699362Z", - "updated_at": "2024-09-20T22:55:10.516462Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "collection_ids": [], + "extraction_id": "8d705d89-4e26-5d28-ba87-68662c801efc", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "collection_ids": [], + "text": "and allow them to skate, and university departments insist on the\nability to do independent research (which is by custom treated as\nskating, whether it is or not), and the people they hire collaborate\nas much as they choose.[5]", "metadata": { - "version": "v0" - }, - "title": "pg_essay_1.html", - "version": "v0", - "size_in_bytes": 175340, - "ingestion_status": "storing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.517969Z", - "updated_at": "2024-09-20T22:55:10.518388Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", - "collection_ids": [], + "extraction_id": "b11d3e48-3aa9-5bfd-8912-e7151435778a", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "as much as they choose.[5]\nIf a company could design its software in such a way that the\nbest newly arrived programmers always got a clean sheet, it could\nhave a kind of eternal youth. That might not be impossible. If you", "metadata": { - "version": "v0" - }, - "title": "got.txt", - "version": "v0", - "size_in_bytes": 12656, - "ingestion_status": "chunking", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.516278Z", - "updated_at": "2024-09-20T22:55:10.519709Z", - "ingestion_attempt_number": null - } - ], - "total_entries": 16 - }, - "document_chunks_test": { - "results": [ + "version": "v0", + "document_type": "html" + } + }, { - "fragment_id": "67e7ab57-eaa0-57d8-9276-da273abcdabd", - "extraction_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", + "extraction_id": "2951ae53-3cd1-5c5e-8898-09b81fe23b64", "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "text": "June 2021 A few days ago, on the way home from school, my nine year old son", + "text": "had a software backbone defining a game with sufficiently clear\nrules, individual programmers could write their own players.\nThanks to Trevor Blackwell, Paul Buchheit, Andy Hertzfeld, Jessica\nLivingston, and Peter Norvig for reading drafts of this.", "metadata": { "version": "v0", - "chunk_order": 0, - "document_type": "html", - "unstructured_filetype": "text/html", - "unstructured_languages": [ - "eng" - ], - "unstructured_text_as_html": "June 2021 A few days ago, on the way home from school, my nine year old son |