Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance API documentation #237

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion API/auth/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,39 @@


class UserRole(Enum):
"""
Assigning user roles as integer
ADMIN = 1
STAFF = 2
GUEST = 3
"""


class AuthUser(BaseModel):
"""
Defining fields as attributes
id: int
username: str
img_url: Union[str, None]
role: UserRole = Field(default=UserRole.GUEST.value)
"""


osm_auth = Auth(*get_oauth_credentials())


def get_user_from_db(osm_id: int):
"""
Get user's information (osm_id)
auth = Users()
user = auth.read_user(osm_id)
"""
return user


def get_osm_auth_user(access_token):
"""
Get user's access token
try:
user = AuthUser(**osm_auth.deserialize_access_token(access_token))
except Exception as ex:
Expand All @@ -40,29 +51,39 @@ def get_osm_auth_user(access_token):
)
db_user = get_user_from_db(user.id)
user.role = db_user["role"]
"""
return user


def login_required(access_token: str = Header(...)):
"""Get user's login details"""
return get_osm_auth_user(access_token)


def get_optional_user(access_token: str = Header(default=None)) -> AuthUser:
"""
Get user's access token which is optional
if access_token:
return get_osm_auth_user(access_token)
else:
# If no token provided, return a user with limited options or guest user
return AuthUser(id=0, username="guest", img_url=None)
"""
return AuthUser(id=0, username="guest", img_url=None)


def admin_required(user: AuthUser = Depends(login_required)):
"""
Get admin login details
db_user = get_user_from_db(user.id)
if not db_user["role"] is UserRole.ADMIN.value:
raise HTTPException(status_code=403, detail="User is not an admin")
"""
return user


def staff_required(user: AuthUser = Depends(login_required)):
"""
Get staff login details
db_user = get_user_from_db(user.id)

# admin is staff too
Expand All @@ -71,4 +92,5 @@ def staff_required(user: AuthUser = Depends(login_required)):
or db_user["role"] is UserRole.ADMIN.value
):
raise HTTPException(status_code=403, detail="User is not a staff")
"""
return user
16 changes: 8 additions & 8 deletions API/auth/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
router = APIRouter(prefix="/auth", tags=["Auth"])


@router.get("/login/")
@router.get("/login")
def login_url(request: Request):
"""Generate Login URL for authentication using OAuth2 Application registered with OpenStreetMap.
Click on the download url returned to get access_token.
Expand All @@ -25,7 +25,7 @@ def login_url(request: Request):
return login_url


@router.get("/callback/")
@router.get("/callback")
def callback(request: Request):
"""Performs token exchange between OpenStreetMap and Raw Data API

Expand All @@ -42,7 +42,7 @@ def callback(request: Request):
return access_token


@router.get("/me/", response_model=AuthUser)
@router.get("/me", response_model=AuthUser)
def my_data(user_data: AuthUser = Depends(login_required)):
"""Read the access token and provide user details from OSM user's API endpoint,
also integrated with underpass .
Expand All @@ -64,7 +64,7 @@ class User(BaseModel):


# Create user
@router.post("/users/", response_model=dict)
@router.post("/users", response_model=dict)
async def create_user(params: User, user_data: AuthUser = Depends(admin_required)):
"""
Creates a new user and returns the user's information.
Expand All @@ -87,7 +87,7 @@ async def create_user(params: User, user_data: AuthUser = Depends(admin_required


# Read user by osm_id
@router.get("/users/{osm_id}", response_model=dict)
@router.get("/users{osm_id}", response_model=dict)
async def read_user(osm_id: int, user_data: AuthUser = Depends(staff_required)):
"""
Retrieves user information based on the given osm_id.
Expand All @@ -111,7 +111,7 @@ async def read_user(osm_id: int, user_data: AuthUser = Depends(staff_required)):


# Update user by osm_id
@router.put("/users/{osm_id}", response_model=dict)
@router.put("/users{osm_id}", response_model=dict)
async def update_user(
osm_id: int, update_data: User, user_data: AuthUser = Depends(admin_required)
):
Expand All @@ -136,7 +136,7 @@ async def update_user(


# Delete user by osm_id
@router.delete("/users/{osm_id}", response_model=dict)
@router.delete("/users{osm_id}", response_model=dict)
async def delete_user(osm_id: int, user_data: AuthUser = Depends(admin_required)):
"""
Deletes a user based on the given osm_id.
Expand All @@ -155,7 +155,7 @@ async def delete_user(osm_id: int, user_data: AuthUser = Depends(admin_required)


# Get all users
@router.get("/users/", response_model=list)
@router.get("/users", response_model=list)
async def read_users(
skip: int = 0, limit: int = 10, user_data: AuthUser = Depends(staff_required)
):
Expand Down
4 changes: 2 additions & 2 deletions docs/src/api/API/main.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def add_process_time_header(
call_next
)
```
Times request and knows response time and pass it to header in every request
Time request and know response time and pass it to header in every request

Args:
request (_type_): _description_
Expand All @@ -67,7 +67,7 @@ def on_startup(

)
```
Fires up 3 idle conenction with threaded connection pooling before starting the API
Fires up 3 idle connection with threaded connection pooling before starting the API

Raises:
e: if connection is rejected to database
6 changes: 3 additions & 3 deletions docs/src/api/API/raw_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Generates the current raw OpenStreetMap data available on database based on the

Steps to Run Snapshot :

1. Post the your request here and your request will be on queue, endpoint will return as following :
1. Post the your request here and your request will be on queue, endpoint will return as following:
{
"task_id": "your task_id",
"track_link": "/tasks/task_id/"
Expand All @@ -73,7 +73,7 @@ def remove_file(
path: str
) -> None
```
Used for removing temp file dir and its all content after zip file is delivered to user
Used for removing temp file dir and all its content after zip file is delivered to user


#### watch_s3_upload
Expand All @@ -84,7 +84,7 @@ def watch_s3_upload(
path: str
) -> None
```
Watches upload of s3 either it is completed or not and removes the temp file after completion
Watches upload of s3, either it is completed or not and removes the temp file after completion

Args:
url (_type_): url generated by the script where data will be available
Expand Down
2 changes: 1 addition & 1 deletion docs/src/api/API/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def get_task_status(
task_id
)
```
Tracks the request from the task id provided by Raw Data API for the request
Tracks the request from the task id provided by Raw Data API for the request

Args:

Expand Down
16 changes: 8 additions & 8 deletions docs/src/api/src/app.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class RawData(
)
```

Class responsible for the Rawdata Extraction from available sources ,
Class responsible for the Rawdata Extraction from available sources,
Currently Works for Underpass source Current Snapshot
Returns:
Geojson Zip file
Expand Down Expand Up @@ -214,7 +214,7 @@ def ogr_export(
params
)
```
Function written to support ogr type extractions as well , In this way we will be able to support all file formats supported by Ogr , Currently it is slow when dataset gets bigger as compared to our own conversion method but rich in feature and data types even though it is slow
Function written to support ogr type extractions as well. In this way, we will be able to support all file formats supported by ogr. Currently, it is slow when dataset gets bigger as compared to our own conversion method but rich in feature and data types even though it is slow


#### ogr_export_shp
Expand All @@ -228,7 +228,7 @@ def ogr_export_shp(
file_name
)
```
Function written to support ogr type extractions as well , In this way we will be able to support all file formats supported by Ogr , Currently it is slow when dataset gets bigger as compared to our own conversion method but rich in feature and data types even though it is slow
Function written to support ogr type extractions as well. In this way, we will be able to support all file formats supported by ogr. Currently, it is slow when dataset gets bigger as compared to our own conversion method but rich in feature and data types even though it is slow


#### query2geojson
Expand All @@ -240,7 +240,7 @@ def query2geojson(
dump_temp_file_path
)
```
Function written from scratch without being dependent on any library, Provides better performance for geojson binding
Function written from scratch without being dependent on any library, provides better performance for geojson binding


#### to_geojson_raw
Expand All @@ -262,7 +262,7 @@ def check_status(
self
)
```
Gives status about DB update, Substracts with current time and last db update time
Gives status about DB update, subtracts with current time and last db update time


#### extract_current_data
Expand All @@ -273,7 +273,7 @@ def extract_current_data(
exportname
)
```
Responsible for Extracting rawdata current snapshot, Initially it creates a geojson file , Generates query , run it with 1000 chunk size and writes it directly to the geojson file and closes the file after dump
Responsible for Extracting rawdata current snapshot, Initially it creates a geojson file, Generates query, run it with 1000 chunk size and writes it directly to the geojson file and closes the file after dump
Args:
exportname: takes filename as argument to create geojson file passed from routers

Expand All @@ -289,7 +289,7 @@ def extract_plain_geojson(
self
)
```
Gets geojson for small area : Performs direct query with/without geometry
Gets geojson for small area: Performs direct query with/without geometry

### S3FileTransfer

Expand Down Expand Up @@ -335,7 +335,7 @@ def upload(
file_suffix='zip'
)
```
Used for transferring file to s3 after reading path from the user , It will wait for the upload to complete
Used for transferring file to s3 after reading path from the user, It will wait for the upload to complete
Parameters :file_path --- your local file path to upload ,
file_prefix -- prefix for the filename which is stored
sample function call :
Expand Down
16 changes: 8 additions & 8 deletions docs/src/installation/configurations.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

# Configuring the API service

Raw Data API can be setup using two configuration options. You can choose based on your convienience
Raw Data API can be setup using two configuration options. You can choose based on your convenience
- *config.txt* : You can follow `config.txt.sample` in dir and documentation below to set your configurations
- *.env* : Another option is from OS Environment variable , You can export all your env variables ( They are same as you put in config without blocks ) and pass it to API , API will pick it up automatically.
- *.env* : Another option is from OS Environment variable, You can export all your env variables (They are same as you put in config without blocks) and pass it to API, API will pick it up automatically.

## What you need to start?

Expand Down Expand Up @@ -47,7 +47,7 @@ The following are the different configuration options that are accepted.
| `PGPASSWORD` | `PGPASSWORD` | `[DB]` | _none_ | PostgreSQL user/role password | REQUIRED |
| `PGDATABASE` | `PGDATABASE` | `[DB]` | _none_ | PostgreSQL database name | REQUIRED |
| `OSM_CLIENT_ID` | `OSM_CLIENT_ID` | `[OAUTH]` | _none_ | Client ID of OSM OAuth2 application | REQIRED |
| `OSM_CLIENT_SECRET` | `OSM_CLIENT_SECRET` | `[OAUTH]` | _none_ | Client Secret of OSM OAuth2 application | REQIRED |
| `OSM_CLIENT_SECRET` | `OSM_CLIENT_SECRET` | `[OAUTH]` | _none_ | Client Secret of OSM OAuth2 application | REQUIRED |
| `OSM_PERMISSION_SCOPE` | `OSM_PERMISSION_SCOPE` | `[OAUTH]` | `read_prefs` | OSM access permission for OAuth2 application | OPTIONAL |
| `LOGIN_REDIRECT_URI` | `LOGIN_REDIRECT_URI` | `[OAUTH]` | _none_ | Redirect URL set in the OAuth2 application | REQUIRED |
| `APP_SECRET_KEY` | `APP_SECRET_KEY` | `[OAUTH]` | _none_ | High-entropy string generated for the application | REQUIRED |
Expand All @@ -73,7 +73,7 @@ The following are the different configuration options that are accepted.
| `DEFAULT_HARD_TASK_LIMIT` | `DEFAULT_HARD_TASK_LIMIT` | `[API_CONFIG]` | `10800` | Hard task time limit signal for celery workers in seconds. It will immediately kill the celery task.Defaults to 3 Hour| OPTIONAL |
| `USE_DUCK_DB_FOR_CUSTOM_EXPORTS` | `USE_DUCK_DB_FOR_CUSTOM_EXPORTS` | `[API_CONFIG]` | `False` | Enable this setting to use duckdb , By default duck db is disabled and postgres is used| OPTIONAL |
| `CELERY_BROKER_URL` | `CELERY_BROKER_URL` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the broker | OPTIONAL |
| `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis/psotgresql connection string for the the result backend, eg : db+postgresql://username:password@localhost:5432/db_name | OPTIONAL |
| `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis/psotgresql connection string for the result backend, eg : db+postgresql://username:password@localhost:5432/db_name | OPTIONAL |
| `FILE_UPLOAD_METHOD` | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | `disk` | File upload method; Allowed values - disk, s3 | OPTIONAL |
| `BUCKET_NAME` | `BUCKET_NAME` | `[EXPORT_UPLOAD]` | _none_ | AWS S3 Bucket name | CONDITIONAL |
| `AWS_ACCESS_KEY_ID` | `AWS_ACCESS_KEY_ID` | `[EXPORT_UPLOAD]` | _none_ | AWS Access Key ID for S3 access | CONDITIONAL |
Expand All @@ -89,13 +89,13 @@ The following are the different configuration options that are accepted.
| `DUCK_DB_THREAD_LIMIT` | `DUCK_DB_THREAD_LIMIT` | `[API_CONFIG]` | None | Duck DB max threads limit ,n of your cores eg : 2 | CONDITIONAL |
| `HDX_SOFT_TASK_LIMIT` | `HDX_SOFT_TASK_LIMIT` | `[HDX]` | `18000` | Soft task time limit signal for celery workers in seconds.It will gently remind celery to finish up the task and terminate, Defaults to 5 Hour| OPTIONAL |
| `HDX_HARD_TASK_LIMIT` | `HDX_HARD_TASK_LIMIT` | `[HDX]` | `21600` | Hard task time limit signal for celery workers in seconds. It will immediately kill the celery task.Defaults to 6 Hour| OPTIONAL |
| `PROCESS_SINGLE_CATEGORY_IN_POSTGRES` | `PROCESS_SINGLE_CATEGORY_IN_POSTGRES` | `[HDX]` | False | Recommended for workers with low memery or CPU usage , This will process single category request like buildings only , Roads only in postgres itself and avoid extraction from duckdb| OPTIONAL |
| `PARALLEL_PROCESSING_CATEGORIES` | `PARALLEL_PROCESSING_CATEGORIES` | `[HDX]` | True | Enable parallel processing for mulitple categories and export formats , Disable this if you have single cpu and limited RAM , Enabled by default| OPTIONAL |
| `PROCESS_SINGLE_CATEGORY_IN_POSTGRES` | `PROCESS_SINGLE_CATEGORY_IN_POSTGRES` | `[HDX]` | False | Recommended for workers with low memory or CPU usage, This will process single category request like buildings only, Roads only in postgres itself and avoid extraction from duckdb| OPTIONAL |
| `PARALLEL_PROCESSING_CATEGORIES` | `PARALLEL_PROCESSING_CATEGORIES` | `[HDX]` | True | Enable parallel processing for multiple categories and export formats , Disable this if you have single cpu and limited RAM , Enabled by default| OPTIONAL |

**Note :** HDX_API_KEY

In order to generate HDX_API_KEY , You need to be logged in to https://data.humdata.org/ .
Follow following navigation to generate tokens :
Follow the following navigation to generate tokens:
- Your profile section > User settings > API Tokens

API Tokens have expiry date, It is `important to update API Tokens manually each year` for hosted api service !
Expand Down Expand Up @@ -213,7 +213,7 @@ APP_SECRET_KEY=your generated secret key

### Configure celery and redis

API uses [Celery 5](https://docs.celeryq.dev/en/stable/getting-started/first-steps-with-celery.html) and [Redis 6](https://redis.io/download/#redis-stack-downloads) for task queue management , Currently implemented for Rawdata endpoint. 6379 is the default port . if you are running redis on same machine your broker could be `redis://localhost:6379/`. You can change the port according to your configuration for the current docker compose use following
API uses [Celery 5](https://docs.celeryq.dev/en/stable/getting-started/first-steps-with-celery.html) and [Redis 6](https://redis.io/download/#redis-stack-downloads) for task queue management, Currently implemented for Rawdata endpoint. 6379 is the default port . if you are running redis on same machine your broker could be `redis://localhost:6379/`. You can change the port according to your configuration for the current docker compose use following

```
[CELERY]
Expand Down
8 changes: 4 additions & 4 deletions docs/src/installation/docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ echo >> config.txt #Windows without WSL
if you prefer configurations as env variables you can put them in `.env` and pass it to dockerfile or export them

- Database configuration:
- To use the default database(with sample data) , Run docker compsoe and update the `docker-compose-config.txt`
- To use the default database(with sample data) , Run docker compose and update the `docker-compose-config.txt`

- To use a local postgres (with postgis enabled) database, you can follow the instruction on how to set it up with raw data [here](./configurations.md). or export them as system env variables

Expand Down Expand Up @@ -85,13 +85,13 @@ docker run --name rawdata-flower -p 5555:5555 \
**Development instruction:**
If you are running Dockerfile only for the API , Have postgresql redis installed on your machine directly then you should change following :

- Change --broker Host address in flower command (You can use redis if it is docker compsoe container or use `redis://host.docker.internal:6379/0` if you want API container to connect to your localhsot , Follow #troubleshoot section for more)
- Change --broker Host address in flower command (You can use redis if it is docker compose container or use `redis://host.docker.internal:6379/0` if you want API container to connect to your localhost , Follow #troubleshoot section for more)
- Change DB Host & Celery broker url accordingly with the same logic


**Note:**

In above example we have attached our working dir to containers along with config.txt for efficiency in development environment only . It is recommended to use proper docker copy as stated in dockerfile and system environement variables instead of config.txt in Production
In above example, we have attached our working dir to containers along with config.txt for efficiency in development environment only . It is recommended to use proper docker copy as stated in dockerfile and system environement variables instead of config.txt in Production

## Check the servers

Expand All @@ -111,7 +111,7 @@ API docs will be displayed like this upon successfull server startup

- Flower dashboard

Vist the route below to access the Flower dashboard
Visit the route below to access the Flower dashboard

```
http://127.0.0.1:5555/
Expand Down
8 changes: 4 additions & 4 deletions docs/src/installation/local.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ uvicorn API.main:app --reload

### Queues

Currently there are two type of queue implemented :
- "raw_daemon" : Queue for default exports which will create each unique id for exports , This queue is attached to 24/7 available workers
- "raw_ondemand" : Queue for recurring exports which will replace the previous exports if present on the system , can be enabled through uuid:false API Param . This queue will be attached to worker which will only spin up upon request.
Currently, there are two types of queue implemented:
- "raw_daemon" : Queue for default exports which will create each unique id for exports, this queue is attached to 24/7 available workers
- "raw_ondemand" : Queue for recurring exports which will replace the previous exports if present on the system, can be enabled through uuid:false API Param . This queue will be attached to worker which will only spin up upon request.

### Start Celery Worker

Expand All @@ -84,7 +84,7 @@ pip install SQLAlchemy==2.0.25
```
### Start flower for monitoring queue [OPTIONAL]

Raw Data API uses flower for monitoring the Celery distributed queue. Run this command on a different shell , if you are running redis on same machine your broker could be `redis://localhost:6379//`.
Raw Data API uses flower for monitoring the Celery distributed queue. Run this command on a different shell, if you are running redis on same machine your broker could be `redis://localhost:6379//`.

```
celery --broker=redis://redis:6379// --app API.api_worker flower --port=5000 --queues="raw_daemon,raw_ondemand"
Expand Down