-
Notifications
You must be signed in to change notification settings - Fork 0
/
llava_bird_detector_automated.py
124 lines (96 loc) · 4.75 KB
/
llava_bird_detector_automated.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#'tcli' file created based on https://github.com/haotian-liu/LLaVA/issues/540
#It should be place inside 'LLaVA\llava\serve' folder
#If errors, change time.sleep(35) to 40
# #Dependencies
# git clone https://github.com/haotian-liu/LLaVA.git
# cd LLaVA
# conda create -n llava python=3.10 -y
# conda activate llava
# pip install --upgrade pip # enable PEP 660 support
# pip install -e .
#Might also need to install the following:
#pip install protobuf
#pip install --upgrade transformers
#If we use llava-v1.6-vicuna-7b, we need to reinstall some dependencies by going to the cloned repository and running the following:
#git pull
#pip install -e .
#We might need to run the following command to install the correct version of psutil:
# pip uninstall psutil
# pip install psutil
#GPT4 Prompt:
# Given a a path with images (/home/soyrl/spyscape_test/), sort them and create a list of them.
# Run the python command (python -m llava.serve.tcli --model-path liuhaotian/llava-v1.6-vicuna-7b --load-4bit) in terminal.
# That command should be run first. After that, some messages will be printed in the terminal from each execution.
# Show them to the user. At some point terminal will show 'Image path:' and will wait for user input.
# Use the path of the first image in the list as input. Then the terminal will show 'USER:' and will wait for user input.
# Use as input there 'Extract the text in the image'. After that, you should wait for 30secs.
# Then, the terminal will show 'ASSISTANT:' with some text after it that you should show to the user.
# Wait 2 more seconds and then the 'Image Path:' will be shown again. Repeat the above process for all the images.
# Keep in mind that the python command should only be executed once at the beginning
import subprocess
import time
import os
import select
start=time.time()
# Get list of images
image_dir = '/home/soyrl/pdf_llava/'
image_list = sorted([os.path.join(image_dir, img) for img in os.listdir(image_dir) if img.endswith('.jpg') or img.endswith('.png')])
# Start the python command
process = subprocess.Popen(['python', '-m', 'llava.serve.tcli', '--model-path', 'liuhaotian/llava-v1.6-vicuna-7b', '--load-4bit'], #llava-v1.6-mistral-7b
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def read_output(timeout=1):
ready, _, _ = select.select([process.stdout], [], [], timeout)
if ready:
return os.read(process.stdout.fileno(), 4096).decode('utf-8') #Read the output
return None
for ind,image in enumerate(image_list):
#Print progress in terminal and save to txt file
print(str(ind)+'/'+str(len(image_list)), '('+str(round(ind/len(image_list)*100,2))+'%)') #print progress in terminal
with open("output_llava.txt", "a") as file: #Save progress to txt file
file.write(str(ind)+'/'+str(len(image_list))+ '('+str(round(ind/len(image_list)*100,2))+'%)'+'\n')
# Wait for 'Image path:' prompt and send image path
while True:
if ind==0:
output = read_output()
#Second condition because sometimes not full message from LLM
if (output and 'Image path:' in output) or (output and 'ASSISTANT' in output and ind!=0):
#Print the output and save to txt file
print('Image path:', image)
with open("output_llava.txt", "a") as file: #First time write the command we send to LlaVa to the output file
file.write('Image path:'+ image+'\n')
process.stdin.write((image + '\n').encode())
process.stdin.flush()
break
# Wait for 'USER:' prompt and send command
while True:
output = read_output()
if output and 'USER:' in output:
#Print the output and save to txt file
print('USER:', 'Are there any birds in the image? Respond with just a yes or no')
process.stdin.write(("Are there any birds in the image? Respond with just a yes or no \n").encode())
process.stdin.flush()
break
# Wait for 35 seconds
time.sleep(35)
# Print 'ASSISTANT:' output
while True:
output = read_output()
if output and output.startswith('ASSISTANT:'):
#Print the output and save to txt file
print('ASSISTANT:', output)
with open("output_llava.txt", "a") as file:
if 'yes' in output.lower():
file.write('Yes \n')
elif 'no' in output.lower():
file.write('No \n')
file.write('\n')
break
# Wait for 2 seconds
time.sleep(2)
# Close the process
process.stdin.close()
process.terminate()
process.wait(timeout=0.2)
print('Time:', time.time()-start)
with open("output_llava.txt", "a") as file:
file.write('Time:'+ str(time.time()-start)+'\n')