Spaces:
Runtime error
Runtime error
File size: 1,450 Bytes
f745baf 26197e0 f745baf 26197e0 f745baf 26197e0 f745baf 26197e0 f745baf 26197e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import concurrent.futures
from extractors.model import LlamaParseModel, UnstructuredModel, GPTModel, ClaudeModel, AnyParserModel
DEFAULT_TIMEOUT = 30
ap_rt = AnyParserModel()
lp = LlamaParseModel()
un = UnstructuredModel()
gpt = GPTModel()
claude = ClaudeModel()
model_function_map = {
"AnyParser": ap_rt.run,
"LlamaParse": lp.run,
"Unstructured": un.run,
"GPT-4o-mini": gpt.run,
"Claude-3.5-Sonnet": claude.run,
}
models = [key for key in model_function_map]
def run_extract(model, file_path):
print('Running extract: model', model, 'file_path', file_path)
extractor = model_function_map[model]
markdown = extractor(file_path)
return markdown
def run_extract_parallel(model_a, model_b, pdf, timeout=DEFAULT_TIMEOUT):
with concurrent.futures.ThreadPoolExecutor() as executor:
# Submit tasks to the executor for parallel execution
future_a = executor.submit(run_extract, model_a, pdf)
future_b = executor.submit(run_extract, model_b, pdf)
try:
# Get the results with a timeout
result_a = future_a.result(timeout=timeout)
except concurrent.futures.TimeoutError:
result_a = f"Error: Timeout after {timeout} seconds"
try:
result_b = future_b.result(timeout=timeout)
except concurrent.futures.TimeoutError:
result_b = f"Error: Timeout after {timeout} seconds"
return result_a, result_b |