import os import argparse import warnings from TStar.interface_llm import TStarUniversalGrounder from TStar.TStarFramework import TStarFramework, initialize_TStar_Scorer, run_tstar def main(): """ TStarSearcher: Simplified Video Frame Search Tool Example usage: python searcher.py --video_path path/to/video.mp4 --question "Your question here" --options "A) Option1\nB) Option2\nC) Option3\nD) Option4" """ parser = argparse.ArgumentParser(description="TStarSearcher: Simplified Video Frame Search and QA Tool") parser.add_argument('--video_path', type=str, required=True, help='Path to the input video file.') parser.add_argument('--question', type=str, required=True, help='Question for video content QA.') parser.add_argument('--options', type=str, required=True, help='Multiple-choice options for the question.') # search tools parser.add_argument('--grounder', type=str, default='gpt-4o', help='Directory to save outputs.') parser.add_argument('--heuristic', type=str, default='owl-vit', help='Directory to save outputs.') parser.add_argument('--device', type=str, default="cuda:0", help='Device for model inference (e.g., "cuda:0" or "cpu").') parser.add_argument('--search_nframes', type=int, default=8, help='Number of top frames to return.') parser.add_argument('--grid_rows', type=int, default=4, help='Number of rows in the image grid.') parser.add_argument('--grid_cols', type=int, default=4, help='Number of columns in the image grid.') parser.add_argument('--confidence_threshold', type=float, default=0.6, help='YOLO detection confidence threshold.') parser.add_argument('--search_budget', type=float, default=0.5, help='Maximum ratio of frames to process during search.') parser.add_argument('--output_dir', type=str, default='./output', help='Directory to save outputs.') args = parser.parse_args() # Run the TStar search process results = run_tstar( video_path=args.video_path, question=args.question, options=args.options, grounder=args.grounder, heuristic=args.heuristic, device=args.device, search_nframes=args.search_nframes, grid_rows=args.grid_rows, grid_cols=args.grid_cols, confidence_threshold=args.confidence_threshold, search_budget=args.search_budget, output_dir=args.output_dir, ) # Display the results print("Final Results:") print(f"Grounding Objects: {results['Grounding Objects']}") print(f"Frame Timestamps: {results['Frame Timestamps']}") print(f"Answer: {results['Answer']}") if __name__ == "__main__": with warnings.catch_warnings(): warnings.simplefilter("ignore") # Ignore all warnings main()