Algorithm,Dataset,Eval Date,LLM,Score,Pass rate,X-shot,Parameters,Samples,Total input tokens,Average input tokens,Total output tokens,Average output tokens,All tokens,Cost($),Note,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/7,gpt-3.5-turbo,37.83,99.92,8,,"1,319","546,990",415,"39,563",30,"586,553",0.3328,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/7,Doubao-lite-32k,72.02,99.92,8,,"1,319","617,377",468,"123,106",93,"740,483",0.0354,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,gpt-4o,88.40,100.00,8,,"1,319","542,416",411,"199,030",151,"741,446",3.3463,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,Qwen2.5-72B-Instruct,86.58,100.00,8,,"1,319","555,340",421,"313,720",238,"869,060",0.4899,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,Llama-3.3-70B-Instruct,92.27,100.00,8,,"1,319","583,916",443,"251,359",191,"835,275",0.4709,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,Qwen2.5-7B-Instruct,57.24,100.00,8,,"1,319","596,229",452,"291,684",221,"887,913",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,Llama-3.1-8B-Instruct,57.16,99.55,8,,"1,319","550,941",418,"1,194,488",906,"1,745,429",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,Internllm2_5-7B,11.60,97.95,8,,"1,319","679,302",515,"434,426",329,"1,113,728",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,Qwen2-1.5B-Instruct,16.68,100.00,8,,"1,319","568,530",431,"168,466",128,"736,996",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,Qwen2-0.5B-Instruct,14.71,100.00,8,,"1,319","568,116",431,"266,781",202,"834,897",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,gsm8k,2025/1/22,deepseek-r1:1.5b,64.14,99.62,8,,"1,319","561,935",426,"921,116",698,"1,483,051",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/7,gpt-3.5-turbo,74.91,99.39,8,max_steps=10,"1,319","6,506,164","4,933","140,122",106,"6,646,286",3.4633,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/7,Doubao-lite-32k,85.60,99.62,8,max_steps=10,"1,319","5,862,016","4,444","136,623",104,"5,998,639",0.2512,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,gpt-4o,63.31,99.55,8,max_steps=10,"1,319","14,411,173","10,926","304,714",231,"14,715,887",39.0751,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,Qwen2.5-72B-Instruct,87.26,100.00,8,max_steps=10,"1,319","18,160,983","13,769","549,454",417,"18,710,437",10.5479,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,Llama-3.3-70B-Instruct,87.64,99.92,8,max_steps=10,"1,319","17,038,928","12,918","898,936",682,"17,937,864",10.1124,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,Qwen2.5-7B-Instruct,82.87,100.00,8,max_steps=10,"1,319","14,355,752","10,884","495,162",375,"14,850,914",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,Llama-3.1-8B-Instruct,67.78,98.56,8,max_steps=10,"1,319","21,044,978","15,955","1,790,789","1,358","22,835,767",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,Internllm2_5-7B,33.51,97.95,8,max_steps=10,"1,319","30,120,070","22,836","5,549,919","4,208","35,669,989",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,Qwen2-1.5B-Instruct,24.87,80.21,8,max_steps=10,"1,319","9,133,603","6,925","694,398",526,"9,828,001",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/1/22,Qwen2-0.5B-Instruct,7.66,95.22,8,max_steps=10,"1,319","52,431,343","39,751","2,961,268","2,245","55,392,611",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,gsm8k,2025/2/10,deepseek-r1:1.5b,35.94,99.62,8,max_steps=10,"1,319","19,299,381","14,632","4,919,696","3,730","24,219,077",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/7,gpt-3.5-turbo,76.88,99.24,8,,"1,319","1,090,418",827,"96,662",73,"1,187,080",0.6902,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/7,Doubao-lite-32k,79.61,92.57,8,,"1,319","1,170,038",887,"118,017",89,"1,288,055",0.0576,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,gpt-4o,93.10,99.77,8,,"1,319","1,101,672",835,"146,240",111,"1,247,912",4.2166,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,Qwen2.5-72B-Instruct,92.34,99.39,8,,"1,319","1,106,682",839,"144,528",110,"1,251,210",0.7054,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,Llama-3.3-70B-Instruct,73.09,79.61,8,,"1,319","1,126,025",854,"601,019",456,"1,727,044",0.9736,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,Qwen2.5-7B-Instruct,58.83,70.51,8,,"1,319","1,145,390",868,"217,432",165,"1,362,822",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,Llama-3.1-8B-Instruct,38.67,55.42,8,,"1,319","1,147,538",870,"243,573",185,"1,391,111",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,Internllm2_5-7B,38.21,48.90,8,,"1,319","1,136,843",862,"188,106",143,"1,324,949",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,Qwen2-1.5B-Instruct,18.50,31.01,8,,"1,319","1,151,528",873,"175,994",133,"1,327,522",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/1/22,Qwen2-0.5B-Instruct,9.63,16.91,8,,"1,319","1,151,528",873,"237,607",180,"1,389,135",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,gsm8k,2025/2/10,deepseek-r1:1.5b,11.90,17.44,8,,"1,319","1,138,872",863,"815,637",618,"1,954,509",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/7,gpt-3.5-turbo,78.70,100.00,8,,"1,319","953,242",723,"134,799",102,"1,088,041",0.6788,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/7,Doubao-lite-32k,89.31,100.00,8,,"1,319","1,042,095",790,"159,725",121,"1,201,820",0.0558,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,gpt-4o,94.09,100.00,8,,"1,319","948,668",719,"216,498",164,"1,165,166",4.5367,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,Qwen2.5-72B-Instruct,92.87,100.00,8,,"1,319","1,005,119",762,"271,133",206,"1,276,252",0.7195,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,Llama-3.3-70B-Instruct,93.93,100.00,8,,"1,319","990,168",751,"228,497",173,"1,218,665",0.6870,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,Qwen2.5-7B-Instruct,85.67,100.00,8,,"1,319","1,046,008",793,"244,797",186,"1,290,805",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,Llama-3.1-8B-Instruct,75.44,99.92,8,,"1,319","990,168",751,"258,161",196,"1,248,329",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,Internllm2_5-7B,77.71,99.70,8,,"1,319","968,163",734,"234,000",177,"1,202,163",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,Qwen2-1.5B-Instruct,55.50,100.00,8,,"1,319","1,032,818",783,"185,707",141,"1,218,525",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/22,Qwen2-0.5B-Instruct,35.94,99.92,8,,"1,319","1,032,818",783,"190,641",145,"1,223,459",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,gsm8k,2025/1/23,deepseek-r1:1.5b,70.66,99.77,8,,"1,319","1,011,714",767,"1,078,911",818,"2,090,625",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/7,gpt-3.5-turbo,69.29,98.79,8,"temperature=1, path_num=5","1,319","895,571",679,"1,381,678","1,048","2,277,249",2.5203,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/7,Doubao-lite-32k,91.58,99.92,8,"temperature=1, path_num=5","1,319","942,182",714,"893,709",678,"1,835,891",0.1118,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,gpt-4o,94.77,100.00,8,"temperature=1, path_num=5","1,319","894,889",678,"1,596,716","1,211","2,491,605",18.2044,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,Qwen2.5-72B-Instruct,94.77,100.00,8,"temperature=1, path_num=5","1,319","5,370,360","4,072","1,804,898","1,368","7,175,258",4.0450,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,Llama-3.3-70B-Instruct,95.22,100.00,8,"temperature=1, path_num=5","1,319","5,295,585","4,015","1,426,429","1,081","6,722,014",3.7895,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,Qwen2.5-7B-Instruct,90.98,100.00,8,"temperature=1, path_num=5","1,319","5,580,524","4,231","1,679,419","1,273","7,259,943",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,Llama-3.1-8B-Instruct,54.36,99.85,8,"temperature=1, path_num=5","1,319","5,136,762","3,894","5,819,672","4,412","10,956,434",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,Internllm2_5-7B,44.66,91.81,8,"temperature=1, path_num=5","1,319","5,847,761","4,433","2,314,738","1,755","8,162,499",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,Qwen2-1.5B-Instruct,8.19,68.76,8,"temperature=1, path_num=5","1,319","5,439,568","4,124","1,946,885","1,476","7,386,453",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/1/22,Qwen2-0.5B-Instruct,4.17,94.47,8,"temperature=1, path_num=5","1,319","5,441,962","4,126","2,036,805","1,544","7,478,767",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,gsm8k,2025/2/10,deepseek-r1:1.5b,69.07,98.79,8,"temperature=1, path_num=5","1,319","5,407,357","4,100","4,622,327","3,504","10,029,684",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/7,gpt-3.5-turbo,67.93,99.70,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","15,920,037","12,070","807,138",612,"16,727,175",9.1707,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/7,Doubao-lite-32k,37.83,87.34,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","19,208,597","14,563","1,065,752",808,"20,274,349",0.8739,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,gpt-4o,91.13,100.00,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","29,445,237","22,324","1,324,498","1,004","30,769,735",86.8581,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,Qwen2.5-72B-Instruct,88.88,100.00,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","40,435,361","30,656","1,411,787","1,070","41,847,148",23.5911,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,Llama-3.3-70B-Instruct,91.89,100.00,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","35,096,810","26,609","1,932,877","1,465","37,029,687",20.8753,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,Qwen2.5-7B-Instruct,72.21,99.01,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","20,196,528","15,312","11,460,791","8,689","31,657,319",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,Llama-3.1-8B-Instruct,65.05,91.96,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","15,554,967","11,793","877,135",665,"16,432,102",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,Internllm2_5-7B,20.85,70.13,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","11,768,118","8,922","1,410,011","1,069","13,178,129",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,Qwen2-1.5B-Instruct,19.64,77.26,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","12,124,248","9,192","634,439",481,"12,758,687",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/1/22,Qwen2-0.5B-Instruct,-,-,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319",-,-,-,-,-,-,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,gsm8k,2025/2/10,deepseek-r1:1.5b,23.12,72.48,8,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true","1,319","2,738,244","2,076","683,242",518,"3,421,486",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/7,gpt-3.5-turbo,38.98,100.00,0,,254,"25,701",101,"16,770",66,"42,471",0.0380,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/7,Doubao-lite-32k,79.13,100.00,0,,254,"33,058",130,"54,684",215,"87,742",0.0058,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,gpt-4o,75.59,97.24,0,,254,"25,631",101,"108,121",426,"133,752",1.1453,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,Qwen2.5-72B-Instruct,84.25,99.61,0,,254,"25,397",100,"106,207",418,"131,604",0.0742,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,Llama-3.3-70B-Instruct,82.68,99.21,0,,254,"32,809",129,"108,758",428,"141,567",0.0798,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,Qwen2.5-7B-Instruct,78.74,98.43,0,,254,"33,271",131,"104,500",411,"137,771",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,Llama-3.1-8B-Instruct,51.18,98.82,0,,254,"26,459",104,"106,647",420,"133,106",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,Internllm2_5-7B,47.64,90.94,0,,254,"50,232",198,"134,809",531,"185,041",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,Qwen2-1.5B-Instruct,29.13,97.64,0,,254,"27,937",110,"43,110",170,"71,047",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,Qwen2-0.5B-Instruct,27.17,98.82,0,,254,"27,937",110,"82,478",325,"110,415",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,AQuA,2025/1/22,deepseek-r1:1.5b,68.90,94.88,0,,254,"26,667",105,"325,100","1,280","351,767",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/7,gpt-3.5-turbo,61.02,93.70,0,,254,"25,447",100,"55,346",218,"80,793",0.0957,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/7,Doubao-lite-32k,82.68,97.24,0,,254,"27,978",110,"66,599",262,"94,577",0.0066,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,gpt-4o,82.68,98.03,0,,254,"25,123",99,"97,894",385,"123,017",1.0417,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,Qwen2.5-72B-Instruct,86.22,99.21,0,,254,"25,143",99,"118,146",465,"143,289",0.0808,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,Llama-3.3-70B-Instruct,83.46,98.43,0,,254,"32,555",128,"131,834",519,"164,389",0.0927,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,Qwen2.5-7B-Instruct,80.71,99.61,0,,254,"33,017",130,"116,719",460,"149,736",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,Llama-3.1-8B-Instruct,60.63,100.00,0,,254,"32,555",128,"111,880",440,"144,435",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,Internllm2_5-7B,52.76,89.37,0,,254,"26,610",105,"100,910",397,"127,520",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,Qwen2-1.5B-Instruct,40.55,98.82,0,,254,"30,477",120,"79,563",313,"110,040",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/22,Qwen2-0.5B-Instruct,33.07,98.82,0,,254,"30,477",120,"86,862",342,"117,339",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,AQuA,2025/1/23,deepseek-r1:1.5b,71.65,96.85,0,,254,"26,413",104,"306,659","1,207","333,072",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/7,gpt-3.5-turbo,59.45,100.00,0,,254,"225,162",886,"41,492",163,"266,654",0.1748,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/7,Doubao-lite-32k,71.65,96.85,0,,254,"259,863","1,023","49,573",195,"309,436",0.0147,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,gpt-4o,75.20,100.00,0,,254,"222,717",877,"105,191",414,"327,908",1.6087,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,Qwen2.5-72B-Instruct,75.20,100.00,0,,254,"249,215",981,"42,549",168,"291,764",0.1645,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,Llama-3.3-70B-Instruct,79.53,99.21,0,,254,"240,735",948,"69,064",272,"309,799",0.1746,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,Qwen2.5-7B-Instruct,68.11,100.00,0,,254,"264,517","1,041","49,211",194,"313,728",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,Llama-3.1-8B-Instruct,36.61,96.85,0,,254,"240,613",947,"50,301",198,"290,914",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,Internllm2_5-7B,36.61,98.82,0,,254,"233,505",919,"68,457",270,"301,962",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,Qwen2-1.5B-Instruct,30.71,96.46,0,,254,"246,560",971,"51,915",204,"298,475",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/1/22,Qwen2-0.5B-Instruct,17.32,92.13,0,,254,"258,867","1,019","63,414",250,"322,281",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,AQuA,2025/2/10,deepseek-r1:1.5b,54.72,97.24,0,,254,"250,690",987,"765,957","3,016","1,016,647",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,gpt-3.5-turbo,58.66,92.52,0,"temperature=1, path_num=5",254,"27,906",110,"209,160",823,"237,066",0.3277,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Doubao-lite-32k,76.37,91.73,0,"temperature=1, path_num=5",254,"31,703",125,"325,136","1,280","356,839",0.0279,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,gpt-4o,85.83,99.21,0,"temperature=1, path_num=5",254,"27,829",110,"517,602","2,038","545,431",5.2456,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Qwen2.5-72B-Instruct,85.43,96.85,0,"temperature=1, path_num=5",254,"137,990",543,"604,562","2,380","742,552",0.4186,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Llama-3.3-70B-Instruct,84.65,99.61,0,"temperature=1, path_num=5",254,"175,050",689,"612,262","2,410","787,312",0.4438,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Qwen2.5-7B-Instruct,79.53,100.00,0,"temperature=1, path_num=5",254,"177,972",701,"567,438","2,234","745,410",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Llama-3.1-8B-Instruct,59.45,95.67,0,"temperature=1, path_num=5",254,"145,108",571,"544,969","2,146","690,077",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Internllm2_5-7B,38.58,97.24,0,"temperature=1, path_num=5",254,"264,557","1,042","615,114","2,422","879,671",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Qwen2-1.5B-Instruct,10.63,51.57,0,"temperature=1, path_num=5",254,"151,410",596,"550,570","2,168","701,980",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/1/22,Qwen2-0.5B-Instruct,17.32,82.28,0,"temperature=1, path_num=5",254,"150,787",594,"603,126","2,375","753,913",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,AQuA,2025/2/10,deepseek-r1:1.5b,57.87,74.02,0,"temperature=1, path_num=5",254,"144,710",570,"1,987,401","7,824","2,132,111",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/7,gpt-3.5-turbo,64.57,98.03,0,max_steps=10,254,"862,614","3,396","40,973",161,"903,587",0.4928,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/7,Doubao-lite-32k,77.56,96.06,0,max_steps=10,254,"977,890","3,850","54,951",216,"1,032,841",0.0445,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,gpt-4o,57.48,97.24,0,max_steps=10,254,"615,589","2,424","76,507",301,"692,096",2.3040,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,Qwen2.5-72B-Instruct,73.23,100.00,0,max_steps=10,254,"441,765","1,739","121,838",480,"563,603",0.3177,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,Llama-3.3-70B-Instruct,79.13,99.61,0,max_steps=10,254,"1,119,143","4,406","243,236",958,"1,362,379",0.7680,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,Qwen2.5-7B-Instruct,74.41,99.21,0,max_steps=10,254,"564,165","2,221","131,679",518,"695,844",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,Llama-3.1-8B-Instruct,55.51,96.85,0,max_steps=10,254,"3,764,723","14,822","576,098","2,268","4,340,821",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,Internllm2_5-7B,40.94,96.85,0,max_steps=10,254,"3,592,039","14,142","836,762","3,294","4,428,801",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,Qwen2-1.5B-Instruct,25.59,96.06,0,max_steps=10,254,"4,555,858","17,936","516,146","2,032","5,072,004",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/1/22,Qwen2-0.5B-Instruct,24.02,96.85,0,max_steps=10,254,"6,344,167","24,977","825,920","3,252","7,170,087",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,AQuA,2025/2/10,deepseek-r1:1.5b,54.33,96.46,0,max_steps=10,254,"10,578,715","41,648","3,866,326","15,222","14,445,041",0.0000,"think-action 单独返回,prompt v1",,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/7,gpt-3.5-turbo,57.09,99.61,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"1,850,767","7,286","150,629",593,"2,001,396",1.1513,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/7,Doubao-lite-32k,45.28,74.02,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"1,850,249","7,284","150,301",592,"2,000,550",0.0881,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,gpt-4o,81.50,99.21,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"2,347,538","9,242","266,069","1,048","2,613,607",8.5295,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,Qwen2.5-72B-Instruct,81.10,99.21,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"6,371,642","25,085","260,613","1,026","6,632,255",3.7389,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,Llama-3.3-70B-Instruct,83.07,100.00,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"4,735,188","18,642","480,660","1,892","5,215,848",2.9404,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,Qwen2.5-7B-Instruct,53.94,100.00,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"8,224,468","32,380","378,214","1,489","8,602,682",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,Llama-3.1-8B-Instruct,59.06,100.00,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"4,896,222","19,276","843,462","3,321","5,739,684",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,Internllm2_5-7B,35.83,99.61,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"4,263,136","16,784","471,424","1,856","4,734,560",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,Qwen2-1.5B-Instruct,31.50,98.82,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"6,058,022","23,850","192,680",759,"6,250,702",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/1/22,Qwen2-0.5B-Instruct,29.92,100.00,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"8,100,085","31,890","600,196","2,363","8,700,281",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,AQuA,2025/2/10,deepseek-r1:1.5b,24.80,55.51,0,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",254,"605,028","2,382","189,484",746,"794,512",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,gpt-3.5-turbo,17.20,100.00,4,,500,"154,881",310,"110,744",221,"265,625",0.2436,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Doubao-lite-32k,37.40,100.00,4,,500,"166,870",334,"144,860",290,"311,730",0.0187,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/22,gpt-4o,41.80,100.00,4,,500,"153,832",308,"240,615",481,"394,447",2.7907,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Qwen2.5-72B-Instruct,70.20,100.00,4,,500,"169,549",339,"275,042",550,"444,591",0.2506,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Llama-3.3-70B-Instruct,69.40,100.00,4,,500,"155,879",312,"267,337",535,"423,216",0.2386,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Qwen2.5-7B-Instruct,59.40,100.00,4,,500,"169,549",339,"241,813",484,"411,362",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Llama-3.1-8B-Instruct,38.60,100.00,4,,500,"155,563",311,"348,371",697,"503,934",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Internllm2_5-7B,22.80,100.00,4,,500,"201,883",404,"266,005",532,"467,888",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Qwen2-1.5B-Instruct,7.00,100.00,4,,500,"158,777",318,"255,101",510,"413,878",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,Qwen2-0.5B-Instruct,2.60,100.00,4,,500,"159,049",318,"270,281",541,"429,330",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, IO,MATH-500,2025/1/24,deepseek-r1:1.5b,43.80,100.00,4,,500,"157,049",314,"865,499","1,731","1,022,548",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,gpt-3.5-turbo,39.80,100.00,4,,500,"329,381",659,"102,815",206,"432,196",0.3189,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/22,Doubao-lite-32k,59.00,100.00,4,,500,"336,370",673,"143,571",287,"479,941",0.0255,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,gpt-4o,68.00,100.00,4,,500,"329,332",659,"223,356",447,"552,688",3.0569,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/22,Qwen2.5-72B-Instruct,80.20,100.00,4,,500,"338,549",677,"280,466",561,"619,015",0.3490,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,Llama-3.3-70B-Instruct,71.20,100.00,4,,500,"342,879",686,"271,342",543,"614,221",0.3463,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,Qwen2.5-7B-Instruct,69.80,100.00,4,,500,"354,049",708,"263,155",526,"617,204",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,Llama-3.1-8B-Instruct,25.80,100.00,4,,500,"342,879",686,"282,689",565,"625,568",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,Internllm2_5-7B,46.60,100.00,4,,500,"332,883",666,"213,891",428,"546,774",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,Qwen2-1.5B-Instruct,15.20,100.00,4,,500,"349,049",698,"187,328",375,"536,377",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,Qwen2-0.5B-Instruct,6.20,100.00,4,,500,"349,049",698,"200,139",400,"549,188",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, CoT,MATH-500,2025/1/24,deepseek-r1:1.5b,49.40,100.00,4,,500,"341,549",683,"857,580","1,715","1,199,129",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,gpt-3.5-turbo,28.80,83.80,4,,500,"239,902",480,"32,014",64,"271,916",0.1680,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Doubao-lite-32k,32.60,68.00,4,,500,"254,377",509,"48,771",98,"303,148",0.0144,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,gpt-4o,46.20,86.40,4,,500,"241,357",483,"99,603",199,"340,960",1.5994,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Qwen2.5-72B-Instruct,47.20,82.20,4,,500,"242,549",485,"170,823",342,"413,372",0.2330,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Llama-3.3-70B-Instruct,42.60,80.20,4,,500,"253,879",508,"249,717",499,"503,596",0.2839,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Qwen2.5-7B-Instruct,39.60,74.40,4,,500,"258,549",517,"150,263",301,"408,812",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Llama-3.1-8B-Instruct,25.40,68.40,4,,500,"253,879",508,"208,392",417,"462,271",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Internllm2_5-7B,15.00,32.40,4,,500,"247,883",496,"120,826",242,"368,709",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Qwen2-1.5B-Instruct,0.80,2.20,4,,500,"248,509",497,"538,361","1,077","786,870",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,Qwen2-0.5B-Instruct,0.00,0.00,4,,500,"253,549",507,"183,653",367,"437,202",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, PoT,MATH-500,2025/2/10,deepseek-r1:1.5b,1.00,1.60,4,,500,"245,549",491,"785,518","1,571","1,031,067",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,gpt-3.5-turbo,40.80,100.00,4,"temperature=1, path_num=5",500,"345,411",691,"705,408","1,411","1,050,819",1.2308,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Doubao-lite-32k,65.80,99.80,4,"temperature=1, path_num=5",500,"362,390",725,"715,613","1,431","1,078,003",0.0734,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,gpt-4o,74.60,100.00,4,"temperature=1, path_num=5",500,"345,347",691,"1,149,778","2,300","1,495,125",12.3611,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Qwen2.5-72B-Instruct,79.80,100.00,4,"temperature=1, path_num=5",500,"1,775,395","3,551","1,506,954","3,014","3,282,349",1.8504,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Llama-3.3-70B-Instruct,72.40,100.00,4,"temperature=1, path_num=5",500,"1,797,045","3,594","1,368,466","2,737","3,165,511",1.7845,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Qwen2.5-7B-Instruct,71.20,100.00,4,"temperature=1, path_num=5",500,"1,855,922","3,712","1,299,553","2,599","3,155,475",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Llama-3.1-8B-Instruct,19.80,99.80,4,"temperature=1, path_num=5",500,"1,734,545","3,469","1,756,289","3,513","3,490,834",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Internllm2_5-7B,9.20,97.40,4,"temperature=1, path_num=5",500,"1,994,983","3,990","1,254,893","2,510","3,249,876",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Qwen2-1.5B-Instruct,2.00,89.40,4,"temperature=1, path_num=5",500,"1,805,170","3,610","1,333,854","2,668","3,139,024",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,Qwen2-0.5B-Instruct,2.20,98.80,4,"temperature=1, path_num=5",500,"1,808,691","3,617","988,991","1,978","2,797,682",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, SC-CoT,MATH-500,2025/2/10,deepseek-r1:1.5b,46.80,99.20,4,"temperature=1, path_num=5",500,"1,858,874","3,718","12,109,294","24,219","13,968,168",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,gpt-3.5-turbo,23.80,100.00,4,max_steps=10,500,"3,708,461","7,417","124,253",249,"3,832,714",2.0406,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Doubao-lite-32k,47.20,100.00,4,max_steps=10,500,"4,234,620","8,469","154,046",308,"4,388,666",0.1860,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,gpt-4o,54.00,100.00,4,max_steps=10,500,"5,834,537","11,669","318,718",637,"6,153,255",17.7735,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Qwen2.5-72B-Instruct,62.80,100.00,4,max_steps=10,500,"5,747,268","11,495","379,849",760,"6,127,117",3.4541,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Llama-3.3-70B-Instruct,64.60,100.00,4,max_steps=10,500,"5,223,611","10,447","418,268",837,"5,641,879",3.1806,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Qwen2.5-7B-Instruct,48.80,100.00,4,max_steps=10,500,"4,646,708","9,293","343,532",687,"4,990,240",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Llama-3.1-8B-Instruct,28.80,100.00,4,max_steps=10,500,"7,486,706","14,973","1,276,923","2,554","8,763,629",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Internllm2_5-7B,14.80,100.00,4,max_steps=10,500,"11,831,496","23,663","2,354,609","4,709","14,186,105",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Qwen2-1.5B-Instruct,8.20,100.00,4,max_steps=10,500,"8,430,774","16,862","556,287","1,113","8,987,061",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,Qwen2-0.5B-Instruct,0.60,100.00,4,max_steps=10,500,"18,137,392","36,275","1,305,048","2,610","19,442,440",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ReAct-Pro*,MATH-500,2025/2/10,deepseek-r1:1.5b,24.40,100.00,4,max_steps=10,500,"20,729,970","41,460","9,447,378","18,895","30,177,348",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,gpt-3.5-turbo,9.80,100.00,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"9,711,244","19,422","290,523",581,"10,001,767",5.2914,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Doubao-lite-32k,1.20,94.20,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"5,338,500","10,677","226,000",452,"5,564,500",0.2371,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,gpt-4o,3.20,100.00,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"14,881,985","29,764","360,447",721,"15,242,432",40.8094,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Qwen2.5-72B-Instruct,10.80,100.00,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"15,657,730","31,315","381,631",763,"16,039,361",9.0421,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Llama-3.3-70B-Instruct,1.40,69.80,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"14,099,500","28,199","570,000","1,140","14,669,500",8.2699,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Qwen2.5-7B-Instruct,1.40,91.60,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"9,749,000","19,498","418,500",837,"10,167,500",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Llama-3.1-8B-Instruct,1.80,90.80,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"7,729,000","15,458","1,306,000","2,612","9,035,000",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Internllm2_5-7B,0.20,99.00,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"7,515,000","15,030","835,500","1,671","8,350,500",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Qwen2-1.5B-Instruct,0.80,97.20,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"4,408,000","8,816","127,000",254,"4,535,000",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,Qwen2-0.5B-Instruct,0.00,96.20,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"5,590,500","11,181","406,000",812,"5,996,500",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,, ToT,MATH-500,2025/2/10,deepseek-r1:1.5b,0.40,71.60,4,"search_type=bfs, b=1, max_depth=6, max_steps=6, generation_n=1, evaluation_n=3, evaluation_type=vote, use_llm_completion=true",500,"1,831,000","3,662","110,500",221,"1,941,500",0.0000,,,,,,,,,,,,,,,,,,,,,,,,,,,,