Spaces:

aikobay
/

traning

Sleeping

App Files Files Community

traning / searchWorkerBackup.py

aikobay

Create searchWorkerBackup.py

9b959aa verified about 1 month ago

raw

history blame contribute delete

42.5 kB

	import os
	import torch
	import pandas as pd
	import logging
	import faiss
	import numpy as np
	import time
	import gensim
	import random
	import multiprocessing
	from fastapi import FastAPI, HTTPException, BackgroundTasks
	from pydantic import BaseModel
	from datasets import load_dataset
	from huggingface_hub import login, hf_hub_download, HfApi, create_repo
	from keybert import KeyBERT
	from sentence_transformers import SentenceTransformer
	from joblib import Parallel, delayed
	from tqdm import tqdm
	import tempfile
	import re
	import sys
	import asyncio
	import gc
	from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

	# ✅ 로그 설정
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# ✅ 스레드 풀 최적화 (작업자 수 감소로 오버헤드 감소)
	thread_pool = ThreadPoolExecutor(max_workers=min(32, os.cpu_count() * 2))

	# ✅ 메모리 관리 전역 변수
	last_gc_time = time.time()
	request_count = 0
	CLEANUP_INTERVAL = 100 # 100 요청마다 메모리 정리

	# ✅ FastAPI 인스턴스 생성
	app = FastAPI(title="🚀 KeyBERT + Word2Vec 기반 FAISS 검색 API", version="1.2")

	# ✅ GPU 사용 여부 확인
	device = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"🚀 실행 디바이스: {device.upper()}")

	# ✅ Hugging Face 로그인
	HF_API_TOKEN = os.getenv("HF_API_TOKEN")

	if multiprocessing.current_process().name == "MainProcess":
	if HF_API_TOKEN and HF_API_TOKEN.startswith("hf_"):
	logger.info("🔑 Hugging Face API 로그인 중 (MainProcess)...")
	login(token=HF_API_TOKEN)
	else:
	logger.warning("⚠️ HF_API_TOKEN이 없거나 잘못된 형식입니다.")


	# ✅ 모델 변수 초기화만 (실제 로드는 나중에)
	word2vec_model = None
	kw_model = None
	embedding_model = None
	original_embedding_model = None

	# ✅ 지연 로딩 구현 - 모델 로드 함수
	def load_models():
	"""모든 필요한 모델을 로드하는 함수 (지연 로딩)"""
	global word2vec_model, kw_model, embedding_model, original_embedding_model

	# 이미 로드되었는지 확인 (중복 로드 방지)
	if word2vec_model is not None and embedding_model is not None:
	return True

	worker_id = os.getenv("WORKER_ID", multiprocessing.current_process().name)
	logger.info(f"🔄 워커 {worker_id}: 모델 로드 시작...")

	try:
	# 1. Word2Vec 모델 로드
	if word2vec_model is None:
	MODEL_REPO = "aikobay/item-model"
	model_path = hf_hub_download(repo_id=MODEL_REPO, filename="item_vectors.bin", repo_type="dataset", token=HF_API_TOKEN)
	word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True)
	logger.info(f"✅ 워커 {worker_id}: Word2Vec 모델 로드 완료! 단어 수: {len(word2vec_model.key_to_index)}")

	# 2. KeyBERT 모델 로드
	if kw_model is None:
	original_embedding_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
	kw_model = KeyBERT("paraphrase-multilingual-MiniLM-L12-v2")
	logger.info(f"✅ 워커 {worker_id}: KeyBERT 모델 로드 완료!")

	# 3. 한국어 특화 임베딩 모델 로드
	if embedding_model is None:
	try:
	embedding_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
	logger.info(f"✅ 워커 {worker_id}: 한국어 특화 임베딩 모델 로드 완료!")
	except Exception as e:
	logger.warning(f"⚠️ 워커 {worker_id}: 한국어 특화 모델 로드 실패, 기존 모델 유지: {e}")
	embedding_model = original_embedding_model

	# GPU 최적화 - 워커 ID가 짝수인 경우만 GPU 사용 (메모리 부하 분산)
	if device == "cuda":
	try:
	# 첫 번째 워커나 짝수 번호의 워커만 GPU 사용
	if worker_id == "MainProcess" or worker_id.endswith("0") or worker_id.endswith("1") or worker_id.endswith("2"):
	embedding_model.to(device)
	embedding_model.eval() # 평가 모드로 설정
	logger.info(f"✅ 워커 {worker_id}: GPU에 임베딩 모델 로드 완료!")
	else:
	logger.info(f"⚠️ 워커 {worker_id}: 메모리 효율화를 위해 CPU 모드 사용")
	except Exception as e:
	logger.error(f"❌ 워커 {worker_id}: GPU 모델 초기화 오류: {e}")

	# 가비지 컬렉션 수행
	gc.collect()
	if device == "cuda":
	torch.cuda.empty_cache()

	return True

	except Exception as e:
	logger.error(f"❌ 워커 {worker_id}: 모델 로드 중 오류 발생: {e}")
	return False

	# ✅ 진행 중인 경매 상품 데이터 로드
	async def load_huggingface_jsonl(dataset_name, split="train"):
	"""Hugging Face Hub에서 데이터셋 비동기 로드"""
	try:
	# 스레드 풀에서 실행하여 비동기 처리
	loop = asyncio.get_event_loop()

	def _load_dataset():
	repo_id = f"aikobay/{dataset_name}"
	dataset = load_dataset(repo_id, split=split)
	return dataset.to_pandas().dropna()

	# 스레드 풀에서 비동기로 실행
	df = await loop.run_in_executor(thread_pool, _load_dataset)
	return df
	except Exception as e:
	logger.error(f"❌ 데이터 로드 중 오류 발생: {e}")
	return pd.DataFrame()

	# 초기화만 수행, 실제 로드는 startup에서
	active_sale_items = None

	# ✅ FAISS 인덱스 초기화
	faiss_index = None
	indexed_items = []

	# ✅ 주기적 메모리 정리 함수
	async def cleanup_memory():
	"""주기적인 메모리 정리 수행"""
	global last_gc_time

	# 현재 시간 확인
	current_time = time.time()

	# 15초마다 메모리 정리 (너무 자주하면 성능 저하)
	if current_time - last_gc_time > 15:
	# 가비지 컬렉션 실행
	gc.collect()

	# GPU 메모리 정리
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	# 시간 업데이트
	last_gc_time = current_time
	logger.debug("🧹 메모리 정리 완료")

	return True

	return False

	# ✅ 멀티코어 벡터화 함수 - 메모리 누수 해결
	async def encode_texts_parallel(texts, batch_size=1024):
	"""GPU 활용 + 메모리 누수 방지 최적화 벡터화"""
	# 모델이 로드되었는지 확인
	if embedding_model is None:
	# 비동기 컨텍스트에서 동기 함수 호출 방지
	worker_id = multiprocessing.current_process().name
	logger.warning(f"⚠️ 워커 {worker_id}: 벡터화 전 모델 로드 필요")
	if not load_models():
	logger.error(f"❌ 워커 {worker_id}: 모델 로드 실패, 벡터화 불가")
	return np.array([]).astype("float32")

	if not texts:
	return np.array([]).astype("float32")

	try:
	# 배치 크기 조정 - 짧은 텍스트는 큰 배치, 길면 작게
	if len(texts) > 10:
	batch_size = min(1024, batch_size) # 많은 텍스트는 배치 크기 제한
	else:
	batch_size = min(2048, batch_size) # 적은 텍스트는 더 큰 배치 가능

	loop = asyncio.get_event_loop()

	def _encode_efficiently():
	# 작은 배치로 나누어 인코딩 (메모리 최적화)
	with torch.no_grad():
	return embedding_model.encode(
	texts,
	batch_size=batch_size,
	convert_to_numpy=True,
	show_progress_bar=False,
	device=device,
	normalize_embeddings=True
	)

	# 스레드 풀에서 실행
	embeddings = await loop.run_in_executor(thread_pool, _encode_efficiently)
	return embeddings.astype("float32")

	except Exception as e:
	logger.error(f"❌ 벡터화 오류: {str(e)}")
	return np.array([]).astype("float32")

	finally:
	# 수행 후 메모리 정리 (요청이 많을 경우는 가끔씩만)
	global request_count
	if request_count % 25 == 0 and device == "cuda": # 25 요청마다 정리
	torch.cuda.empty_cache()


	# ✅ FAISS 인덱스 저장 함수 (Hugging Face Hub)
	async def save_faiss_index():
	"""FAISS 인덱스를 Hugging Face Hub에 저장 (비동기 지원)"""
	global faiss_index, indexed_items

	if faiss_index is None or not indexed_items:
	logger.error("❌ 저장할 FAISS 인덱스가 없습니다.")
	return False

	try:
	# 레포지토리 ID
	repo_id = os.getenv("HF_INDEX_REPO", "aikobay/saleitem_faiss_index")

	# 비동기 작업을 위한 루프
	loop = asyncio.get_event_loop()

	# 비동기 작업으로 실행
	def _save_index():
	# HfApi 객체 생성
	api = HfApi()

	# 레포지토리 존재 여부 확인 및 생성
	try:
	api.repo_info(repo_id=repo_id, repo_type="dataset")
	logger.info(f"✅ 기존 레포지토리 사용: {repo_id}")
	except Exception:
	logger.info(f"🔄 레포지토리가 존재하지 않아 새로 생성합니다: {repo_id}")
	create_repo(
	repo_id=repo_id,
	repo_type="dataset",
	private=True,
	exist_ok=True
	)
	logger.info(f"✅ 레포지토리 생성 완료: {repo_id}")

	# 임시 파일로 먼저 로컬에 저장
	with tempfile.TemporaryDirectory() as temp_dir:
	index_path = os.path.join(temp_dir, "faiss_index.bin")
	items_path = os.path.join(temp_dir, "indexed_items.txt")

	# FAISS 인덱스 저장
	faiss.write_index(faiss_index, index_path)

	# 아이템 목록 저장
	with open(items_path, "w", encoding="utf-8") as f:
	f.write("\n".join(indexed_items))

	# README 파일 생성
	readme_path = os.path.join(temp_dir, "README.md")
	with open(readme_path, "w", encoding="utf-8") as f:
	f.write(f"""# FAISS 인덱스 저장소
	이 저장소는 상품 검색을 위한 FAISS 인덱스와 관련 데이터를 포함하고 있습니다.
	- 최종 업데이트: {pd.Timestamp.now()}
	- 인덱스 항목 수: {len(indexed_items)}
	- 모델: KeyBERT + Word2Vec
	이 저장소는 'aikobay/initial_saleitem_dataset'의 상품 데이터를 기반으로 생성된 벡터 인덱스를 저장하기 위해 자동 생성되었습니다.
	""")

	# 파일 업로드
	for file_path, file_name in [
	(index_path, "faiss_index.bin"),
	(items_path, "indexed_items.txt"),
	(readme_path, "README.md")
	]:
	api.upload_file(
	path_or_fileobj=file_path,
	path_in_repo=file_name,
	repo_id=repo_id,
	repo_type="dataset"
	)

	logger.info(f"✅ FAISS 인덱스가 Hugging Face Hub에 저장되었습니다. 레포: {repo_id}")
	return True

	# 스레드 풀에서 비동기적으로 실행
	result = await loop.run_in_executor(thread_pool, _save_index)
	return result

	except Exception as e:
	logger.error(f"❌ FAISS 인덱스 Hub 저장 중 오류 발생: {e}")

	# 로컬에 백업 저장 시도
	try:
	loop = asyncio.get_event_loop()

	def _local_backup():
	local_path = os.path.join(os.getcwd(), "faiss_index.bin")
	faiss.write_index(faiss_index, local_path)
	with open("indexed_items.txt", "w", encoding="utf-8") as f:
	f.write("\n".join(indexed_items))
	logger.info(f"✅ FAISS 인덱스가 로컬에 백업 저장되었습니다: {local_path}")
	return True

	result = await loop.run_in_executor(thread_pool, _local_backup)
	return result
	except Exception as local_err:
	logger.error(f"❌ 로컬 백업 저장도 실패: {local_err}")
	return False

	# ✅ FAISS 인덱스 로드 함수 (Hugging Face Hub)
	async def load_faiss_index_safe():
	"""안전하게 FAISS 인덱스를 읽기 전용으로 로드"""
	global faiss_index, indexed_items

	# 최대 재시도 횟수
	max_retries = 5
	retry_delay = 1 # 초기 지연 (초)
	worker_id = os.getenv("WORKER_ID", multiprocessing.current_process().name)

	for attempt in range(max_retries):
	try:
	# 레포지토리 ID
	repo_id = os.getenv("HF_INDEX_REPO", "aikobay/saleitem_faiss_index")

	# Hub에서 파일 다운로드 (읽기 전용)
	index_path = hf_hub_download(
	repo_id=repo_id,
	filename="faiss_index.bin",
	repo_type="dataset"
	)

	items_path = hf_hub_download(
	repo_id=repo_id,
	filename="indexed_items.txt",
	repo_type="dataset"
	)

	# 직접 파일 경로를 사용하여 인덱스 로드 - 이 방법이 가장 안정적
	loaded_index = faiss.read_index(index_path)

	# 항목 목록 읽기
	with open(items_path, "r", encoding="utf-8") as f:
	loaded_items = f.read().splitlines()

	# 전역 변수에 할당
	faiss_index = loaded_index
	indexed_items = loaded_items

	logger.info(f"✅ 워커 {worker_id}: FAISS 인덱스 로드 완료. 총 {len(indexed_items)}개 항목")
	return True

	except Exception as e:
	logger.warning(f"⚠️ 워커 {worker_id}: 인덱스 로드 실패 (시도 {attempt+1}/{max_retries}): {e}")

	# 지연 후 재시도
	await asyncio.sleep(retry_delay * (2 ** attempt)) # 지수 백오프

	logger.error(f"❌ 워커 {worker_id}: 인덱스 로드 최대 재시도 횟수 초과")
	return False

	# ✅ FAISS 양자화 인덱스 구축 함수 (IVF 기반으로 변경)
	async def rebuild_faiss_index():
	"""FAISS 인덱스를 IVF 기반으로 새롭게 구축 (속도 최적화)"""
	global faiss_index, indexed_items, active_sale_items

	logger.info("🔄 FAISS 인덱스를 고속 IVF 기반으로 재구축 중...")

	# 최신 상품 데이터 로드
	active_sale_items = await load_huggingface_jsonl("initial_saleitem_dataset")
	if active_sale_items.empty:
	logger.error("❌ 상품 데이터를 로드할 수 없습니다.")
	raise RuntimeError("상품 데이터 로드 실패")

	# 상품명 목록 추출
	item_names = active_sale_items["ITEMNAME"].tolist()
	indexed_items = item_names

	# 간소화된 로깅
	total_items = len(item_names)
	logger.info(f"🔹 총 {total_items}개 상품 고속 벡터화 시작...")

	# 벡터화 최적화 - 배치 사이즈 증가
	item_vectors = await encode_texts_parallel(item_names, batch_size=2048)

	# GPU 메모리 정리
	if device == "cuda":
	torch.cuda.empty_cache()

	# IVF 기반 인덱스 구축 (속도 대폭 개선)
	loop = asyncio.get_event_loop()

	def _build_ivf_index():
	dimension = item_vectors.shape[1]
	# IVF 클러스터 수 - 데이터 크기에 따라 조정 (√n 규칙 사용)
	nlist = int(np.sqrt(total_items) * 4) # 클러스터 수 증가
	nlist = max(32, min(nlist, 1024)) # 최소 32, 최대 1024개 제한

	# 양자화 파라미터 - 차원 수에 맞게 조정
	M = min(64, dimension // 2) # 서브벡터 수
	nbits = 8 # 비트 수

	# 고속 IVF 인덱스 생성
	if total_items > 10000: # 벡터가 많으면 압축 기법 사용
	# IVF + PQ (Product Quantization) 조합 - 메모리 효율적
	quantizer = faiss.IndexFlatIP(dimension)
	index = faiss.IndexIVFPQ(quantizer, dimension, nlist, M, nbits)
	else:
	# 일반 IVF - 속도 향상
	quantizer = faiss.IndexFlatIP(dimension)
	index = faiss.IndexIVFFlat(quantizer, dimension, nlist)

	# 학습 및 추가
	index.train(item_vectors)
	index.add(item_vectors)

	# 검색 품질 향상을 위한 설정
	# nprobe = 몇 개의 클러스터를 검색할지 (높을수록 정확도 ↑, 속도 ↓)
	index.nprobe = min(32, nlist // 4) # 클러스터의 25% 검색

	logger.info(f"✅ IVF 인덱스 구축 완료: clusters={nlist}, nprobe={index.nprobe}")
	return index

	# 인덱스 구축 실행
	faiss_index = await loop.run_in_executor(thread_pool, _build_ivf_index)

	logger.info(f"✅ 고속 FAISS 인덱스 구축 완료! 총 {len(indexed_items)}개 항목")

	# 메모리 정리
	if device == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	# 구축 후 Hub에 저장
	await save_faiss_index()
	return True


	# ✅ FAISS 인덱스 상태 확인 및 필요시에만 구축
	async def check_faiss_index():
	"""FAISS 인덱스가 존재하는지 확인하고 없으면 구축 (비동기 지원)"""
	global faiss_index

	if faiss_index is None:
	# Hub에서 로드 시도
	if not await load_faiss_index_safe(): # 여기를 수정
	# 로드 실패 시 새로 구축
	logger.warning("⚠️ 저장된 인덱스가 없어 새로 구축합니다.")
	await rebuild_faiss_index()

	# 모든 과정 후에도 인덱스가 None이면 오류
	if faiss_index is None:
	raise RuntimeError("FAISS 인덱스 초기화에 실패했습니다.")

	# ✅ 최적화된 키워드 추출 함수
	async def extract_keywords(query: str, top_n: int = 2):
	"""KeyBERT 최적화 키워드 추출 (성능 중심)"""
	# 매우 짧은 쿼리는 그대로 반환 (처리 비용 절감)
	if len(query) <= 3:
	return [query]

	loop = asyncio.get_event_loop()

	def _optimized_extract():
	# 성능 중심 설정
	return kw_model.extract_keywords(
	query,
	keyphrase_ngram_range=(1, 1), # 단일 단어만 추출
	stop_words=["이", "그", "저", "을", "를", "에", "에서", "은", "는"], # 한국어 불용어
	use_mmr=True,
	diversity=0.5,
	top_n=top_n
	)

	try:
	keywords = await loop.run_in_executor(thread_pool, _optimized_extract)
	# 가중치가 너무 낮은 키워드 제외
	filtered = [(k, s) for k, s in keywords if s > 0.2]
	return [k[0] for k in filtered]
	except Exception as e:
	logger.error(f"❌ 키워드 추출 오류: {str(e)}")
	# 단어 분리로 폴백
	return query.split()[:2]


	# ✅ 최적화된 키워드 확장 함수 (단순화 및 효율 향상)
	async def expand_keywords_with_word2vec(keywords: list, max_new=2):
	"""Word2Vec 키워드 확장 최적화"""
	global word2vec_model

	if word2vec_model is None or not keywords:
	return keywords

	# 결과 저장을 위한 집합
	expanded = set(keywords)

	# 처리 조기 종료 조건 (최적화)
	if len(keywords) >= 4: # 이미 충분한 키워드가 있으면 확장 불필요
	return keywords

	loop = asyncio.get_event_loop()

	def _expand_keywords():
	for keyword in keywords:
	# 불필요한 확장 방지
	if len(expanded) >= 5: # 최대 5개 키워드로 제한
	break

	# 단일 단어인 경우만 처리 (최적화)
	if keyword in word2vec_model:
	# 유사도가 높은 단어만 선택 (임계값 적용)
	similar_words = word2vec_model.most_similar(keyword, topn=max_new)
	for word, score in similar_words:
	if score > 0.7: # 높은 유사도 임계값 적용
	expanded.add(word)

	# 결과 변환
	result = list(expanded)
	# 키워드가 너무 많으면 제한
	if len(result) > 5:
	return keywords + result[len(keywords):5]
	return result

	try:
	# 확장 실행
	expanded_keywords = await loop.run_in_executor(thread_pool, _expand_keywords)
	return expanded_keywords
	except Exception as e:
	logger.error(f"❌ Word2Vec 확장 오류: {str(e)}")
	return keywords # 오류 시 원본 키워드 반환


	# ✅ 배치 검색 통합 함수 - 한번에 검색으로 효율 향상
	async def unified_search(vectors, top_k=5):
	"""모든 벡터를 한 번에 검색하여 효율성 향상"""
	if vectors.size == 0:
	return []

	# nprobe 동적 조정 (서버 부하에 따라)
	global request_count
	if request_count % 100 == 0: # 100개 요청마다 조정
	if faiss_index.nprobe > 8: # 현재 값이 높으면
	faiss_index.nprobe = 8 # 낮은 값으로 설정 (속도 중시)

	loop = asyncio.get_event_loop()

	def _batch_search():
	# 모든 벡터를 한 번에 검색
	distances, indices = faiss_index.search(vectors, top_k)
	return distances, indices

	try:
	# 일괄 검색 수행
	distances, indices = await loop.run_in_executor(thread_pool, _batch_search)

	# 결과 정리
	results = []
	for i in range(len(indices)):
	items = []
	for j, (idx, dist) in enumerate(zip(indices[i], distances[i])):
	if idx < len(indexed_items):
	items.append((idx, float(dist)))
	results.append(items)

	return results
	except Exception as e:
	logger.error(f"❌ 검색 오류: {str(e)}")
	return []


	# ✅ 최적화된 search_faiss_with_keywords 함수
	async def search_faiss_with_keywords(query: str, top_k: int = 5, keywords=None):
	"""고속 키워드 기반 FAISS 검색 수행 (효율적 최적화)"""
	global faiss_index, indexed_items, request_count

	# FAISS 인덱스 확인 - 한 번만 실행
	if faiss_index is None:
	await check_faiss_index()

	# 타이머 시작
	start_time = time.time()

	# 요청 카운터 증가
	request_count += 1

	# 조기 최적화 - 매우 짧은 쿼리
	if len(query) <= 2:
	# 간단한 처리로 빠르게 반환
	vector = await encode_texts_parallel([query])
	distances, indices = faiss_index.search(vector, top_k)

	quick_results = []
	for idx, dist in zip(indices[0], distances[0]):
	if idx < len(indexed_items):
	item_name = indexed_items[idx]
	try:
	item_seq = active_sale_items.loc[active_sale_items["ITEMNAME"] == item_name, "ITEMSEQ"].values[0]
	quick_results.append({
	"ITEMSEQ": item_seq,
	"ITEMNAME": item_name,
	"score": float(dist)
	})
	except:
	continue

	# 주기적 메모리 정리
	if request_count % CLEANUP_INTERVAL == 0:
	await cleanup_memory()

	return quick_results

	# 1. 키워드 추출
	if keywords is None:
	keywords = await extract_keywords(query)

	# 불필요한 확장 절차 제거 (성능 향상)

	# 2. 벡터 인코딩 - 모든 텍스트를 한 번에 처리
	search_texts = [query] + keywords

	try:
	# 벡터 인코딩 - 최적화된 함수 사용 (정규화 포함)
	all_vectors = await encode_texts_parallel(search_texts)

	if all_vectors.size == 0:
	logger.warning(f"⚠️ 벡터화 실패: {query}")
	return []

	# 3. 일괄 검색 수행 (효율적)
	search_results = await unified_search(all_vectors, top_k=top_k)

	if not search_results:
	return []

	# 4. 결과 통합 및 중복 제거
	all_results = {}

	# 쿼리 결과 처리 (가중치 높게)
	for idx, score in search_results[0]:
	if idx < len(indexed_items):
	all_results[idx] = score * 3.0 # 쿼리 결과에 가중치 3배

	# 키워드 결과 처리
	for i in range(1, len(search_results)):
	keyword_results = search_results[i]
	weight = 0.5 # 키워드 가중치

	for idx, score in keyword_results:
	if idx in all_results:
	# 기존 점수에 추가
	all_results[idx] = max(all_results[idx], score * weight)
	else:
	# 새 항목 추가
	all_results[idx] = score * weight

	# 5. 최종 처리 및 반환
	# 점수 기준 정렬
	sorted_items = sorted(all_results.items(), key=lambda x: x[1], reverse=True)

	# 최종 결과 변환 (최소한의 룩업으로 최적화)
	recommendations = []
	item_indices = [idx for idx, _ in sorted_items[:top_k]]

	# 배치로 항목 조회 (성능 향상)
	if item_indices:
	item_names = [indexed_items[idx] for idx in item_indices]
	# 효율적인 배치 조회
	items_df = active_sale_items[active_sale_items["ITEMNAME"].isin(item_names)]
	items_map = dict(zip(items_df["ITEMNAME"], items_df["ITEMSEQ"]))

	for idx, score in sorted_items[:top_k]:
	item_name = indexed_items[idx]
	if item_name in items_map:
	recommendations.append({
	"ITEMSEQ": items_map[item_name],
	"ITEMNAME": item_name,
	"score": float(score)
	})

	# 주기적 메모리 정리
	if request_count % CLEANUP_INTERVAL == 0:
	await cleanup_memory()

	# 처리 시간이 1초 이상인 경우에만 로깅
	elapsed = time.time() - start_time
	if elapsed > 1.0:
	logger.info(f"🔍 검색 완료 \| 소요시간: {elapsed:.2f}초 \| 결과: {len(recommendations)}개")

	return recommendations[:top_k]

	except Exception as e:
	logger.error(f"❌ 검색 프로세스 오류: {str(e)}")
	return []


	# ✅ 직접 매칭 분리 (성능 최적화)
	async def find_direct_matches(query, limit=5, existing_names=None):
	"""직접 텍스트 매칭 검색 (분리하여 최적화)"""
	loop = asyncio.get_event_loop()

	def _find_matches():
	matches = []
	query_lower = query.lower()
	existing = set(existing_names or [])

	# 데이터 인덱싱 최적화
	item_dict = {}
	for idx, item_name in enumerate(indexed_items):
	if len(matches) >= limit:
	break

	if item_name in existing:
	continue

	if query_lower in item_name.lower():
	item_dict[item_name] = idx

	# 한 번에 데이터프레임 조회
	if item_dict:
	mask = active_sale_items["ITEMNAME"].isin(item_dict.keys())
	filtered_items = active_sale_items[mask]

	for _, row in filtered_items.iterrows():
	if len(matches) >= limit:
	break

	matches.append({
	"ITEMSEQ": row["ITEMSEQ"],
	"ITEMNAME": row["ITEMNAME"],
	"score": 1.0
	})

	return matches

	# 스레드 풀에서 실행
	return await loop.run_in_executor(thread_pool, _find_matches)

	# ✅ API 요청 모델
	class RecommendRequest(BaseModel):
	search_query: str
	top_k: int = 5
	use_expansion: bool = True # 키워드 확장 사용 여부

	# 모델이 로드되었는지 검증하는 함수 추가
	def validate_models():
	"""필요한 모델들이 모두 로드되었는지 확인"""
	models_loaded = (
	word2vec_model is not None and
	kw_model is not None and
	embedding_model is not None
	)
	return models_loaded

	# ✅ 추천 API 엔드포인트 (다중 요청 처리 최적화)
	@app.post("/api/recommend")
	async def recommend(request: RecommendRequest, background_tasks: BackgroundTasks):
	if not validate_models():
	# 모델이 로드되지 않았을 때 재시도 또는 오류 반환
	load_models() # 마지막 시도
	if not validate_models():
	raise HTTPException(status_code=503, detail="서비스가 준비되지 않았습니다. 잠시 후 다시 시도해주세요.")

	"""고속 추천 API (메모리 관리 최적화 + 성능 개선)"""
	try:
	# 벤치마크용 타이머 시작
	start_time = time.time()

	# 파라미터 최적화 및 검증
	search_query = request.search_query.strip()
	if not search_query:
	raise HTTPException(status_code=400, detail="검색어를 입력해주세요")

	top_k = min(max(1, request.top_k), 20) # 1~20 범위로 제한

	# 최적화 검색 수행
	recommendations = await search_faiss_with_keywords(
	search_query,
	top_k
	)

	# 결과 반환 (간소화)
	result = {
	"query": search_query,
	"recommendations": recommendations
	}

	# 응답 시간 측정 (1초 이상만 로깅)
	elapsed = time.time() - start_time
	if elapsed > 1.0:
	logger.info(f"⏱️ API 응답 시간: {elapsed:.2f}초 \| 쿼리: '{search_query}'")

	return result

	except Exception as e:
	logger.error(f"❌ 추천 처리 오류: {str(e)}")
	raise HTTPException(status_code=500, detail=f"추천 처리 중 오류가 발생했습니다")

	# 인덱스 상태 확인 함수 (백그라운드 태스크용)
	async def check_index_health():
	"""인덱스 상태를 주기적으로 확인하는 백그라운드 태스크"""
	try:
	# 인덱스 사용 상태 확인
	if faiss_index is None:
	logger.warning("⚠️ 백그라운드 체크: FAISS 인덱스가 로드되지 않았습니다.")
	await check_faiss_index()

	# 추가적인 상태 확인 로직을 여기에 구현할 수 있음
	logger.debug("✅ 인덱스 상태 확인 완료")
	except Exception as e:
	logger.error(f"❌ 백그라운드 인덱스 체크 중 오류: {str(e)}")

	# ✅ 유사 단어 검색 API
	@app.post("/api/similar_words")
	async def similar_words(word: str, top_k: int = 10):
	"""Word2Vec 모델을 사용한 유사 단어 검색 API (비동기 지원)"""
	try:
	if word2vec_model is None:
	return {"error": "Word2Vec 모델이 로드되지 않았습니다."}

	loop = asyncio.get_event_loop()

	def _get_similar():
	if word not in word2vec_model:
	return []

	similar = word2vec_model.most_similar(word, topn=top_k)
	return [{"word": w, "similarity": float(s)} for w, s in similar]

	result = await loop.run_in_executor(thread_pool, _get_similar)

	if not result:
	return {"word": word, "similar_words": [], "message": "단어가 모델에 없습니다."}

	return {"word": word, "similar_words": result}
	except Exception as e:
	logger.error(f"❌ 유사 단어 검색 중 오류: {str(e)}")
	raise HTTPException(status_code=500, detail=f"유사 단어 검색 오류: {str(e)}")

	# ✅ FAISS 인덱스 갱신 API (명시적으로 요청할 때만 실행)
	@app.post("/api/update_index")
	async def update_index(background_tasks: BackgroundTasks):
	"""FAISS 인덱스를 새롭게 구축 (명시적 요청 시에만, 비동기 처리)"""
	try:
	# 인덱스 재구축을 백그라운드 태스크로 실행
	background_tasks.add_task(rebuild_and_log_index)
	return {"message": "✅ FAISS 인덱스 업데이트가 백그라운드에서 시작되었습니다."}
	except Exception as e:
	logger.exception("❌ [API] 인덱스 업데이트 처리 중 예외 발생")
	raise HTTPException(status_code=500, detail=f"인덱스 업데이트 실패: {str(e)}")

	# 백그라운드 작업용 인덱스 재구축 함수
	async def rebuild_and_log_index():
	"""백그라운드에서 인덱스를 재구축하고 결과를 로깅"""
	try:
	logger.info("🔄 백그라운드에서 인덱스 재구축 시작")
	start_time = time.time()
	await rebuild_faiss_index()
	elapsed = time.time() - start_time
	logger.info(f"✅ 백그라운드 인덱스 재구축 완료! 소요 시간: {elapsed:.2f}초")
	except Exception as e:
	logger.error(f"❌ 백그라운드 인덱스 재구축 중 오류: {str(e)}")

	# 메모리 정리
	await cleanup_memory()

	# ✅ 메모리 사용량 확인 및 관리 API
	@app.get("/api/memory_status")
	async def memory_status():
	"""메모리 사용량 확인 및 정리"""
	try:
	if device == "cuda":
	# GPU 메모리 정보 수집
	gpu_stats = {}

	# PyTorch GPU 메모리 정보
	torch.cuda.empty_cache() # 캐시 정리
	gpu_stats["allocated"] = torch.cuda.memory_allocated() / (1024**3)
	gpu_stats["reserved"] = torch.cuda.memory_reserved() / (1024**3)

	# 가비지 컬렉션 강제 실행
	gc.collect()

	# 메모리 정리 후 정보
	torch.cuda.empty_cache()
	gpu_stats["after_cleanup_allocated"] = torch.cuda.memory_allocated() / (1024**3)
	gpu_stats["after_cleanup_reserved"] = torch.cuda.memory_reserved() / (1024**3)

	return {
	"device": "GPU",
	"memory_stats": {
	"allocated_gb": round(gpu_stats["allocated"], 3),
	"reserved_gb": round(gpu_stats["reserved"], 3),
	"after_cleanup_allocated_gb": round(gpu_stats["after_cleanup_allocated"], 3),
	"after_cleanup_reserved_gb": round(gpu_stats["after_cleanup_reserved"], 3)
	},
	"request_count": request_count
	}
	else:
	# CPU 모드 정보
	return {
	"device": "CPU",
	"message": "CPU 모드에서 실행 중입니다. 메모리 정보가 제한적입니다.",
	"request_count": request_count
	}
	except Exception as e:
	logger.error(f"❌ 메모리 상태 확인 중 오류: {str(e)}")
	raise HTTPException(status_code=500, detail=f"메모리 상태 확인 오류: {str(e)}")


	# 주기적 메모리 모니터링 함수
	async def periodic_memory_monitor():
	"""주기적으로 메모리 사용량을 모니터링하고 정리합니다."""
	try:
	worker_id = multiprocessing.current_process().name
	logger.info(f"🔄 워커 {worker_id}: 주기적 메모리 모니터링 시작")

	while True:
	await asyncio.sleep(300) # 5분마다 실행

	# 메모리 정리
	gc.collect()
	if device == "cuda":
	torch.cuda.empty_cache()

	# 메모리 사용량 로깅
	if device == "cuda":
	allocated = torch.cuda.memory_allocated() / (1024**3)
	reserved = torch.cuda.memory_reserved() / (1024**3)
	logger.info(f"📊 워커 {worker_id}: GPU 메모리 - 할당: {allocated:.2f}GB, 예약: {reserved:.2f}GB")

	# 시스템 메모리 로깅 (선택 사항)
	import psutil
	process = psutil.Process()
	memory_info = process.memory_info()
	logger.info(f"📊 워커 {worker_id}: 시스템 메모리 - RSS: {memory_info.rss/(1024**3):.2f}GB")

	except Exception as e:
	logger.error(f"❌ 워커 {worker_id}: 메모리 모니터링 중 오류: {e}")

	# FastAPI 시작 이벤트 핸들러 확장
	@app.on_event("startup")
	async def startup_event():
	try:
	worker_id = multiprocessing.current_process().name
	logger.warning(f"🟡 워커 {worker_id} STARTUP 시작")

	# 리소스 사용량 로깅 추가 (여기에 추가)
	if device == "cuda":
	logger.info(f"🔄 워커 {worker_id}: GPU 메모리 상태:")
	logger.info(f" - 총 메모리: {torch.cuda.get_device_properties(0).total_memory/(1024**3):.2f}GB")
	logger.info(f" - 현재 할당: {torch.cuda.memory_allocated()/(1024**3):.2f}GB")
	logger.info(f" - 예약: {torch.cuda.memory_reserved()/(1024**3):.2f}GB")

	# 랜덤 지연으로 동시 접근 방지
	delay = random.uniform(1.0, 5.0) # 더 긴 지연 시간
	logger.info(f"⏱️ 워커 {worker_id}: {delay:.1f}초 지연 중...")
	#delay = random.uniform(0.5, 2.0)
	await asyncio.sleep(delay)

	# 모델 로드 부분에 더 견고한 예외 처리 추가 ----------
	try:
	if not load_models():
	logger.error(f"❌ 워커 {worker_id} 모델 로드 실패")
	# 오류가 있어도 계속 진행
	except Exception as model_err:
	logger.error(f"💥 워커 {worker_id} 모델 로드 중 심각한 오류: {model_err}")
	# 오류를 기록하고 계속 진행 (종료하지 않음)

	# 데이터 로드 부분 오류 처리 강화 -----------
	global active_sale_items
	try:
	active_sale_items = await load_huggingface_jsonl("initial_saleitem_dataset")
	if active_sale_items is not None and not active_sale_items.empty:
	logger.info(f"✅ 워커 {worker_id} 데이터 로드 완료: {len(active_sale_items)}개 항목")
	else:
	logger.error(f"❌ 워커 {worker_id} 데이터 로드 실패 - 빈 데이터셋")
	# 데이터가 없어도 계속 진행
	except Exception as data_err:
	logger.error(f"💥 워커 {worker_id} 데이터 로드 중 오류: {data_err}")
	# 데이터 로드 실패해도 계속 진행
	active_sale_items = pd.DataFrame() # 빈 DataFrame으로 초기화

	# FAISS 인덱스 로드 부분 오류 처리 강화 -----------
	faiss_loaded = False
	try:
	if await load_faiss_index_safe():
	logger.info(f"✅ 워커 {worker_id} FAISS 인덱스 로드 성공")
	faiss_loaded = True
	else:
	logger.warning(f"⚠️ 워커 {worker_id} FAISS 인덱스 로드 실패")
	except Exception as faiss_err:
	logger.error(f"💥 워커 {worker_id} FAISS 인덱스 로드 중 오류: {faiss_err}")
	# 인덱스 로드 실패해도 계속 진행

	# 여기에 주기적 메모리 모니터링 시작
	try:
	asyncio.create_task(periodic_memory_monitor())
	logger.info(f"✅ 워커 {worker_id} 메모리 모니터링 시작")
	except Exception as monitor_err:
	logger.error(f"⚠️ 워커 {worker_id} 메모리 모니터링 시작 실패: {monitor_err}")

	# 최종 워커 상태 기록
	status = "🟢 정상" if faiss_loaded else "🟠 부분적 (인덱스 없음)"
	logger.info(f"🏁 워커 {worker_id} STARTUP 완료: {status}")
	except Exception as e:
	logger.exception(f"🔥 워커 {worker_id} STARTUP 실패: {e}")
	# 상세 오류 로깅 추가 (여기에 추가)
	import traceback
	logger.error(f"스택 추적: {traceback.format_exc()}")
	# 심각한 오류 시 워커 종료 고려
	# sys.exit(1) # 프로덕션에서는 주의해서 사용


	# ✅ FastAPI 실행
	# Uvicorn 실행 설정 최적화
	if __name__ == "__main__":
	import uvicorn

	# 워커 수 설정 - 허깅페이스 환경 고려
	# 메모리 제한으로 워커 수 최소화 (2-3개 권장)
	#workers = int(os.getenv("WORKERS", min(2, os.cpu_count() or 1)))
	workers = int(os.getenv("WORKERS", 3))
	#workers = int(os.getenv("WORKERS", 10)) #고사양 GPU사용시나 해볼만함

	# GPU 메모리 분배를 명시적으로 설정 (여기에 추가)
	if device == "cuda":
	# 사용 가능한 GPU 메모리 제한
	torch.cuda.set_per_process_memory_fraction(0.28) # 각 워커가 최대 40%의 GPU 메모리만 사용
	#torch.cuda.set_per_process_memory_fraction(0.09) # 각 워커가 최대 40%의 GPU 메모리만 사용 고사양 GPU사용시나 해볼만함

	uvicorn.run(
	"searchWorker:app",
	host="0.0.0.0",
	port=7860,
	workers=workers,
	log_level="info",
	timeout_keep_alive=65, # 연결 유지 시간 증가
	limit_concurrency=100, # 동시 연결 제한(기본 100에서 변경함)
	timeout_graceful_shutdown=30 # 종료 시 대기 시간
	)