|
|
|
|
|
|
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
# Load base model |
|
|
# Load base model |
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
print("[1/4] Loading base model...") |
|
|
|
|
|
|
|
|
print(f"{80 * '_'}\n[1/4] Loading base model...") |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
BASE_MODEL, |
|
|
BASE_MODEL, |
|
|
torch_dtype=DTYPE, |
|
|
torch_dtype=DTYPE, |
|
|
|
|
|
|
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
# Load tokenizer |
|
|
# Load tokenizer |
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
print("[2/4] Loading tokenizer...") |
|
|
|
|
|
|
|
|
print(f"{80 * '_'}\n[2/4] Loading tokenizer...") |
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
BASE_MODEL, |
|
|
BASE_MODEL, |
|
|
trust_remote_code=True |
|
|
trust_remote_code=True |
|
|
|
|
|
|
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
# Load LoRA adapter |
|
|
# Load LoRA adapter |
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
print("[3/4] Loading LoRA adapter...") |
|
|
|
|
|
|
|
|
print(f"{80 * '_'}\n[3/4] Loading LoRA adapter...") |
|
|
model = PeftModel.from_pretrained( |
|
|
model = PeftModel.from_pretrained( |
|
|
base_model, |
|
|
base_model, |
|
|
LORA_DIR, |
|
|
LORA_DIR, |
|
|
|
|
|
|
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
# Merge LoRA into base model |
|
|
# Merge LoRA into base model |
|
|
# ---------------------------- |
|
|
# ---------------------------- |
|
|
print("[4/4] Merging LoRA into base model...") |
|
|
|
|
|
|
|
|
print(f"{80 * '_'}\n[4/4] Merging LoRA into base model...") |
|
|
model = model.merge_and_unload() |
|
|
model = model.merge_and_unload() |
|
|
print("LoRA successfully merged.") |
|
|
print("LoRA successfully merged.") |
|
|
|
|
|
|