This model was trained using my abliterated variant of Qwen2.5-VL-7B, the text encoder behind Qwen Image 2512, and Musubi Tuner. It was trained for 12,600 steps in 2 phases:
Phase 1 - 10,000 Steps - Generated with detailed captions, but found the model lacking in two areas: size reference, and spatial relation of the top of the shaft to the bottom of the shaft including features that should exist in those regions and not in the other. This resulted in a lot of upside down dicks, with the frenulum on top.
Phase 2 - 2,600 Steps - Added frenulum captions, attaching the concept to the circumcision scar and the glans which the model got a strong grasp on during Phase 1 training.
Trigger Word: qpenis
Common Caption Vocabulary: glans, scrotum, urethra, shaft (under side, top side, base, tip), circumcision scar, frenulum, prominent vein.
Recommended ComfyUI Setup:
uv run python main.py --cuda-malloc --use-flash-attention --fp8_e4m3fn-text-encNodes: Load Diffusion Model, Load VAE, Load Clip Model (Choose full size text encoder, it will get scaled to FP8 on the fly due to the --fp8_e4m3fn-text-enc flag. I tried quantizing to FP8 and the results were not fantastic)
-OR-
Rent yourself a Vast.ai instance with 96GB VRAM (RTX PRO 6000) for like $1.00/hr, and use this script to generate with diffusers and transformers (courtesy of Claude code):
#!/usr/bin/env python3
"""Generate images from a list of prompts using Qwen-Image-2512."""
import argparse
import random
import re
import torch
from datetime import datetime
from pathlib import Path
from diffusers import DiffusionPipeline, schedulers as _schedulers
from huggingface_hub import login
LORA_DIR = Path("/workspace/lora_output/v2_model")
# Flow-matching compatible schedulers only (Qwen-Image-2512 is a flow-matching model)
SCHEDULERS = {
"euler": _schedulers.FlowMatchEulerDiscreteScheduler,
"heun": _schedulers.FlowMatchHeunDiscreteScheduler,
}
def parse_prompt_file(path):
"""Parse a prompt file with #id and #seed headers.
#seed: 42 -> fixed seed starting at 42
#seed: random -> random seed per image
"""
prompt_id = None
seed = 42
random_seed = False
prompts = []
with open(path) as f:
for line in f:
line = line.strip()
if line.startswith("#id:"):
prompt_id = line.split(":", 1)[1].strip()
continue
if line.startswith("#seed:"):
seed_val = line.split(":", 1)[1].strip()
if seed_val.lower() == "random":
random_seed = True
else:
seed = int(seed_val)
continue
if line.startswith("#") or not line:
continue
if " --n " in line:
prompt, neg = line.split(" --n ", 1)
prompts.append((prompt.strip(), neg.strip()))
else:
prompts.append((line, ""))
if not prompt_id:
# Fallback to filename without extension
prompt_id = Path(path).stem
return prompt_id, seed, random_seed, prompts
def parse_lora_args(lora_args):
"""Parse --lora args into a list of lora groups.
Bare names are individual loop iterations.
Names inside [] are stacked together as one group.
Examples:
--lora luca_200 luca_400 -> [[luca_200], [luca_400]] (loop)
--lora "[luca_200,penis]" -> [[luca_200, penis]] (stacked)
--lora luca_200 "[luca_400,penis]" luca_600
-> [[luca_200], [luca_400, penis], [luca_600]]
"""
groups = []
joined = " ".join(lora_args)
i = 0
while i < len(joined):
if joined[i] == "[":
end = joined.index("]", i)
inner = joined[i + 1:end]
group = [name.strip() for name in inner.split(",") if name.strip()]
groups.append(group)
i = end + 1
elif joined[i].isspace():
i += 1
else:
end = joined.find(" ", i)
if end == -1:
end = len(joined)
token = joined[i:end].strip()
if token:
groups.append([token])
i = end
return groups
def resolve_lora_path(name, lora_dir):
"""Resolve a short LoRA name to a full path.
Checks: exact path, name as-is in lora_dir, name + .safetensors in lora_dir.
"""
p = Path(name)
if p.is_file():
return p
p = lora_dir / name
if p.is_file():
return p
p = lora_dir / f"{name}.safetensors"
if p.is_file():
return p
raise FileNotFoundError(f"LoRA not found: {name} (searched {lora_dir})")
def lora_label(name):
"""Get a clean label from a LoRA name for use in filenames."""
p = Path(name)
stem = p.stem if p.suffix else name
return stem
def load_loras(pipe, group, scales, lora_dir):
"""Load and fuse a group of LoRAs into the pipeline."""
for idx, (name, scale) in enumerate(zip(group, scales)):
lora_path = resolve_lora_path(name, lora_dir)
print(f" Loading LoRA: {lora_path.name} (scale={scale})")
adapter_name = f"lora_{idx}"
pipe.load_lora_weights(str(lora_path.parent), weight_name=lora_path.name, adapter_name=adapter_name)
pipe.fuse_lora(lora_scale=scale, adapter_names=[adapter_name])
pipe.unfuse_lora()
def unload_loras(pipe):
"""Unload all LoRA adapters from the pipeline."""
pipe.unload_lora_weights()
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="Qwen/Qwen-Image-2512")
parser.add_argument("--te-path", type=str, default="./abliterated-Qwen-Image-2512/text_encoder", help="Path to abliterated text_encoder")
parser.add_argument("--prompts", type=str, required=True, help="Text file with one prompt per line (with #id and #seed headers)")
parser.add_argument("--output-dir", type=str, default="./output")
parser.add_argument("--width", type=int, default=1328)
parser.add_argument("--height", type=int, default=1328)
parser.add_argument("--steps", type=int, default=50)
parser.add_argument("--cfg", type=float, default=4.0)
parser.add_argument("--lora", type=str, nargs="+", default=[], help="LoRA name(s). Bare names loop; [a,b] stacks.")
parser.add_argument("--lora-scale", type=float, nargs="+", default=[], help="LoRA scale(s), one per group or one for all")
parser.add_argument("--lora-dir", type=str, default=str(LORA_DIR), help="Directory to find LoRA files")
parser.add_argument("--scheduler", type=str, default=None, choices=list(SCHEDULERS.keys()),
help="Override the default scheduler/sampler")
parser.add_argument("--device", type=str, default="cuda:0")
args = parser.parse_args()
lora_dir = Path(args.lora_dir)
te_path = Path(args.te_path)
if not te_path.is_dir():
parser.error(f"Abliterated text encoder not found at: {te_path}\n"
"Download it with: hf download sci4ai/Qwen-Image-2512-Abliterated-TE-For-Musubi-Lora-Training")
# Authenticate with HuggingFace for gated model access
login(token="YOUR_HF_TOKEN_HERE")
out = Path(args.output_dir)
out.mkdir(parents=True, exist_ok=True)
prompt_id, seed, random_seed, prompts = parse_prompt_file(args.prompts)
seed_desc = "random" if random_seed else str(seed)
print(f"Prompt file: id={prompt_id}, seed={seed_desc}, {len(prompts)} prompts")
# Parse LoRA groups
lora_groups = parse_lora_args(args.lora) if args.lora else [[]]
try:
import flash_attn
attn_impl = "flash_attention_2"
print("Using Flash Attention 2")
except ImportError:
attn_impl = None
print("Flash Attention not available, using default")
print(f"Loading pipeline from {args.model}...")
pipe_kwargs = dict(torch_dtype=torch.bfloat16, trust_remote_code=True)
if attn_impl:
pipe_kwargs["attn_implementation"] = attn_impl
pipe = DiffusionPipeline.from_pretrained(args.model, **pipe_kwargs).to(args.device)
if args.scheduler:
sched = SCHEDULERS[args.scheduler]
if callable(sched) and not isinstance(sched, type):
pipe.scheduler = sched(pipe.scheduler.config)
else:
pipe.scheduler = sched.from_config(pipe.scheduler.config)
print(f"Scheduler: {args.scheduler}")
else:
print(f"Scheduler: {type(pipe.scheduler).__name__} (default)")
print(f"Loading abliterated text encoder from {te_path}...")
from transformers import AutoModel
te_kwargs = dict(dtype=torch.bfloat16, trust_remote_code=True)
if attn_impl:
te_kwargs["attn_implementation"] = attn_impl
te = AutoModel.from_pretrained(str(te_path), **te_kwargs).to(args.device)
pipe.text_encoder = te
print("Text encoder swapped.")
# Generate for each LoRA group
for group_idx, group in enumerate(lora_groups):
# Build LoRA label for filenames
if group and group != [""]:
labels = [lora_label(name) for name in group]
lora_tag = "_".join(labels)
# Resolve scales for this group
if args.lora_scale:
if len(args.lora_scale) == 1:
scales = args.lora_scale * len(group)
else:
# Consume scales for this group
offset = sum(len(g) for g in lora_groups[:group_idx])
scales = args.lora_scale[offset:offset + len(group)]
if len(scales) < len(group):
scales.extend([1.0] * (len(group) - len(scales)))
else:
scales = [1.0] * len(group)
print(f"\n--- LoRA group {group_idx + 1}/{len(lora_groups)}: {', '.join(group)} ---")
load_loras(pipe, group, scales, lora_dir)
else:
lora_tag = "nolora"
print(f"\n--- No LoRA ---")
print(f"Generating {len(prompts)} images...\n")
for i, (prompt, neg_prompt) in enumerate(prompts):
img_seed = random.randint(0, 2**32 - 1) if random_seed else seed + i
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = out / f"{prompt_id}_{i:03d}_{img_seed}_{lora_tag}_{timestamp}.png"
print(f"[{i+1}/{len(prompts)}] {prompt}")
if neg_prompt:
print(f" neg: {neg_prompt}")
image = pipe(
prompt=prompt,
negative_prompt=neg_prompt if neg_prompt else None,
width=args.width,
height=args.height,
num_inference_steps=args.steps,
true_cfg_scale=args.cfg,
generator=torch.Generator(device=args.device).manual_seed(img_seed),
).images[0]
image.save(filename)
print(f" -> {filename}")
# Unload LoRAs before next group
if group and group != [""]:
unload_loras(pipe)
print(f"\nDone. Images saved to {out}/")
if __name__ == "__main__":
main()
One of my goals with this was to better teach the model that holding a penis happens AT the penis, and not at the mouth. Many times when asked to render an image of a man holding his penis, it became an accessory or a flesh Popsicle. Unfortunately, I still often see people holding dicks, but it has a much better spatial relation understanding of the genital region as a whole.
Results


In Conclusion:
Overall, I think the model may have memorized the training set a bit, but I'm really happy with the results either way. The model had absolutely no idea what a penis was prior to the training. See below prolapsed asshole with stapled on flaming hot cheeto for reference.

2026, The Worm-Hole, Qwen Image