This video shows how to install HunyuanVideo AI model for text to video long generation locally.
Code:
git clone https://github.com/tencent/HunyuanVideo && cd HunyuanVideo
conda env create -f environment.yml
conda activate HunyuanVideo
conda install gcc_linux-64 gxx_linux-64 -y
conda install cuda -c nvidia -y
python -m pip install -r requirements.txt
pip install packaging
pip uninstall -y ninja && pip install ninja
python -m pip install git+https://github.com/Dao-AILab/flash-attention.git@v2.5.9.post1
huggingface-cli login #get Read token from huggingface.co
huggingface-cli download tencent/HunyuanVideo --local-dir ./ckpts
cd HunyuanVideo/ckpts
huggingface-cli download xtuner/llava-llama-3-8b-v1_1-transformers --local-dir ./llava-llama-3-8b-v1_1-transformers
cd ..
python hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py --input_dir ckpts/llava-llama-3-8b-v1_1-transformers --output_dir ckpts/text_encoder
cd HunyuanVideo/ckpts
huggingface-cli download openai/clip-vit-large-patch14 --local-dir ./text_encoder_2
cd HunyuanVideo
python3 sample_video.py
--video-size 720 1280
--video-length 129
--infer-steps 30
--prompt "a cat is running, realistic."
--flow-reverse
--seed 0
--use-cpu-offload
--save-path ./results