Skip to content

Commit

Permalink
FixLogo
Browse files Browse the repository at this point in the history
  • Loading branch information
NaruseMioShirakana committed Jun 1, 2024
1 parent cb23193 commit 1aba099
Show file tree
Hide file tree
Showing 11 changed files with 2,426 additions and 21 deletions.
1 change: 0 additions & 1 deletion CSharpDemo/Program.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using LibSvcApi;


LibSvc.LibSvcHparams Config = new();
Config.TensorExtractor = "DiffusionSvc";
Config.SamplingRate = 44100;
Expand Down
747 changes: 747 additions & 0 deletions CSharpDemo/README.md

Large diffs are not rendered by default.

704 changes: 704 additions & 0 deletions CSharpDemo/README_en.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<div align="center">

![image](logo/logo256(AIGen).png)
# DragonianVoice
[中文](README.md) | [English](README_en.md)

Expand Down
25 changes: 24 additions & 1 deletion fish-speech.cpp/include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,38 @@ class Attention : public Module
bool _Inplace = false
);

bool training = false;

private:
ggml_tensor* apply_rotary_emb(
ggml_tensor* x,
ggml_tensor* freqs_cis,
ggml_context* _Ctx,
bool _Inplace = false
);

private:
static ggml_tensor* scaled_dot_product_attention(
ggml_tensor* query,
ggml_tensor* key,
ggml_tensor* value,
ggml_context* _Ctx,
ggml_tensor* attn_mask = nullptr,
float dropout_p = 0.0,
bool _Inplace = false
);

static ggml_tensor* eq_scaled_dot_product_attention(
ggml_tensor* query,
ggml_tensor* key,
ggml_tensor* value,
ggml_context* _Ctx,
ggml_tensor* attn_mask = nullptr,
float dropout_p = 0.0,
bool _Inplace = false
);

void DumpCurrentLayerInfo(std::wstring& _Tmp) override;

int total_head_dim;
Linear wqkv, wo;
float dropout;
Expand Down
70 changes: 65 additions & 5 deletions fish-speech.cpp/src/llama.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "llama.h"

LibTTSBegin
inline void UnUsedPtr(void*) {}

RMSNorm::RMSNorm(Module* _Parent, const std::wstring& _Name, SizeType dim, float eps) :
Module(_Parent, _Name),
Expand Down Expand Up @@ -136,21 +137,80 @@ ggml_tensor* Attention::operator()(
int Scale = n_head / n_local_heads;
if(Scale > 1)
{
K = ggml_repeat(_Ctx, K, ggml_new_tensor_4d(_Ctx, K->type, K->ne[0], K->ne[1], K->ne[2] * Scale, K->ne[3]));
V = ggml_repeat(_Ctx, V, ggml_new_tensor_4d(_Ctx, V->type, V->ne[0], V->ne[1], V->ne[2] * Scale, V->ne[3]));
K = ggml_repeat(_Ctx, K, ggml_new_tensor_4d(_Ctx, K->type, K->ne[0], K->ne[1] * Scale, K->ne[2], K->ne[3]));
V = ggml_repeat(_Ctx, V, ggml_new_tensor_4d(_Ctx, V->type, V->ne[0], V->ne[1] * Scale, V->ne[2], V->ne[3]));
}

//TODO
ggml_tensor* y;
if(use_sdpa)
y = scaled_dot_product_attention(
Q,
K,
V,
_Ctx,
mask,
training ? dropout : 0.f
);
else
y = eq_scaled_dot_product_attention(
Q,
K,
V,
_Ctx,
mask,
training ? dropout : 0.f
);

return nullptr;
y = ggml_reshape_3d(_Ctx, ggml_cont(_Ctx, ggml_permute(_Ctx, y, 0, 2, 1, 3)), dim, seqlen, bsz);

return wo(y, _Ctx, _Inplace);
}

ggml_tensor* Attention::apply_rotary_emb(ggml_tensor* x, ggml_tensor* freqs_cis, ggml_context* _Ctx, bool _Inplace)
{
//TODO
int ndim = int(x->ne[0]) / 2;
int64_t n_tokens = x->ne[0] / 2;
for (size_t i = 1; i < 4; ++i)
n_tokens *= x->ne[i];
auto xshape = ggml_reshape_2d(_Ctx, x, 2, n_tokens);
auto xshape0 = ggml_view_4d(_Ctx, xshape, ndim, x->ne[1], x->ne[2], x->ne[3], x->nb[1], x->nb[2], x->nb[3], 0);
auto xshape1 = ggml_view_4d(_Ctx, xshape, ndim, x->ne[1], x->ne[2], x->ne[3], x->nb[1], x->nb[2], x->nb[3], sizeof(float));

return nullptr;
}

ggml_tensor* Attention::scaled_dot_product_attention(
ggml_tensor* query,
ggml_tensor* key,
ggml_tensor* value,
ggml_context* _Ctx,
ggml_tensor* attn_mask,
float dropout_p,
bool _Inplace
)
{
UnUsedPtr(attn_mask);
UNUSED(dropout_p);
UNUSED(_Inplace);
return ggml_flash_attn(_Ctx, query, key, value, false);
}

ggml_tensor* Attention::eq_scaled_dot_product_attention(
ggml_tensor* query,
ggml_tensor* key,
ggml_tensor* value,
ggml_context* _Ctx,
ggml_tensor* attn_mask,
float dropout_p,
bool _Inplace
)
{
UnUsedPtr(attn_mask);
UNUSED(dropout_p);
UNUSED(_Inplace);
return ggml_flash_attn(_Ctx, query, key, value, false);
}

void Attention::DumpCurrentLayerInfo(std::wstring& _Tmp)
{
_Tmp += std::format(
Expand Down
20 changes: 6 additions & 14 deletions fish-speech.cpp/test.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,7 @@
import torch

a = torch.nn.ConvTranspose2d(114, 514, 3)
def precompute_freqs_cis(seq_len: int, n_elem: int, base: int = 20000):
freqs = 1.0 / (
base ** (torch.arange(0, n_elem, 2)[: (n_elem // 2)].float() / n_elem)
)
t = torch.arange(seq_len, device=freqs.device)
freqs = torch.outer(t, freqs)
freqs_cis = torch.polar(torch.ones_like(freqs), freqs)
cache = torch.stack([freqs_cis.real, freqs_cis.imag], dim=-1)
return cache.to(dtype=torch.bfloat16)

b = precompute_freqs_cis(2000, 4999)
print(b.size())
import time
for i in range(20):
a = torch.ones(size=(1, 768, 100000))
beg = time.time()
a.fill_(i)
print(time.time() - beg)
Loading

0 comments on commit 1aba099

Please sign in to comment.