xiaomusic/xiaomusic/utils.py
2024-04-30 12:47:57 +00:00

68 lines
1.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
from __future__ import annotations
import os
import re
import socket
from http.cookies import SimpleCookie
from typing import AsyncIterator
from urllib.parse import urlparse
import difflib
from requests.utils import cookiejar_from_dict
### HELP FUNCTION ###
def parse_cookie_string(cookie_string):
cookie = SimpleCookie()
cookie.load(cookie_string)
cookies_dict = {k: m.value for k, m in cookie.items()}
return cookiejar_from_dict(cookies_dict, cookiejar=None, overwrite=True)
_no_elapse_chars = re.compile(r"([「」『』《》“”'\"()]|(?<!-)-(?!-))", re.UNICODE)
def calculate_tts_elapse(text: str) -> float:
# for simplicity, we use a fixed speed
speed = 4.5 # this value is picked by trial and error
# Exclude quotes and brackets that do not affect the total elapsed time
return len(_no_elapse_chars.sub("", text)) / speed
_ending_punctuations = ("", "", "", "", ".", "?", "!", ";")
async def split_sentences(text_stream: AsyncIterator[str]) -> AsyncIterator[str]:
cur = ""
async for text in text_stream:
cur += text
if cur.endswith(_ending_punctuations):
yield cur
cur = ""
if cur:
yield cur
### for edge-tts utils ###
def find_key_by_partial_string(dictionary: dict[str, str], partial_key: str) -> str:
for key, value in dictionary.items():
if key in partial_key:
return value
def validate_proxy(proxy_str: str) -> bool:
"""Do a simple validation of the http proxy string."""
parsed = urlparse(proxy_str)
if parsed.scheme not in ("http", "https"):
raise ValueError("Proxy scheme must be http or https")
if not (parsed.hostname and parsed.port):
raise ValueError("Proxy hostname and port must be set")
return True
# 模糊搜索
def fuzzyfinder(user_input, collection):
return difflib.get_close_matches(user_input, collection, 10, cutoff=0.1)