if btn_translate:
if English:
Model_IT = './model_it/' #torch
with st.spinner('Initializing pipelines...'):
st.success(' AI Translation started', icon="🆗")
from langchain.text_splitter import CharacterTextSplitter
# TEXT SPLITTER FUNCTION FOR CHUNKING
text_splitter = CharacterTextSplitter(
separator = "\n\n",
chunk_size = 300,
chunk_overlap = 0,
length_function = len,
)
# CHUNK THE DOCUMENT
st.success(' Chunking text...', icon="🆗")
texts = text_splitter.create_documents([English])
#print('[bold red] Inizialize AI toknizer...')
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# INITIALIZE TRANSLATION FROM ENGLISH TO ITALIAN
tokenizer_tt0it = AutoTokenizer.from_pretrained(Model_IT)
st.success(' Initializing AI Model & pipeline...', icon="🆗")
model_tt0it = AutoModelForSeq2SeqLM.from_pretrained(Model_IT) #Helsinki-NLP/opus-mt-en-it or #Helsinki-NLP/opus-mt-it-en
#print("pipeline")
TToIT = pipeline("translation", model=model_tt0it, tokenizer=tokenizer_tt0it)
# ITERATE OVER CHUNKS AND JOIN THE TRANSLATIONS
finaltext = ''
start = datetime.datetime.now() #not used now but useful
print('Translation in progress...')
for item in texts:
line = TToIT(item.page_content)[0]['translation_text']
finaltext = finaltext+line+'\n'
stop = datetime.datetime.now() #not used now but useful
elapsed = stop - start
st.success(f'Translation completed in {elapsed}', icon="🆗")
print(f'Translation generated in {elapsed}...')
st.text_area(label="Translated text in Italian:", value=finaltext, height=350)
st.markdown(f'Translation completed in **{elapsed}**')
st.markdown(f"Translated number **{len(English.split(' '))}** of words")
else:
st.warning("You need some text to be translated!", icon="⚠️")