From 41c8d970948a8bde87b05af91e04284c2b6fd7d9 Mon Sep 17 00:00:00 2001 From: Arity-T Date: Fri, 13 Feb 2026 11:19:25 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D1=81=D1=82=D0=B0=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BA=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- report/.gitignore | 7 + report/refs.bib | 634 ++++++++++++++++++++++++++++++++++++++++++++++ report/report.tex | 287 +++++++++++++++++++++ 3 files changed, 928 insertions(+) create mode 100755 report/.gitignore create mode 100755 report/refs.bib create mode 100755 report/report.tex diff --git a/report/.gitignore b/report/.gitignore new file mode 100755 index 0000000..a389b6c --- /dev/null +++ b/report/.gitignore @@ -0,0 +1,7 @@ +* + +!**/ +!.gitignore +!report.tex +!img/*.png +!refs.bib \ No newline at end of file diff --git a/report/refs.bib b/report/refs.bib new file mode 100755 index 0000000..13cc33d --- /dev/null +++ b/report/refs.bib @@ -0,0 +1,634 @@ +@book{skobtsov-evolution, + author = {Скобцов, Ю. А. and Сперанский, Д. В.}, + title = {Эволюционные вычисления: Учебное пособие}, + year = {2012}, + publisher = {Национальный Открытый Университет «ИНТУИТ»}, + address = {М.}, + pages = {331}, + note = {ил. — Серия «Основы информационных технологий»} +} + + + +% Разное +@online{yandex-research-calendar, + author = {Осиков, Александр and Уласович, Кристина and Лохов, Пётр and + Андрианова, Надежда and Сергиенко, Ярослав and Матющенко, Максим}, + title = {Сколько длятся рабочие встречи}, + year = {2025}, + url = {https://yandex.ru/company/researches/2025/calendar}, + urldate = {2026-01-03}, + media = {eresource}, + organization = {Яндекс} +} +@online{speech-rate, + author = {Светозарова, Н. Д.}, + title = {Темп речи}, + year = {2017}, + url = {https://old.bigenc.ru/linguistics/text/4186876}, + urldate = {2026-01-03}, + media = {eresource}, + organization = {Большая российская энциклопедия}, + note = {Электронная версия} +} +@misc{nemo-toolkit, + title = {NeMo: a toolkit for building AI applications using Neural Modules}, + author = {Oleksii Kuchaiev and Jason Li and Huyen Nguyen and Oleksii Hrinchuk and Ryan Leary and Boris Ginsburg and Samuel Kriman and Stanislav Beliaev and Vitaly Lavrukhin and Jack Cook and Patrice Castonguay and Mariya Popova and Jocelyn Huang and Jonathan M. Cohen}, + year = {2019}, + eprint = {1909.09577}, + archiveprefix = {arXiv}, + primaryclass = {cs.LG}, + url = {https://arxiv.org/abs/1909.09577}, + urldate = {2026-01-10} +} + + +% Научные e2e-статьи +@misc{auto-meet, + title = {AutoMeet: a proof-of-concept study of genAI to automate meetings in automotive engineering}, + author = {Simon Baeuerle and Max Radyschevski and Ulrike Pado}, + year = {2025}, + eprint = {2507.16054}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2507.16054}, + urldate = {2026-01-10} +} +@misc{building-real-world-meeting-summarization, + title = {Building Real-World Meeting Summarization Systems using Large Language Models: A Practical Perspective}, + author = {Md Tahmid Rahman Laskar and Xue-Yong Fu and Cheng Chen and Shashi Bhushan TN}, + year = {2023}, + eprint = {2310.19233}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2310.19233}, + urldate = {2026-01-10} +} +@misc{end-to-end-speech-summarization, + title = {An End-to-End Speech Summarization Using Large Language Model}, + author = {Hengchao Shang and Zongyao Li and Jiaxin Guo and Shaojun Li and Zhiqiang Rao and Yuanchang Luo and Daimeng Wei and Hao Yang}, + year = {2024}, + eprint = {2407.02005}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2407.02005}, + urldate = {2026-01-10} +} +@inproceedings{meetalk, + title = {Meetalk: Retrieval-Augmented and Adaptively Personalized Meeting Summarization with Knowledge Learning from User Corrections}, + author = {Chen, Zheng and + Futian, Jiang and + Deng, Yue and + He, Changyang and + Li, Bo}, + editor = {Zhang, Yuji and + Chen, Canyu and + Li, Sha and + Geva, Mor and + Han, Chi and + Wang, Xiaozhi and + Feng, Shangbin and + Gao, Silin and + Augenstein, Isabelle and + Bansal, Mohit and + Li, Manling and + Ji, Heng}, + booktitle = {Proceedings of the 3rd Workshop on Towards Knowledgeable Foundation Models (KnowFM)}, + year = {2025}, + address = {Vienna, Austria}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/2025.knowllm-1.9/}, + urldate = {2026-01-10}, + doi = {10.18653/v1/2025.knowllm-1.9}, + pages = {94--110}, + isbn = {979-8-89176-283-1} +} + +% DIY-статьи +@online{yoomoney-transcribe-calls, + author = {izzyleet}, + title = {Как мы транскрибируем аудио с внутренних созвонов в текст}, + year = {2025}, + url = {https://habr.com/ru/companies/yoomoney/articles/896096/}, + urldate = {2026-01-03}, + media = {eresource}, + organization = {ЮMoney} +} +@online{alfabank-local-transcriber, + author = {Луняка, Николай}, + title = {На входе аудио, на выходе — саммари. Собираем локальный транскрибатор из бесплатного софта}, + year = {2025}, + url = {https://habr.com/ru/companies/alfa/articles/909498/}, + urldate = {2026-01-03}, + media = {eresource}, + organization = {Альфа-Банк} +} +@online{bitrix-copilot-videocalls, + author = {Соколов, Дмитрий}, + title = {Будьте добры, помедленнее! Я записываю… или Как мы приручили нейросеть для видеозвонков}, + year = {2025}, + url = {https://habr.com/ru/companies/bitrix/articles/904916/}, + urldate = {2026-01-03}, + media = {eresource}, + organization = {Битрикс24} +} +@online{habr-autoprotocoling-start, + author = {virus3908}, + title = {Как я начал писать своё автопротоколирование}, + year = {2025}, + url = {https://habr.com/ru/articles/910616/}, + urldate = {2026-01-03}, + media = {eresource}, + organization = {Habr} +} + + + +% Транскрибация + диаризация +@online{whisperlivekit, + author = {{QuentinFuxa}}, + title = {{QuentinFuxa/WhisperLiveKit}: Ultra-low latency, self-hosted real-time speech-to-text with speaker identification}, + urldate = {2025-12-25}, + url = {https://github.com/QuentinFuxa/WhisperLiveKit}, + media = {eresource}, + organization = {GitHub} +} +@misc{whisperx, + title = {WhisperX: Time-Accurate Speech Transcription of Long-Form Audio}, + author = {Max Bain and Jaesung Huh and Tengda Han and Andrew Zisserman}, + year = {2023}, + eprint = {2303.00747}, + archiveprefix = {arXiv}, + primaryclass = {cs.SD}, + url = {https://arxiv.org/abs/2303.00747}, + urldate = {2026-01-10} +} + + +% Транскрибация +@misc{e2e_asr_survey, + title = {End-to-End Speech Recognition: A Survey}, + author = {Rohit Prabhavalkar and Takaaki Hori and Tara N. Sainath and Ralf Schlüter and Shinji Watanabe}, + year = {2023}, + eprint = {2303.03329}, + archiveprefix = {arXiv}, + primaryclass = {eess.AS}, + url = {https://arxiv.org/abs/2303.03329}, + urldate = {2026-01-10} +} +@article{asr-deep-learning-survey, + title = {Automatic Speech Recognition: A survey of deep learning techniques and approaches}, + journal = {International Journal of Cognitive Computing in Engineering}, + volume = {6}, + pages = {201-237}, + year = {2025}, + issn = {2666-3074}, + doi = {https://doi.org/10.1016/j.ijcce.2024.12.007}, + url = {https://www.sciencedirect.com/science/article/pii/S2666307424000573}, + urldate = {2026-01-10}, + author = {Harsh Ahlawat and Naveen Aggarwal and Deepti Gupta}, + keywords = {Automatic Speech Recognition, Deep Neural Networks, Conformer, Transformer, Datasets, Multilingual, Deep learning} +} +@online{russian-asr-leaderboard, + author = {Шмырев, Николай}, + title = {Открытые модели для распознавания русской речи 2025}, + year = {2025}, + url = {https://alphacephei.com/nsh/2025/04/18/russian-models.html}, + urldate = {2026-01-05}, + media = {eresource}, + organization = {Alpha Cephei} +} +@misc{open-asr-leaderboard, + title = {Open ASR Leaderboard: Towards Reproducible and Transparent Multilingual and Long-Form Speech Recognition Evaluation}, + author = {Vaibhav Srivastav and Steven Zheng and Eric Bezzam and Eustache Le Bihan and Nithin Koluguri and Piotr Żelasko and Somshubra Majumdar and Adel Moumen and Sanchit Gandhi}, + year = {2025}, + eprint = {2510.06961}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2510.06961}, + urldate = {2026-01-10} +} +% Whisper +@misc{whisper, + title = {Robust Speech Recognition via Large-Scale Weak Supervision}, + author = {Alec Radford and Jong Wook Kim and Tao Xu and + Greg Brockman and Christine McLeavey and Ilya Sutskever}, + year = {2022}, + eprint = {2212.04356}, + archiveprefix = {arXiv}, + primaryclass = {eess.AS}, + url = {https://arxiv.org/abs/2212.04356}, + urldate = {2026-01-10} +} +@misc{distil-whisper, + title = {Distil-Whisper: Robust Knowledge Distillation via Large-Scale Pseudo Labelling}, + author = {Sanchit Gandhi and Patrick von Platen and Alexander M. Rush}, + year = {2023}, + eprint = {2311.00430}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2311.00430}, + urldate = {2026-01-10} +} +@online{ctranslate2, + author = {{OpenNMT}}, + title = {{OpenNMT/CTranslate2}: Fast inference engine for Transformer models}, + urldate = {2025-12-25}, + url = {https://github.com/OpenNMT/CTranslate2}, + media = {eresource}, + organization = {GitHub} +} +@online{faster-whisper, + author = {{SYSTRAN}}, + title = {{SYSTRAN/faster-whisper}: Faster Whisper transcription with CTranslate2}, + urldate = {2025-12-25}, + url = {https://github.com/SYSTRAN/faster-whisper}, + media = {eresource}, + organization = {GitHub} +} +@inproceedings{whisper-streaming, + title = {Turning Whisper into Real-Time Transcription System}, + author = {Mach{\'a}{\v{c}}ek, Dominik and + Dabre, Raj and + Bojar, Ond{\v{r}}ej}, + editor = {Saha, Sriparna and + Sujaini, Herry}, + booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics: System Demonstrations}, + month = nov, + year = {2023}, + address = {Bali, Indonesia}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/2023.ijcnlp-demo.3}, + urldate = {2026-01-10}, + pages = {17--24} +} +@inproceedings{simul-whisper, + series = {Interspeech 2024}, + title = {Simul-Whisper: Attention-Guided Streaming Whisper with Truncation Detection}, + url = {http://dx.doi.org/10.21437/Interspeech.2024-1814}, + urldate = {2026-01-10}, + doi = {10.21437/interspeech.2024-1814}, + booktitle = {Interspeech 2024}, + publisher = {ISCA}, + author = {Wang, Haoyu and Hu, Guoqiang and Lin, Guodong and Zhang, Wei-Qiang and Li, Jian}, + year = {2024}, + month = sep, + pages = {4483–4487}, + collection = {Interspeech 2024} +} +@misc{simulstreaming, + title = {Simultaneous Translation with Offline Speech and LLM Models in CUNI Submission to IWSLT 2025}, + author = {Dominik Macháček and Peter Polák}, + year = {2025}, + eprint = {2506.17077}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2506.17077}, + urldate = {2026-01-10} +} +% Sber +@misc{giga-am, + title = {GigaAM: Efficient Self-Supervised Learner for Speech Recognition}, + author = {Aleksandr Kutsakov and Alexandr Maximenko and Georgii Gospodinov and Pavel Bogomolov and Fyodor Minkin}, + year = {2025}, + eprint = {2506.01192}, + archiveprefix = {arXiv}, + primaryclass = {eess.AS}, + url = {https://arxiv.org/abs/2506.01192}, + urldate = {2026-01-10} +} +@online{giga-am-v3, + author = {Куцаков, Александр}, + title = {GigaAM-v3: открытая SOTA-модель распознавания речи на русском}, + year = {2025}, + url = {https://habr.com/ru/companies/sberdevices/articles/973160/}, + urldate = {2026-01-05}, + media = {eresource}, + organization = {SberDevices} +} +% Other +@misc{canary-1b-v2-parakeet-tdt-06b-v3, + title = {Canary-1B-v2 \& Parakeet-TDT-0.6B-v3: Efficient and High-Performance Models for Multilingual ASR and AST}, + author = {Monica Sekoyan and Nithin Rao Koluguri and Nune Tadevosyan and Piotr Zelasko and Travis Bartley and Nikolay Karpov and Jagadeesh Balam and Boris Ginsburg}, + year = {2025}, + eprint = {2509.14128}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2509.14128}, + urldate = {2026-01-10} +} +@online{vosk, + title = {Vosk Speech Recognition Toolkit}, + year = {2023}, + url = {https://alphacephei.com/vosk/}, + urldate = {2026-01-05}, + media = {eresource}, + organization = {Alpha Cephei} +} + + +% Диаризация +@article{speaker-diarization-review, + author = {O’Shaughnessy, Douglas}, + title = {Speaker Diarization: A Review of Objectives and Methods}, + journal = {Applied Sciences}, + volume = {15}, + year = {2025}, + number = {4}, + article-number = {2002}, + url = {https://www.mdpi.com/2076-3417/15/4/2002}, + urldate = {2026-01-10}, + issn = {2076-3417}, + doi = {10.3390/app15042002} +} +@misc{benchmarking-diarization-models, + title = {Benchmarking Diarization Models}, + author = {Luca A. Lanzendörfer and Florian Grötschla and Cesare Blaser and Roger Wattenhofer}, + year = {2025}, + eprint = {2509.26177}, + archiveprefix = {arXiv}, + primaryclass = {cs.SD}, + url = {https://arxiv.org/abs/2509.26177}, + urldate = {2026-01-10} +} +@misc{sdbench, + title = {SDBench: A Comprehensive Benchmark Suite for Speaker Diarization}, + author = {Eduardo Pacheco and Atila Orhon and Berkin Durmus and Blaise Munyampirwa and Andrey Leonov}, + year = {2025}, + eprint = {2507.16136}, + archiveprefix = {arXiv}, + primaryclass = {cs.SD}, + url = {https://arxiv.org/abs/2507.16136}, + urldate = {2026-01-10} +} +@misc{pyannote-audio, + title = {pyannote.audio: neural building blocks for speaker diarization}, + author = {Hervé Bredin and Ruiqing Yin and Juan Manuel Coria and Gregory Gelly and Pavel Korshunov and Marvin Lavechin and Diego Fustes and Hadrien Titeux and Wassim Bouaziz and Marie-Philippe Gill}, + year = {2019}, + eprint = {1911.01255}, + archiveprefix = {arXiv}, + primaryclass = {eess.AS}, + url = {https://arxiv.org/abs/1911.01255}, + urldate = {2026-01-10} +} +@online{pyannote-community-1, + author = {{pyannote.ai}}, + title = {Community-1: Unleashing open-source diarization}, + year = {2025}, + url = {https://www.pyannote.ai/blog/community-1}, + urldate = {2026-01-05}, + organization = {pyannote.ai}, + media = {eresource}, + note = {Блог о релизе модели Community-1 для open-source speaker diarization} +} +@misc{sortformer, + title = {Sortformer: A Novel Approach for Permutation-Resolved Speaker Supervision in Speech-to-Text Systems}, + author = {Taejin Park and Ivan Medennikov and Kunal Dhawan and Weiqing Wang and He Huang and Nithin Rao Koluguri and Krishna C. Puvvada and Jagadeesh Balam and Boris Ginsburg}, + year = {2025}, + eprint = {2409.06656}, + archiveprefix = {arXiv}, + primaryclass = {eess.AS}, + url = {https://arxiv.org/abs/2409.06656}, + urldate = {2026-01-10} +} +@inproceedings{diarizen, + title = {Leveraging self-supervised learning for speaker diarization}, + author = {Han, Jiangyu and Landini, Federico and Rohdin, Johan and Silnova, Anna and Diez, Mireia and Burget, Luk{\'a}{\v{s}}}, + booktitle = {Proc. ICASSP}, + year = {2025} +} +@misc{streaming-sortformer, + title = {Streaming Sortformer: Speaker Cache-Based Online Speaker Diarization with Arrival-Time Ordering}, + author = {Ivan Medennikov and Taejin Park and Weiqing Wang and He Huang and Kunal Dhawan and Jinhan Wang and Jagadeesh Balam and Boris Ginsburg}, + year = {2025}, + eprint = {2507.18446}, + archiveprefix = {arXiv}, + primaryclass = {eess.AS}, + url = {https://arxiv.org/abs/2507.18446}, + urldate = {2026-01-10} +} + +% VAD +@misc{silero-vad, + author = {Silero Team}, + title = {Silero VAD: pre-trained enterprise-grade Voice Activity Detector (VAD), Number Detector and Language Classifier}, + year = {2024}, + urldate = {2025-12-25}, + url = {https://github.com/snakers4/silero-vad}, + organization = {GitHub}, + media = {eresource}, + email = {hello@silero.ai} +} + +% Суммаризация +@article{text-summarization-survey, + title = {A systematic survey of text summarization: From statistical methods to large language models}, + author = {Zhang, Haopeng and Yu, Philip S and Zhang, Jiawei}, + journal = {ACM Computing Surveys}, + volume = {57}, + number = {11}, + pages = {1--41}, + year = {2025}, + publisher = {ACM New York, NY} +} + +% LLM +% Tiny Titans: Can Smaller Large Language Models Punch Above Their +% Weight in the Real World for Meeting Summarization? +@misc{vikhr, + title = {Vikhr: The Family of Open-Source Instruction-Tuned Large Language Models for Russian}, + author = {Aleksandr Nikolich and Konstantin Korolev and Sergei Bratchikov and Igor Kiselev and Artem Shelmanov}, + year = {2025}, + eprint = {2405.13929}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2405.13929}, + urldate = {2026-01-10} +} +@misc{qwen, + title = {Qwen2.5 Technical Report}, + author = {Qwen and : and An Yang and Baosong Yang and Beichen Zhang and Binyuan Hui and Bo Zheng and Bowen Yu and Chengyuan Li and Dayiheng Liu and Fei Huang and Haoran Wei and Huan Lin and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Yang and Jiaxi Yang and Jingren Zhou and Junyang Lin and Kai Dang and Keming Lu and Keqin Bao and Kexin Yang and Le Yu and Mei Li and Mingfeng Xue and Pei Zhang and Qin Zhu and Rui Men and Runji Lin and Tianhao Li and Tianyi Tang and Tingyu Xia and Xingzhang Ren and Xuancheng Ren and Yang Fan and Yang Su and Yichang Zhang and Yu Wan and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zihan Qiu}, + year = {2025}, + eprint = {2412.15115}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2412.15115}, + urldate = {2026-01-10} +} +@misc{llama, + title = {The Llama 3 Herd of Models}, + author = {Aaron Grattafiori and Abhimanyu Dubey and Abhinav Jauhri and Abhinav Pandey and Abhishek Kadian and Ahmad Al-Dahle and Aiesha Letman and Akhil Mathur and Alan Schelten and Alex Vaughan and Amy Yang and Angela Fan and Anirudh Goyal and Anthony Hartshorn and Aobo Yang and Archi Mitra and Archie Sravankumar and Artem Korenev and Arthur Hinsvark and Arun Rao and Aston Zhang and Aurelien Rodriguez and Austen Gregerson and Ava Spataru and Baptiste Roziere and Bethany Biron and Binh Tang and Bobbie Chern and Charlotte Caucheteux and Chaya Nayak and Chloe Bi and Chris Marra and Chris McConnell and Christian Keller and Christophe Touret and Chunyang Wu and Corinne Wong and Cristian Canton Ferrer and Cyrus Nikolaidis and Damien Allonsius and Daniel Song and Danielle Pintz and Danny Livshits and Danny Wyatt and David Esiobu and Dhruv Choudhary and Dhruv Mahajan and Diego Garcia-Olano and Diego Perino and Dieuwke Hupkes and Egor Lakomkin and Ehab AlBadawy and Elina Lobanova and Emily Dinan and Eric Michael Smith and Filip Radenovic and Francisco Guzmán and Frank Zhang and Gabriel Synnaeve and Gabrielle Lee and Georgia Lewis Anderson and Govind Thattai and Graeme Nail and Gregoire Mialon and Guan Pang and Guillem Cucurell and Hailey Nguyen and Hannah Korevaar and Hu Xu and Hugo Touvron and Iliyan Zarov and Imanol Arrieta Ibarra and Isabel Kloumann and Ishan Misra and Ivan Evtimov and Jack Zhang and Jade Copet and Jaewon Lee and Jan Geffert and Jana Vranes and Jason Park and Jay Mahadeokar and Jeet Shah and Jelmer van der Linde and Jennifer Billock and Jenny Hong and Jenya Lee and Jeremy Fu and Jianfeng Chi and Jianyu Huang and Jiawen Liu and Jie Wang and Jiecao Yu and Joanna Bitton and Joe Spisak and Jongsoo Park and Joseph Rocca and Joshua Johnstun and Joshua Saxe and Junteng Jia and Kalyan Vasuden Alwala and Karthik Prasad and Kartikeya Upasani and Kate Plawiak and Ke Li and Kenneth Heafield and Kevin Stone and Khalid El-Arini and Krithika Iyer and Kshitiz Malik and Kuenley Chiu and Kunal Bhalla and Kushal Lakhotia and Lauren Rantala-Yeary and Laurens van der Maaten and Lawrence Chen and Liang Tan and Liz Jenkins and Louis Martin and Lovish Madaan and Lubo Malo and Lukas Blecher and Lukas Landzaat and Luke de Oliveira and Madeline Muzzi and Mahesh Pasupuleti and Mannat Singh and Manohar Paluri and Marcin Kardas and Maria Tsimpoukelli and Mathew Oldham and Mathieu Rita and Maya Pavlova and Melanie Kambadur and Mike Lewis and Min Si and Mitesh Kumar Singh and Mona Hassan and Naman Goyal and Narjes Torabi and Nikolay Bashlykov and Nikolay Bogoychev and Niladri Chatterji and Ning Zhang and Olivier Duchenne and Onur Çelebi and Patrick Alrassy and Pengchuan Zhang and Pengwei Li and Petar Vasic and Peter Weng and Prajjwal Bhargava and Pratik Dubal and Praveen Krishnan and Punit Singh Koura and Puxin Xu and Qing He and Qingxiao Dong and Ragavan Srinivasan and Raj Ganapathy and Ramon Calderer and Ricardo Silveira Cabral and Robert Stojnic and Roberta Raileanu and Rohan Maheswari and Rohit Girdhar and Rohit Patel and Romain Sauvestre and Ronnie Polidoro and Roshan Sumbaly and Ross Taylor and Ruan Silva and Rui Hou and Rui Wang and Saghar Hosseini and Sahana Chennabasappa and Sanjay Singh and Sean Bell and Seohyun Sonia Kim and Sergey Edunov and Shaoliang Nie and Sharan Narang and Sharath Raparthy and Sheng Shen and Shengye Wan and Shruti Bhosale and Shun Zhang and Simon Vandenhende and Soumya Batra and Spencer Whitman and Sten Sootla and Stephane Collot and Suchin Gururangan and Sydney Borodinsky and Tamar Herman and Tara Fowler and Tarek Sheasha and Thomas Georgiou and Thomas Scialom and Tobias Speckbacher and Todor Mihaylov and Tong Xiao and Ujjwal Karn and Vedanuj Goswami and Vibhor Gupta and Vignesh Ramanathan and Viktor Kerkez and Vincent Gonguet and Virginie Do and Vish Vogeti and Vítor Albiero and Vladan Petrovic and Weiwei Chu and Wenhan Xiong and Wenyin Fu and Whitney Meers and Xavier Martinet and Xiaodong Wang and Xiaofang Wang and Xiaoqing Ellen Tan and Xide Xia and Xinfeng Xie and Xuchao Jia and Xuewei Wang and Yaelle Goldschlag and Yashesh Gaur and Yasmine Babaei and Yi Wen and Yiwen Song and Yuchen Zhang and Yue Li and Yuning Mao and Zacharie Delpierre Coudert and Zheng Yan and Zhengxing Chen and Zoe Papakipos and Aaditya Singh and Aayushi Srivastava and Abha Jain and Adam Kelsey and Adam Shajnfeld and Adithya Gangidi and Adolfo Victoria and Ahuva Goldstand and Ajay Menon and Ajay Sharma and Alex Boesenberg and Alexei Baevski and Allie Feinstein and Amanda Kallet and Amit Sangani and Amos Teo and Anam Yunus and Andrei Lupu and Andres Alvarado and Andrew Caples and Andrew Gu and Andrew Ho and Andrew Poulton and Andrew Ryan and Ankit Ramchandani and Annie Dong and Annie Franco and Anuj Goyal and Aparajita Saraf and Arkabandhu Chowdhury and Ashley Gabriel and Ashwin Bharambe and Assaf Eisenman and Azadeh Yazdan and Beau James and Ben Maurer and Benjamin Leonhardi and Bernie Huang and Beth Loyd and Beto De Paola and Bhargavi Paranjape and Bing Liu and Bo Wu and Boyu Ni and Braden Hancock and Bram Wasti and Brandon Spence and Brani Stojkovic and Brian Gamido and Britt Montalvo and Carl Parker and Carly Burton and Catalina Mejia and Ce Liu and Changhan Wang and Changkyu Kim and Chao Zhou and Chester Hu and Ching-Hsiang Chu and Chris Cai and Chris Tindal and Christoph Feichtenhofer and Cynthia Gao and Damon Civin and Dana Beaty and Daniel Kreymer and Daniel Li and David Adkins and David Xu and Davide Testuggine and Delia David and Devi Parikh and Diana Liskovich and Didem Foss and Dingkang Wang and Duc Le and Dustin Holland and Edward Dowling and Eissa Jamil and Elaine Montgomery and Eleonora Presani and Emily Hahn and Emily Wood and Eric-Tuan Le and Erik Brinkman and Esteban Arcaute and Evan Dunbar and Evan Smothers and Fei Sun and Felix Kreuk and Feng Tian and Filippos Kokkinos and Firat Ozgenel and Francesco Caggioni and Frank Kanayet and Frank Seide and Gabriela Medina Florez and Gabriella Schwarz and Gada Badeer and Georgia Swee and Gil Halpern and Grant Herman and Grigory Sizov and Guangyi and Zhang and Guna Lakshminarayanan and Hakan Inan and Hamid Shojanazeri and Han Zou and Hannah Wang and Hanwen Zha and Haroun Habeeb and Harrison Rudolph and Helen Suk and Henry Aspegren and Hunter Goldman and Hongyuan Zhan and Ibrahim Damlaj and Igor Molybog and Igor Tufanov and Ilias Leontiadis and Irina-Elena Veliche and Itai Gat and Jake Weissman and James Geboski and James Kohli and Janice Lam and Japhet Asher and Jean-Baptiste Gaya and Jeff Marcus and Jeff Tang and Jennifer Chan and Jenny Zhen and Jeremy Reizenstein and Jeremy Teboul and Jessica Zhong and Jian Jin and Jingyi Yang and Joe Cummings and Jon Carvill and Jon Shepard and Jonathan McPhie and Jonathan Torres and Josh Ginsburg and Junjie Wang and Kai Wu and Kam Hou U and Karan Saxena and Kartikay Khandelwal and Katayoun Zand and Kathy Matosich and Kaushik Veeraraghavan and Kelly Michelena and Keqian Li and Kiran Jagadeesh and Kun Huang and Kunal Chawla and Kyle Huang and Lailin Chen and Lakshya Garg and Lavender A and Leandro Silva and Lee Bell and Lei Zhang and Liangpeng Guo and Licheng Yu and Liron Moshkovich and Luca Wehrstedt and Madian Khabsa and Manav Avalani and Manish Bhatt and Martynas Mankus and Matan Hasson and Matthew Lennie and Matthias Reso and Maxim Groshev and Maxim Naumov and Maya Lathi and Meghan Keneally and Miao Liu and Michael L. Seltzer and Michal Valko and Michelle Restrepo and Mihir Patel and Mik Vyatskov and Mikayel Samvelyan and Mike Clark and Mike Macey and Mike Wang and Miquel Jubert Hermoso and Mo Metanat and Mohammad Rastegari and Munish Bansal and Nandhini Santhanam and Natascha Parks and Natasha White and Navyata Bawa and Nayan Singhal and Nick Egebo and Nicolas Usunier and Nikhil Mehta and Nikolay Pavlovich Laptev and Ning Dong and Norman Cheng and Oleg Chernoguz and Olivia Hart and Omkar Salpekar and Ozlem Kalinli and Parkin Kent and Parth Parekh and Paul Saab and Pavan Balaji and Pedro Rittner and Philip Bontrager and Pierre Roux and Piotr Dollar and Polina Zvyagina and Prashant Ratanchandani and Pritish Yuvraj and Qian Liang and Rachad Alao and Rachel Rodriguez and Rafi Ayub and Raghotham Murthy and Raghu Nayani and Rahul Mitra and Rangaprabhu Parthasarathy and Raymond Li and Rebekkah Hogan and Robin Battey and Rocky Wang and Russ Howes and Ruty Rinott and Sachin Mehta and Sachin Siby and Sai Jayesh Bondu and Samyak Datta and Sara Chugh and Sara Hunt and Sargun Dhillon and Sasha Sidorov and Satadru Pan and Saurabh Mahajan and Saurabh Verma and Seiji Yamamoto and Sharadh Ramaswamy and Shaun Lindsay and Shaun Lindsay and Sheng Feng and Shenghao Lin and Shengxin Cindy Zha and Shishir Patil and Shiva Shankar and Shuqiang Zhang and Shuqiang Zhang and Sinong Wang and Sneha Agarwal and Soji Sajuyigbe and Soumith Chintala and Stephanie Max and Stephen Chen and Steve Kehoe and Steve Satterfield and Sudarshan Govindaprasad and Sumit Gupta and Summer Deng and Sungmin Cho and Sunny Virk and Suraj Subramanian and Sy Choudhury and Sydney Goldman and Tal Remez and Tamar Glaser and Tamara Best and Thilo Koehler and Thomas Robinson and Tianhe Li and Tianjun Zhang and Tim Matthews and Timothy Chou and Tzook Shaked and Varun Vontimitta and Victoria Ajayi and Victoria Montanez and Vijai Mohan and Vinay Satish Kumar and Vishal Mangla and Vlad Ionescu and Vlad Poenaru and Vlad Tiberiu Mihailescu and Vladimir Ivanov and Wei Li and Wenchen Wang and Wenwen Jiang and Wes Bouaziz and Will Constable and Xiaocheng Tang and Xiaojian Wu and Xiaolan Wang and Xilun Wu and Xinbo Gao and Yaniv Kleinman and Yanjun Chen and Ye Hu and Ye Jia and Ye Qi and Yenda Li and Yilin Zhang and Ying Zhang and Yossi Adi and Youngjin Nam and Yu and Wang and Yu Zhao and Yuchen Hao and Yundi Qian and Yunlu Li and Yuzi He and Zach Rait and Zachary DeVito and Zef Rosnbrick and Zhaoduo Wen and Zhenyu Yang and Zhiwei Zhao and Zhiyu Ma}, + year = {2024}, + eprint = {2407.21783}, + archiveprefix = {arXiv}, + primaryclass = {cs.AI}, + url = {https://arxiv.org/abs/2407.21783}, + urldate = {2026-01-10} +} +@misc{gemma, + title = {Gemma 2: Improving Open Language Models at a Practical Size}, + author = {Gemma Team and Morgane Riviere and Shreya Pathak and Pier Giuseppe Sessa and Cassidy Hardin and Surya Bhupatiraju and Léonard Hussenot and Thomas Mesnard and Bobak Shahriari and Alexandre Ramé and Johan Ferret and Peter Liu and Pouya Tafti and Abe Friesen and Michelle Casbon and Sabela Ramos and Ravin Kumar and Charline Le Lan and Sammy Jerome and Anton Tsitsulin and Nino Vieillard and Piotr Stanczyk and Sertan Girgin and Nikola Momchev and Matt Hoffman and Shantanu Thakoor and Jean-Bastien Grill and Behnam Neyshabur and Olivier Bachem and Alanna Walton and Aliaksei Severyn and Alicia Parrish and Aliya Ahmad and Allen Hutchison and Alvin Abdagic and Amanda Carl and Amy Shen and Andy Brock and Andy Coenen and Anthony Laforge and Antonia Paterson and Ben Bastian and Bilal Piot and Bo Wu and Brandon Royal and Charlie Chen and Chintu Kumar and Chris Perry and Chris Welty and Christopher A. Choquette-Choo and Danila Sinopalnikov and David Weinberger and Dimple Vijaykumar and Dominika Rogozińska and Dustin Herbison and Elisa Bandy and Emma Wang and Eric Noland and Erica Moreira and Evan Senter and Evgenii Eltyshev and Francesco Visin and Gabriel Rasskin and Gary Wei and Glenn Cameron and Gus Martins and Hadi Hashemi and Hanna Klimczak-Plucińska and Harleen Batra and Harsh Dhand and Ivan Nardini and Jacinda Mein and Jack Zhou and James Svensson and Jeff Stanway and Jetha Chan and Jin Peng Zhou and Joana Carrasqueira and Joana Iljazi and Jocelyn Becker and Joe Fernandez and Joost van Amersfoort and Josh Gordon and Josh Lipschultz and Josh Newlan and Ju-yeong Ji and Kareem Mohamed and Kartikeya Badola and Kat Black and Katie Millican and Keelin McDonell and Kelvin Nguyen and Kiranbir Sodhia and Kish Greene and Lars Lowe Sjoesund and Lauren Usui and Laurent Sifre and Lena Heuermann and Leticia Lago and Lilly McNealus and Livio Baldini Soares and Logan Kilpatrick and Lucas Dixon and Luciano Martins and Machel Reid and Manvinder Singh and Mark Iverson and Martin Görner and Mat Velloso and Mateo Wirth and Matt Davidow and Matt Miller and Matthew Rahtz and Matthew Watson and Meg Risdal and Mehran Kazemi and Michael Moynihan and Ming Zhang and Minsuk Kahng and Minwoo Park and Mofi Rahman and Mohit Khatwani and Natalie Dao and Nenshad Bardoliwalla and Nesh Devanathan and Neta Dumai and Nilay Chauhan and Oscar Wahltinez and Pankil Botarda and Parker Barnes and Paul Barham and Paul Michel and Pengchong Jin and Petko Georgiev and Phil Culliton and Pradeep Kuppala and Ramona Comanescu and Ramona Merhej and Reena Jana and Reza Ardeshir Rokni and Rishabh Agarwal and Ryan Mullins and Samaneh Saadat and Sara Mc Carthy and Sarah Cogan and Sarah Perrin and Sébastien M. R. Arnold and Sebastian Krause and Shengyang Dai and Shruti Garg and Shruti Sheth and Sue Ronstrom and Susan Chan and Timothy Jordan and Ting Yu and Tom Eccles and Tom Hennigan and Tomas Kocisky and Tulsee Doshi and Vihan Jain and Vikas Yadav and Vilobh Meshram and Vishal Dharmadhikari and Warren Barkley and Wei Wei and Wenming Ye and Woohyun Han and Woosuk Kwon and Xiang Xu and Zhe Shen and Zhitao Gong and Zichuan Wei and Victor Cotruta and Phoebe Kirk and Anand Rao and Minh Giang and Ludovic Peran and Tris Warkentin and Eli Collins and Joelle Barral and Zoubin Ghahramani and Raia Hadsell and D. Sculley and Jeanine Banks and Anca Dragan and Slav Petrov and Oriol Vinyals and Jeff Dean and Demis Hassabis and Koray Kavukcuoglu and Clement Farabet and Elena Buchatskaya and Sebastian Borgeaud and Noah Fiedel and Armand Joulin and Kathleen Kenealy and Robert Dadashi and Alek Andreev}, + year = {2024}, + eprint = {2408.00118}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2408.00118}, + urldate = {2026-01-10} +} +@misc{phi-3, + title = {Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone}, + author = {Marah Abdin and Jyoti Aneja and Hany Awadalla and Ahmed Awadallah and Ammar Ahmad Awan and Nguyen Bach and Amit Bahree and Arash Bakhtiari and Jianmin Bao and Harkirat Behl and Alon Benhaim and Misha Bilenko and Johan Bjorck and Sébastien Bubeck and Martin Cai and Qin Cai and Vishrav Chaudhary and Dong Chen and Dongdong Chen and Weizhu Chen and Yen-Chun Chen and Yi-Ling Chen and Hao Cheng and Parul Chopra and Xiyang Dai and Matthew Dixon and Ronen Eldan and Victor Fragoso and Jianfeng Gao and Mei Gao and Min Gao and Amit Garg and Allie Del Giorno and Abhishek Goswami and Suriya Gunasekar and Emman Haider and Junheng Hao and Russell J. Hewett and Wenxiang Hu and Jamie Huynh and Dan Iter and Sam Ade Jacobs and Mojan Javaheripi and Xin Jin and Nikos Karampatziakis and Piero Kauffmann and Mahoud Khademi and Dongwoo Kim and Young Jin Kim and Lev Kurilenko and James R. Lee and Yin Tat Lee and Yuanzhi Li and Yunsheng Li and Chen Liang and Lars Liden and Xihui Lin and Zeqi Lin and Ce Liu and Liyuan Liu and Mengchen Liu and Weishung Liu and Xiaodong Liu and Chong Luo and Piyush Madan and Ali Mahmoudzadeh and David Majercak and Matt Mazzola and Caio César Teodoro Mendes and Arindam Mitra and Hardik Modi and Anh Nguyen and Brandon Norick and Barun Patra and Daniel Perez-Becker and Thomas Portet and Reid Pryzant and Heyang Qin and Marko Radmilac and Liliang Ren and Gustavo de Rosa and Corby Rosset and Sambudha Roy and Olatunji Ruwase and Olli Saarikivi and Amin Saied and Adil Salim and Michael Santacroce and Shital Shah and Ning Shang and Hiteshi Sharma and Yelong Shen and Swadheen Shukla and Xia Song and Masahiro Tanaka and Andrea Tupini and Praneetha Vaddamanu and Chunyu Wang and Guanhua Wang and Lijuan Wang and Shuohang Wang and Xin Wang and Yu Wang and Rachel Ward and Wen Wen and Philipp Witte and Haiping Wu and Xiaoxia Wu and Michael Wyatt and Bin Xiao and Can Xu and Jiahang Xu and Weijian Xu and Jilong Xue and Sonali Yadav and Fan Yang and Jianwei Yang and Yifan Yang and Ziyi Yang and Donghan Yu and Lu Yuan and Chenruidong Zhang and Cyril Zhang and Jianwen Zhang and Li Lyna Zhang and Yi Zhang and Yue Zhang and Yunan Zhang and Xiren Zhou}, + year = {2024}, + eprint = {2404.14219}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/2404.14219}, + urldate = {2026-01-10} +} + + +% Метрики +@inproceedings{morris-asr-metrics, + title = {From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition}, + author = {Andrew Cameron Morris and Viktoria Maier and Phil Green}, + year = {2004}, + booktitle = {Interspeech 2004}, + pages = {2765--2768}, + doi = {10.21437/Interspeech.2004-668}, + issn = {2958-1796} +} +@inproceedings{rouge, + title = {Rouge: A package for automatic evaluation of summaries}, + author = {Lin, Chin-Yew}, + booktitle = {Text summarization branches out}, + pages = {74--81}, + year = {2004} +} +@inproceedings{bert-score, + title = {BERTScore: Evaluating Text Generation with BERT}, + author = {Tianyi Zhang* and Varsha Kishore* and Felix Wu* and Kilian Q. Weinberger and Yoav Artzi}, + booktitle = {International Conference on Learning Representations}, + year = {2020}, + url = {https://openreview.net/forum?id=SkeHuCVFDr}, + urldate = {2026-01-10} +} +@article{qaeval, + title = {Towards question-answering as an automatic metric for evaluating the content quality of a summary}, + author = {Deutsch, Daniel and Bedrax-Weiss, Tania and Roth, Dan}, + journal = {Transactions of the Association for Computational Linguistics}, + volume = {9}, + pages = {774--789}, + year = {2021}, + publisher = {MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} +} +@inproceedings{questeval, + title={QuestEval: Summarization asks for fact-based evaluation}, + author={Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo and Wang, Alex and Gallinari, Patrick}, + booktitle={Proceedings of the 2021 conference on empirical methods in natural language processing}, + pages={6594--6604}, + year={2021} +} +@misc{answers-unite, + title = {Answers Unite! Unsupervised Metrics for Reinforced Summarization Models}, + author = {Thomas Scialom and Sylvain Lamprier and Benjamin Piwowarski and Jacopo Staiano}, + year = {2019}, + eprint = {1909.01610}, + archiveprefix = {arXiv}, + primaryclass = {cs.CL}, + url = {https://arxiv.org/abs/1909.01610}, + urldate = {2026-01-10} +} +@inbook{qa-legal-summaries, + title = {A Question-Answering Approach to Evaluating Legal Summaries}, + isbn = {9781643684734}, + issn = {1879-8314}, + url = {http://dx.doi.org/10.3233/FAIA230977}, + urldate = {2026-01-10}, + doi = {10.3233/faia230977}, + booktitle = {Legal Knowledge and Information Systems}, + publisher = {IOS Press}, + author = {Xu, Huihui and Ashley, Kevin}, + year = {2023}, + month = dec +} +@inproceedings{summequal, + title={SummEQuAL: Summarization evaluation via question answering using large language models}, + author={Liu, Junyuan and Shi, Zhengyan and Lipani, Aldo}, + booktitle={Proceedings of the 2nd Workshop on Natural Language Reasoning and Structured Explanations (@ ACL 2024)}, + pages={46--55}, + year={2024} +} + + +% Датасеты (audio -> text -> summary) +@inproceedings{ami, + title = {The AMI meeting corpus}, + author = {Kraaij, Wessel and Hain, Thomas and Lincoln, Mike and Post, Wilfried}, + booktitle = {Proc. International Conference on Methods and Techniques in Behavioral Research}, + pages = {1--4}, + year = {2005} +} +@inproceedings{icsi, + title = {The ICSI meeting corpus}, + author = {Janin, Adam and Baron, Don and Edwards, Jane and Ellis, Dan and Gelbart, David and Morgan, Nelson and Peskin, Barbara and Pfau, Thilo and Shriberg, Elizabeth and Stolcke, Andreas and others}, + booktitle = {2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings.(ICASSP'03).}, + volume = {1}, + pages = {I--I}, + year = {2003}, + organization = {IEEE} +} +@inproceedings{meetingbank, + title = {MeetingBank: A Benchmark Dataset for Meeting Summarization}, + author = {Yebowen Hu and Tim Ganter and Hanieh Deilamsalehy and Franck Dernoncourt and Hassan Foroosh and Fei Liu}, + booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (ACL)}, + month = July, + year = {2023}, + address = {Toronto, Canada}, + publisher = {Association for Computational Linguistics} +} + +% Датасеты (text -> summary) +@inproceedings{elitr-minuting-corpus, + author = {Anna Nedoluzhko + and Muskaan Singh + and Marie Hled{\'{\i}}kov{\'{a}} + and Tirthankar Ghosal + and Ond{\v{r}}ej Bojar}, + title = {{ELITR} {M}inuting {C}orpus: + {A} Novel Dataset for Automatic Minuting from Multi-Party Meetings in {E}nglish and {C}zech}, + booktitle = {Proceedings of the 13th International Conference + on Language Resources and Evaluation (LREC-2022)}, + year = 2022, + address = {Marseille, France}, + publisher = {European Language Resources Association (ELRA)}, + note = {In print.} +} + +% Датасеты на русском языке +@misc{golos, + title = {Golos: Russian Dataset for Speech Research}, + author = {Nikolay Karpov and Alexander Denisenko and Fedor Minkin}, + year = {2021}, + eprint = {2106.10161}, + archiveprefix = {arXiv}, + primaryclass = {eess.AS}, + url = {https://arxiv.org/abs/2106.10161}, + urldate = {2026-01-10} +} +@online{open_stt, + author = {{snakers4}}, + title = {{snakers4/open\_stt}: Russian Open Speech To Text (STT/ASR) Dataset}, + urldate = {2026-01-14}, + url = {https://github.com/snakers4/open_stt}, + media = {eresource}, + organization = {GitHub} +} +@online{synthetic-speech-diarization-ru, + author = {ivkond}, + title = {synthetic-speech-diarization-ru}, + year = {2025}, + url = {https://huggingface.co/datasets/ivkond/synthetic-speech-diarization-ru}, + urldate = {2026-01-03}, + media = {eresource}, + organization = {Hugging Face} +} diff --git a/report/report.tex b/report/report.tex new file mode 100755 index 0000000..4cb772b --- /dev/null +++ b/report/report.tex @@ -0,0 +1,287 @@ +\documentclass[a4paper, final]{article} +%\usepackage{literat} % Нормальные шрифты +\usepackage[14pt]{extsizes} % для того чтобы задать нестандартный 14-ый размер шрифта +\usepackage{tabularx} +\usepackage{booktabs} +\usepackage[T2A]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage[russian]{babel} +\usepackage{amsmath} +\usepackage[left=25mm, top=20mm, right=20mm, bottom=20mm, footskip=10mm]{geometry} +\usepackage{ragged2e} %для растягивания по ширине +\usepackage{setspace} %для межстрочного интервала +\usepackage{moreverb} %для работы с листингами +\usepackage{indentfirst} % для абзацного отступа +\usepackage{moreverb} %для печати в листинге исходного кода программ +\usepackage{pdfpages} %для вставки других pdf файлов +\usepackage{tikz} +\usepackage{graphicx} +\usepackage{afterpage} +\usepackage{longtable} +\usepackage{float} + +% Рекомендуется для biblatex (кавычки/локализация цитат и т.п.) +\usepackage{csquotes} + +% ГОСТ-стили для biblatex +\usepackage[ + backend=biber, + bibstyle=gost-numeric, % ссылки вида: [1] + citestyle=gost-numeric, + sorting=none % порядок в списке = по первому цитированию +]{biblatex} + +% Все источники хранятся в отдельном файле +\addbibresource{refs.bib} + +\renewcommand*{\bibfont}{\small} + +% \usepackage[paper=A4,DIV=12]{typearea} +\usepackage{pdflscape} +% \usepackage{lscape} + +\usepackage{array} +\usepackage{multirow} + +\renewcommand\verbatimtabsize{4\relax} +\renewcommand\listingoffset{0.2em} %отступ от номеров строк в листинге +\renewcommand{\arraystretch}{1.4} % изменяю высоту строки в таблице +\usepackage[font=small, singlelinecheck=false, justification=centering, format=plain, labelsep=period]{caption} %для настройки заголовка таблицы +\usepackage{listings} %листинги +\usepackage{xcolor} % цвета +% \usepackage{hyperref}% для гиперссылок +\usepackage{enumitem} %для перечислений + +\newcommand{\specialcell}[2][l]{\begin{tabular}[#1]{@{}l@{}}#2\end{tabular}} + + +\setlist[enumerate,itemize]{leftmargin=1.2cm} %отступ в перечислениях + +% \hypersetup{colorlinks, + % allcolors=[RGB]{010 090 200}} %красивые гиперссылки (не красные) + +% подгружаемые языки — подробнее в документации listings (это всё для листингов) +\lstloadlanguages{ SQL} +% включаем кириллицу и добавляем кое−какие опции +\lstset{tabsize=2, + breaklines, + basicstyle=\footnotesize, + columns=fullflexible, + flexiblecolumns, + numbers=left, + numberstyle={\footnotesize}, + keywordstyle=\color{blue}, + inputencoding=cp1251, + extendedchars=true +} +\lstdefinelanguage{MyC}{ + language=SQL, +% ndkeywordstyle=\color{darkgray}\bfseries, +% identifierstyle=\color{black}, +% morecomment=[n]{/**}{*/}, +% commentstyle=\color{blue}\ttfamily, +% stringstyle=\color{red}\ttfamily, +% morestring=[b]", +% showstringspaces=false, +% morecomment=[l][\color{gray}]{//}, + keepspaces=true, + escapechar=\%, + texcl=true +} + +\textheight=24cm % высота текста +\textwidth=16cm % ширина текста +\oddsidemargin=0pt % отступ от левого края +\topmargin=-1.5cm % отступ от верхнего края +\parindent=24pt % абзацный отступ +\parskip=5pt % интервал между абзацами +\tolerance=2000 % терпимость к "жидким" строкам +\flushbottom % выравнивание высоты страниц + + +% Настройка листингов +\lstset{ + language=python, + extendedchars=\true, + inputencoding=utf8, + keepspaces=true, + % captionpos=b, % подписи листингов снизу +} + +% Настройка содержания +\usepackage{tocloft} +\usepackage[hidelinks]{hyperref} + +% section в содержании НЕ жирным +\renewcommand{\cftsecfont}{\normalfont} +\renewcommand{\cftsecpagefont}{\normalfont} + +% убрать отступ у subsection +\setlength{\cftsubsecindent}{0pt} + +% subsubsection курсивом +\usepackage{titlesec} + +\titleformat{\subsubsection} + {\normalfont\large\itshape} % стиль: обычный + курсив + {\thesubsubsection} % номер (убери если не нужен) + {1em} + {} + + +\begin{document} +% ТИТУЛЬНЫЙ ЛИСТ +\begin{center} + \hfill \break + \hfill \break + \normalsize{МИНИСТЕРСТВО НАУКИ И ВЫСШЕГО ОБРАЗОВАНИЯ РОССИЙСКОЙ ФЕДЕРАЦИИ\\ + федеральное государственное автономное образовательное учреждение высшего образования «Санкт-Петербургский политехнический университет Петра Великого»\\[10pt]} + \normalsize{Институт компьютерных наук и кибербезопасности}\\[10pt] + \normalsize{Высшая школа технологий искусственного интеллекта}\\[10pt] + \normalsize{Направление: 02.03.01 <<Математика и компьютерные науки>>}\\ + + \hfill \break + \hfill \break + \hfill \break + \large{Курсовой проект}\\ + \large{<<Подбор оптимальной конфигурации модульной системы автоматического протоколирования совещаний с помощью генетических алгоритмов>>}\\ + \large{по дисциплине <<Генетические алгоритмы>>}\\ + + \hfill \break + \hfill \break +\end{center} + +\small{ + \begin{tabular}{lrrl} + \!\!\!Студент, & \hspace{2cm} & & \\ + \!\!\!группы 5130201/20101 & \hspace{2cm} & \underline{\hspace{3cm}} & Тищенко А. А. \\\\ + \!\!\!Руководитель, & \hspace{2cm} & & \\ + \!\!\!профессор, д.т.н. & \hspace{2cm} & \underline{\hspace{3cm}} & Большаков А. А. \\\\ + &&\hspace{4cm} + \end{tabular} + \begin{flushright} + <<\underline{\hspace{1cm}}>>\underline{\hspace{2.5cm}} 2026г. + \end{flushright} +} + +\hfill \break +\begin{center} \small{Санкт-Петербург, 2026} \end{center} +\thispagestyle{empty} % выключаем отображение номера для этой страницы + + +\newpage +\section*{Задание} + +Задание по выполнению курсового проекта студенту Тищенко Артёму Андреевичу, гр. 5130201/20101: + +\begin{enumerate} + \item Тема работы: <<Подбор оптимальной конфигурации модульной системы автоматического протоколирования совещаний с помощью генетических алгоритмов>>. + \begin{itemize} + \item Скобцов, Ю. А. Эволюционные вычисления: Учебное пособие / Ю. А. Скобцов, Д. В. Сперанский. – М.: Национальный Открытый Университет «ИНТУИТ», 2012. – 331с., ил. – (Серия «Основы информационных технологий»). + \item Building Real-World Meeting Summarization Systems using Large Language Models: A Practical Perspective / M. T. R. Laskar [и~др.]. — 2023. — arXiv: 2310.19233 [cs.CL]. — Режим доступа: https://arxiv.org/abs/2310.19233 (дата обр. 10.01.2026). + \item The AMI meeting corpus / W. Kraaij [и~др.] // Proc. International Conference on Methods and Techniques in Behavioral Research. — 2005. — С. 1–4. + \end{itemize} + \item Содержание работы (перечень подлежащих разработке вопросов): + \begin{enumerate}[label=\arabic{enumi}.\arabic*, ref=\arabic{enumi}.\arabic*] + \item Краткое описание задачи автоматического протоколирования совещаний; + \item Постановка задачи подбора оптимальной конфигурации системы автоматического протоколирования; + \item Системный анализ архитектуры системы автоматического протоколирования и факторов, влияющих на качество её работы; + \item Разработка генетического алгоритма для подбора оптимальной конфигурации модульного пайплайна автоматического протоколирования; + \item Анализ результатов апробации разработанного алгоритма; + \item Заключение по работе. + \end{enumerate} + \item Дата выдачи задания «13» февраля 2026 г. +\end{enumerate} + +\noindent +\begin{tabular}{@{}p{4cm} >{\centering\arraybackslash}p{7cm} >{\raggedleft\arraybackslash}p{4cm}@{}} +Преподаватель +& \rule{7cm}{0.4pt} +& Большаков А.А. \\[-0.2cm] +& (подпись) +& (инициалы, фамилия) +\end{tabular} + +\vspace{0.1cm} + +\noindent +Задание принял к исполнению «13» февраля 2026 г. + +\noindent +\begin{tabular}{@{}p{4cm} >{\centering\arraybackslash}p{7cm} >{\raggedleft\arraybackslash}p{4cm}@{}} +Студент +& \rule{7cm}{0.4pt} +& Тищенко А.~А. \\[-0.2cm] +& (подпись) +& (инициалы, фамилия) +\end{tabular} + + +% СОДЕРЖАНИЕ +\newpage + +\tableofcontents + +\newpage +\section*{Введение} +\addcontentsline{toc}{section}{Введение} + +Совещания являются основным способом передачи информации в крупных организациях, однако информация часто теряется из-за забывчивости участников и отсутствия на встречах части коллег. Протоколирование позволяет зафиксировать ключевые решения и распространить их среди всех заинтересованных сторон. + +С технической точки зрения задача автоматического протоколирования представляет собой последовательность этапов: транскрибация (преобразование аудио в текст), диаризация (определение говорящих) и суммаризация (формирование протокола). Актуальность задачи возросла с распространением удалённой работы~\cite{yandex-research-calendar}, а развитие глубокого обучения и больших языковых моделей~\cite{whisper} сделало создание качественных систем возможным~\cite{auto-meet, building-real-world-meeting-summarization,end-to-end-speech-summarization, meetalk}. + +Однако большинство исследований ориентированы на английский язык, а для русского языка отсутствуют полноценные датасеты и целостные решения. В рамках данной работы предлагается разработать модульную систему автоматического протоколирования для русского языка, основанную на последовательном выполнении транскрибации, диаризации и суммаризации. Каждый модуль системы может быть реализован различными алгоритмами и моделями, что приводит к большому числу возможных конфигураций пайплайна. В данной курсовой работе предлагается применить генетический алгоритм для подбора оптимальной конфигурации такого пайплайна под заданные ограничения по качеству и вычислительным ресурсам~\cite{skobtsov-evolution}. + +\newpage +\section{Актуальность темы} + +Актуальность автоматического протоколирования особенно возросла из-за распространения удалённой и гибридной работы: количество записываемых встреч растёт, а ручная обработка и анализ их содержания становятся практически невозможными. При этом качественный протокол требует не только точной транскрибации, но и восстановления структуры диалога и причинно-следственных связей, чтобы документ отражал ход обсуждения и его результат. + +На практике наиболее распространённый сценарий — обработка моноканальных записей без разделения дорожек по говорящим. В типичных совещаниях число участников может достигать 10–15 человек, присутствуют перебивания и быстрые смены говорящего, а ошибки диаризации дают каскадный эффект и напрямую ухудшают качество итогового протокола, особенно при фиксации поручений и ответственных. Дополнительные ограничения связаны с необходимостью локального развёртывания без внешних API и с лимитами вычислительных ресурсов (например, одна видеокарта до 16 ГБ или CPU), а также с большой длиной стенограмм. + +В таких условиях выбор “лучшей” комбинации моделей и параметров становится нетривиальной многокритериальной задачей: нужно одновременно учитывать качество транскрибации, диаризации и суммаризации и ограничения по ресурсам. Генетические алгоритмы являются естественным инструментом для поиска близких к оптимальным решений в больших пространствах конфигураций при наличии ограничений и нескольких критериев качества. + + +\newpage +\section{Постановка задачи} + +В данной работе необходимо: + +\begin{enumerate} + \item Описать модульную схему системы автоматического протоколирования совещаний как последовательность этапов (транскрибация, диаризация, суммаризация) и определить набор альтернативных компонентов/параметров (пространство конфигураций). + + \item Сформулировать целевую функцию (фитнес) для оценки конфигурации на основе метрик качества: WER для транскрибации, DER для диаризации, метрик суммаризации/протокола (ROUGE/BERTScore и QA-метрики), а также учесть ограничения на ресурсы и требования локального запуска. + + \item Разработать и реализовать генетический алгоритм для поиска оптимальной (или близкой к оптимальной) конфигурации пайплайна: кодирование хромосомы, операторы селекции/кроссовера/мутации, критерии остановки. + + \item Провести экспериментальную апробацию на тестовом наборе совещаний (или собранном датасете), сравнить результаты ГА с базовыми стратегиями подбора и проанализировать полученные конфигурации с точки зрения качества и вычислительных затрат +\end{enumerate} + + +\newpage +\section{Моделирование процесса автоматического протоколирования совещаний} + +\newpage +\section{Разработка методики оценки качества протоколирования} + +\newpage +\section{Обзор современных методов и технологий автоматического протоколирования совещаний} + +\newpage +\section{Описание генетического алгоритма для подбора оптимальной конфигурации пайплайна} + +\newpage +\section{Реализация генетического алгоритма и экспериментального стенда} + +\newpage +\section{Демонстрация применения алгоритма и анализ результатов} + +\newpage +\section*{Заключение} +\addcontentsline{toc}{section}{Заключение} + +\newpage +\printbibliography[heading=bibintoc] + + +\end{document} \ No newline at end of file