From 93bb0f247c18f76486c862c3ef641c3179196ef5 Mon Sep 17 00:00:00 2001 From: Arity-T Date: Mon, 9 Dec 2024 16:48:54 +0300 Subject: [PATCH] =?UTF-8?q?N-=D0=B3=D1=80=D0=B0=D0=BC=D0=BC=D1=8B=20=D0=B2?= =?UTF-8?q?=20=D0=B4=D0=B5=D0=B9=D1=81=D1=82=D0=B2=D0=B8=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- coursework/part2/app/Main.hs | 5 ++++- coursework/part2/package.yaml | 1 + coursework/part2/part2.cabal | 3 +++ coursework/part2/src/Lib.hs | 21 ++++++++++++++++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/coursework/part2/app/Main.hs b/coursework/part2/app/Main.hs index b4d2753..a6b84a8 100644 --- a/coursework/part2/app/Main.hs +++ b/coursework/part2/app/Main.hs @@ -8,4 +8,7 @@ main = putStrLn "Введите имя файла:" >> getLine >>= \fileName -> readFile fileName >>= \content -> - uprint $ splitText content + let sentences = splitText content in + uprint (take 10 sentences) >> + let dict = buildDictionary sentences in + saveDictionary "dict.txt" dict diff --git a/coursework/part2/package.yaml b/coursework/part2/package.yaml index 6d3b318..33849a8 100644 --- a/coursework/part2/package.yaml +++ b/coursework/part2/package.yaml @@ -21,6 +21,7 @@ description: Please see the README on GitHub at = 4.7 && < 5 +- containers - unescaping-print ghc-options: diff --git a/coursework/part2/part2.cabal b/coursework/part2/part2.cabal index 741652a..eb5a011 100644 --- a/coursework/part2/part2.cabal +++ b/coursework/part2/part2.cabal @@ -35,6 +35,7 @@ library ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints build-depends: base >=4.7 && <5 + , containers , unescaping-print default-language: Haskell2010 @@ -49,6 +50,7 @@ executable part2-exe ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N build-depends: base >=4.7 && <5 + , containers , part2 , unescaping-print default-language: Haskell2010 @@ -65,6 +67,7 @@ test-suite part2-test ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N build-depends: base >=4.7 && <5 + , containers , part2 , unescaping-print default-language: Haskell2010 diff --git a/coursework/part2/src/Lib.hs b/coursework/part2/src/Lib.hs index 9399da5..5966753 100644 --- a/coursework/part2/src/Lib.hs +++ b/coursework/part2/src/Lib.hs @@ -1,6 +1,11 @@ module Lib where import Data.Char (isLetter, toLower) +import Data.Map (Map) +import qualified Data.Map as Map +import Data.List (nub, tails) +import System.IO +import UnescapingPrint (ushow) splitText :: String -> [[String]] @@ -22,4 +27,18 @@ splitText text = filter (not . null) $ map (processSentence . words) (splitSente processSentence = filter (not . null) . map cleanWord cleanWord :: String -> String - cleanWord = map toLower . filter isLetter \ No newline at end of file + cleanWord = map toLower . filter isLetter + +buildDictionary :: [[String]] -> Map String [String] +buildDictionary sentences = + let bigrams = [ (w1, w2) | s <- sentences, (w1:w2:_) <- tails s ] + trigrams = [ (w1, w2, w3) | s <- sentences, (w1:w2:w3:_) <- tails s ] + singleKeys = foldr (\(w1, w2) acc -> Map.insertWith (++) w1 [w2] acc) Map.empty bigrams + singleKeys' = foldr (\(w1, w2, w3) acc -> Map.insertWith (++) w1 [w2 ++ " " ++ w3] acc) singleKeys trigrams + doubleKeys = foldr (\(w1, w2, w3) acc -> Map.insertWith (++) (w1 ++ " " ++ w2) [w3] acc) Map.empty trigrams + combined = Map.unionWith (++) singleKeys' doubleKeys + in Map.map nub combined + +saveDictionary :: FilePath -> Map String [String] -> IO () +saveDictionary filePath dict = withFile filePath WriteMode $ \h -> + mapM_ (\(k,v) -> hPutStrLn h $ ushow k ++ ": " ++ ushow v) (Map.toList dict) \ No newline at end of file