diff --git a/.refactor.exs b/.refactor.exs new file mode 100644 index 00000000..46cf6e76 --- /dev/null +++ b/.refactor.exs @@ -0,0 +1,36 @@ +%{ + inputs: ["lib/**/*.ex", "test/**/*.exs"], + configured_modules: [], + skipped_modules: [ + # Re-renames identifiers without scope-awareness: collapses two distinct + # variables in the same scope (e.g. base_val + head_val) into a single + # shadowed name, silently breaking semantics. Also fights manual rename + # decisions on each rerun (avg_us → microseconds → microseconds collision). + # Track upstream issue before re-enabling. + Number42.Refactors.Ex.ExpandShortFormBindings, + + # Loses guard conditions in transit. Example: rewrites + # if next == nil do + # if total > 0 and ratio > 0.6, do: vote, else: nothing + # else + # nothing + # end + # to a cond block whose first arm is `compute_total.() -> vote` + # (i.e. just "any positive total casts a vote") — the original + # guard `ratio > 0.6` and the outer `next == nil` precondition + # are simply dropped. Produced 2 test failures in + # data_signal_test.exs on first try. + Number42.Refactors.Ex.IfElseToCond, + + # Sorts def/defp groups alphabetically across the entire module. + # On this codebase that touches 121 files at once, deletes + # `# --- GenServer callbacks ---` / `# --- Private helpers ---` + # section comments, mixes public-API/impl-callbacks/private helpers + # into one alphabetical block, and splits some same-name clauses + # apart (compile warning: "clauses ... should be grouped together"). + # Tests still pass but the resulting diff is unreviewable and the + # imposed order disagrees with the project's prior structural + # organisation (public first, then impl callbacks, then private). + Number42.Refactors.Ex.SortFunctions + ] +} diff --git a/lib/code_qa.ex b/lib/code_qa.ex index 8df34812..3ca4dee8 100644 --- a/lib/code_qa.ex +++ b/lib/code_qa.ex @@ -12,7 +12,5 @@ defmodule CodeQA do :world """ - def hello do - :world - end + def hello, do: :world end diff --git a/lib/codeqa/analysis/behavior_config_server.ex b/lib/codeqa/analysis/behavior_config_server.ex index 04cc9bab..e9d666b7 100644 --- a/lib/codeqa/analysis/behavior_config_server.ex +++ b/lib/codeqa/analysis/behavior_config_server.ex @@ -18,9 +18,7 @@ defmodule CodeQA.Analysis.BehaviorConfigServer do # --- Public API --- @spec start_link(keyword()) :: GenServer.on_start() - def start_link(opts \\ []) do - GenServer.start_link(__MODULE__, opts) - end + def start_link(opts \\ []), do: __MODULE__ |> GenServer.start_link(opts) @doc "Returns the ETS table id. Callers may read directly from it." @spec get_tid(pid()) :: :ets.tid() @@ -74,23 +72,11 @@ defmodule CodeQA.Analysis.BehaviorConfigServer do end @impl true - def handle_call(:get_tid, _from, state) do - {:reply, state.tid, state} - end + def handle_call(:get_tid, _from, state), do: {:reply, state.tid, state} # --- Private helpers --- - defp load_configs(tid) do - case File.ls(@yaml_dir) do - {:ok, files} -> - files - |> Enum.filter(&String.ends_with?(&1, ".yml")) - |> Enum.each(&load_yml_file(&1, tid)) - - {:error, _} -> - :ok - end - end + defp load_configs(tid), do: File.ls(@yaml_dir) |> handle_load_configs_ls(tid) defp load_yml_file(yml_file, tid) do category = String.trim_trailing(yml_file, ".yml") @@ -109,11 +95,21 @@ defmodule CodeQA.Analysis.BehaviorConfigServer do behavior_data |> Enum.flat_map(fn {group, keys} when is_map(keys) -> - Enum.map(keys, fn {key, scalar} -> {{group, key}, scalar / 1.0} end) + keys |> Enum.map(fn {key, scalar} -> {{group, key}, scalar / 1.0} end) _ -> [] end) |> Map.new() end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_load_configs_ls({:ok, files}, tid), + do: + files + |> Enum.filter(&String.ends_with?(&1, ".yml")) + |> Enum.each(&load_yml_file(&1, tid)) + + defp handle_load_configs_ls({:error, _}, _tid), do: :ok end diff --git a/lib/codeqa/analysis/file_context_server.ex b/lib/codeqa/analysis/file_context_server.ex index 987595f9..c264f425 100644 --- a/lib/codeqa/analysis/file_context_server.ex +++ b/lib/codeqa/analysis/file_context_server.ex @@ -16,16 +16,15 @@ defmodule CodeQA.Analysis.FileContextServer do use GenServer - alias CodeQA.Engine.{FileContext, Pipeline} + alias CodeQA.Engine.FileContext + alias CodeQA.Engine.Pipeline alias CodeQA.Language alias CodeQA.Languages.Unknown # --- Public API --- @spec start_link(keyword()) :: GenServer.on_start() - def start_link(opts \\ []) do - GenServer.start_link(__MODULE__, opts) - end + def start_link(opts \\ []), do: __MODULE__ |> GenServer.start_link(opts) @doc "Returns the ETS table id. Callers may read directly from it." @spec get_tid(pid()) :: :ets.tid() @@ -44,13 +43,13 @@ defmodule CodeQA.Analysis.FileContextServer do key = {md5(content), language_name} case :ets.lookup(tid, key) do - [{_, ctx}] -> - ctx + [{_, context}] -> + context [] -> - ctx = Pipeline.build_file_context(content, opts) - :ets.insert(tid, {key, ctx}) - ctx + context = Pipeline.build_file_context(content, opts) + :ets.insert(tid, {key, context}) + context end end @@ -63,9 +62,7 @@ defmodule CodeQA.Analysis.FileContextServer do end @impl true - def handle_call(:get_tid, _from, state) do - {:reply, state.tid, state} - end + def handle_call(:get_tid, _from, state), do: {:reply, state.tid, state} # --- Private helpers --- diff --git a/lib/codeqa/analysis/file_metrics_server.ex b/lib/codeqa/analysis/file_metrics_server.ex index 579a63d9..3e1b16c5 100644 --- a/lib/codeqa/analysis/file_metrics_server.ex +++ b/lib/codeqa/analysis/file_metrics_server.ex @@ -18,9 +18,7 @@ defmodule CodeQA.Analysis.FileMetricsServer do # --- Public API --- @spec start_link(keyword()) :: GenServer.on_start() - def start_link(opts \\ []) do - GenServer.start_link(__MODULE__, opts) - end + def start_link(opts \\ []), do: __MODULE__ |> GenServer.start_link(opts) @doc "Returns the ETS table id. Callers may read directly from it." @spec get_tid(pid()) :: :ets.tid() @@ -37,12 +35,14 @@ defmodule CodeQA.Analysis.FileMetricsServer do tid = get_tid(pid) files_data = Map.get(pipeline_result, "files", %{}) - Enum.each(files_data, fn {path, file_data} -> + files_data + |> Enum.each(fn {path, file_data} -> metrics = Map.get(file_data, "metrics", %{}) :ets.insert(tid, {{:path, path}, metrics}) end) - Enum.each(files_map, fn {path, content} -> + files_map + |> Enum.each(fn {path, content} -> hash = md5(content) case :ets.lookup(tid, {:path, path}) do @@ -81,8 +81,8 @@ defmodule CodeQA.Analysis.FileMetricsServer do metrics [] -> - ctx = Pipeline.build_file_context(content, opts) - metrics = Registry.run_file_metrics(registry, ctx) + context = Pipeline.build_file_context(content, opts) + metrics = Registry.run_file_metrics(registry, context) :ets.insert(tid, {{:hash, hash}, metrics}) metrics end @@ -97,9 +97,7 @@ defmodule CodeQA.Analysis.FileMetricsServer do end @impl true - def handle_call(:get_tid, _from, state) do - {:reply, state.tid, state} - end + def handle_call(:get_tid, _from, state), do: {:reply, state.tid, state} # --- Private helpers --- diff --git a/lib/codeqa/analysis/run_supervisor.ex b/lib/codeqa/analysis/run_supervisor.ex index ab6bb10f..3a7244b6 100644 --- a/lib/codeqa/analysis/run_supervisor.ex +++ b/lib/codeqa/analysis/run_supervisor.ex @@ -11,12 +11,12 @@ defmodule CodeQA.Analysis.RunSupervisor do use Supervisor - alias CodeQA.Analysis.{BehaviorConfigServer, FileContextServer, RunContext} + alias CodeQA.Analysis.BehaviorConfigServer + alias CodeQA.Analysis.FileContextServer + alias CodeQA.Analysis.RunContext @spec start_link(keyword()) :: Supervisor.on_start() - def start_link(opts \\ []) do - Supervisor.start_link(__MODULE__, opts) - end + def start_link(opts \\ []), do: __MODULE__ |> Supervisor.start_link(opts) @doc """ Queries child PIDs from `sup` and returns a `RunContext` struct. @@ -45,7 +45,7 @@ defmodule CodeQA.Analysis.RunSupervisor do defp find_pid(children, module) do {_id, pid, _type, _modules} = - Enum.find(children, fn {id, _pid, _type, _modules} -> id == module end) + children |> Enum.find(fn {id, _pid, _type, _modules} -> id == module end) pid end diff --git a/lib/codeqa/ast/classification/node_classifier.ex b/lib/codeqa/ast/classification/node_classifier.ex index 7a71e584..e14b9e26 100644 --- a/lib/codeqa/ast/classification/node_classifier.ex +++ b/lib/codeqa/ast/classification/node_classifier.ex @@ -36,26 +36,22 @@ defmodule CodeQA.AST.Classification.NodeClassifier do alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Nodes.{ - AttributeNode, - CodeNode, - DocNode, - FunctionNode, - ImportNode, - ModuleNode, - TestNode - } + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.CodeNode + alias CodeQA.AST.Nodes.DocNode + alias CodeQA.AST.Nodes.FunctionNode + alias CodeQA.AST.Nodes.ImportNode + alias CodeQA.AST.Nodes.ModuleNode + alias CodeQA.AST.Nodes.TestNode alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Signals.Classification.{ - AttributeSignal, - DocSignal, - FunctionSignal, - ImportSignal, - ModuleSignal, - TestSignal - } + alias CodeQA.AST.Signals.Classification.AttributeSignal + alias CodeQA.AST.Signals.Classification.DocSignal + alias CodeQA.AST.Signals.Classification.FunctionSignal + alias CodeQA.AST.Signals.Classification.ImportSignal + alias CodeQA.AST.Signals.Classification.ModuleSignal + alias CodeQA.AST.Signals.Classification.TestSignal @classification_signals [ %DocSignal{}, @@ -67,13 +63,13 @@ defmodule CodeQA.AST.Classification.NodeClassifier do ] @type_modules %{ - doc: DocNode, attribute: AttributeNode, + code: CodeNode, + doc: DocNode, function: FunctionNode, - module: ModuleNode, import: ImportNode, - test: TestNode, - code: CodeNode + module: ModuleNode, + test: TestNode } @doc """ @@ -106,12 +102,12 @@ defmodule CodeQA.AST.Classification.NodeClassifier do defp prepend_context(tokens, []), do: tokens defp prepend_context(tokens, ctx) when is_list(ctx), do: ctx ++ tokens - defp vote(tokens, lang_mod) do - tokens - |> run_signals(lang_mod) - |> tally() - |> winner() - end + defp vote(tokens, lang_mod), + do: + tokens + |> run_signals(lang_mod) + |> tally() + |> winner() defp run_signals(tokens, lang_mod) do SignalStream.run(tokens, @classification_signals, lang_mod) @@ -120,7 +116,8 @@ defmodule CodeQA.AST.Classification.NodeClassifier do end defp tally(emissions) do - Enum.reduce(emissions, %{}, fn {_src, _grp, name, weight}, acc -> + emissions + |> Enum.reduce(%{}, fn {_src, _grp, name, weight}, acc -> Map.update(acc, name, weight, &(&1 + weight)) end) end @@ -128,7 +125,7 @@ defmodule CodeQA.AST.Classification.NodeClassifier do defp winner(votes) when map_size(votes) == 0, do: :code defp winner(votes) do - {vote_name, _weight} = Enum.max_by(votes, fn {_, w} -> w end) + {vote_name, _weight} = votes |> Enum.max_by(fn {_, w} -> w end) vote_to_type(vote_name) end diff --git a/lib/codeqa/ast/classification/node_type_detector.ex b/lib/codeqa/ast/classification/node_type_detector.ex index 50383713..e8f161da 100644 --- a/lib/codeqa/ast/classification/node_type_detector.ex +++ b/lib/codeqa/ast/classification/node_type_detector.ex @@ -14,7 +14,6 @@ defmodule CodeQA.AST.Classification.NodeTypeDetector do Classify each node in the list into the most specific typed struct. """ @spec detect_types([Node.t()], module()) :: [term()] - def detect_types(blocks, lang_mod) do - Enum.map(blocks, &NodeClassifier.classify(&1, lang_mod)) - end + def detect_types(blocks, lang_mod), + do: blocks |> Enum.map(&NodeClassifier.classify(&1, lang_mod)) end diff --git a/lib/codeqa/ast/classification/typed_node_kind.ex b/lib/codeqa/ast/classification/typed_node_kind.ex index 4993ee1d..1843c710 100644 --- a/lib/codeqa/ast/classification/typed_node_kind.ex +++ b/lib/codeqa/ast/classification/typed_node_kind.ex @@ -1,15 +1,13 @@ defmodule CodeQA.AST.Classification.TypedNodeKind do @moduledoc "Maps a typed node struct from `NodeClassifier` to its kind atom." - alias CodeQA.AST.Nodes.{ - AttributeNode, - CodeNode, - DocNode, - FunctionNode, - ImportNode, - ModuleNode, - TestNode - } + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.CodeNode + alias CodeQA.AST.Nodes.DocNode + alias CodeQA.AST.Nodes.FunctionNode + alias CodeQA.AST.Nodes.ImportNode + alias CodeQA.AST.Nodes.ModuleNode + alias CodeQA.AST.Nodes.TestNode @type kind :: :doc | :attribute | :function | :module | :import | :test | :code diff --git a/lib/codeqa/ast/enrichment/compound_node.ex b/lib/codeqa/ast/enrichment/compound_node.ex index 88a594c2..32e3c8bd 100644 --- a/lib/codeqa/ast/enrichment/compound_node.ex +++ b/lib/codeqa/ast/enrichment/compound_node.ex @@ -21,21 +21,21 @@ defmodule CodeQA.AST.Enrichment.CompoundNode do alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Nodes.AttributeNode - defstruct docs: [], - typespecs: [], - code: [], - start_line: nil, - start_col: nil, + defstruct code: [], + docs: [], + end_col: nil, end_line: nil, - end_col: nil + start_col: nil, + start_line: nil, + typespecs: [] @type t :: %__MODULE__{ - docs: [Node.t()], - typespecs: [AttributeNode.t()], code: [Node.t()], - start_line: non_neg_integer() | nil, - start_col: non_neg_integer() | nil, + docs: [Node.t()], + end_col: non_neg_integer() | nil, end_line: non_neg_integer() | nil, - end_col: non_neg_integer() | nil + start_col: non_neg_integer() | nil, + start_line: non_neg_integer() | nil, + typespecs: [AttributeNode.t()] } end diff --git a/lib/codeqa/ast/enrichment/compound_node_builder.ex b/lib/codeqa/ast/enrichment/compound_node_builder.ex index 27c61659..0f8f3f0b 100644 --- a/lib/codeqa/ast/enrichment/compound_node_builder.ex +++ b/lib/codeqa/ast/enrichment/compound_node_builder.ex @@ -16,8 +16,10 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do alias CodeQA.AST.Classification.NodeProtocol alias CodeQA.AST.Enrichment.CompoundNode alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} - alias CodeQA.AST.Nodes.{AttributeNode, DocNode} + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.DocNode @doc """ Groups a list of typed nodes into CompoundNode structs. @@ -32,8 +34,8 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do # trailing whitespace — BlankLineRule places blank-line tokens at the # END of the node that precedes the split, not at the start of the new one. {current, _, compounds} = - Enum.reduce(blocks, {empty_compound(), [], []}, fn block, - {current, prev_trailing_ws, acc} -> + blocks + |> Enum.reduce({empty_compound(), [], []}, fn block, {current, prev_trailing_ws, acc} -> {content_tokens, trailing_ws} = split_trailing_whitespace(block.tokens) clean_block = %{block | tokens: content_tokens} # Check the PREVIOUS node's trailing whitespace for blank-line boundary @@ -63,7 +65,7 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do defp empty_compound, do: %CompoundNode{} - defp empty_compound?(%CompoundNode{docs: [], typespecs: [], code: []}), do: true + defp empty_compound?(%CompoundNode{code: [], docs: [], typespecs: []}), do: true defp empty_compound?(_), do: false defp add_block(%CompoundNode{} = compound, block) when is_struct(block, DocNode) do @@ -86,13 +88,12 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do } end - defp start_compound(new_block) do - add_block(empty_compound(), new_block) - end + defp start_compound(new_block), do: empty_compound() |> add_block(new_block) # Separates children by type — :doc/:typespec go up to the compound level. defp promote_sub_blocks(children) do - Enum.reduce(children, {[], [], []}, fn sub, {docs, specs, code} -> + children + |> Enum.reduce({[], [], []}, fn sub, {docs, specs, code} -> case sub.type do :doc -> {docs ++ [sub], specs, code} :typespec -> {docs, specs ++ [sub], code} @@ -118,28 +119,25 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do rev_idx -> content_len = length(tokens) - rev_idx - {Enum.slice(tokens, 0, content_len), Enum.slice(tokens, content_len..-1//1)} + {tokens |> Enum.slice(0, content_len), tokens |> Enum.slice(content_len..-1//1)} end end # A blank-line boundary exists when the trailing whitespace contains 3+ tokens # (i.e. 2+ blank lines). A single blank line (2 NLs: end-of-line + blank line) is # common within a compound (e.g. between function clauses) and does not split. - defp blank_line_boundary?(trailing_ws) do - Enum.count(trailing_ws, &(&1.kind == NewlineToken.kind())) >= 3 - end + defp blank_line_boundary?(trailing_ws), + do: Enum.count(trailing_ws, &(&1.kind == NewlineToken.kind())) >= 3 # Computes boundaries from all constituent nodes in source order: # docs → typespecs → code. Reads col directly from Token structs. defp finalize(%CompoundNode{} = compound) do all_blocks = compound.docs ++ compound.typespecs ++ compound.code - all_tokens = Enum.flat_map(all_blocks, &NodeProtocol.flat_tokens/1) + all_tokens = all_blocks |> Enum.flat_map(&NodeProtocol.flat_tokens/1) first_token = - Enum.find( - all_tokens, - &(is_map(&1) and &1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()]) - ) + all_tokens + |> Enum.find(&(is_map(&1) and &1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()])) last_token = all_tokens diff --git a/lib/codeqa/ast/enrichment/node.ex b/lib/codeqa/ast/enrichment/node.ex index 65e4b23d..2037e194 100644 --- a/lib/codeqa/ast/enrichment/node.ex +++ b/lib/codeqa/ast/enrichment/node.ex @@ -34,12 +34,12 @@ defmodule CodeQA.AST.Enrichment.Node do ] @type t :: %__MODULE__{ - tokens: [CodeQA.AST.Lexing.Token.t()], - line_count: non_neg_integer(), children: [term()], + end_line: non_neg_integer() | nil, label: term() | nil, + line_count: non_neg_integer(), start_line: non_neg_integer() | nil, - end_line: non_neg_integer() | nil, + tokens: [CodeQA.AST.Lexing.Token.t()], type: :code | :doc | :typespec } @@ -63,8 +63,8 @@ defimpl CodeQA.AST.Classification.NodeProtocol, for: CodeQA.AST.Enrichment.Node def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) end end diff --git a/lib/codeqa/ast/enrichment/node_analyzer.ex b/lib/codeqa/ast/enrichment/node_analyzer.ex index 2f6221cc..36cd485b 100644 --- a/lib/codeqa/ast/enrichment/node_analyzer.ex +++ b/lib/codeqa/ast/enrichment/node_analyzer.ex @@ -21,19 +21,15 @@ defmodule CodeQA.AST.Enrichment.NodeAnalyzer do Function parameters are NOT extracted here (see `param_variables/1`). """ @spec bound_variables([Token.t()]) :: MapSet.t(String.t()) - def bound_variables(tokens) do - MapSet.union( - assignment_bindings(tokens), - arrow_bindings(tokens) - ) - end + def bound_variables(tokens), + do: assignment_bindings(tokens) |> MapSet.union(arrow_bindings(tokens)) # Collect `` immediately before `=` defp assignment_bindings(tokens) do tokens |> Enum.chunk_every(2, 1, :discard) |> Enum.flat_map(fn - [%Token{kind: "", content: name}, %Token{kind: "="}] -> + [%Token{content: name, kind: ""}, %Token{kind: "="}] -> [String.downcase(name)] _ -> @@ -54,7 +50,7 @@ defmodule CodeQA.AST.Enrichment.NodeAnalyzer do %NewlineToken{}, {_, acc} -> {[], acc} - %Token{kind: "", content: name}, {lhs_ids, acc} -> + %Token{content: name, kind: ""}, {lhs_ids, acc} -> {[name | lhs_ids], acc} _, {lhs_ids, acc} -> diff --git a/lib/codeqa/ast/lexing/newline_token.ex b/lib/codeqa/ast/lexing/newline_token.ex index 2ccb7129..b8f69ec7 100644 --- a/lib/codeqa/ast/lexing/newline_token.ex +++ b/lib/codeqa/ast/lexing/newline_token.ex @@ -21,9 +21,9 @@ defmodule CodeQA.AST.Lexing.NewlineToken do def kind, do: @kind @type t :: %__MODULE__{ - kind: String.t(), + col: non_neg_integer() | nil, content: String.t(), - line: non_neg_integer() | nil, - col: non_neg_integer() | nil + kind: String.t(), + line: non_neg_integer() | nil } end diff --git a/lib/codeqa/ast/lexing/string_token.ex b/lib/codeqa/ast/lexing/string_token.ex index 04fc2963..7fb58785 100644 --- a/lib/codeqa/ast/lexing/string_token.ex +++ b/lib/codeqa/ast/lexing/string_token.ex @@ -38,11 +38,11 @@ defmodule CodeQA.AST.Lexing.StringToken do @type quotes :: :double | :single | :backtick @type t :: %__MODULE__{ - content: String.t(), - line: non_neg_integer() | nil, col: non_neg_integer() | nil, - kind: String.t(), + content: String.t(), interpolations: [String.t()] | nil, + kind: String.t(), + line: non_neg_integer() | nil, multiline: boolean(), quotes: quotes() } diff --git a/lib/codeqa/ast/lexing/token.ex b/lib/codeqa/ast/lexing/token.ex index f705f7e6..89d47b98 100644 --- a/lib/codeqa/ast/lexing/token.ex +++ b/lib/codeqa/ast/lexing/token.ex @@ -34,12 +34,12 @@ defmodule CodeQA.AST.Lexing.Token do line/col. Consumers that need location data should guard for nil. """ - defstruct [:kind, :content, :line, :col] + defstruct [:col, :content, :kind, :line] @type t :: %__MODULE__{ - kind: String.t(), + col: non_neg_integer() | nil, content: String.t(), - line: non_neg_integer() | nil, - col: non_neg_integer() | nil + kind: String.t(), + line: non_neg_integer() | nil } end diff --git a/lib/codeqa/ast/lexing/token_normalizer.ex b/lib/codeqa/ast/lexing/token_normalizer.ex index 5cabba3a..b4b7360f 100644 --- a/lib/codeqa/ast/lexing/token_normalizer.ex +++ b/lib/codeqa/ast/lexing/token_normalizer.ex @@ -21,7 +21,7 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do """ @spec normalize_structural(String.t()) :: [Token.t()] def normalize_structural(code) do - code = String.replace(code, ~r/[^\x00-\x7F]/, " ") + code = code |> String.replace(~r/[^\x00-\x7F]/, " ") lines = String.split(code, "\n") last_idx = length(lines) - 1 @@ -44,7 +44,7 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do t -> t.col + String.length(t.content) end - tokens ++ [%NewlineToken{content: "\n", line: line_num, col: nl_col}] + tokens ++ [%NewlineToken{col: nl_col, content: "\n", line: line_num}] else tokens end @@ -71,7 +71,7 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do ws_tokens = for i <- 1..indent_units//1 do - %WhitespaceToken{content: " ", line: line_num, col: (i - 1) * 2} + %WhitespaceToken{col: (i - 1) * 2, content: " ", line: line_num} end content = String.slice(line, indent_col_width..-1//1) @@ -139,17 +139,13 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do # so callers get O(1) access to the final token without List.last/1. defp scan_content(text, line_num, col_offset) do {reversed, last} = do_scan(text, line_num, col_offset, [], nil) - {Enum.reverse(reversed), last} + {reversed |> Enum.reverse(), last} end defp do_scan("", _line, _col, acc, last), do: {acc, last} - defp do_scan(<> = text, line, col, acc, last) do - case next_token(first, text, line, col) do - {:skip, rest, advance} -> do_scan(rest, line, col + advance, acc, last) - {token, rest, advance} -> do_scan(rest, line, col + advance, [token | acc], token) - end - end + defp do_scan(<> = text, line, col, acc, last), + do: next_token(first, text, line, col) |> handle_next_token(acc, col, last, line) # next_token/4: dispatches on the first byte to select only candidate rules, # avoiding regex attempts for rules whose first-char pattern can't possibly match. @@ -157,7 +153,8 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do rules = dispatch_rules(first) result = - Enum.find_value(rules, fn {type, regex} -> + rules + |> Enum.find_value(fn {type, regex} -> case Regex.run(regex, text) do [m | _] -> {type, m} nil -> nil @@ -171,17 +168,17 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do {:literal, m} -> len = String.length(m) - {%Token{kind: m, content: m, line: line, col: col}, String.slice(text, len..-1//1), len} + {%Token{col: col, content: m, kind: m, line: line}, String.slice(text, len..-1//1), len} {value, m} -> len = String.length(m) - token = postprocess(value, %Token{kind: value, content: m, line: line, col: col}) + token = postprocess(value, %Token{col: col, content: m, kind: value, line: line}) {token, String.slice(text, len..-1//1), len} nil -> # No rule matched — emit the first character as a literal single-char token. char = String.first(text) - {%Token{kind: char, content: char, line: line, col: col}, String.slice(text, 1..-1//1), 1} + {%Token{col: col, content: char, kind: char, line: line}, String.slice(text, 1..-1//1), 1} end end @@ -204,30 +201,30 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do defp postprocess("", %Token{content: ~s(""")} = token), do: %StringToken{ - kind: StringToken.doc_kind(), + col: token.col, content: token.content, + kind: StringToken.doc_kind(), line: token.line, - col: token.col, multiline: true, quotes: :double } defp postprocess("", token), do: %StringToken{ - kind: StringToken.doc_kind(), + col: token.col, content: token.content, + kind: StringToken.doc_kind(), line: token.line, - col: token.col, multiline: true, quotes: :single } defp postprocess("", token), do: %StringToken{ - kind: StringToken.kind(), + col: token.col, content: token.content, + kind: StringToken.kind(), line: token.line, - col: token.col, quotes: :backtick } @@ -235,10 +232,10 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do quotes = if String.starts_with?(token.content, "\""), do: :double, else: :single %StringToken{ - kind: StringToken.kind(), + col: token.col, content: token.content, + kind: StringToken.kind(), line: token.line, - col: token.col, quotes: quotes } end @@ -253,11 +250,19 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do |> Enum.map(fn [expr] -> String.trim(expr) end) %StringToken{ - content: String.replace(token.content, strip_regex, ""), - line: token.line, col: token.col, + content: String.replace(token.content, strip_regex, ""), interpolations: interpolations, + line: token.line, quotes: quotes } end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_next_token({:skip, rest, advance}, acc, col, last, line), + do: rest |> do_scan(line, col + advance, acc, last) + + defp handle_next_token({token, rest, advance}, acc, col, _last, line), + do: rest |> do_scan(line, col + advance, [token | acc], token) end diff --git a/lib/codeqa/ast/lexing/whitespace_token.ex b/lib/codeqa/ast/lexing/whitespace_token.ex index cb230827..a289ca3d 100644 --- a/lib/codeqa/ast/lexing/whitespace_token.ex +++ b/lib/codeqa/ast/lexing/whitespace_token.ex @@ -21,9 +21,9 @@ defmodule CodeQA.AST.Lexing.WhitespaceToken do def kind, do: @kind @type t :: %__MODULE__{ - kind: String.t(), + col: non_neg_integer() | nil, content: String.t(), - line: non_neg_integer() | nil, - col: non_neg_integer() | nil + kind: String.t(), + line: non_neg_integer() | nil } end diff --git a/lib/codeqa/ast/nodes/attribute_node.ex b/lib/codeqa/ast/nodes/attribute_node.ex index 7dd106cb..973d17d9 100644 --- a/lib/codeqa/ast/nodes/attribute_node.ex +++ b/lib/codeqa/ast/nodes/attribute_node.ex @@ -5,42 +5,42 @@ defmodule CodeQA.AST.Nodes.AttributeNode do """ alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken - defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :name, :kind] + defstruct [:children, :end_line, :kind, :label, :line_count, :name, :start_line, :tokens] @type t :: %__MODULE__{ - tokens: [term()], - line_count: non_neg_integer(), children: [term()], - start_line: non_neg_integer() | nil, end_line: non_neg_integer() | nil, + kind: :field | :constant | :decorator | :annotation | :typespec | nil, label: term() | nil, + line_count: non_neg_integer(), name: String.t() | nil, - kind: :field | :constant | :decorator | :annotation | :typespec | nil + start_line: non_neg_integer() | nil, + tokens: [term()] } @typespec_attrs MapSet.new(~w[spec type typep opaque callback macrocallback]) @doc "Build an AttributeNode from a raw %Node{}, detecting :typespec kind from tokens." @spec cast(Node.t()) :: t() - def cast(%Node{} = node) do - %__MODULE__{ - tokens: node.tokens, - line_count: node.line_count, + def cast(%Node{} = node), + do: %__MODULE__{ children: node.children, - start_line: node.start_line, end_line: node.end_line, + kind: detect_kind(node.tokens), label: node.label, - kind: detect_kind(node.tokens) + line_count: node.line_count, + start_line: node.start_line, + tokens: node.tokens } - end defp detect_kind(tokens) do tokens |> Enum.drop_while(&(&1.kind in [WhitespaceToken.kind(), NewlineToken.kind()])) |> case do - [%{kind: "@"}, %{kind: "", content: name} | _] -> + [%{kind: "@"}, %{content: name, kind: ""} | _] -> if MapSet.member?(@typespec_attrs, name), do: :typespec, else: nil _ -> @@ -59,9 +59,9 @@ defmodule CodeQA.AST.Nodes.AttributeNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/code_node.ex b/lib/codeqa/ast/nodes/code_node.ex index b7dfd9db..13e45240 100644 --- a/lib/codeqa/ast/nodes/code_node.ex +++ b/lib/codeqa/ast/nodes/code_node.ex @@ -2,30 +2,22 @@ defmodule CodeQA.AST.Nodes.CodeNode do @moduledoc "Catch-all AST node for unclassified code blocks." alias CodeQA.AST.Enrichment.Node + import CodeQA.AST.Nodes.Shared, only: [cast_shared: 2] - defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label] + defstruct [:children, :end_line, :label, :line_count, :start_line, :tokens] @type t :: %__MODULE__{ - tokens: [term()], - line_count: non_neg_integer(), children: [term()], - start_line: non_neg_integer() | nil, end_line: non_neg_integer() | nil, - label: term() | nil + label: term() | nil, + line_count: non_neg_integer(), + start_line: non_neg_integer() | nil, + tokens: [term()] } @doc "Build a CodeNode from a raw %Node{}, copying all base fields." @spec cast(Node.t()) :: t() - def cast(%Node{} = node) do - %__MODULE__{ - tokens: node.tokens, - line_count: node.line_count, - children: node.children, - start_line: node.start_line, - end_line: node.end_line, - label: node.label - } - end + def cast(%Node{} = node), do: cast_shared(__MODULE__, node) defimpl CodeQA.AST.Classification.NodeProtocol do alias CodeQA.AST.Classification.NodeProtocol @@ -38,9 +30,9 @@ defmodule CodeQA.AST.Nodes.CodeNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/doc_node.ex b/lib/codeqa/ast/nodes/doc_node.ex index 5e011ca3..387866ad 100644 --- a/lib/codeqa/ast/nodes/doc_node.ex +++ b/lib/codeqa/ast/nodes/doc_node.ex @@ -2,30 +2,22 @@ defmodule CodeQA.AST.Nodes.DocNode do @moduledoc "AST node for documentation strings and comment blocks." alias CodeQA.AST.Enrichment.Node + import CodeQA.AST.Nodes.Shared, only: [cast_shared: 2] - defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label] + defstruct [:children, :end_line, :label, :line_count, :start_line, :tokens] @type t :: %__MODULE__{ - tokens: [term()], - line_count: non_neg_integer(), children: [term()], - start_line: non_neg_integer() | nil, end_line: non_neg_integer() | nil, - label: term() | nil + label: term() | nil, + line_count: non_neg_integer(), + start_line: non_neg_integer() | nil, + tokens: [term()] } @doc "Build a DocNode from a raw %Node{}, copying all base fields." @spec cast(Node.t()) :: t() - def cast(%Node{} = node) do - %__MODULE__{ - tokens: node.tokens, - line_count: node.line_count, - children: node.children, - start_line: node.start_line, - end_line: node.end_line, - label: node.label - } - end + def cast(%Node{} = node), do: cast_shared(__MODULE__, node) defimpl CodeQA.AST.Classification.NodeProtocol do alias CodeQA.AST.Classification.NodeProtocol @@ -38,9 +30,9 @@ defmodule CodeQA.AST.Nodes.DocNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/function_node.ex b/lib/codeqa/ast/nodes/function_node.ex index 48c6a5d4..6a879c40 100644 --- a/lib/codeqa/ast/nodes/function_node.ex +++ b/lib/codeqa/ast/nodes/function_node.ex @@ -2,43 +2,35 @@ defmodule CodeQA.AST.Nodes.FunctionNode do @moduledoc "AST node for function, method, or callable definitions." alias CodeQA.AST.Enrichment.Node + import CodeQA.AST.Nodes.Shared, only: [cast_shared: 2] defstruct [ - :tokens, - :line_count, + :arity, :children, - :start_line, :end_line, :label, + :line_count, :name, - :arity, + :start_line, + :tokens, :visibility ] @type t :: %__MODULE__{ - tokens: [term()], - line_count: non_neg_integer(), + arity: non_neg_integer() | nil, children: [term()], - start_line: non_neg_integer() | nil, end_line: non_neg_integer() | nil, label: term() | nil, + line_count: non_neg_integer(), name: String.t() | nil, - arity: non_neg_integer() | nil, + start_line: non_neg_integer() | nil, + tokens: [term()], visibility: :public | :private | nil } @doc "Build a FunctionNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." @spec cast(Node.t()) :: t() - def cast(%Node{} = node) do - %__MODULE__{ - tokens: node.tokens, - line_count: node.line_count, - children: node.children, - start_line: node.start_line, - end_line: node.end_line, - label: node.label - } - end + def cast(%Node{} = node), do: cast_shared(__MODULE__, node) defimpl CodeQA.AST.Classification.NodeProtocol do alias CodeQA.AST.Classification.NodeProtocol @@ -51,9 +43,9 @@ defmodule CodeQA.AST.Nodes.FunctionNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/import_node.ex b/lib/codeqa/ast/nodes/import_node.ex index 3730370a..edf52b57 100644 --- a/lib/codeqa/ast/nodes/import_node.ex +++ b/lib/codeqa/ast/nodes/import_node.ex @@ -2,31 +2,23 @@ defmodule CodeQA.AST.Nodes.ImportNode do @moduledoc "AST node for import, require, use, alias, or include statements." alias CodeQA.AST.Enrichment.Node + import CodeQA.AST.Nodes.Shared, only: [cast_shared: 2] - defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :target] + defstruct [:children, :end_line, :label, :line_count, :start_line, :target, :tokens] @type t :: %__MODULE__{ - tokens: [term()], - line_count: non_neg_integer(), children: [term()], - start_line: non_neg_integer() | nil, end_line: non_neg_integer() | nil, label: term() | nil, - target: String.t() | nil + line_count: non_neg_integer(), + start_line: non_neg_integer() | nil, + target: String.t() | nil, + tokens: [term()] } @doc "Build an ImportNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." @spec cast(Node.t()) :: t() - def cast(%Node{} = node) do - %__MODULE__{ - tokens: node.tokens, - line_count: node.line_count, - children: node.children, - start_line: node.start_line, - end_line: node.end_line, - label: node.label - } - end + def cast(%Node{} = node), do: cast_shared(__MODULE__, node) defimpl CodeQA.AST.Classification.NodeProtocol do alias CodeQA.AST.Classification.NodeProtocol @@ -39,9 +31,9 @@ defmodule CodeQA.AST.Nodes.ImportNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/module_node.ex b/lib/codeqa/ast/nodes/module_node.ex index c8d50723..91aa40a9 100644 --- a/lib/codeqa/ast/nodes/module_node.ex +++ b/lib/codeqa/ast/nodes/module_node.ex @@ -2,32 +2,24 @@ defmodule CodeQA.AST.Nodes.ModuleNode do @moduledoc "AST node for module, class, namespace, or struct definitions." alias CodeQA.AST.Enrichment.Node + import CodeQA.AST.Nodes.Shared, only: [cast_shared: 2] - defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :name, :kind] + defstruct [:children, :end_line, :kind, :label, :line_count, :name, :start_line, :tokens] @type t :: %__MODULE__{ - tokens: [term()], - line_count: non_neg_integer(), children: [term()], - start_line: non_neg_integer() | nil, end_line: non_neg_integer() | nil, + kind: :class | :module | :namespace | :struct | nil, label: term() | nil, + line_count: non_neg_integer(), name: String.t() | nil, - kind: :class | :module | :namespace | :struct | nil + start_line: non_neg_integer() | nil, + tokens: [term()] } @doc "Build a ModuleNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." @spec cast(Node.t()) :: t() - def cast(%Node{} = node) do - %__MODULE__{ - tokens: node.tokens, - line_count: node.line_count, - children: node.children, - start_line: node.start_line, - end_line: node.end_line, - label: node.label - } - end + def cast(%Node{} = node), do: cast_shared(__MODULE__, node) defimpl CodeQA.AST.Classification.NodeProtocol do alias CodeQA.AST.Classification.NodeProtocol @@ -40,9 +32,9 @@ defmodule CodeQA.AST.Nodes.ModuleNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/shared.ex b/lib/codeqa/ast/nodes/shared.ex new file mode 100644 index 00000000..b1fbbf2a --- /dev/null +++ b/lib/codeqa/ast/nodes/shared.ex @@ -0,0 +1,27 @@ +defmodule CodeQA.AST.Nodes.Shared do + @moduledoc """ + Shared helpers for AST node modules. + + Extracted by `mix refactor --only ExtractParametricClone` because every + node module (`CodeNode`, `DocNode`, `FunctionNode`, `ImportNode`, + `ModuleNode`, `TestNode`) implemented an identical `cast/1` that copies + the same six fields from a generic `Node` into its own struct. The + helper takes the target struct module as a parameter so a single + implementation can populate any of them. + """ + + alias CodeQA.AST.Enrichment.Node + + @spec cast_shared(module(), Node.t()) :: struct() + def cast_shared(target_struct, %Node{} = node), + do: + target_struct + |> struct( + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label + ) +end diff --git a/lib/codeqa/ast/nodes/test_node.ex b/lib/codeqa/ast/nodes/test_node.ex index b3460cf4..12d2c7b2 100644 --- a/lib/codeqa/ast/nodes/test_node.ex +++ b/lib/codeqa/ast/nodes/test_node.ex @@ -2,31 +2,23 @@ defmodule CodeQA.AST.Nodes.TestNode do @moduledoc "AST node for test cases, describe blocks, and it blocks." alias CodeQA.AST.Enrichment.Node + import CodeQA.AST.Nodes.Shared, only: [cast_shared: 2] - defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :description] + defstruct [:children, :description, :end_line, :label, :line_count, :start_line, :tokens] @type t :: %__MODULE__{ - tokens: [term()], - line_count: non_neg_integer(), children: [term()], - start_line: non_neg_integer() | nil, + description: String.t() | nil, end_line: non_neg_integer() | nil, label: term() | nil, - description: String.t() | nil + line_count: non_neg_integer(), + start_line: non_neg_integer() | nil, + tokens: [term()] } @doc "Build a TestNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." @spec cast(Node.t()) :: t() - def cast(%Node{} = node) do - %__MODULE__{ - tokens: node.tokens, - line_count: node.line_count, - children: node.children, - start_line: node.start_line, - end_line: node.end_line, - label: node.label - } - end + def cast(%Node{} = node), do: cast_shared(__MODULE__, node) defimpl CodeQA.AST.Classification.NodeProtocol do alias CodeQA.AST.Classification.NodeProtocol @@ -39,9 +31,9 @@ defmodule CodeQA.AST.Nodes.TestNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/parsing/parser.ex b/lib/codeqa/ast/parsing/parser.ex index 2615bb9a..ccd2465d 100644 --- a/lib/codeqa/ast/parsing/parser.ex +++ b/lib/codeqa/ast/parsing/parser.ex @@ -34,16 +34,15 @@ defmodule CodeQA.AST.Parsing.Parser do """ alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Signals.Structural.{ - BlankLineSignal, - BracketSignal, - ColonIndentSignal, - KeywordSignal, - TripleQuoteSignal - } + alias CodeQA.AST.Signals.Structural.BlankLineSignal + alias CodeQA.AST.Signals.Structural.BracketSignal + alias CodeQA.AST.Signals.Structural.ColonIndentSignal + alias CodeQA.AST.Signals.Structural.KeywordSignal + alias CodeQA.AST.Signals.Structural.TripleQuoteSignal alias CodeQA.Language @@ -98,21 +97,14 @@ defmodule CodeQA.AST.Parsing.Parser do line_count = if start_line && end_line, do: end_line - start_line + 1, else: 1 block = %Node{ - tokens: tokens, - line_count: line_count, children: [], + end_line: end_line, + line_count: line_count, start_line: start_line, - end_line: end_line + tokens: tokens } - case find_sub_candidates(tokens, lang_mod) do - [] -> - block - - candidates -> - children = Enum.map(candidates, &parse_block(&1, lang_mod)) - %{block | children: children} - end + find_sub_candidates(tokens, lang_mod) |> handle_find_sub_candidates(block, lang_mod) end # Collect enclosure regions from rules. @@ -143,7 +135,7 @@ defmodule CodeQA.AST.Parsing.Parser do |> Enum.uniq() |> Enum.sort() |> Enum.reject(fn {s, e} -> s == 0 and e == n - 1 end) - |> Enum.map(fn {s, e} -> Enum.slice(search_tokens, s..e) end) + |> Enum.map(fn {s, e} -> search_tokens |> Enum.slice(s..e) end) |> Enum.reject(&whitespace_only?/1) end @@ -157,7 +149,7 @@ defmodule CodeQA.AST.Parsing.Parser do if last && MapSet.member?(@open_brackets, first.kind) && Map.get(@matching_close, first.kind) == last.kind do - {Enum.drop(rest, -1), 1} + {rest |> Enum.drop(-1), 1} else {tokens, 0} end @@ -174,7 +166,7 @@ defmodule CodeQA.AST.Parsing.Parser do end defp inside_protected?(idx, ranges) do - Enum.any?(ranges, fn {lo, hi} -> idx >= lo and idx <= hi end) + ranges |> Enum.any?(fn {lo, hi} -> idx >= lo and idx <= hi end) end # When TripleQuoteSignal splits `@doc """` mid-line, the tokens before the @@ -210,12 +202,11 @@ defmodule CodeQA.AST.Parsing.Parser do boundaries |> Enum.chunk_every(2, 1, :discard) - |> Enum.map(fn [start, stop] -> Enum.slice(tokens, start..(stop - 1)//1) end) + |> Enum.map(fn [start, stop] -> tokens |> Enum.slice(start..(stop - 1)//1) end) end - defp whitespace_only?(tokens) do - Enum.all?(tokens, &(&1.kind in [WhitespaceToken.kind(), NewlineToken.kind()])) - end + defp whitespace_only?(tokens), + do: tokens |> Enum.all?(&(&1.kind in [WhitespaceToken.kind(), NewlineToken.kind()])) defp block_start_line([%{line: line} | _]), do: line defp block_start_line([]), do: nil @@ -231,4 +222,13 @@ defmodule CodeQA.AST.Parsing.Parser do token -> token.line end end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_find_sub_candidates([], block, _lang_mod), do: block + + defp handle_find_sub_candidates(candidates, block, lang_mod) do + children = candidates |> Enum.map(&parse_block(&1, lang_mod)) + %{block | children: children} + end end diff --git a/lib/codeqa/ast/parsing/signal_registry.ex b/lib/codeqa/ast/parsing/signal_registry.ex index 0f4a7521..e6325f60 100644 --- a/lib/codeqa/ast/parsing/signal_registry.ex +++ b/lib/codeqa/ast/parsing/signal_registry.ex @@ -7,40 +7,36 @@ defmodule CodeQA.AST.Parsing.SignalRegistry do language-specific or analysis-specific configurations. """ - alias CodeQA.AST.Signals.Structural.{ - AccessModifierSignal, - AssignmentFunctionSignal, - BlankLineSignal, - BracketSignal, - BranchSplitSignal, - ColonIndentSignal, - CommentDividerSignal, - DecoratorSignal, - DedentToZeroSignal, - DocCommentLeadSignal, - KeywordSignal, - SQLBlockSignal, - TripleQuoteSignal - } + alias CodeQA.AST.Signals.Structural.AccessModifierSignal + alias CodeQA.AST.Signals.Structural.AssignmentFunctionSignal + alias CodeQA.AST.Signals.Structural.BlankLineSignal + alias CodeQA.AST.Signals.Structural.BracketSignal + alias CodeQA.AST.Signals.Structural.BranchSplitSignal + alias CodeQA.AST.Signals.Structural.ColonIndentSignal + alias CodeQA.AST.Signals.Structural.CommentDividerSignal + alias CodeQA.AST.Signals.Structural.DecoratorSignal + alias CodeQA.AST.Signals.Structural.DedentToZeroSignal + alias CodeQA.AST.Signals.Structural.DocCommentLeadSignal + alias CodeQA.AST.Signals.Structural.KeywordSignal + alias CodeQA.AST.Signals.Structural.SQLBlockSignal + alias CodeQA.AST.Signals.Structural.TripleQuoteSignal - alias CodeQA.AST.Signals.Classification.{ - AttributeSignal, - CommentDensitySignal, - ConfigSignal, - DataSignal, - DocSignal, - FunctionSignal, - ImportSignal, - ModuleSignal, - TestSignal, - TypeSignal - } + alias CodeQA.AST.Signals.Classification.AttributeSignal + alias CodeQA.AST.Signals.Classification.CommentDensitySignal + alias CodeQA.AST.Signals.Classification.ConfigSignal + alias CodeQA.AST.Signals.Classification.DataSignal + alias CodeQA.AST.Signals.Classification.DocSignal + alias CodeQA.AST.Signals.Classification.FunctionSignal + alias CodeQA.AST.Signals.Classification.ImportSignal + alias CodeQA.AST.Signals.Classification.ModuleSignal + alias CodeQA.AST.Signals.Classification.TestSignal + alias CodeQA.AST.Signals.Classification.TypeSignal - defstruct structural: [], classification: [] + defstruct classification: [], structural: [] @type t :: %__MODULE__{ - structural: [term()], - classification: [term()] + classification: [term()], + structural: [term()] } @spec new() :: t() @@ -55,30 +51,30 @@ defmodule CodeQA.AST.Parsing.SignalRegistry do do: %{r | classification: r.classification ++ [signal]} @spec default() :: t() - def default do - new() - |> register_structural(%TripleQuoteSignal{}) - |> register_structural(%BlankLineSignal{}) - |> register_structural(%KeywordSignal{}) - |> register_structural(%AccessModifierSignal{}) - |> register_structural(%DecoratorSignal{}) - |> register_structural(%CommentDividerSignal{}) - |> register_structural(%DocCommentLeadSignal{}) - |> register_structural(%AssignmentFunctionSignal{}) - |> register_structural(%DedentToZeroSignal{}) - |> register_structural(%BranchSplitSignal{}) - |> register_structural(%BracketSignal{}) - |> register_classification(%DocSignal{}) - |> register_classification(%TestSignal{}) - |> register_classification(%FunctionSignal{}) - |> register_classification(%ModuleSignal{}) - |> register_classification(%ImportSignal{}) - |> register_classification(%AttributeSignal{}) - |> register_classification(%TypeSignal{}) - |> register_classification(%ConfigSignal{}) - |> register_classification(%DataSignal{}) - |> register_classification(%CommentDensitySignal{}) - end + def default, + do: + new() + |> register_structural(%TripleQuoteSignal{}) + |> register_structural(%BlankLineSignal{}) + |> register_structural(%KeywordSignal{}) + |> register_structural(%AccessModifierSignal{}) + |> register_structural(%DecoratorSignal{}) + |> register_structural(%CommentDividerSignal{}) + |> register_structural(%DocCommentLeadSignal{}) + |> register_structural(%AssignmentFunctionSignal{}) + |> register_structural(%DedentToZeroSignal{}) + |> register_structural(%BranchSplitSignal{}) + |> register_structural(%BracketSignal{}) + |> register_classification(%DocSignal{}) + |> register_classification(%TestSignal{}) + |> register_classification(%FunctionSignal{}) + |> register_classification(%ModuleSignal{}) + |> register_classification(%ImportSignal{}) + |> register_classification(%AttributeSignal{}) + |> register_classification(%TypeSignal{}) + |> register_classification(%ConfigSignal{}) + |> register_classification(%DataSignal{}) + |> register_classification(%CommentDensitySignal{}) @spec python() :: t() def python do diff --git a/lib/codeqa/ast/parsing/signal_stream.ex b/lib/codeqa/ast/parsing/signal_stream.ex index 8b6f4519..485b77ca 100644 --- a/lib/codeqa/ast/parsing/signal_stream.ex +++ b/lib/codeqa/ast/parsing/signal_stream.ex @@ -25,19 +25,21 @@ defmodule CodeQA.AST.Parsing.SignalStream do def run(tokens, signals, lang_mod) do prevs = [nil | tokens] nexts = Enum.drop(tokens, 1) ++ [nil] - triples = Enum.zip_with([prevs, tokens, nexts], fn [p, c, n] -> {p, c, n} end) + triples = [prevs, tokens, nexts] |> Enum.zip_with(fn [p, c, n] -> {p, c, n} end) - Enum.map(signals, fn signal -> + signals + |> Enum.map(fn signal -> init_state = Signal.init(signal, lang_mod) source = Signal.source(signal) group = Signal.group(signal) {_final_state, emissions} = - Enum.reduce_while(triples, {init_state, []}, fn triple, {state, acc} -> + triples + |> Enum.reduce_while({init_state, []}, fn triple, {state, acc} -> emit_step(signal, triple, state, acc, source, group) end) - Enum.reverse(emissions) + emissions |> Enum.reverse() end) end diff --git a/lib/codeqa/ast/signals/classification/attribute_signal.ex b/lib/codeqa/ast/signals/classification/attribute_signal.ex index aaaa6403..4d276275 100644 --- a/lib/codeqa/ast/signals/classification/attribute_signal.ex +++ b/lib/codeqa/ast/signals/classification/attribute_signal.ex @@ -1,4 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.AttributeSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + @moduledoc """ Classification signal — votes `:attribute` when an `@identifier` pattern appears at indent 0. @@ -14,8 +17,8 @@ defmodule CodeQA.AST.Signals.Classification.AttributeSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() @typespec_attrs MapSet.new(~w[spec type typep opaque callback macrocallback]) @skip_attrs MapSet.new(~w[doc moduledoc]) diff --git a/lib/codeqa/ast/signals/classification/comment_density_signal.ex b/lib/codeqa/ast/signals/classification/comment_density_signal.ex index ceb4c23a..8a856b64 100644 --- a/lib/codeqa/ast/signals/classification/comment_density_signal.ex +++ b/lib/codeqa/ast/signals/classification/comment_density_signal.ex @@ -1,4 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.CommentDensitySignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + @moduledoc """ Classification signal — votes `:comment` when more than 60% of non-blank lines begin with a comment prefix. @@ -12,18 +15,18 @@ defmodule CodeQA.AST.Signals.Classification.CommentDensitySignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() def source(_), do: CodeQA.AST.Signals.Classification.CommentDensitySignal def group(_), do: :classification def init(_, lang_mod) do prefixes = MapSet.new(lang_mod.comment_prefixes()) - %{prefixes: prefixes, at_line_start: true, comment_lines: 0, total_lines: 0} + %{at_line_start: true, comment_lines: 0, prefixes: prefixes, total_lines: 0} end def emit(_, {_prev, token, next}, state) do - %{prefixes: prefixes, at_line_start: als} = state + %{at_line_start: als, prefixes: prefixes} = state state = case token.kind do diff --git a/lib/codeqa/ast/signals/classification/config_signal.ex b/lib/codeqa/ast/signals/classification/config_signal.ex index 43b58728..7563a8dc 100644 --- a/lib/codeqa/ast/signals/classification/config_signal.ex +++ b/lib/codeqa/ast/signals/classification/config_signal.ex @@ -1,4 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.ConfigSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + @moduledoc """ Classification signal — votes `:config` when a configuration keyword appears at indent 0 and bracket depth 0. @@ -10,17 +13,17 @@ defmodule CodeQA.AST.Signals.Classification.ConfigSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() @config_keywords MapSet.new(["config", "configure", "settings", "options", "defaults"]) def source(_), do: CodeQA.AST.Signals.Classification.ConfigSignal def group(_), do: :classification def init(_, _lang_mod), - do: %{at_line_start: true, indent: 0, bracket_depth: 0, is_first: true} + do: %{at_line_start: true, bracket_depth: 0, indent: 0, is_first: true} def emit(_, {_prev, token, _next}, state) do - %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state + %{at_line_start: als, bracket_depth: bd, indent: ind, is_first: first} = state case token.kind do @nl -> diff --git a/lib/codeqa/ast/signals/classification/data_signal.ex b/lib/codeqa/ast/signals/classification/data_signal.ex index 1d6aa773..dc4e898a 100644 --- a/lib/codeqa/ast/signals/classification/data_signal.ex +++ b/lib/codeqa/ast/signals/classification/data_signal.ex @@ -1,4 +1,6 @@ defmodule CodeQA.AST.Signals.Classification.DataSignal do + alias CodeQA.AST.Lexing.StringToken + @moduledoc """ Classification signal — votes `:data` when a token stream consists primarily of literal values (``, ``) with no control-flow keywords. @@ -10,7 +12,7 @@ defmodule CodeQA.AST.Signals.Classification.DataSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @str CodeQA.AST.Lexing.StringToken.kind() + @str StringToken.kind() @control_flow MapSet.new([ "if", "else", @@ -31,7 +33,7 @@ defmodule CodeQA.AST.Signals.Classification.DataSignal do def group(_), do: :classification def init(_, _lang_mod), - do: %{literal_count: 0, id_count: 0, has_control_flow: false} + do: %{has_control_flow: false, id_count: 0, literal_count: 0} def emit(_, {_prev, token, next}, state) do state = diff --git a/lib/codeqa/ast/signals/classification/doc_signal.ex b/lib/codeqa/ast/signals/classification/doc_signal.ex index 615cf55c..a4780b87 100644 --- a/lib/codeqa/ast/signals/classification/doc_signal.ex +++ b/lib/codeqa/ast/signals/classification/doc_signal.ex @@ -1,4 +1,6 @@ defmodule CodeQA.AST.Signals.Classification.DocSignal do + alias CodeQA.AST.Lexing.StringToken + @moduledoc """ Classification signal — votes `:doc` when a `` (triple-quoted string) token is found anywhere in the node's token stream. @@ -10,7 +12,7 @@ defmodule CodeQA.AST.Signals.Classification.DocSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @doc_kind CodeQA.AST.Lexing.StringToken.doc_kind() + @doc_kind StringToken.doc_kind() def source(_), do: CodeQA.AST.Signals.Classification.DocSignal def group(_), do: :classification diff --git a/lib/codeqa/ast/signals/classification/function_signal.ex b/lib/codeqa/ast/signals/classification/function_signal.ex index 62d3f487..4133e852 100644 --- a/lib/codeqa/ast/signals/classification/function_signal.ex +++ b/lib/codeqa/ast/signals/classification/function_signal.ex @@ -1,4 +1,8 @@ defmodule CodeQA.AST.Signals.Classification.FunctionSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language + @moduledoc """ Classification signal — votes `:function` when a function definition keyword appears at indent 0 and bracket depth 0. @@ -15,28 +19,27 @@ defmodule CodeQA.AST.Signals.Classification.FunctionSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() def source(_), do: CodeQA.AST.Signals.Classification.FunctionSignal def group(_), do: :classification - def init(_, lang_mod) do - %{ + def init(_, lang_mod), + do: %{ at_line_start: true, - indent: 0, bracket_depth: 0, + indent: 0, is_first: true, - voted: false, - keywords: CodeQA.Language.function_keywords(lang_mod) + keywords: Language.function_keywords(lang_mod), + voted: false } - end def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} def emit( _, {_prev, token, _next}, - %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state + %{at_line_start: als, bracket_depth: bd, indent: ind, is_first: first} = state ) do case token.kind do @nl -> diff --git a/lib/codeqa/ast/signals/classification/import_signal.ex b/lib/codeqa/ast/signals/classification/import_signal.ex index e27ed8a8..eacd697e 100644 --- a/lib/codeqa/ast/signals/classification/import_signal.ex +++ b/lib/codeqa/ast/signals/classification/import_signal.ex @@ -1,4 +1,8 @@ defmodule CodeQA.AST.Signals.Classification.ImportSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language + @moduledoc """ Classification signal — votes `:import` when an import/require/use/alias keyword appears at indent 0. @@ -15,20 +19,19 @@ defmodule CodeQA.AST.Signals.Classification.ImportSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() def source(_), do: CodeQA.AST.Signals.Classification.ImportSignal def group(_), do: :classification - def init(_, lang_mod) do - %{ + def init(_, lang_mod), + do: %{ at_line_start: true, indent: 0, is_first: true, - voted: false, - keywords: CodeQA.Language.import_keywords(lang_mod) + keywords: Language.import_keywords(lang_mod), + voted: false } - end def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} diff --git a/lib/codeqa/ast/signals/classification/module_signal.ex b/lib/codeqa/ast/signals/classification/module_signal.ex index 4e9ca98e..30f18113 100644 --- a/lib/codeqa/ast/signals/classification/module_signal.ex +++ b/lib/codeqa/ast/signals/classification/module_signal.ex @@ -1,4 +1,8 @@ defmodule CodeQA.AST.Signals.Classification.ModuleSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language + @moduledoc """ Classification signal — votes `:module` when a module/class/namespace definition keyword appears at indent 0 and bracket depth 0. @@ -14,28 +18,27 @@ defmodule CodeQA.AST.Signals.Classification.ModuleSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() def source(_), do: CodeQA.AST.Signals.Classification.ModuleSignal def group(_), do: :classification - def init(_, lang_mod) do - %{ + def init(_, lang_mod), + do: %{ at_line_start: true, - indent: 0, bracket_depth: 0, + indent: 0, is_first: true, - voted: false, - keywords: CodeQA.Language.module_keywords(lang_mod) + keywords: Language.module_keywords(lang_mod), + voted: false } - end def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} def emit( _, {_prev, token, _next}, - %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state + %{at_line_start: als, bracket_depth: bd, indent: ind, is_first: first} = state ) do case token.kind do @nl -> diff --git a/lib/codeqa/ast/signals/classification/test_signal.ex b/lib/codeqa/ast/signals/classification/test_signal.ex index de6abe50..eb1ae9e2 100644 --- a/lib/codeqa/ast/signals/classification/test_signal.ex +++ b/lib/codeqa/ast/signals/classification/test_signal.ex @@ -1,4 +1,8 @@ defmodule CodeQA.AST.Signals.Classification.TestSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language + @moduledoc """ Classification signal — votes `:test` when a test block keyword appears at indent 0. @@ -16,20 +20,19 @@ defmodule CodeQA.AST.Signals.Classification.TestSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() def source(_), do: CodeQA.AST.Signals.Classification.TestSignal def group(_), do: :classification - def init(_, lang_mod) do - %{ + def init(_, lang_mod), + do: %{ at_line_start: true, indent: 0, is_first: true, - voted: false, - keywords: CodeQA.Language.test_keywords(lang_mod) + keywords: Language.test_keywords(lang_mod), + voted: false } - end def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} diff --git a/lib/codeqa/ast/signals/classification/type_signal.ex b/lib/codeqa/ast/signals/classification/type_signal.ex index fc4440f5..661a7d99 100644 --- a/lib/codeqa/ast/signals/classification/type_signal.ex +++ b/lib/codeqa/ast/signals/classification/type_signal.ex @@ -1,4 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.TypeSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + @moduledoc """ Classification signal — votes `:type` when an Elixir type definition attribute (`@type`, `@typep`, `@opaque`) appears at indent 0. @@ -10,14 +13,14 @@ defmodule CodeQA.AST.Signals.Classification.TypeSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @nl CodeQA.AST.Lexing.NewlineToken.kind() - @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @nl NewlineToken.kind() + @ws WhitespaceToken.kind() @type_attrs MapSet.new(["type", "typep", "opaque"]) def source(_), do: CodeQA.AST.Signals.Classification.TypeSignal def group(_), do: :classification def init(_, _lang_mod), - do: %{at_line_start: true, indent: 0, saw_at: false, is_first: true} + do: %{at_line_start: true, indent: 0, is_first: true, saw_at: false} def emit(_, {_prev, token, _next}, state) do case token.kind do diff --git a/lib/codeqa/ast/signals/structural/access_modifier_signal.ex b/lib/codeqa/ast/signals/structural/access_modifier_signal.ex index 43ed0687..0e32f92a 100644 --- a/lib/codeqa/ast/signals/structural/access_modifier_signal.ex +++ b/lib/codeqa/ast/signals/structural/access_modifier_signal.ex @@ -1,6 +1,7 @@ defmodule CodeQA.AST.Signals.Structural.AccessModifierSignal do alias CodeQA.AST.Lexing.NewlineToken alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language @moduledoc """ Emits `:access_modifier_split` when an access modifier keyword appears at line @@ -21,20 +22,20 @@ defmodule CodeQA.AST.Signals.Structural.AccessModifierSignal do def group(_), do: :split def init(_, lang_mod) do - modifiers = CodeQA.Language.access_modifiers(lang_mod) - %{idx: 0, bracket_depth: 0, at_line_start: true, seen_content: false, modifiers: modifiers} + modifiers = Language.access_modifiers(lang_mod) + %{at_line_start: true, bracket_depth: 0, idx: 0, modifiers: modifiers, seen_content: false} end def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{at_line_start: true, idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in ["(", "[", "{"], do: {MapSet.new(), @@ -46,7 +47,7 @@ defmodule CodeQA.AST.Signals.Structural.AccessModifierSignal do at_line_start: false }} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in [")", "]", "}"], do: {MapSet.new(), @@ -70,7 +71,7 @@ defmodule CodeQA.AST.Signals.Structural.AccessModifierSignal do end defp modifier_split?( - %{seen_content: true, bracket_depth: 0, at_line_start: true, modifiers: m}, + %{at_line_start: true, bracket_depth: 0, modifiers: m, seen_content: true}, %{content: c} ), do: MapSet.member?(m, c) diff --git a/lib/codeqa/ast/signals/structural/assignment_function_signal.ex b/lib/codeqa/ast/signals/structural/assignment_function_signal.ex index a778d55b..f59bae8b 100644 --- a/lib/codeqa/ast/signals/structural/assignment_function_signal.ex +++ b/lib/codeqa/ast/signals/structural/assignment_function_signal.ex @@ -18,27 +18,26 @@ defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignal do def source(_), do: CodeQA.AST.Signals.Structural.AssignmentFunctionSignal def group(_), do: :split - def init(_, _lang_mod) do - %{ + def init(_, _lang_mod), + do: %{ + at_line_start: true, + bracket_depth: 0, idx: 0, indent: 0, - bracket_depth: 0, - at_line_start: true, - seen_content: false, - phase: :idle + phase: :idle, + seen_content: false } - end def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, indent: 0, at_line_start: true, phase: :idle}} - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, indent: i, at_line_start: true} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{at_line_start: true, idx: idx, indent: i} = state), do: {MapSet.new(), %{state | idx: idx + 1, indent: i + 1, at_line_start: true}} def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd, phase: phase} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx, phase: phase} = state) when k in ["(", "[", "{"] do new_bd = bd + 1 new_phase = advance_phase_open(phase, k) @@ -54,7 +53,7 @@ defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignal do }} end - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd, phase: phase} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx, phase: phase} = state) when k in [")", "]", "}"] do new_bd = max(0, bd - 1) new_phase = advance_phase_close(phase, k) @@ -74,12 +73,12 @@ defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignal do _, {_, token, _}, %{ + at_line_start: als, + bracket_depth: bd, idx: idx, - seen_content: sc, indent: i, - bracket_depth: bd, - at_line_start: als, - phase: phase + phase: phase, + seen_content: sc } = state ) do {emissions, new_phase} = advance_phase(phase, token, idx, sc, i, bd, als) @@ -117,10 +116,10 @@ defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignal do defp advance_phase({:saw_id, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle} - defp advance_phase({:saw_eq, id_idx}, %{kind: "", content: "function"}, _, _, _, _, _), + defp advance_phase({:saw_eq, id_idx}, %{content: "function", kind: ""}, _, _, _, _, _), do: {MapSet.new([{:assignment_function_split, id_idx}]), :idle} - defp advance_phase({:saw_eq, id_idx}, %{kind: "", content: "async"}, _, _, _, _, _), + defp advance_phase({:saw_eq, id_idx}, %{content: "async", kind: ""}, _, _, _, _, _), do: {MapSet.new(), {:saw_eq, id_idx}} defp advance_phase({:saw_eq, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle} diff --git a/lib/codeqa/ast/signals/structural/blank_line_signal.ex b/lib/codeqa/ast/signals/structural/blank_line_signal.ex index c484e1a1..eb4239c4 100644 --- a/lib/codeqa/ast/signals/structural/blank_line_signal.ex +++ b/lib/codeqa/ast/signals/structural/blank_line_signal.ex @@ -1,6 +1,7 @@ defmodule CodeQA.AST.Signals.Structural.BlankLineSignal do alias CodeQA.AST.Lexing.NewlineToken alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language @moduledoc """ Emits `:blank_split` at the first substantive token after 2+ consecutive @@ -17,8 +18,8 @@ defmodule CodeQA.AST.Signals.Structural.BlankLineSignal do def group(_), do: :split def init(_, lang_mod) do - tokens = CodeQA.Language.block_end_tokens(lang_mod) - %{idx: 0, nl_run: 0, seen_content: false, last_content: nil, block_end_tokens: tokens} + tokens = Language.block_end_tokens(lang_mod) + %{block_end_tokens: tokens, idx: 0, last_content: nil, nl_run: 0, seen_content: false} end def emit(_, {_, %NewlineToken{}, _}, %{idx: idx, nl_run: nl} = state), @@ -36,7 +37,7 @@ defmodule CodeQA.AST.Signals.Structural.BlankLineSignal do {emissions, base} end - defp blank_split?(%{seen_content: true, nl_run: nl, block_end_tokens: t, last_content: lc}) + defp blank_split?(%{block_end_tokens: t, last_content: lc, nl_run: nl, seen_content: true}) when nl >= 2, do: MapSet.member?(t, lc) diff --git a/lib/codeqa/ast/signals/structural/bracket_signal.ex b/lib/codeqa/ast/signals/structural/bracket_signal.ex index 201f66e9..3d7a993f 100644 --- a/lib/codeqa/ast/signals/structural/bracket_signal.ex +++ b/lib/codeqa/ast/signals/structural/bracket_signal.ex @@ -15,17 +15,17 @@ defmodule CodeQA.AST.Signals.Structural.BracketSignal do def source(_), do: CodeQA.AST.Signals.Structural.BracketSignal def group(_), do: :enclosure - def init(_, _lang_mod), do: %{idx: 0, depth: 0, start_idx: nil, stack: []} + def init(_, _lang_mod), do: %{depth: 0, idx: 0, stack: [], start_idx: nil} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: 0, stack: stack} = state) + def emit(_, {_, %{kind: k}, _}, %{depth: 0, idx: idx, stack: stack} = state) when k in ["(", "[", "{"], do: {MapSet.new(), %{state | idx: idx + 1, depth: 1, start_idx: idx, stack: [k | stack]}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: d, stack: stack} = state) + def emit(_, {_, %{kind: k}, _}, %{depth: d, idx: idx, stack: stack} = state) when k in ["(", "[", "{"], do: {MapSet.new(), %{state | idx: idx + 1, depth: d + 1, stack: [k | stack]}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: d, stack: [top | rest]} = state) + def emit(_, {_, %{kind: k}, _}, %{depth: d, idx: idx, stack: [top | rest]} = state) when k in [")", "]", "}"] do base = %{state | idx: idx + 1} diff --git a/lib/codeqa/ast/signals/structural/branch_split_signal.ex b/lib/codeqa/ast/signals/structural/branch_split_signal.ex index 1d6d2644..d59c45b9 100644 --- a/lib/codeqa/ast/signals/structural/branch_split_signal.ex +++ b/lib/codeqa/ast/signals/structural/branch_split_signal.ex @@ -1,6 +1,7 @@ defmodule CodeQA.AST.Signals.Structural.BranchSplitSignal do alias CodeQA.AST.Lexing.NewlineToken alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language @moduledoc """ Emits `:branch_split` when a branch keyword appears at bracket depth 0 @@ -21,8 +22,8 @@ defmodule CodeQA.AST.Signals.Structural.BranchSplitSignal do def group(_), do: :branch_split def init(_, lang_mod) do - keywords = CodeQA.Language.branch_keywords(lang_mod) - %{idx: 0, bracket_depth: 0, seen_content: false, keywords: keywords} + keywords = Language.branch_keywords(lang_mod) + %{bracket_depth: 0, idx: 0, keywords: keywords, seen_content: false} end def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), @@ -31,11 +32,11 @@ defmodule CodeQA.AST.Signals.Structural.BranchSplitSignal do def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in ["(", "[", "{"], do: {MapSet.new(), %{state | idx: idx + 1, bracket_depth: bd + 1, seen_content: true}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in [")", "]", "}"], do: {MapSet.new(), @@ -50,7 +51,7 @@ defmodule CodeQA.AST.Signals.Structural.BranchSplitSignal do {emissions, base} end - defp branch_split?(%{seen_content: true, bracket_depth: 0, keywords: kw}, %{content: c}), + defp branch_split?(%{bracket_depth: 0, keywords: kw, seen_content: true}, %{content: c}), do: MapSet.member?(kw, c) defp branch_split?(_, _), do: false diff --git a/lib/codeqa/ast/signals/structural/colon_indent_signal.ex b/lib/codeqa/ast/signals/structural/colon_indent_signal.ex index 9189b795..8ac91f96 100644 --- a/lib/codeqa/ast/signals/structural/colon_indent_signal.ex +++ b/lib/codeqa/ast/signals/structural/colon_indent_signal.ex @@ -22,15 +22,14 @@ defmodule CodeQA.AST.Signals.Structural.ColonIndentSignal do def source(_), do: CodeQA.AST.Signals.Structural.ColonIndentSignal def group(_), do: :enclosure - def init(_, lang_mod) do - %{ + def init(_, lang_mod), + do: %{ + ci: 0, enabled: lang_mod.uses_colon_indent?(), idx: 0, - ci: 0, last_colon_indent: nil, stack: [] } - end def emit(_, _, %{enabled: false} = state), do: {MapSet.new(), %{state | idx: state.idx + 1}} @@ -40,13 +39,13 @@ defmodule CodeQA.AST.Signals.Structural.ColonIndentSignal do {emissions, %{state | idx: idx + 1, ci: 0, stack: []}} end - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, ci: ci} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{ci: ci, idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, ci: ci + 1}} - def emit(_, {_, %{kind: ":"}, _}, %{idx: idx, ci: ci} = state), + def emit(_, {_, %{kind: ":"}, _}, %{ci: ci, idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, last_colon_indent: ci}} - def emit(_, {_, _, _}, %{idx: idx, ci: ci} = state) do + def emit(_, {_, _, _}, %{ci: ci, idx: idx} = state) do {dedent_emissions, remaining} = close_dedented(state.stack, ci) new_stack = maybe_open_block(remaining, state.last_colon_indent, ci, idx) @@ -55,7 +54,7 @@ defmodule CodeQA.AST.Signals.Structural.ColonIndentSignal do end defp close_dedented(stack, ci) do - {to_close, keep} = Enum.split_while(stack, fn e -> ci <= e.colon_indent end) + {to_close, keep} = stack |> Enum.split_while(fn e -> ci <= e.colon_indent end) {build_emissions(to_close), keep} end @@ -63,13 +62,14 @@ defmodule CodeQA.AST.Signals.Structural.ColonIndentSignal do defp maybe_open_block(stack, colon_indent, ci, idx) when colon_indent != nil and ci > colon_indent, - do: [%{colon_indent: colon_indent, sub_start: idx, last_content_idx: idx} | stack] + do: [%{colon_indent: colon_indent, last_content_idx: idx, sub_start: idx} | stack] defp maybe_open_block(stack, _, _, _), do: stack defp build_emissions(entries) do - Enum.reduce(entries, MapSet.new(), fn - %{sub_start: s, last_content_idx: e}, acc when e != nil -> + entries + |> Enum.reduce(MapSet.new(), fn + %{last_content_idx: e, sub_start: s}, acc when e != nil -> MapSet.put(acc, {:colon_indent_enclosure, {s, e}}) _entry, acc -> diff --git a/lib/codeqa/ast/signals/structural/comment_divider_signal.ex b/lib/codeqa/ast/signals/structural/comment_divider_signal.ex index d01e5e83..edb1ca5b 100644 --- a/lib/codeqa/ast/signals/structural/comment_divider_signal.ex +++ b/lib/codeqa/ast/signals/structural/comment_divider_signal.ex @@ -1,6 +1,7 @@ defmodule CodeQA.AST.Signals.Structural.CommentDividerSignal do alias CodeQA.AST.Lexing.NewlineToken alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language @moduledoc """ Emits `:comment_divider_split` when a line is a "visual divider" comment — @@ -22,15 +23,15 @@ defmodule CodeQA.AST.Signals.Structural.CommentDividerSignal do def init(_, lang_mod) do comment_prefixes = MapSet.new(lang_mod.comment_prefixes()) - divider_indicators = CodeQA.Language.divider_indicators(lang_mod) + divider_indicators = Language.divider_indicators(lang_mod) %{ - idx: 0, at_line_start: true, - seen_content: false, - indent: 0, comment_prefixes: comment_prefixes, - divider_indicators: divider_indicators + divider_indicators: divider_indicators, + idx: 0, + indent: 0, + seen_content: false } end @@ -40,7 +41,7 @@ defmodule CodeQA.AST.Signals.Structural.CommentDividerSignal do def emit( _, {_, %WhitespaceToken{}, _}, - %{idx: idx, at_line_start: true, indent: indent} = state + %{at_line_start: true, idx: idx, indent: indent} = state ), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true, indent: indent + 1}} @@ -60,11 +61,11 @@ defmodule CodeQA.AST.Signals.Structural.CommentDividerSignal do defp divider_split?( %{ - seen_content: true, at_line_start: true, - indent: 0, comment_prefixes: cp, - divider_indicators: di + divider_indicators: di, + indent: 0, + seen_content: true }, %{kind: k}, next diff --git a/lib/codeqa/ast/signals/structural/decorator_signal.ex b/lib/codeqa/ast/signals/structural/decorator_signal.ex index 0dc1f5be..170ecebc 100644 --- a/lib/codeqa/ast/signals/structural/decorator_signal.ex +++ b/lib/codeqa/ast/signals/structural/decorator_signal.ex @@ -18,18 +18,18 @@ defmodule CodeQA.AST.Signals.Structural.DecoratorSignal do def group(_), do: :split def init(_, _lang_mod), - do: %{idx: 0, bracket_depth: 0, at_line_start: true, seen_content: false} + do: %{at_line_start: true, bracket_depth: 0, idx: 0, seen_content: false} def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{at_line_start: true, idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in ["(", "[", "{"], do: {MapSet.new(), @@ -41,7 +41,7 @@ defmodule CodeQA.AST.Signals.Structural.DecoratorSignal do at_line_start: false }} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in [")", "]", "}"], do: {MapSet.new(), @@ -56,7 +56,7 @@ defmodule CodeQA.AST.Signals.Structural.DecoratorSignal do def emit( _, {_, %{kind: "@"}, _}, - %{idx: idx, seen_content: true, bracket_depth: 0, at_line_start: true} = state + %{at_line_start: true, bracket_depth: 0, idx: idx, seen_content: true} = state ), do: {MapSet.new([{:decorator_split, idx}]), @@ -65,7 +65,7 @@ defmodule CodeQA.AST.Signals.Structural.DecoratorSignal do def emit( _, {_, %{kind: "#"}, next}, - %{idx: idx, seen_content: true, bracket_depth: 0, at_line_start: true} = state + %{at_line_start: true, bracket_depth: 0, idx: idx, seen_content: true} = state ) do emissions = if next != nil and next.kind == "[", diff --git a/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex b/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex index d644dad4..c19cdd39 100644 --- a/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex +++ b/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex @@ -17,24 +17,23 @@ defmodule CodeQA.AST.Signals.Structural.DedentToZeroSignal do def source(_), do: CodeQA.AST.Signals.Structural.DedentToZeroSignal def group(_), do: :split - def init(_, _lang_mod) do - %{ - idx: 0, + def init(_, _lang_mod), + do: %{ at_line_start: true, - seen_content: false, - current_line_has_indent: false, current_line_has_content: false, - prev_line_had_indent: false + current_line_has_indent: false, + idx: 0, + prev_line_had_indent: false, + seen_content: false } - end def emit( _, {_, %NewlineToken{}, _}, %{ - idx: idx, current_line_has_content: clhc, current_line_has_indent: clhi, + idx: idx, prev_line_had_indent: plhi } = state ) do @@ -51,7 +50,7 @@ defmodule CodeQA.AST.Signals.Structural.DedentToZeroSignal do }} end - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{at_line_start: true, idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, current_line_has_indent: true, at_line_start: true}} diff --git a/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex b/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex index c5e5c4e3..5bbc4395 100644 --- a/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex +++ b/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex @@ -18,12 +18,12 @@ defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignal do def source(_), do: CodeQA.AST.Signals.Structural.DocCommentLeadSignal def group(_), do: :split - def init(_, _lang_mod), do: %{idx: 0, at_line_start: true, seen_content: false} + def init(_, _lang_mod), do: %{at_line_start: true, idx: 0, seen_content: false} def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{at_line_start: true, idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), @@ -32,7 +32,7 @@ defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignal do def emit( _, {_, %{kind: "//"}, next}, - %{idx: idx, at_line_start: true, seen_content: true} = state + %{at_line_start: true, idx: idx, seen_content: true} = state ) do base = %{state | idx: idx + 1, at_line_start: false} @@ -47,7 +47,7 @@ defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignal do def emit( _, {_, %{kind: "/"}, next}, - %{idx: idx, at_line_start: true, seen_content: true} = state + %{at_line_start: true, idx: idx, seen_content: true} = state ) do base = %{state | idx: idx + 1, at_line_start: false} diff --git a/lib/codeqa/ast/signals/structural/keyword_signal.ex b/lib/codeqa/ast/signals/structural/keyword_signal.ex index c13d3cf9..63911715 100644 --- a/lib/codeqa/ast/signals/structural/keyword_signal.ex +++ b/lib/codeqa/ast/signals/structural/keyword_signal.ex @@ -1,6 +1,7 @@ defmodule CodeQA.AST.Signals.Structural.KeywordSignal do alias CodeQA.AST.Lexing.NewlineToken alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language @moduledoc """ Emits `:keyword_split` when a declaration keyword appears at bracket depth 0 @@ -17,28 +18,28 @@ defmodule CodeQA.AST.Signals.Structural.KeywordSignal do def group(_), do: :split def init(_, lang_mod) do - keywords = CodeQA.Language.declaration_keywords(lang_mod) + keywords = Language.declaration_keywords(lang_mod) %{ - idx: 0, + at_line_start: true, bracket_depth: 0, + idx: 0, indent: 0, - at_line_start: true, - seen_content: false, - keywords: keywords + keywords: keywords, + seen_content: false } end def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, indent: 0, at_line_start: true}} - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, indent: i, at_line_start: true} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{at_line_start: true, idx: idx, indent: i} = state), do: {MapSet.new(), %{state | idx: idx + 1, indent: i + 1, at_line_start: true}} def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1}} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in ["(", "[", "{"], do: {MapSet.new(), @@ -50,7 +51,7 @@ defmodule CodeQA.AST.Signals.Structural.KeywordSignal do at_line_start: false }} - def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + def emit(_, {_, %{kind: k}, _}, %{bracket_depth: bd, idx: idx} = state) when k in [")", "]", "}"], do: {MapSet.new(), @@ -73,7 +74,7 @@ defmodule CodeQA.AST.Signals.Structural.KeywordSignal do {emissions, base} end - defp keyword_split?(%{seen_content: true, bracket_depth: 0, indent: 0, keywords: kw}, %{ + defp keyword_split?(%{bracket_depth: 0, indent: 0, keywords: kw, seen_content: true}, %{ content: c }), do: MapSet.member?(kw, c) diff --git a/lib/codeqa/ast/signals/structural/sql_block_signal.ex b/lib/codeqa/ast/signals/structural/sql_block_signal.ex index 1e376f59..ef648ba6 100644 --- a/lib/codeqa/ast/signals/structural/sql_block_signal.ex +++ b/lib/codeqa/ast/signals/structural/sql_block_signal.ex @@ -1,6 +1,7 @@ defmodule CodeQA.AST.Signals.Structural.SQLBlockSignal do alias CodeQA.AST.Lexing.NewlineToken alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language @moduledoc """ Emits `:sql_block_split` when a SQL DDL or DML statement keyword appears @@ -22,14 +23,14 @@ defmodule CodeQA.AST.Signals.Structural.SQLBlockSignal do def group(_), do: :split def init(_, lang_mod) do - keywords = CodeQA.Language.statement_keywords(lang_mod) - %{idx: 0, at_line_start: true, seen_content: false, keywords: keywords} + keywords = Language.statement_keywords(lang_mod) + %{at_line_start: true, idx: 0, keywords: keywords, seen_content: false} end def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} - def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + def emit(_, {_, %WhitespaceToken{}, _}, %{at_line_start: true, idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), @@ -47,7 +48,7 @@ defmodule CodeQA.AST.Signals.Structural.SQLBlockSignal do def emit(_, {_, _, _}, %{idx: idx} = state), do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: false, seen_content: true}} - defp sql_split?(%{seen_content: true, at_line_start: true, keywords: kw}, %{content: c}), + defp sql_split?(%{at_line_start: true, keywords: kw, seen_content: true}, %{content: c}), do: MapSet.member?(kw, String.downcase(c)) defp sql_split?(_, _), do: false diff --git a/lib/codeqa/ast/signals/structural/triple_quote_signal.ex b/lib/codeqa/ast/signals/structural/triple_quote_signal.ex index ac5808db..e1fbcd39 100644 --- a/lib/codeqa/ast/signals/structural/triple_quote_signal.ex +++ b/lib/codeqa/ast/signals/structural/triple_quote_signal.ex @@ -1,4 +1,6 @@ defmodule CodeQA.AST.Signals.Structural.TripleQuoteSignal do + alias CodeQA.AST.Lexing.StringToken + @moduledoc """ Emits `:triple_split` at each `` token boundary. @@ -13,7 +15,7 @@ defmodule CodeQA.AST.Signals.Structural.TripleQuoteSignal do defstruct [] defimpl CodeQA.AST.Parsing.Signal do - @doc_kind CodeQA.AST.Lexing.StringToken.doc_kind() + @doc_kind StringToken.doc_kind() def source(_), do: CodeQA.AST.Signals.Structural.TripleQuoteSignal def group(_), do: :split diff --git a/lib/codeqa/block_impact/file_impact.ex b/lib/codeqa/block_impact/file_impact.ex index 10bd1f9f..1e381f61 100644 --- a/lib/codeqa/block_impact/file_impact.ex +++ b/lib/codeqa/block_impact/file_impact.ex @@ -26,21 +26,21 @@ defmodule CodeQA.BlockImpact.FileImpact do end @spec reconstruct_without([CodeQA.AST.Lexing.Token.t()], Node.t()) :: String.t() - def reconstruct_without(root_tokens, %Node{tokens: []}) do - Enum.map_join(root_tokens, "", & &1.content) - end + def reconstruct_without(root_tokens, %Node{tokens: []}), + do: root_tokens |> Enum.map_join("", & &1.content) def reconstruct_without(root_tokens, node) do first = List.first(node.tokens) - case Enum.find_index(root_tokens, fn t -> t.line == first.line and t.col == first.col end) do + case root_tokens + |> Enum.find_index(fn t -> t.line == first.line and t.col == first.col end) do nil -> - Enum.map_join(root_tokens, "", & &1.content) + root_tokens |> Enum.map_join("", & &1.content) start_idx -> end_idx = start_idx + length(node.tokens) remaining = Enum.take(root_tokens, start_idx) ++ Enum.drop(root_tokens, end_idx) - Enum.map_join(remaining, "", & &1.content) + remaining |> Enum.map_join("", & &1.content) end end end diff --git a/lib/codeqa/block_impact/refactoring_potentials.ex b/lib/codeqa/block_impact/refactoring_potentials.ex index 4dcceb77..2d942f84 100644 --- a/lib/codeqa/block_impact/refactoring_potentials.ex +++ b/lib/codeqa/block_impact/refactoring_potentials.ex @@ -57,7 +57,7 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do behavior_map ) - all_keys = Enum.uniq(Map.keys(file_delta) ++ Map.keys(codebase_delta)) + all_keys = (Map.keys(file_delta) ++ Map.keys(codebase_delta)) |> Enum.uniq() all_keys |> Enum.reject(fn {category, behavior} -> @@ -106,9 +106,12 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do defp cosines_to_delta(baseline_cosines, without_cosines) do without_map = - Map.new(without_cosines, fn %{category: c, behavior: b, cosine: cos} -> {{c, b}, cos} end) + for %{behavior: b, category: c, cosine: cos} <- without_cosines do + {{c, b}, cos} + end + |> Map.new() - Map.new(baseline_cosines, fn %{category: c, behavior: b, cosine: cos} -> + Map.new(baseline_cosines, fn %{behavior: b, category: c, cosine: cos} -> without_cos = Map.get(without_map, {c, b}, 0.0) {{c, b}, without_cos - cos} end) @@ -116,9 +119,8 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do defp excluded?(_category, _behavior, nil, _behavior_map), do: false - defp excluded?(category, behavior, block_type, behavior_map) do - Atom.to_string(block_type) in excludes_for(category, behavior, behavior_map) - end + defp excluded?(category, behavior, block_type, behavior_map), + do: Atom.to_string(block_type) in excludes_for(category, behavior, behavior_map) defp excludes_for(category, behavior, behavior_map) when is_map(behavior_map) do with [_ | _] = behaviors <- Map.get(behavior_map, category, []), diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex index 69da2fe7..469e1647 100644 --- a/lib/codeqa/block_impact_analyzer.ex +++ b/lib/codeqa/block_impact_analyzer.ex @@ -1,4 +1,8 @@ defmodule CodeQA.BlockImpactAnalyzer do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + alias CodeQA.Language + @moduledoc """ Orchestrates block impact analysis across all files in a pipeline result. @@ -31,14 +35,18 @@ defmodule CodeQA.BlockImpactAnalyzer do """ alias CodeQA.Analysis.BehaviorConfigServer - alias CodeQA.AST.Classification.{NodeClassifier, TypedNodeKind} + alias CodeQA.AST.Classification.NodeClassifier + alias CodeQA.AST.Classification.TypedNodeKind alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Parser - alias CodeQA.BlockImpact.{FileImpact, RefactoringPotentials} - alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner} + alias CodeQA.BlockImpact.FileImpact + alias CodeQA.BlockImpact.RefactoringPotentials + alias CodeQA.CombinedMetrics.FileScorer + alias CodeQA.CombinedMetrics.SampleRunner alias CodeQA.Engine.Analyzer alias CodeQA.Languages.Unknown + import CodeQA.Shared, only: [project_languages_shared: 1] @min_tokens 10 @@ -132,7 +140,7 @@ defmodule CodeQA.BlockImpactAnalyzer do ordered: false, timeout: :infinity ) - |> Enum.reduce(%{}, fn {:ok, {path, data}}, acc -> Map.put(acc, path, data) end) + |> Map.new(fn {:ok, {path, data}} -> {path, data} end) :telemetry.execute( [:codeqa, :block_impact, :analyze], @@ -143,6 +151,17 @@ defmodule CodeQA.BlockImpactAnalyzer do Map.put(pipeline_result, "files", updated_files) end + defp compute_nodes_timed( + _path, + "", + _baseline_file_metrics, + _file_results, + _baseline_codebase_cosines, + _nodes_top, + _cached_behaviors + ), + do: {[], %{duration: 0, file_cosines_us: 0, node_count: 0, parse_us: 0, tokenize_us: 0}} + defp compute_nodes_timed( path, content, @@ -152,68 +171,64 @@ defmodule CodeQA.BlockImpactAnalyzer do nodes_top, cached_behaviors ) do - if content == "" do - {[], %{duration: 0, tokenize_us: 0, parse_us: 0, file_cosines_us: 0, node_count: 0}} - else - t0 = now() - - {root_tokens, tokenize_us} = timed(fn -> TokenNormalizer.normalize_structural(content) end) - {top_level_nodes, parse_us} = timed(fn -> Parser.detect_blocks(root_tokens, Unknown) end) - - baseline_file_agg = FileScorer.file_to_aggregate(baseline_file_metrics) - lang_mod = CodeQA.Language.detect(path) - language = lang_mod.name() - - {baseline_file_cosines, file_cosines_us} = - timed(fn -> - SampleRunner.diagnose_aggregate(baseline_file_agg, - top: 99_999, - language: language, - behavior_map: cached_behaviors - ) - end) + t0 = now() - inc_agg = build_incremental_agg(file_results) - old_file_triples = file_metrics_to_triples(baseline_file_metrics) - project_langs = project_languages(file_results) - - node_ctx = %{ - inc_agg: inc_agg, - old_file_triples: old_file_triples, - project_langs: project_langs, - cached_behaviors: cached_behaviors, - lang_mod: lang_mod, - baseline_file_metrics: baseline_file_metrics - } - - nodes = - top_level_nodes - |> Enum.map(fn node -> - serialize_node( - node, - path, - root_tokens, - baseline_file_cosines, - baseline_codebase_cosines, - nodes_top, - language, - node_ctx - ) - end) - |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) + {root_tokens, tokenize_us} = timed(fn -> TokenNormalizer.normalize_structural(content) end) + {top_level_nodes, parse_us} = timed(fn -> Parser.detect_blocks(root_tokens, Unknown) end) - measurements = %{ - duration: now() - t0, - tokenize_us: tokenize_us, - parse_us: parse_us, - file_cosines_us: file_cosines_us, - node_count: length(top_level_nodes), - token_count: length(root_tokens), - bytes: byte_size(content) - } - - {nodes, measurements} - end + baseline_file_agg = FileScorer.file_to_aggregate(baseline_file_metrics) + lang_mod = Language.detect(path) + language = lang_mod.name() + + {baseline_file_cosines, file_cosines_us} = + timed(fn -> + SampleRunner.diagnose_aggregate(baseline_file_agg, + top: 99_999, + language: language, + behavior_map: cached_behaviors + ) + end) + + inc_agg = build_incremental_agg(file_results) + old_file_triples = file_metrics_to_triples(baseline_file_metrics) + project_langs = project_languages(file_results) + + node_ctx = %{ + baseline_file_metrics: baseline_file_metrics, + cached_behaviors: cached_behaviors, + inc_agg: inc_agg, + lang_mod: lang_mod, + old_file_triples: old_file_triples, + project_langs: project_langs + } + + nodes = + top_level_nodes + |> Enum.map( + &serialize_node( + &1, + path, + root_tokens, + baseline_file_cosines, + baseline_codebase_cosines, + nodes_top, + language, + node_ctx + ) + ) + |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) + + measurements = %{ + bytes: byte_size(content), + duration: now() - t0, + file_cosines_us: file_cosines_us, + node_count: length(top_level_nodes), + parse_us: parse_us, + token_count: length(root_tokens), + tokenize_us: tokenize_us + } + + {nodes, measurements} end defp serialize_node( @@ -269,7 +284,7 @@ defmodule CodeQA.BlockImpactAnalyzer do |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) first_token = List.first(node.tokens) - char_length = Enum.reduce(node.tokens, 0, fn t, acc -> acc + byte_size(t.content) end) + char_length = node.tokens |> Enum.sum_by(fn t -> byte_size(t.content) end) %{ "start_line" => node.start_line, @@ -288,23 +303,8 @@ defmodule CodeQA.BlockImpactAnalyzer do # leading whitespace stripped so the classification signals see the keyword at # indent 0. Lets NodeClassifier see the keyword that drove the bracket-split # (`alias`, `@name`, etc.) when classifying a sub-block. - defp parent_context_for(parent_tokens, child) do - case List.first(child.tokens) do - nil -> - [] - - child_first -> - nl_kind = CodeQA.AST.Lexing.NewlineToken.kind() - ws_kind = CodeQA.AST.Lexing.WhitespaceToken.kind() - - parent_tokens - |> Enum.take_while(fn t -> t != child_first end) - |> Enum.reverse() - |> Enum.take_while(fn t -> t.kind != nl_kind end) - |> Enum.reverse() - |> Enum.drop_while(fn t -> t.kind == ws_kind end) - end - end + defp parent_context_for(parent_tokens, child), + do: List.first(child.tokens) |> handle_parent_context_for_first(parent_tokens) defp compute_potentials_timed( %Node{} = node, @@ -322,7 +322,7 @@ defmodule CodeQA.BlockImpactAnalyzer do {reconstructed, reconstruct_us} = timed(fn -> FileImpact.reconstruct_without(root_tokens, node) end) - block_content = Enum.map_join(node.tokens, "", & &1.content) + block_content = node.tokens |> Enum.map_join("", & &1.content) {without_file_metrics, analyze_file_us} = timed(fn -> @@ -361,10 +361,10 @@ defmodule CodeQA.BlockImpactAnalyzer do :telemetry.execute( [:codeqa, :block_impact, :node], %{ + aggregate_us: aggregate_us, + analyze_file_us: analyze_file_us, duration: now() - t0, reconstruct_us: reconstruct_us, - analyze_file_us: analyze_file_us, - aggregate_us: aggregate_us, refactoring_us: refactoring_us }, %{path: path, token_count: length(node.tokens)} @@ -395,20 +395,31 @@ defmodule CodeQA.BlockImpactAnalyzer do |> Enum.group_by(fn {metric, key, _val} -> {metric, key} end, fn {_, _, val} -> val end) |> Map.new(fn {{metric, key}, values} -> n = length(values) - sum = Enum.sum(values) - sum_sq = Enum.reduce(values, 0.0, fn v, acc -> acc + v * v end) + sum = values |> Enum.sum() + sum_sq = values |> Enum.reduce(0.0, fn v, acc -> acc + v * v end) {{metric, key}, - %{sum: sum, sum_sq: sum_sq, min: Enum.min(values), max: Enum.max(values), count: n}} + %{count: n, max: values |> Enum.max(), min: values |> Enum.min(), sum: sum, sum_sq: sum_sq}} end) end defp swap_file_in_agg(inc_agg, old_triples, new_triples) do - old_map = Map.new(old_triples, fn {metric, key, val} -> {{metric, key}, val} end) - new_map = Map.new(new_triples, fn {metric, key, val} -> {{metric, key}, val} end) - all_keys = Enum.uniq(Map.keys(old_map) ++ Map.keys(new_map)) + old_map = + for {metric, key, val} <- old_triples do + {{metric, key}, val} + end + |> Map.new() - Enum.reduce(all_keys, inc_agg, fn mk, acc -> + new_map = + for {metric, key, val} <- new_triples do + {{metric, key}, val} + end + |> Map.new() + + all_keys = (Map.keys(old_map) ++ Map.keys(new_map)) |> Enum.uniq() + + all_keys + |> Enum.reduce(inc_agg, fn mk, acc -> case Map.get(acc, mk) do nil -> acc @@ -418,18 +429,19 @@ defmodule CodeQA.BlockImpactAnalyzer do new_val = Map.get(new_map, mk, 0.0) Map.put(acc, mk, %{ - sum: state.sum - old_val + new_val, - sum_sq: state.sum_sq - old_val * old_val + new_val * new_val, - min: min(state.min, new_val), + count: state.count, max: max(state.max, new_val), - count: state.count + min: min(state.min, new_val), + sum: state.sum - old_val + new_val, + sum_sq: state.sum_sq - old_val * old_val + new_val * new_val }) end end) end defp incremental_agg_to_aggregate(inc_agg) do - Enum.reduce(inc_agg, %{}, fn {{metric, key}, state}, acc -> + inc_agg + |> Enum.reduce(%{}, fn {{metric, key}, state}, acc -> n = state.count mean = if n > 0, do: state.sum / n, else: 0.0 variance = if n > 0, do: max(state.sum_sq / n - mean * mean, 0.0), else: 0.0 @@ -452,7 +464,8 @@ defmodule CodeQA.BlockImpactAnalyzer do defp filter_behaviors_by_languages(behaviors_map, project_langs) do Map.new(behaviors_map, fn {category, behaviors} -> filtered = - Enum.filter(behaviors, fn {_behavior, behavior_data} -> + behaviors + |> Enum.filter(fn {_behavior, behavior_data} -> behavior_langs = Map.get(behavior_data, "_languages", []) behavior_langs == [] or Enum.any?(behavior_langs, &(&1 in project_langs)) end) @@ -461,13 +474,7 @@ defmodule CodeQA.BlockImpactAnalyzer do end) end - defp project_languages(path_keyed_map) do - path_keyed_map - |> Map.keys() - |> Enum.map(&CodeQA.Language.detect(&1).name()) - |> Enum.reject(&(&1 == "unknown")) - |> Enum.uniq() - end + defp project_languages(path_keyed_map), do: project_languages_shared(path_keyed_map) defp timed(fun) do t = now() @@ -476,4 +483,20 @@ defmodule CodeQA.BlockImpactAnalyzer do end defp now, do: System.monotonic_time(:microsecond) + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_parent_context_for_first(nil, _parent_tokens), do: [] + + defp handle_parent_context_for_first(child_first, parent_tokens) do + nl_kind = NewlineToken.kind() + ws_kind = WhitespaceToken.kind() + + parent_tokens + |> Enum.take_while(&(&1 != child_first)) + |> Enum.reverse() + |> Enum.take_while(&(&1.kind != nl_kind)) + |> Enum.reverse() + |> Enum.drop_while(&(&1.kind == ws_kind)) + end end diff --git a/lib/codeqa/cli/analyze.ex b/lib/codeqa/cli/analyze.ex index 9c1f8402..48ee155b 100644 --- a/lib/codeqa/cli/analyze.ex +++ b/lib/codeqa/cli/analyze.ex @@ -65,7 +65,7 @@ defmodule CodeQA.CLI.Analyze do IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms") total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() - results = filter_files_for_output(results, opts, "json") + results = results |> filter_files_for_output(opts, "json") report = %{ @@ -92,15 +92,13 @@ defmodule CodeQA.CLI.Analyze do end end - defp print_progress(opts, files) do - if opts[:progress] do - step_prefix = if opts[:show_ncd], do: "1/5 ", else: "1/1 " - IO.puts(:stderr, " #{step_prefix}Analyzing #{map_size(files)} files...") - else - IO.puts(:stderr, "Analyzing #{map_size(files)} files...") - end + defp print_progress(%{progress: progress} = opts, files) when progress not in [nil, false] do + step_prefix = if opts[:show_ncd], do: "1/5 ", else: "1/1 " + IO.puts(:stderr, " #{step_prefix}Analyzing #{map_size(files)} files...") end + defp print_progress(_opts, files), do: IO.puts(:stderr, "Analyzing #{map_size(files)} files...") + defp filter_files_for_output(results, opts, _format) do cond do opts[:show_files] -> diff --git a/lib/codeqa/cli/correlate.ex b/lib/codeqa/cli/correlate.ex index c38a2481..b7ec98da 100644 --- a/lib/codeqa/cli/correlate.ex +++ b/lib/codeqa/cli/correlate.ex @@ -1,4 +1,5 @@ defmodule CodeQA.CLI.Correlate do + alias CodeQA.Math @moduledoc false @behaviour CodeQA.CLI.Command @@ -80,8 +81,8 @@ defmodule CodeQA.CLI.Correlate do IO.puts(:stderr, " Total time: #{total_end - total_start}ms") top_n = opts[:top] || 20 - sorted = Enum.sort_by(correlations, &abs(&1["correlation"]), :desc) - top = Enum.take(sorted, top_n) + sorted = correlations |> Enum.sort_by(&abs(&1["correlation"]), :desc) + top = sorted |> Enum.take(top_n) Jason.encode!(top, pretty: true) end @@ -90,7 +91,8 @@ defmodule CodeQA.CLI.Correlate do t0 = System.monotonic_time(:millisecond) extracted = - Enum.map(files, fn file -> + files + |> Enum.map(fn file -> Path.join(path, file) |> File.read!() |> Jason.decode!() @@ -117,7 +119,7 @@ defmodule CodeQA.CLI.Correlate do series = keys |> Enum.map(fn key -> - values = Enum.map(extracted, &Map.get(&1, key, 0.0)) + values = extracted |> Enum.map(&Map.get(&1, key, 0.0)) {key, values, Enum.min(values) != Enum.max(values)} end) |> Enum.filter(fn {_, _, has_variance} -> has_variance end) @@ -146,8 +148,9 @@ defmodule CodeQA.CLI.Correlate do end defp flatten_aggregate_metrics(aggregate) do - Enum.flat_map(aggregate, fn {category, metrics} -> - Enum.map(metrics, fn {name, val} -> {"#{category}.#{name}", val} end) + aggregate + |> Enum.flat_map(fn {category, metrics} -> + metrics |> Enum.map(fn {name, val} -> {"#{category}.#{name}", val} end) end) end @@ -160,7 +163,7 @@ defmodule CodeQA.CLI.Correlate do else: all_pairs_stream(active_keys) pairs_stream = - if max_steps > 0, do: Stream.take(pairs_to_process, max_steps), else: pairs_to_process + if max_steps > 0, do: pairs_to_process |> Stream.take(max_steps), else: pairs_to_process total_pairs = cond do @@ -168,8 +171,8 @@ defmodule CodeQA.CLI.Correlate do max_steps opts[:combined_only] -> - normal_count = Enum.count(active_keys, &(not String.contains?(&1, ","))) - combined_count = Enum.count(active_keys, &String.contains?(&1, ",")) + normal_count = active_keys |> Enum.count(&(not String.contains?(&1, ","))) + combined_count = active_keys |> Enum.count(&String.contains?(&1, ",")) normal_count * combined_count true -> @@ -180,21 +183,23 @@ defmodule CodeQA.CLI.Correlate do end defp combined_pairs_stream(active_keys) do - normal = Enum.reject(active_keys, &String.contains?(&1, ",")) - combined = Enum.filter(active_keys, &String.contains?(&1, ",")) + normal = active_keys |> Enum.reject(&String.contains?(&1, ",")) + combined = active_keys |> Enum.filter(&String.contains?(&1, ",")) - Stream.flat_map(normal, fn k1 -> - Stream.map(combined, fn k2 -> {k1, k2} end) + normal + |> Stream.flat_map(fn k1 -> + combined |> Stream.map(&{k1, &1}) end) end defp all_pairs_stream(active_keys) do - Stream.unfold(active_keys, fn + active_keys + |> Stream.unfold(fn [] -> nil [_h | t] = list -> {list, t} end) |> Stream.flat_map(fn - [k1 | rest] -> Stream.map(rest, fn k2 -> {k1, k2} end) + [k1 | rest] -> rest |> Stream.map(&{k1, &1}) [] -> [] end) end @@ -241,7 +246,7 @@ defmodule CodeQA.CLI.Correlate do MapSet.disjoint?(Map.fetch!(category_map, k1), Map.fetch!(category_map, k2)) if cross_valid do - corr = CodeQA.Math.pearson_correlation_list(Map.fetch!(series, k1), Map.fetch!(series, k2)) + corr = Math.pearson_correlation_list(Map.fetch!(series, k1), Map.fetch!(series, k2)) maybe_correlation_result(k1, k2, corr, opts) end end diff --git a/lib/codeqa/cli/diagnose.ex b/lib/codeqa/cli/diagnose.ex index 93c2e8d0..9b7a0795 100644 --- a/lib/codeqa/cli/diagnose.ex +++ b/lib/codeqa/cli/diagnose.ex @@ -1,4 +1,5 @@ defmodule CodeQA.CLI.Diagnose do + alias CodeQA.Diagnostics @moduledoc false @behaviour CodeQA.CLI.Command @@ -60,7 +61,7 @@ defmodule CodeQA.CLI.Diagnose do _ -> :plain end - CodeQA.Diagnostics.run( + Diagnostics.run( path: path, mode: mode, top: opts[:top] || 15, diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index 5dc8e6b6..73a6dee1 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -93,7 +93,8 @@ defmodule CodeQA.CLI.HealthReport do total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() results = - Map.put(results, "metadata", %{ + results + |> Map.put("metadata", %{ "path" => Path.expand(path), "timestamp" => DateTime.utc_now() |> DateTime.to_iso8601(), "total_files" => map_size(files), @@ -178,7 +179,8 @@ defmodule CodeQA.CLI.HealthReport do parts = HealthReport.Formatter.render_parts(report, detail: detail) # Write each part to a numbered file - Enum.with_index(parts, 1) + parts + |> Enum.with_index(1) |> Enum.each(fn {content, n} -> path = Path.join(tmpdir, "codeqa-part-#{n}.md") File.write!(path, content) @@ -233,17 +235,17 @@ defmodule CodeQA.CLI.HealthReport do {:ok, pid} = Agent.start_link(fn -> %{ - nodes: [], - files: [], codebase_cosines_us: 0, - stages: %{}, - file_metrics: %{}, codebase_metrics: %{}, - phases: %{}, + cosine_breakdown: %{}, + cosine_breakdown_calls: 0, + file_metrics: %{}, + files: [], loo_breakdown: %{}, loo_breakdown_calls: 0, - cosine_breakdown: %{}, - cosine_breakdown_calls: 0 + nodes: [], + phases: %{}, + stages: %{} } end) @@ -277,9 +279,8 @@ defmodule CodeQA.CLI.HealthReport do measurements, _metadata, pid - ) do - Agent.update(pid, &Map.put(&1, :codebase_cosines_us, measurements.duration)) - end + ), + do: pid |> Agent.update(&Map.put(&1, :codebase_cosines_us, measurements.duration)) defp handle_event([:codeqa, :block_impact, :file], measurements, metadata, pid) do Agent.update(pid, fn state -> @@ -320,7 +321,8 @@ defmodule CodeQA.CLI.HealthReport do defp handle_event([:codeqa, :loo_breakdown], measurements, _metadata, pid) do Agent.update(pid, fn state -> merged = - Enum.reduce(measurements, state.loo_breakdown, fn {k, v}, acc -> + measurements + |> Enum.reduce(state.loo_breakdown, fn {k, v}, acc -> Map.update(acc, k, v, &(&1 + v)) end) @@ -333,7 +335,8 @@ defmodule CodeQA.CLI.HealthReport do defp handle_event([:codeqa, :cosine_breakdown], measurements, _metadata, pid) do Agent.update(pid, fn state -> merged = - Enum.reduce(measurements, state.cosine_breakdown, fn {k, v}, acc -> + measurements + |> Enum.reduce(state.cosine_breakdown, fn {k, v}, acc -> Map.update(acc, k, v, &(&1 + v)) end) @@ -354,8 +357,8 @@ defmodule CodeQA.CLI.HealthReport do total_nodes = length(nodes) total_files = length(files) - node_totals = Enum.map(nodes, fn {_, m} -> m end) - file_totals = Enum.map(files, fn {_, m} -> m end) + node_totals = nodes |> Enum.map(fn {_, m} -> m end) + file_totals = files |> Enum.map(fn {_, m} -> m end) IO.puts(:stderr, """ @@ -414,28 +417,28 @@ defmodule CodeQA.CLI.HealthReport do |> Enum.sort_by(fn {_, v} -> -v end) |> Enum.take(25) |> Enum.map_join("\n", fn {key, total_us} -> - avg = div(total_us, calls) + avg_us = div(total_us, calls) pct = total_us * 100 / Enum.sum(Map.values(breakdown)) - " #{String.pad_trailing(to_string(key), 32)} total #{us(total_us)} avg/call #{us(avg)} (#{Float.round(pct, 1)}%)" + " #{String.pad_trailing(to_string(key), 32)} total #{us(total_us)} avg/call #{us(avg_us)} (#{Float.round(pct, 1)}%)" end) end defp format_scaling(files, nodes) do - nodes_by_path = Enum.group_by(nodes, fn {p, _} -> p end, fn {_, m} -> m end) + nodes_by_path = nodes |> Enum.group_by(fn {p, _} -> p end, fn {_, m} -> m end) rows = files |> Enum.map(fn {path, fm} -> node_durations = nodes_by_path |> Map.get(path, []) |> Enum.map(& &1.duration) - total_node_us = Enum.sum(node_durations) + total_node_us = node_durations |> Enum.sum() %{ - path: path, bytes: Map.get(fm, :bytes, 0), - tokens: Map.get(fm, :token_count, 0), - nodes: Map.get(fm, :node_count, 0), file_us: fm.duration, + nodes: Map.get(fm, :node_count, 0), + path: path, + tokens: Map.get(fm, :token_count, 0), total_node_us: total_node_us } end) @@ -450,23 +453,23 @@ defmodule CodeQA.CLI.HealthReport do bin_rows = bins |> Enum.map(fn {label, pred} -> - bucket = Enum.filter(rows, pred) + bucket = rows |> Enum.filter(pred) n = length(bucket) if n == 0 do " #{label} (none)" else - avg_bytes = div(Enum.sum(Enum.map(bucket, & &1.bytes)), n) - avg_tokens = div(Enum.sum(Enum.map(bucket, & &1.tokens)), n) - avg_nodes = div(Enum.sum(Enum.map(bucket, & &1.nodes)), n) - avg_node_us = div(Enum.sum(Enum.map(bucket, & &1.total_node_us)), n) - tokens_per_node_us = if avg_nodes > 0, do: div(avg_node_us, avg_nodes), else: 0 + avg_bytes = div(bucket |> Enum.map(& &1.bytes) |> Enum.sum(), n) + avg_tokens = div(bucket |> Enum.map(& &1.tokens) |> Enum.sum(), n) + avg_nodes = div(bucket |> Enum.map(& &1.nodes) |> Enum.sum(), n) + avg_node_us = div(bucket |> Enum.map(& &1.total_node_us) |> Enum.sum(), n) + per_node_us = if avg_nodes > 0, do: div(avg_node_us, avg_nodes), else: 0 - " #{label} files=#{n} avg bytes=#{avg_bytes} tokens=#{avg_tokens} nodes=#{avg_nodes} total_node=#{us(avg_node_us)} per_node=#{us(tokens_per_node_us)}" + " #{label} files=#{n} avg bytes=#{avg_bytes} tokens=#{avg_tokens} nodes=#{avg_nodes} total_node=#{us(avg_node_us)} per_node=#{us(per_node_us)}" end end) - Enum.join(bin_rows, "\n") + bin_rows |> Enum.join("\n") end defp format_phases(phases) when map_size(phases) == 0, do: " (no phases recorded)" @@ -506,12 +509,12 @@ defmodule CodeQA.CLI.HealthReport do m |> Enum.map(fn {name, {n, sum}} -> - avg = if n > 0, do: div(sum, n), else: 0 - {name, sum, avg, n} + avg_us = if n > 0, do: div(sum, n), else: 0 + {name, sum, avg_us, n} end) |> Enum.sort_by(fn {_, sum, _, _} -> -sum end) - |> Enum.map_join("\n", fn {name, sum, avg, n} -> - " #{String.pad_trailing(to_string(name), 32)} total #{us(sum)} avg/file #{us(div(sum, fc))} (#{n} calls, avg/call #{us(avg)})" + |> Enum.map_join("\n", fn {name, sum, avg_us, n} -> + " #{String.pad_trailing(to_string(name), 32)} total #{us(sum)} avg/file #{us(div(sum, fc))} (#{n} calls, avg/call #{us(avg_us)})" end) end @@ -519,7 +522,7 @@ defmodule CodeQA.CLI.HealthReport do node_time_by_file = nodes |> Enum.group_by(fn {path, _} -> path end, fn {_, m} -> m.duration end) - |> Map.new(fn {path, durations} -> {path, Enum.sum(durations)} end) + |> Map.new(fn {path, durations} -> {path, durations |> Enum.sum()} end) files |> Enum.map(fn {path, fm} -> @@ -536,7 +539,7 @@ defmodule CodeQA.CLI.HealthReport do defp avg_us([], _key), do: "n/a" defp avg_us(measurements, key) do - total = Enum.sum(Enum.map(measurements, &Map.get(&1, key, 0))) + total = measurements |> Enum.map(&Map.get(&1, key, 0)) |> Enum.sum() us(div(total, length(measurements))) end diff --git a/lib/codeqa/cli/history.ex b/lib/codeqa/cli/history.ex index ca40669c..7552466e 100644 --- a/lib/codeqa/cli/history.ex +++ b/lib/codeqa/cli/history.ex @@ -108,7 +108,7 @@ defmodule CodeQA.CLI.History do else: analyze_opts files = Git.collect_files_at_ref(path, commit) - files = Collector.reject_ignored_map(files, ignore_patterns) + files = files |> Collector.reject_ignored_map(ignore_patterns) if map_size(files) == 0 do IO.puts(:stderr, "Warning: no source files found at commit #{commit}") diff --git a/lib/codeqa/cli/options.ex b/lib/codeqa/cli/options.ex index 199a95df..98c02b3e 100644 --- a/lib/codeqa/cli/options.ex +++ b/lib/codeqa/cli/options.ex @@ -28,12 +28,13 @@ defmodule CodeQA.CLI.Options do def common_aliases, do: @common_aliases @spec parse(list(String.t()), keyword(), keyword()) :: {keyword(), list(String.t()), list()} - def parse(args, extra_strict \\ [], extra_aliases \\ []) do - OptionParser.parse(args, - strict: Keyword.merge(@common_strict, extra_strict), - aliases: Keyword.merge(@common_aliases, extra_aliases) - ) - end + def parse(args, extra_strict \\ [], extra_aliases \\ []), + do: + args + |> OptionParser.parse( + strict: Keyword.merge(@common_strict, extra_strict), + aliases: Keyword.merge(@common_aliases, extra_aliases) + ) @spec validate_dir!(String.t()) :: :ok def validate_dir!(path) do @@ -48,11 +49,11 @@ defmodule CodeQA.CLI.Options do @spec parse_ignore_paths(String.t() | nil) :: list(String.t()) def parse_ignore_paths(nil), do: [] - def parse_ignore_paths(paths_string) do - paths_string - |> String.split(",", trim: true) - |> Enum.map(&String.trim/1) - end + def parse_ignore_paths(paths_string), + do: + paths_string + |> String.split(",", trim: true) + |> Enum.map(&String.trim/1) @spec build_analyze_opts(keyword()) :: keyword() def build_analyze_opts(opts) do @@ -78,13 +79,14 @@ defmodule CodeQA.CLI.Options do {:ncd_paths, opts[:ncd_paths] && String.split(opts[:ncd_paths], ",")} ) - Enum.reduce(passthrough_keys, base, fn key, acc -> + passthrough_keys + |> Enum.reduce(base, fn key, acc -> if opts[key], do: [{key, opts[key]} | acc], else: acc end) end @spec maybe_add(keyword(), any(), {atom(), any()}) :: keyword() - def maybe_add(opts, val, item) do - if val, do: [item | opts], else: opts - end + def maybe_add(opts, val, item) when val not in [nil, false], do: [item | opts] + + def maybe_add(opts, _val, _item), do: opts end diff --git a/lib/codeqa/combined_metrics/category.ex b/lib/codeqa/combined_metrics/category.ex index def09ad1..57c383e3 100644 --- a/lib/codeqa/combined_metrics/category.ex +++ b/lib/codeqa/combined_metrics/category.ex @@ -32,9 +32,8 @@ defmodule CodeQA.CombinedMetrics.Category do Delegates to `CodeQA.CombinedMetrics.Scorer.compute_score/3`. """ @spec compute_score(String.t(), map()) :: float() - def compute_score(metric_name, metrics) do - Scorer.compute_score(unquote(yaml_path), metric_name, metrics) - end + def compute_score(metric_name, metrics), + do: unquote(yaml_path) |> Scorer.compute_score(metric_name, metrics) end end end diff --git a/lib/codeqa/combined_metrics/cosine_vector.ex b/lib/codeqa/combined_metrics/cosine_vector.ex index 36bbe23f..638b8390 100644 --- a/lib/codeqa/combined_metrics/cosine_vector.ex +++ b/lib/codeqa/combined_metrics/cosine_vector.ex @@ -39,8 +39,8 @@ defmodule CodeQA.CombinedMetrics.CosineVector do log_metrics = Keyword.get(opts, :log_metrics) {dot, norm_s_sq, norm_v_sq, contributions} = - Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, - {d, ns, nv, contribs} -> + scalars + |> Enum.reduce({0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, {d, ns, nv, contribs} -> log_m = lookup_log_metric(log_metrics, aggregate, group, key) contrib = scalar * log_m @@ -61,13 +61,13 @@ defmodule CodeQA.CombinedMetrics.CosineVector do |> Enum.sort_by(fn {_, c} -> c end) |> Enum.take(5) |> Enum.map(fn {metric, contribution} -> - %{metric: to_string(metric), contribution: Float.round(contribution, 4)} + %{contribution: Float.round(contribution, 4), metric: to_string(metric)} end) [ %{ - category: category, behavior: behavior, + category: category, cosine: Float.round(cos_sim, 4), score: Float.round(calibrated, 4), top_metrics: top_metrics @@ -81,10 +81,13 @@ defmodule CodeQA.CombinedMetrics.CosineVector do defp lookup_log_metric(nil, aggregate, group, key), do: :math.log(max(Scorer.get(aggregate, group, key) / 1.0, 1.0e-300)) - defp lookup_log_metric(log_metrics, aggregate, group, key) do - case get_in(log_metrics, [group, key]) do - nil -> :math.log(max(Scorer.get(aggregate, group, key) / 1.0, 1.0e-300)) - log_val -> log_val - end - end + defp lookup_log_metric(log_metrics, aggregate, group, key), + do: get_in(log_metrics, [group, key]) |> handle_get_in(aggregate, group, key) + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_get_in(nil, aggregate, group, key), + do: max(Scorer.get(aggregate, group, key) / 1.0, 1.0e-300) |> :math.log() + + defp handle_get_in(log_val, _aggregate, _group, _key), do: log_val end diff --git a/lib/codeqa/combined_metrics/file_scorer.ex b/lib/codeqa/combined_metrics/file_scorer.ex index e7479b08..bb76c6b5 100644 --- a/lib/codeqa/combined_metrics/file_scorer.ex +++ b/lib/codeqa/combined_metrics/file_scorer.ex @@ -25,7 +25,12 @@ defmodule CodeQA.CombinedMetrics.FileScorer do @spec file_to_aggregate(map()) :: map() def file_to_aggregate(metrics) do Map.new(metrics, fn {group, keys} -> - prefixed_keys = Map.new(keys, fn {key, value} -> {"mean_" <> key, value} end) + prefixed_keys = + for {key, value} <- keys do + {"mean_" <> key, value} + end + |> Map.new() + {group, prefixed_keys} end) end @@ -55,7 +60,7 @@ defmodule CodeQA.CombinedMetrics.FileScorer do @spec worst_files_per_behavior(map(), keyword()) :: %{ String.t() => [ - %{file: String.t(), cosine: float(), top_metrics: list(), top_nodes: list()} + %{cosine: float(), file: String.t(), top_metrics: list(), top_nodes: list()} ] } def worst_files_per_behavior(files_map, opts \\ []) do @@ -95,14 +100,14 @@ defmodule CodeQA.CombinedMetrics.FileScorer do |> file_to_aggregate() |> SampleRunner.diagnose_aggregate(top: 99_999, language: language) |> Enum.reduce(acc, fn %{ - category: category, behavior: behavior, + category: category, cosine: cosine, top_metrics: top_metrics }, inner_acc -> key = "#{category}.#{behavior}" - entry = %{file: path, cosine: cosine, top_metrics: top_metrics, top_nodes: top_nodes} + entry = %{cosine: cosine, file: path, top_metrics: top_metrics, top_nodes: top_nodes} Map.update(inner_acc, key, [entry], &[entry | &1]) end) end diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 318d007a..07c78b81 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -6,8 +6,12 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do manual scalar tuning of combined metric formulas. """ - alias CodeQA.CombinedMetrics.{CosineVector, ScalarApplier, Scorer} - alias CodeQA.Engine.{Analyzer, Collector} + alias CodeQA.CombinedMetrics.CosineVector + alias CodeQA.CombinedMetrics.ScalarApplier + alias CodeQA.CombinedMetrics.Scorer + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector + import CodeQA.Shared, only: [humanize_category_shared: 1] @samples_root "priv/combined_metrics/samples" @@ -138,7 +142,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end) |> Enum.sort_by(& &1.score) - %{category: category, name: humanize(category), behaviors: behaviors} + %{behaviors: behaviors, category: category, name: humanize(category)} end) end @@ -245,9 +249,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do Returns a list of `%{category: String.t(), behaviors_with_languages: non_neg_integer()}`. """ @spec apply_languages(keyword()) :: [map()] - def apply_languages(opts \\ []) do - ScalarApplier.apply_languages(opts) - end + def apply_languages(opts \\ []), do: opts |> ScalarApplier.apply_languages() # --------------------------------------------------------------------------- # Sample discovery @@ -263,21 +265,20 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end) end - defp has_both_dirs?(category, behavior) do - File.dir?(sample_path(category, behavior, "bad")) and - File.dir?(sample_path(category, behavior, "good")) - end + defp has_both_dirs?(category, behavior), + do: + File.dir?(sample_path(category, behavior, "bad")) and + File.dir?(sample_path(category, behavior, "good")) - defp sample_path(category, behavior, kind) do - Path.join([@samples_root, category, behavior, kind]) - end + defp sample_path(category, behavior, kind), + do: [@samples_root, category, behavior, kind] |> Path.join() - defp analyze(dir) do - dir - |> Collector.collect_files() - |> Analyzer.analyze_codebase() - |> get_in(["codebase", "aggregate"]) - end + defp analyze(dir), + do: + dir + |> Collector.collect_files() + |> Analyzer.analyze_codebase() + |> get_in(["codebase", "aggregate"]) # --------------------------------------------------------------------------- # Sample scoring @@ -293,12 +294,12 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do ratio = if bad_score > 0, do: good_score / bad_score, else: 0.0 base = %{ - category: category, - behavior: behavior, bad_score: bad_score, + behavior: behavior, + category: category, + direction_ok: good_score >= bad_score, good_score: good_score, - ratio: Float.round(ratio, 2), - direction_ok: good_score >= bad_score + ratio: Float.round(ratio, 2) } if opts[:verbose] do @@ -314,7 +315,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do bad_val = Scorer.get(bad_agg, group, key) good_val = Scorer.get(good_agg, group, key) ratio = if bad_val > 0, do: Float.round(good_val / bad_val, 2), else: 0.0 - %{group: group, key: key, scalar: scalar, bad: bad_val, good: good_val, ratio: ratio} + %{bad: bad_val, good: good_val, group: group, key: key, ratio: ratio, scalar: scalar} end) |> Enum.sort_by(&abs(&1.ratio - 1.0), :desc) end @@ -396,7 +397,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do ) do yaml_path = "priv/combined_metrics/#{category}.yml" - Enum.flat_map(behaviors, fn {behavior, behavior_data} -> + behaviors + |> Enum.flat_map(fn {behavior, behavior_data} -> maybe_diagnose_behavior( yaml_path, behavior, @@ -461,16 +463,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end end - defp track_behavior_us(behavior, us) do - case Process.get(:codeqa_cosine_breakdown) do - nil -> - :ok - - breakdown -> - cur = Map.get(breakdown, behavior, 0) - Process.put(:codeqa_cosine_breakdown, Map.put(breakdown, behavior, cur + us)) - end - end + defp track_behavior_us(behavior, us), + do: Process.get(:codeqa_cosine_breakdown) |> handle_track_behavior_us_get(behavior, us) # --------------------------------------------------------------------------- # Language filtering @@ -490,7 +484,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do do: language in behavior_langs defp behavior_language_applies?(behavior_langs, nil, languages) when is_list(languages), - do: Enum.any?(behavior_langs, &(&1 in languages)) + do: behavior_langs |> Enum.any?(&(&1 in languages)) defp behavior_language_applies?(behavior_langs, language, languages) when is_binary(language) and is_list(languages), @@ -500,9 +494,14 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do # Misc # --------------------------------------------------------------------------- - defp humanize(slug) do - slug - |> String.split("_") - |> Enum.map_join(" ", &String.capitalize/1) + defp humanize(slug), do: humanize_category_shared(slug) + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_track_behavior_us_get(nil, _behavior, _us), do: :ok + + defp handle_track_behavior_us_get(breakdown, behavior, us) do + cur = Map.get(breakdown, behavior, 0) + Process.put(:codeqa_cosine_breakdown, Map.put(breakdown, behavior, cur + us)) end end diff --git a/lib/codeqa/combined_metrics/scalar_applier.ex b/lib/codeqa/combined_metrics/scalar_applier.ex index 1c8ec4b7..e123fc6b 100644 --- a/lib/codeqa/combined_metrics/scalar_applier.ex +++ b/lib/codeqa/combined_metrics/scalar_applier.ex @@ -1,4 +1,6 @@ defmodule CodeQA.CombinedMetrics.ScalarApplier do + alias CodeQA.Language + @moduledoc """ Writes suggested scalars and language metadata back to the combined-metrics YAML config files under `priv/combined_metrics/`. @@ -78,9 +80,9 @@ defmodule CodeQA.CombinedMetrics.ScalarApplier do File.write!(yaml_path, YamlFormatter.format(updated)) behaviors_with_languages = - Enum.count(updated, fn {_b, groups} -> Map.has_key?(groups, "_languages") end) + updated |> Enum.count(fn {_b, groups} -> Map.has_key?(groups, "_languages") end) - %{category: category, behaviors_with_languages: behaviors_with_languages} + %{behaviors_with_languages: behaviors_with_languages, category: category} end) end @@ -91,7 +93,7 @@ defmodule CodeQA.CombinedMetrics.ScalarApplier do defp apply_to_category(existing, category, report) do existing |> Enum.filter(fn {_k, v} -> is_map(v) end) - |> Enum.reduce({%{}, %{updated: 0, deadzoned: 0, skipped: 0}}, fn + |> Enum.reduce({%{}, %{deadzoned: 0, skipped: 0, updated: 0}}, fn {behavior, current_groups}, {acc_yaml, stats} -> report_key = "#{category}.#{behavior}" doc = read_behavior_doc(category, behavior) @@ -126,8 +128,9 @@ defmodule CodeQA.CombinedMetrics.ScalarApplier do end defp groups_from_report(metrics) do - Enum.reduce(metrics, {%{}, 0.0, 0, 0}, fn {metric_key, data}, - {groups, log_baseline, n_updated, n_deadzoned} -> + metrics + |> Enum.reduce({%{}, 0.0, 0, 0}, fn {metric_key, data}, + {groups, log_baseline, n_updated, n_deadzoned} -> [group, key] = String.split(metric_key, ".", parts: 2) if deadzone?(data.ratio) do @@ -157,16 +160,7 @@ defmodule CodeQA.CombinedMetrics.ScalarApplier do defp read_behavior_doc(category, behavior) do config_path = Path.join([@samples_root, category, behavior, "config.yml"]) - case File.read(config_path) do - {:ok, content} -> - case YamlElixir.read_from_string(content) do - {:ok, %{"doc" => doc}} when is_binary(doc) -> doc - _ -> nil - end - - _ -> - nil - end + File.read(config_path) |> handle_read_behavior_doc_read() end defp maybe_put_doc(groups, nil), do: groups @@ -176,18 +170,7 @@ defmodule CodeQA.CombinedMetrics.ScalarApplier do # Language detection helpers # --------------------------------------------------------------------------- - defp dir_languages(dir) do - case File.ls(dir) do - {:ok, files} -> - files - |> Enum.map(&CodeQA.Language.detect/1) - |> Enum.map(& &1.name()) - |> MapSet.new() - - _ -> - MapSet.new() - end - end + defp dir_languages(dir), do: File.ls(dir) |> handle_dir_languages_ls() defp languages_for_behavior(category, behavior) do bad_langs = dir_languages(sample_path(category, behavior, "bad")) @@ -203,7 +186,28 @@ defmodule CodeQA.CombinedMetrics.ScalarApplier do defp maybe_put_languages(groups, []), do: groups defp maybe_put_languages(groups, langs), do: Map.put(groups, "_languages", langs) - defp sample_path(category, behavior, kind) do - Path.join([@samples_root, category, behavior, kind]) + defp sample_path(category, behavior, kind), + do: [@samples_root, category, behavior, kind] |> Path.join() + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_read_behavior_doc_read({:ok, content}) do + case YamlElixir.read_from_string(content) do + {:ok, %{"doc" => doc}} when is_binary(doc) -> doc + _ -> nil + end end + + defp handle_read_behavior_doc_read(_), do: nil + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_dir_languages_ls({:ok, files}), + do: + files + |> Enum.map(&Language.detect/1) + |> Enum.map(& &1.name()) + |> MapSet.new() + + defp handle_dir_languages_ls(_), do: MapSet.new() end diff --git a/lib/codeqa/combined_metrics/scorer.ex b/lib/codeqa/combined_metrics/scorer.ex index b1220aa9..9e84f44d 100644 --- a/lib/codeqa/combined_metrics/scorer.ex +++ b/lib/codeqa/combined_metrics/scorer.ex @@ -36,7 +36,7 @@ defmodule CodeQA.CombinedMetrics.Scorer do |> Map.get(metric_name, %{}) |> Enum.flat_map(fn {group, keys} when is_map(keys) -> - Enum.map(keys, fn {key, scalar} -> {{group, key}, scalar / 1.0} end) + keys |> Enum.map(fn {key, scalar} -> {{group, key}, scalar / 1.0} end) _ -> [] @@ -49,7 +49,7 @@ defmodule CodeQA.CombinedMetrics.Scorer do def default_scalars do Analyzer.build_registry().file_metrics |> Enum.flat_map(fn mod -> - Enum.map(mod.keys(), fn key -> {{mod.name(), "mean_" <> key}, 0.0} end) + mod.keys() |> Enum.map(&{{mod.name(), "mean_" <> &1}, 0.0}) end) |> Map.new() end diff --git a/lib/codeqa/combined_metrics/yaml_formatter.ex b/lib/codeqa/combined_metrics/yaml_formatter.ex index 8c76a668..c1aafaf1 100644 --- a/lib/codeqa/combined_metrics/yaml_formatter.ex +++ b/lib/codeqa/combined_metrics/yaml_formatter.ex @@ -47,20 +47,20 @@ defmodule CodeQA.CombinedMetrics.YamlFormatter do defp doc_line(doc), do: [" _doc: #{inspect(doc)}"] defp baseline_line(nil), do: [] - defp baseline_line(val), do: [" _log_baseline: #{fmt_scalar(val)}"] + defp baseline_line(val), do: [" _log_baseline: #{format_scalar(val)}"] defp fix_hint_line(nil), do: [] defp fix_hint_line(hint), do: [" _fix_hint: #{inspect(hint)}"] defp languages_line(nil), do: [] defp languages_line([]), do: [] - defp languages_line(langs), do: [" _languages: [#{Enum.join(langs, ", ")}]"] + defp languages_line(langs), do: [" _languages: [#{langs |> Enum.join(", ")}]"] defp excludes_block_types_line(nil), do: [] defp excludes_block_types_line([]), do: [] defp excludes_block_types_line(types), - do: [" _excludes_block_types: [#{Enum.join(types, ", ")}]"] + do: [" _excludes_block_types: [#{types |> Enum.join(", ")}]"] defp group_lines(groups) do groups @@ -73,12 +73,12 @@ defmodule CodeQA.CombinedMetrics.YamlFormatter do key_lines = keys |> Enum.sort_by(fn {key, _} -> key end) - |> Enum.map(fn {key, scalar} -> " #{key}: #{fmt_scalar(scalar)}" end) + |> Enum.map(fn {key, scalar} -> " #{key}: #{format_scalar(scalar)}" end) [" #{group}:" | key_lines] end) end - defp fmt_scalar(f) when is_float(f), do: :erlang.float_to_binary(f, decimals: 4) - defp fmt_scalar(n) when is_integer(n), do: "#{n}.0" + defp format_scalar(f) when is_float(f), do: :erlang.float_to_binary(f, decimals: 4) + defp format_scalar(n) when is_integer(n), do: "#{n}.0" end diff --git a/lib/codeqa/config.ex b/lib/codeqa/config.ex index 5171eacb..0df070be 100644 --- a/lib/codeqa/config.ex +++ b/lib/codeqa/config.ex @@ -15,10 +15,10 @@ defmodule CodeQA.Config do "testing" => 1 } - defstruct ignore_paths: [], - impact_map: @default_impact, - combined_top: 2, + defstruct combined_top: 2, cosine_significance_threshold: 0.15, + ignore_paths: [], + impact_map: @default_impact, near_duplicate_blocks: [] @spec load(String.t()) :: :ok @@ -52,40 +52,33 @@ defmodule CodeQA.Config do @spec near_duplicate_blocks_opts() :: keyword() def near_duplicate_blocks_opts, do: fetch().near_duplicate_blocks - defp fetch do - :persistent_term.get(@key, %__MODULE__{}) - end + defp fetch, do: @key |> :persistent_term.get(%__MODULE__{}) defp parse(path) do config_file = Path.join(path, ".codeqa.yml") - case File.read(config_file) do - {:ok, contents} -> - case YamlElixir.read_from_string(contents) do - {:ok, yaml} -> from_yaml(yaml) - _ -> %__MODULE__{} - end - - {:error, _} -> - %__MODULE__{} - end + File.read(config_file) |> handle_parse_read() end - defp from_yaml(yaml) do - %__MODULE__{ - ignore_paths: parse_ignore_paths(yaml), - impact_map: parse_impact(yaml), + defp from_yaml(yaml), + do: %__MODULE__{ combined_top: Map.get(yaml, "combined_top", 2), cosine_significance_threshold: Map.get(yaml, "cosine_significance_threshold", 0.15), + ignore_paths: parse_ignore_paths(yaml), + impact_map: parse_impact(yaml), near_duplicate_blocks: parse_near_duplicate_blocks(yaml) } - end defp parse_ignore_paths(%{"ignore_paths" => patterns}) when is_list(patterns), do: patterns defp parse_ignore_paths(_), do: [] defp parse_impact(%{"impact" => overrides}) when is_map(overrides) do - string_overrides = Map.new(overrides, fn {k, v} -> {to_string(k), v} end) + string_overrides = + for {k, v} <- overrides do + {to_string(k), v} + end + |> Map.new() + Map.merge(@default_impact, string_overrides) end @@ -96,4 +89,17 @@ defmodule CodeQA.Config do do: [max_pairs_per_bucket: n] defp parse_near_duplicate_blocks(_), do: [] + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_parse_read({:ok, contents}), + do: YamlElixir.read_from_string(contents) |> handle_read_from_string() + + defp handle_parse_read({:error, _}), do: %__MODULE__{} + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_read_from_string({:ok, yaml}), do: yaml |> from_yaml() + + defp handle_read_from_string(_), do: %__MODULE__{} end diff --git a/lib/codeqa/diagnostics.ex b/lib/codeqa/diagnostics.ex index f2479e09..7c657f87 100644 --- a/lib/codeqa/diagnostics.ex +++ b/lib/codeqa/diagnostics.ex @@ -1,4 +1,6 @@ defmodule CodeQA.Diagnostics do + alias CodeQA.Language + @moduledoc """ Diagnoses a codebase by identifying likely code quality issues using cosine similarity against combined metric behavior profiles. @@ -9,6 +11,7 @@ defmodule CodeQA.Diagnostics do alias CodeQA.Engine.Analyzer alias CodeQA.Engine.Collector alias CodeQA.HealthReport.Grader + import CodeQA.Shared, only: [project_languages_shared: 1] @doc """ Runs diagnostics on the given path and returns results as a string. @@ -55,7 +58,7 @@ defmodule CodeQA.Diagnostics do case format do :json -> - Jason.encode!(%{issues: issues, categories: categories}, pretty: true) + Jason.encode!(%{categories: categories, issues: issues}, pretty: true) _ -> "## Diagnose: aggregate\n\n" <> @@ -72,7 +75,7 @@ defmodule CodeQA.Diagnostics do Map.new(files, fn {file_path, file_data} -> metrics = Map.get(file_data, "metrics", %{}) file_agg = FileScorer.file_to_aggregate(metrics) - language = CodeQA.Language.detect(file_path).name() + language = Language.detect(file_path).name() diagnoses = SampleRunner.diagnose_aggregate(file_agg, top: top, language: language) {file_path, diagnoses} end) @@ -80,15 +83,17 @@ defmodule CodeQA.Diagnostics do case format do :json -> files_json = - Enum.map(file_diagnoses, fn {file_path, diagnoses} -> - %{file: file_path, behaviors: Enum.map(diagnoses, &diagnosis_to_map/1)} + file_diagnoses + |> Enum.map(fn {file_path, diagnoses} -> + %{behaviors: diagnoses |> Enum.map(&diagnosis_to_map/1), file: file_path} end) Jason.encode!(%{files: files_json}, pretty: true) _ -> file_rows = - Enum.flat_map(file_diagnoses, fn {file_path, diagnoses} -> + file_diagnoses + |> Enum.flat_map(fn {file_path, diagnoses} -> diagnoses_to_rows(file_path, diagnoses) end) @@ -96,54 +101,47 @@ defmodule CodeQA.Diagnostics do end end - defp diagnosis_to_map(d) do - %{ + defp diagnosis_to_map(d), + do: %{ behavior: "#{d.category}.#{d.behavior}", cosine: d.cosine, score: Grader.score_cosine(d.cosine) } - end defp diagnoses_to_rows(file_path, diagnoses) do - Enum.map(diagnoses, fn %{category: cat, behavior: beh, cosine: cosine, score: score} -> + diagnoses + |> Enum.map(fn %{behavior: beh, category: cat, cosine: cosine, score: score} -> {file_path, "#{cat}.#{beh}", cosine, score} end) end - defp project_languages(files_map) do - files_map - |> Map.keys() - |> Enum.map(&CodeQA.Language.detect(&1).name()) - |> Enum.reject(&(&1 == "unknown")) - |> Enum.uniq() - end + defp project_languages(files_map), do: project_languages_shared(files_map) defp issues_table(issues) do rows = - Enum.map(issues, fn %{category: cat, behavior: beh, cosine: cosine, score: score} -> + issues + |> Enum.map(fn %{behavior: beh, category: cat, cosine: cosine, score: score} -> cosine_str = :erlang.float_to_binary(cosine / 1.0, decimals: 2) score_str = :erlang.float_to_binary(score / 1.0, decimals: 2) "| #{cat}.#{beh} | #{cosine_str} | #{score_str} |" end) - Enum.join( - ["| Behavior | Cosine | Score |", "|----------|--------|-------|"] ++ rows ++ [""], - "\n" - ) + (["| Behavior | Cosine | Score |", "|----------|--------|-------|"] ++ rows ++ [""]) + |> Enum.join("\n") end defp categories_text(categories) do - Enum.map_join(categories, "\n", fn %{name: name, behaviors: behaviors} -> + categories + |> Enum.map_join("\n", fn %{behaviors: behaviors, name: name} -> rows = - Enum.map(behaviors, fn %{behavior: beh, score: score} -> + behaviors + |> Enum.map(fn %{behavior: beh, score: score} -> score_str = :erlang.float_to_binary(score / 1.0, decimals: 2) "| #{beh} | #{score_str} |" end) - Enum.join( - ["### #{name}", "| Behavior | Score |", "|----------|-------|"] ++ rows ++ [""], - "\n" - ) + (["### #{name}", "| Behavior | Score |", "|----------|-------|"] ++ rows ++ [""]) + |> Enum.join("\n") end) end @@ -162,10 +160,8 @@ defmodule CodeQA.Diagnostics do "| #{file_path} | #{behavior_key} | #{cosine_str} | #{cosine_score} |" end) - Enum.join( - ["| File | Behavior | Cosine | Score |", "|------|----------|--------|-------|"] ++ - data_rows, - "\n" - ) + (["| File | Behavior | Cosine | Score |", "|------|----------|--------|-------|"] ++ + data_rows) + |> Enum.join("\n") end end diff --git a/lib/codeqa/engine/analyzer.ex b/lib/codeqa/engine/analyzer.ex index 2436581c..23088d61 100644 --- a/lib/codeqa/engine/analyzer.ex +++ b/lib/codeqa/engine/analyzer.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Engine.Analyzer do + alias CodeQA.CombinedMetrics.Scorer @moduledoc "Orchestrates metric computation across files." alias CodeQA.Analysis.RunSupervisor @@ -39,14 +40,14 @@ defmodule CodeQA.Engine.Analyzer do @spec analyze_file(String.t(), String.t()) :: map() def analyze_file(_path, content) do - ctx = Pipeline.build_file_context(content) - Registry.run_file_metrics(@registry, ctx, []) + context = Pipeline.build_file_context(content) + Registry.run_file_metrics(@registry, context, []) end @spec analyze_file_for_loo(String.t(), String.t()) :: map() def analyze_file_for_loo(_path, content) do - ctx = Pipeline.build_file_context(content, skip_structural: true) - Registry.run_file_metrics(@registry, ctx, []) + context = Pipeline.build_file_context(content, skip_structural: true) + Registry.run_file_metrics(@registry, context, []) end @doc """ @@ -58,14 +59,14 @@ defmodule CodeQA.Engine.Analyzer do """ @spec analyze_file_for_loo_partial(String.t(), String.t(), map(), String.t()) :: map() def analyze_file_for_loo_partial(_path, content, baseline_metrics, block_content \\ "") do - referenced = CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names() + referenced = Scorer.referenced_file_metric_names() {ctx_us, ctx} = :timer.tc(fn -> Pipeline.build_file_context(content, skip_structural: true) end) {result, breakdown} = - Enum.reduce(baseline_metrics, {[], %{ctx: ctx_us}}, fn {name, baseline_value}, - {acc, breakdown} -> + baseline_metrics + |> Enum.reduce({[], %{ctx: ctx_us}}, fn {name, baseline_value}, {acc, breakdown} -> if MapSet.member?(referenced, name) do mod = registered_module_for(name) @@ -83,13 +84,13 @@ defmodule CodeQA.Engine.Analyzer do end) :telemetry.execute([:codeqa, :loo_breakdown], breakdown, %{}) - Map.new(result) + result |> Map.new() end - defp registered_module_for(name) do - Enum.find(@registry.file_metrics, fn mod -> mod.name() == name end) || - raise "no registered file metric module for name #{inspect(name)}" - end + defp registered_module_for(name), + do: + Enum.find(@registry.file_metrics, &(&1.name() == name)) || + raise("no registered file metric module for name #{inspect(name)}") @spec analyze_codebase_aggregate(map(), keyword()) :: map() def analyze_codebase_aggregate(files_map, opts \\ []) do @@ -99,15 +100,14 @@ defmodule CodeQA.Engine.Analyzer do end) end - def analyze_codebase(files, opts \\ []) do - with_run_context(opts, &do_analyze_codebase(files, &1)) - end + def analyze_codebase(files, opts \\ []), + do: opts |> with_run_context(&do_analyze_codebase(files, &1)) defp with_run_context(opts, fun) do {:ok, sup} = RunSupervisor.start_link() run_ctx = RunSupervisor.run_context(sup) - opts = Keyword.put(opts, :file_context_pid, run_ctx.file_context_pid) - opts = Keyword.put(opts, :behavior_config_pid, run_ctx.behavior_config_pid) + opts = opts |> Keyword.put(:file_context_pid, run_ctx.file_context_pid) + opts = opts |> Keyword.put(:behavior_config_pid, run_ctx.behavior_config_pid) try do fun.(opts) @@ -202,20 +202,20 @@ defmodule CodeQA.Engine.Analyzer do end) end - defp compute_stats([]), do: %{mean: 0.0, std: 0.0, min: 0.0, max: 0.0} + defp compute_stats([]), do: %{max: 0.0, mean: 0.0, min: 0.0, std: 0.0} defp compute_stats(values) do n = length(values) mean = Enum.sum(values) / n - sum_squares = Enum.reduce(values, 0.0, fn v, acc -> acc + (v - mean) ** 2 end) + sum_squares = values |> Enum.reduce(0.0, fn v, acc -> acc + (v - mean) ** 2 end) variance = sum_squares / n std = :math.sqrt(variance) %{ + max: Float.round(Enum.max(values) * 1.0, 4), mean: Float.round(mean * 1.0, 4), - std: Float.round(std * 1.0, 4), min: Float.round(Enum.min(values) * 1.0, 4), - max: Float.round(Enum.max(values) * 1.0, 4) + std: Float.round(std * 1.0, 4) } end end diff --git a/lib/codeqa/engine/collector.ex b/lib/codeqa/engine/collector.ex index 3d1b8b41..7414b0b2 100644 --- a/lib/codeqa/engine/collector.ex +++ b/lib/codeqa/engine/collector.ex @@ -1,4 +1,7 @@ defmodule CodeQA.Engine.Collector do + alias CodeQA.Config + alias CodeQA.Git + alias CodeQA.Language @moduledoc false @skip_dirs MapSet.new(~w[ @@ -10,17 +13,17 @@ defmodule CodeQA.Engine.Collector do @default_ignore_patterns ~w[**/*.md **/*.mdx] @spec source_extensions() :: MapSet.t() - def source_extensions do - CodeQA.Language.all() - |> Enum.flat_map(& &1.extensions()) - |> Enum.map(&".#{&1}") - |> MapSet.new() - end + def source_extensions, + do: + Language.all() + |> Enum.flat_map(& &1.extensions()) + |> Enum.map(&".#{&1}") + |> MapSet.new() @spec collect_files(String.t(), [String.t()]) :: %{String.t() => String.t()} def collect_files(root, extra_ignore_patterns \\ []) do root_path = Path.expand(root) - CodeQA.Config.load(root_path) + Config.load(root_path) patterns = all_ignore_patterns(extra_ignore_patterns) extensions = source_extensions() @@ -37,30 +40,28 @@ defmodule CodeQA.Engine.Collector do end) |> do_reject_ignored_map(patterns) - gitignored = CodeQA.Git.gitignored_files(root_path, Map.keys(files_map)) + gitignored = Git.gitignored_files(root_path, Map.keys(files_map)) Map.reject(files_map, fn {path, _} -> MapSet.member?(gitignored, path) end) end @doc false - def ignored?(path, patterns) do - Enum.any?(patterns, fn pattern -> - match_pattern?(path, pattern) - end) - end + def ignored?(path, patterns), + do: + patterns + |> Enum.any?(&match_pattern?(path, &1)) @doc false - def reject_ignored_map(files_map, extra_patterns \\ []) do - do_reject_ignored_map(files_map, all_ignore_patterns(extra_patterns)) - end + def reject_ignored_map(files_map, extra_patterns \\ []), + do: files_map |> do_reject_ignored_map(all_ignore_patterns(extra_patterns)) @doc false def reject_ignored(list, key_fn, extra_patterns \\ []) do patterns = all_ignore_patterns(extra_patterns) - Enum.reject(list, fn item -> ignored?(key_fn.(item), patterns) end) + list |> Enum.reject(&ignored?(key_fn.(&1), patterns)) end defp all_ignore_patterns(extra), - do: extra ++ @default_ignore_patterns ++ CodeQA.Config.ignore_paths() + do: extra ++ @default_ignore_patterns ++ Config.ignore_paths() defp do_reject_ignored_map(files_map, patterns) do Map.reject(files_map, fn {path, _} -> ignored?(path, patterns) end) @@ -88,15 +89,15 @@ defmodule CodeQA.Engine.Collector do defp walk_directory(dir, extensions) do dir |> File.ls!() - |> Enum.flat_map(fn entry -> - full_path = Path.join(dir, entry) + |> Enum.flat_map(fn name -> + full_path = Path.join(dir, name) cond do - File.dir?(full_path) and not skip_dir?(entry) -> + File.dir?(full_path) and not skip_dir?(name) -> walk_directory(full_path, extensions) - File.regular?(full_path) and source_file?(entry, extensions) and - not String.starts_with?(entry, ".") -> + File.regular?(full_path) and source_file?(name, extensions) and + not String.starts_with?(name, ".") -> [full_path] true -> diff --git a/lib/codeqa/engine/file_context.ex b/lib/codeqa/engine/file_context.ex index 6e1da6ba..812b2b09 100644 --- a/lib/codeqa/engine/file_context.ex +++ b/lib/codeqa/engine/file_context.ex @@ -14,16 +14,16 @@ defmodule CodeQA.Engine.FileContext do defstruct @enforce_keys ++ [:path, :blocks] @type t :: %__MODULE__{ + blocks: [CodeQA.AST.Enrichment.Node.t()] | nil, + byte_count: non_neg_integer(), content: String.t(), - tokens: [CodeQA.Engine.Pipeline.Token.t()], - token_counts: map(), - words: list(), - identifiers: list(), - lines: list(), encoded: String.t(), - byte_count: non_neg_integer(), + identifiers: list(), line_count: non_neg_integer(), + lines: list(), path: String.t() | nil, - blocks: [CodeQA.AST.Enrichment.Node.t()] | nil + token_counts: map(), + tokens: [CodeQA.Engine.Pipeline.Token.t()], + words: list() } end diff --git a/lib/codeqa/engine/parallel.ex b/lib/codeqa/engine/parallel.ex index f5a8da15..5800cf4b 100644 --- a/lib/codeqa/engine/parallel.ex +++ b/lib/codeqa/engine/parallel.ex @@ -39,7 +39,7 @@ defmodule CodeQA.Engine.Parallel do {path, result} end) - |> Enum.into(%{}) + |> Map.new() end defp maybe_cached_analyze(path, content, nil, opts), @@ -49,23 +49,7 @@ defmodule CodeQA.Engine.Parallel do hash = :crypto.hash(:sha256, content) |> Base.encode16(case: :lower) cache_file = Path.join(cache_dir, hash <> ".json") - case File.read(cache_file) do - {:ok, cached} -> - case Jason.decode(cached) do - {:ok, data} -> - data - - _ -> - data = analyze_single_file(path, content, opts) - File.write!(cache_file, Jason.encode!(data)) - data - end - - _ -> - data = analyze_single_file(path, content, opts) - File.write!(cache_file, Jason.encode!(data)) - data - end + File.read(cache_file) |> handle_maybe_cached_analyze_read(cache_file, content, opts, path) end defp analyze_single_file(path, content, opts) do @@ -82,4 +66,28 @@ defmodule CodeQA.Engine.Parallel do "metrics" => metrics } end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_maybe_cached_analyze_read({:ok, cached}, cache_file, content, opts, path), + do: + Jason.decode(cached) + |> handle_maybe_cached_analyze_read_decode(cache_file, content, opts, path) + + defp handle_maybe_cached_analyze_read(_, cache_file, content, opts, path) do + data = analyze_single_file(path, content, opts) + File.write!(cache_file, Jason.encode!(data)) + data + end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_maybe_cached_analyze_read_decode({:ok, data}, _cache_file, _content, _opts, _path), + do: data + + defp handle_maybe_cached_analyze_read_decode(_, cache_file, content, opts, path) do + data = analyze_single_file(path, content, opts) + File.write!(cache_file, Jason.encode!(data)) + data + end end diff --git a/lib/codeqa/engine/pipeline.ex b/lib/codeqa/engine/pipeline.ex index 53e25b4f..ae397045 100644 --- a/lib/codeqa/engine/pipeline.ex +++ b/lib/codeqa/engine/pipeline.ex @@ -30,7 +30,7 @@ defmodule CodeQA.Engine.Pipeline do Regex.scan(@word_re, content) |> List.flatten() - identifiers = Enum.reject(words, &MapSet.member?(keywords, &1)) + identifiers = words |> Enum.reject(&MapSet.member?(keywords, &1)) lines = content |> String.split("\n") |> trim_trailing_empty() encoded = content @@ -51,17 +51,17 @@ defmodule CodeQA.Engine.Pipeline do end %FileContext{ + blocks: blocks, + byte_count: byte_size(content), content: content, - tokens: tokens, - token_counts: token_counts, - words: words, - identifiers: identifiers, - lines: lines, encoded: encoded, - byte_count: byte_size(content), + identifiers: identifiers, line_count: length(lines), + lines: lines, path: path, - blocks: blocks + token_counts: token_counts, + tokens: tokens, + words: words } end diff --git a/lib/codeqa/engine/registry.ex b/lib/codeqa/engine/registry.ex index 135385ac..a670f558 100644 --- a/lib/codeqa/engine/registry.ex +++ b/lib/codeqa/engine/registry.ex @@ -1,17 +1,15 @@ defmodule CodeQA.Engine.Registry do @moduledoc "Metric registration and execution." - defstruct file_metrics: [], codebase_metrics: [] + defstruct codebase_metrics: [], file_metrics: [] def new, do: %__MODULE__{} - def register_file_metric(%__MODULE__{} = reg, metric_module) do - %{reg | file_metrics: reg.file_metrics ++ [metric_module]} - end + def register_file_metric(%__MODULE__{} = reg, metric_module), + do: %{reg | file_metrics: reg.file_metrics ++ [metric_module]} - def register_codebase_metric(%__MODULE__{} = reg, metric_module) do - %{reg | codebase_metrics: reg.codebase_metrics ++ [metric_module]} - end + def register_codebase_metric(%__MODULE__{} = reg, metric_module), + do: %{reg | codebase_metrics: reg.codebase_metrics ++ [metric_module]} def run_file_metrics(%__MODULE__{} = reg, ctx, opts \\ []) do base_metrics = @@ -31,7 +29,7 @@ defmodule CodeQA.Engine.Registry do if Keyword.get(opts, :combinations, false) do combinations = generate_combinations(flat_numeric_metrics(base_metrics), []) - Map.merge(base_metrics, Map.new(combinations)) + Map.merge(base_metrics, combinations |> Map.new()) else base_metrics end @@ -49,7 +47,8 @@ defmodule CodeQA.Engine.Registry do defp generate_combinations([{k1, v1} | rest], acc) do # Generate all pairs for the head with the rest of the list new_acc = - Enum.reduce(rest, acc, fn {k2, v2}, current_acc -> + rest + |> Enum.reduce(acc, fn {k2, v2}, current_acc -> combined = %{ "keys" => [k1, k2], "add" => v1 + v2, diff --git a/lib/codeqa/git.ex b/lib/codeqa/git.ex index 44892058..ede58361 100644 --- a/lib/codeqa/git.ex +++ b/lib/codeqa/git.ex @@ -51,20 +51,15 @@ defmodule CodeQA.Git do """ @spec diff_line_ranges(String.t(), String.t(), String.t()) :: {:ok, %{String.t() => [{pos_integer(), pos_integer()}]}} | {:error, term()} - def diff_line_ranges(repo_path, base_ref, head_ref) do - case System.cmd( - "git", - ["diff", "-U0", "#{base_ref}..#{head_ref}"], - cd: repo_path, - stderr_to_stdout: false - ) do - {output, 0} -> - {:ok, parse_diff_hunks(output)} - - {_output, code} -> - {:error, "git diff exited with code #{code}"} - end - end + def diff_line_ranges(repo_path, base_ref, head_ref), + do: + System.cmd( + "git", + ["diff", "-U0", "#{base_ref}..#{head_ref}"], + cd: repo_path, + stderr_to_stdout: false + ) + |> handle_diff_line_ranges_cmd() @typep parse_state :: {String.t() | nil, %{String.t() => [{pos_integer(), pos_integer()}]}} @@ -74,7 +69,7 @@ defmodule CodeQA.Git do |> String.split("\n") |> Enum.reduce({nil, %{}}, &parse_diff_line/2) |> elem(1) - |> Map.new(fn {path, ranges} -> {path, Enum.reverse(ranges)} end) + |> Map.new(fn {path, ranges} -> {path, ranges |> Enum.reverse()} end) end @spec parse_diff_line(String.t(), parse_state()) :: parse_state() @@ -88,29 +83,7 @@ defmodule CodeQA.Git do defp parse_diff_line("@@ " <> rest, {current_file, acc}) when is_binary(current_file) do # Parse hunk header: @@ -old_start,old_count +new_start,new_count @@ - case Regex.run(~r/\+(\d+)(?:,(\d+))?/, rest) do - [_, start_str] -> - # Single line change (no count means 1 line) - start = String.to_integer(start_str) - updated = Map.update(acc, current_file, [{start, start}], &[{start, start} | &1]) - {current_file, updated} - - [_, start_str, count_str] -> - start = String.to_integer(start_str) - count = String.to_integer(count_str) - - if count == 0 do - # Deletion only, no new lines - {current_file, acc} - else - end_line = start + count - 1 - updated = Map.update(acc, current_file, [{start, end_line}], &[{start, end_line} | &1]) - {current_file, updated} - end - - nil -> - {current_file, acc} - end + Regex.run(~r/\+(\d+)(?:,(\d+))?/, rest) |> handle_parse_diff_line_run(acc, current_file) end defp parse_diff_line(_line, state), do: state @@ -134,16 +107,8 @@ defmodule CodeQA.Git do end) end - defp parse_change_line(line) do - case String.split(line, "\t", parts: 2) do - [status_code, path] when byte_size(status_code) > 0 -> - status = Map.get(@status_map, String.first(status_code), "modified") - if source_file?(path), do: [%ChangedFile{path: path, status: status}], else: [] - - _ -> - [] - end - end + defp parse_change_line(line), + do: String.split(line, "\t", parts: 2) |> handle_parse_change_line_split() defp list_source_files_at_ref(repo_path, ref) do {output, 0} = System.cmd("git", ["ls-tree", "-r", "--name-only", ref], cd: repo_path) @@ -158,4 +123,44 @@ defmodule CodeQA.Git do ext = path |> Path.extname() |> String.downcase() MapSet.member?(Collector.source_extensions(), ext) end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_diff_line_ranges_cmd({output, 0}), do: {:ok, parse_diff_hunks(output)} + + defp handle_diff_line_ranges_cmd({_output, code}), + do: {:error, "git diff exited with code #{code}"} + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_parse_diff_line_run([_, start_str], acc, current_file) do + start = String.to_integer(start_str) + updated = Map.update(acc, current_file, [{start, start}], &[{start, start} | &1]) + {current_file, updated} + end + + defp handle_parse_diff_line_run([_, start_str, count_str], acc, current_file) do + start = String.to_integer(start_str) + count = String.to_integer(count_str) + + if count == 0 do + # Deletion only, no new lines + {current_file, acc} + else + end_line = start + count - 1 + updated = Map.update(acc, current_file, [{start, end_line}], &[{start, end_line} | &1]) + {current_file, updated} + end + end + + defp handle_parse_diff_line_run(nil, acc, current_file), do: {current_file, acc} + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_parse_change_line_split([status_code, path]) when byte_size(status_code) > 0 do + status = Map.get(@status_map, String.first(status_code), "modified") + if source_file?(path), do: [%ChangedFile{path: path, status: status}], else: [] + end + + defp handle_parse_change_line_split(_), do: [] end diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 183b737a..e0334cfb 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -1,8 +1,14 @@ defmodule CodeQA.HealthReport do @moduledoc "Orchestrates health report generation from analysis results." - alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner} - alias CodeQA.HealthReport.{Config, Delta, Formatter, Grader, TopBlocks} + alias CodeQA.CombinedMetrics.FileScorer + alias CodeQA.CombinedMetrics.SampleRunner + alias CodeQA.HealthReport.Config + alias CodeQA.HealthReport.Delta + alias CodeQA.HealthReport.Formatter + alias CodeQA.HealthReport.Grader + alias CodeQA.HealthReport.TopBlocks + import CodeQA.Shared, only: [project_languages_shared: 1] @spec generate(map(), keyword()) :: map() def generate(analysis_results, opts \\ []) do @@ -12,12 +18,12 @@ defmodule CodeQA.HealthReport do diff_line_ranges = Keyword.get(opts, :diff_line_ranges, %{}) %{ + block_max_lines: block_max_lines, + block_min_lines: block_min_lines, categories: categories, - grade_scale: grade_scale, - impact_map: impact_map, combined_top: combined_top, - block_min_lines: block_min_lines, - block_max_lines: block_max_lines + grade_scale: grade_scale, + impact_map: impact_map } = Config.load(config_path) @@ -42,25 +48,26 @@ defmodule CodeQA.HealthReport do all_cosines = SampleRunner.diagnose_aggregate(aggregate, top: 99_999, languages: project_langs) - cosines_by_category = Enum.group_by(all_cosines, & &1.category) + cosines_by_category = all_cosines |> Enum.group_by(& &1.category) cosine_grades = Grader.grade_cosine_categories(cosines_by_category, worst_files_map, grade_scale) all_categories = (threshold_grades ++ cosine_grades) - |> Enum.map(fn cat -> - Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1)) - end) + |> Enum.map(&Map.put(&1, :impact, Map.get(impact_map, to_string(&1.key), 1))) {overall_score, overall_grade} = Grader.overall_score(all_categories, grade_scale, impact_map) metadata = build_metadata(analysis_results) - top_issues = Enum.take(all_cosines, 10) + top_issues = all_cosines |> Enum.take(10) codebase_cosine_lookup = - Map.new(all_cosines, fn i -> {{i.category, i.behavior}, i.cosine} end) + for i <- all_cosines do + {{i.category, i.behavior}, i.cosine} + end + |> Map.new() block_opts = [ block_min_lines: block_min_lines, @@ -81,9 +88,9 @@ defmodule CodeQA.HealthReport do grading_cfg = %{ category_defs: categories, + combined_top: combined_top, grade_scale: grade_scale, - impact_map: impact_map, - combined_top: combined_top + impact_map: impact_map } {codebase_delta, pr_summary} = @@ -102,22 +109,21 @@ defmodule CodeQA.HealthReport do end %{ + categories: all_categories, + codebase_delta: codebase_delta, metadata: metadata, - pr_summary: pr_summary, - overall_score: overall_score, overall_grade: overall_grade, - codebase_delta: codebase_delta, - categories: all_categories, - top_issues: top_issues, + overall_score: overall_score, + pr_summary: pr_summary, top_blocks: top_blocks, + top_issues: top_issues, worst_blocks_by_category: worst_blocks_by_category } end @spec to_markdown(map(), atom(), atom()) :: String.t() - def to_markdown(report, detail \\ :default, format \\ :plain) do - Formatter.format_markdown(report, detail, format) - end + def to_markdown(report, detail \\ :default, format \\ :plain), + do: report |> Formatter.format_markdown(detail, format) defp build_delta_and_summary( base_results, @@ -126,9 +132,9 @@ defmodule CodeQA.HealthReport do head_grade, %{ category_defs: category_defs, + combined_top: combined_top, grade_scale: grade_scale, - impact_map: impact_map, - combined_top: combined_top + impact_map: impact_map }, changed_files, top_blocks @@ -165,26 +171,24 @@ defmodule CodeQA.HealthReport do base_all_categories = (base_threshold_grades ++ base_cosine_grades) - |> Enum.map(fn cat -> - Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1)) - end) + |> Enum.map(&Map.put(&1, :impact, Map.get(impact_map, to_string(&1.key), 1))) {base_score, base_grade} = Grader.overall_score(base_all_categories, grade_scale, impact_map) blocks_flagged = length(top_blocks) - files_added = Enum.count(changed_files, &(&1.status == "added")) - files_modified = Enum.count(changed_files, &(&1.status == "modified")) + files_added = changed_files |> Enum.count(&(&1.status == "added")) + files_modified = changed_files |> Enum.count(&(&1.status == "modified")) summary = %{ - base_score: base_score, - head_score: head_score, - score_delta: head_score - base_score, base_grade: base_grade, - head_grade: head_grade, + base_score: base_score, blocks_flagged: blocks_flagged, - files_changed: length(changed_files), files_added: files_added, - files_modified: files_modified + files_changed: length(changed_files), + files_modified: files_modified, + head_grade: head_grade, + head_score: head_score, + score_delta: head_score - base_score } {delta, summary} @@ -200,20 +204,14 @@ defmodule CodeQA.HealthReport do } end - defp project_languages(files_map) do - files_map - |> Map.keys() - |> Enum.map(&CodeQA.Language.detect(&1).name()) - |> Enum.reject(&(&1 == "unknown")) - |> Enum.uniq() - end + defp project_languages(files_map), do: project_languages_shared(files_map) defp build_category_summary(%{type: :cosine}), do: "" defp build_category_summary(graded) do low_scorers = graded.metric_scores - |> Enum.filter(fn m -> m.score < 60 end) + |> Enum.filter(&(&1.score < 60)) |> length() cond do diff --git a/lib/codeqa/health_report/behavior_labels.ex b/lib/codeqa/health_report/behavior_labels.ex index 3cd4f94b..09f958dc 100644 --- a/lib/codeqa/health_report/behavior_labels.ex +++ b/lib/codeqa/health_report/behavior_labels.ex @@ -68,10 +68,10 @@ defmodule CodeQA.HealthReport.BehaviorLabels do end) || "Review this code block" end - defp humanize(behavior) do - behavior - |> String.replace("_", " ") - |> String.split() - |> Enum.map_join(" ", &String.capitalize/1) - end + defp humanize(behavior), + do: + behavior + |> String.replace("_", " ") + |> String.split() + |> Enum.map_join(" ", &String.capitalize/1) end diff --git a/lib/codeqa/health_report/categories.ex b/lib/codeqa/health_report/categories.ex index 98b2e972..7146cf53 100644 --- a/lib/codeqa/health_report/categories.ex +++ b/lib/codeqa/health_report/categories.ex @@ -3,8 +3,8 @@ defmodule CodeQA.HealthReport.Categories do @doc "Returns the default grade scale as `[{min_score, letter}, ...]` sorted descending." @spec default_grade_scale() :: [{number(), String.t()}] - def default_grade_scale do - [ + def default_grade_scale, + do: [ {93, "A"}, {85, "A-"}, {78, "B+"}, @@ -21,257 +21,256 @@ defmodule CodeQA.HealthReport.Categories do {6, "E-"}, {0, "F"} ] - end @doc "Returns the built-in category definitions." @spec defaults() :: [map()] - def defaults do - [ + def defaults, + do: [ %{ key: :readability, - name: "Readability", metrics: [ %{ + fix_hint: + "Low readability score — simplify sentences, prefer short identifiers, avoid deeply nested expressions", + good: :high, name: "flesch_adapted", source: "readability", - weight: 0.4, - good: :high, thresholds: %{a: 70, b: 50, c: 35, d: 20}, - fix_hint: - "Low readability score — simplify sentences, prefer short identifiers, avoid deeply nested expressions" + weight: 0.4 }, %{ + fix_hint: + "High fog index — reduce complex multi-word identifiers and long compound expressions", + good: :low, name: "fog_adapted", source: "readability", - weight: 0.3, - good: :low, thresholds: %{a: 6, b: 10, c: 15, d: 22}, - fix_hint: - "High fog index — reduce complex multi-word identifiers and long compound expressions" + weight: 0.3 }, %{ + fix_hint: + "Too many tokens per line — break long lines into multiple shorter statements", + good: :low, name: "avg_tokens_per_line", source: "readability", - weight: 0.2, - good: :low, thresholds: %{a: 6, b: 10, c: 14, d: 20}, - fix_hint: - "Too many tokens per line — break long lines into multiple shorter statements" + weight: 0.2 }, %{ + fix_hint: + "Lines too long — wrap at 80–120 characters and extract intermediate variables", + good: :low, name: "avg_line_length", source: "readability", - weight: 0.1, - good: :low, thresholds: %{a: 40, b: 60, c: 80, d: 100}, - fix_hint: - "Lines too long — wrap at 80–120 characters and extract intermediate variables" + weight: 0.1 } - ] + ], + name: "Readability" }, %{ key: :complexity, - name: "Complexity", metrics: [ %{ + fix_hint: + "High operator/operand ratio — extract repeated sub-expressions into named variables", + good: :low, name: "difficulty", source: "halstead", - weight: 0.35, - good: :low, thresholds: %{a: 10, b: 20, c: 35, d: 50}, - fix_hint: - "High operator/operand ratio — extract repeated sub-expressions into named variables" + weight: 0.35 }, %{ + fix_hint: + "High implementation effort — simplify logic by extracting helpers and reducing branching", + good: :low, name: "effort", source: "halstead", - weight: 0.30, - good: :low, thresholds: %{a: 5000, b: 20_000, c: 50_000, d: 100_000}, - fix_hint: - "High implementation effort — simplify logic by extracting helpers and reducing branching" + weight: 0.3 }, %{ + fix_hint: + "High token volume — extract helper functions to reduce the total operation count", + good: :low, name: "volume", source: "halstead", - weight: 0.20, - good: :low, thresholds: %{a: 300, b: 1000, c: 3000, d: 8000}, - fix_hint: - "High token volume — extract helper functions to reduce the total operation count" + weight: 0.2 }, %{ + fix_hint: "High defect estimate — reduce complexity; simpler code has fewer bugs", + good: :low, name: "estimated_bugs", source: "halstead", - weight: 0.15, - good: :low, thresholds: %{a: 0.1, b: 0.5, c: 1.0, d: 3.0}, - fix_hint: "High defect estimate — reduce complexity; simpler code has fewer bugs" + weight: 0.15 } - ] + ], + name: "Complexity" }, %{ key: :structure, - name: "Structure", metrics: [ %{ + fix_hint: + "Too many branches per line — flatten conditionals using guard clauses or early returns", + good: :low, name: "branching_density", source: "branching", - weight: 0.25, - good: :low, thresholds: %{a: 0.08, b: 0.17, c: 0.30, d: 0.45}, - fix_hint: - "Too many branches per line — flatten conditionals using guard clauses or early returns" + weight: 0.25 }, %{ + fix_hint: "High average nesting — extract inner blocks into helper functions", + good: :low, name: "mean_depth", source: "indentation", - weight: 0.2, - good: :low, thresholds: %{a: 3.5, b: 7, c: 10, d: 15}, - fix_hint: "High average nesting — extract inner blocks into helper functions" + weight: 0.2 }, %{ + fix_hint: + "Functions too long on average — split into smaller single-purpose functions", + good: :low, name: "avg_function_lines", source: "function_metrics", - weight: 0.2, - good: :low, thresholds: %{a: 8, b: 15, c: 30, d: 65}, - fix_hint: - "Functions too long on average — split into smaller single-purpose functions" + weight: 0.2 }, %{ + fix_hint: "Deep nesting — restructure using early returns or extract nested logic", + good: :low, name: "max_depth", source: "indentation", - weight: 0.1, - good: :low, thresholds: %{a: 8, b: 16, c: 25, d: 35}, - fix_hint: "Deep nesting — restructure using early returns or extract nested logic" + weight: 0.1 }, %{ + fix_hint: + "Largest function too long — decompose the longest function into focused helpers", + good: :low, name: "max_function_lines", source: "function_metrics", - weight: 0.1, - good: :low, thresholds: %{a: 20, b: 50, c: 100, d: 200}, - fix_hint: - "Largest function too long — decompose the longest function into focused helpers" + weight: 0.1 }, %{ + fix_hint: + "Inconsistent indentation depth — standardize nesting by flattening or restructuring", + good: :low, name: "variance", source: "indentation", - weight: 0.1, - good: :low, thresholds: %{a: 7, b: 20, c: 40, d: 65}, - fix_hint: - "Inconsistent indentation depth — standardize nesting by flattening or restructuring" + weight: 0.1 }, %{ + fix_hint: + "Too many parameters on average — group related params into a struct or map", + good: :low, name: "avg_param_count", source: "function_metrics", - weight: 0.03, - good: :low, thresholds: %{a: 2, b: 3, c: 5, d: 7}, - fix_hint: "Too many parameters on average — group related params into a struct or map" + weight: 0.03 }, %{ + fix_hint: + "Function has too many parameters — introduce a parameter object or options map", + good: :low, name: "max_param_count", source: "function_metrics", - weight: 0.02, - good: :low, thresholds: %{a: 3, b: 5, c: 7, d: 10}, - fix_hint: - "Function has too many parameters — introduce a parameter object or options map" + weight: 0.02 } - ] + ], + name: "Structure" }, %{ key: :duplication, - name: "Duplication", metrics: [ %{ + fix_hint: + "High redundancy — extract repeated patterns into shared helpers or abstractions", + good: :low, name: "redundancy", source: "compression", - weight: 0.5, - good: :low, thresholds: %{a: 0.3, b: 0.5, c: 0.65, d: 0.8}, - fix_hint: - "High redundancy — extract repeated patterns into shared helpers or abstractions" + weight: 0.5 }, %{ + fix_hint: + "Repeated two-token sequences — consolidate duplicated patterns into named functions", + good: :low, name: "bigram_repetition_rate", source: "ngram", - weight: 0.3, - good: :low, thresholds: %{a: 0.15, b: 0.30, c: 0.45, d: 0.60}, - fix_hint: - "Repeated two-token sequences — consolidate duplicated patterns into named functions" + weight: 0.3 }, %{ + fix_hint: + "Repeated three-token sequences — extract duplicated logic into reusable abstractions", + good: :low, name: "trigram_repetition_rate", source: "ngram", - weight: 0.2, - good: :low, thresholds: %{a: 0.05, b: 0.15, c: 0.30, d: 0.45}, - fix_hint: - "Repeated three-token sequences — extract duplicated logic into reusable abstractions" + weight: 0.2 } - ] + ], + name: "Duplication" }, %{ key: :naming, - name: "Naming", metrics: [ %{ + fix_hint: + "Mixed casing styles — use a single consistent casing convention throughout the file", + good: :low, name: "entropy", source: "casing_entropy", - weight: 0.3, - good: :low, thresholds: %{a: 1.0, b: 1.5, c: 2.0, d: 2.3}, - fix_hint: - "Mixed casing styles — use a single consistent casing convention throughout the file" + weight: 0.3 }, %{ + fix_hint: "Identifiers too long on average — prefer concise, intent-revealing names", + good: :low, name: "mean", source: "identifier_length_variance", - weight: 0.25, - good: :low, thresholds: %{a: 12, b: 18, c: 25, d: 35}, - fix_hint: "Identifiers too long on average — prefer concise, intent-revealing names" + weight: 0.25 }, %{ + fix_hint: "High identifier length variance — standardize name length conventions", + good: :low, name: "variance", source: "identifier_length_variance", - weight: 0.25, - good: :low, thresholds: %{a: 15, b: 30, c: 50, d: 80}, - fix_hint: "High identifier length variance — standardize name length conventions" + weight: 0.25 }, %{ + fix_hint: + "Identifiers have too many sub-words — simplify to 2–3 word names where possible", + good: :low, name: "avg_sub_words_per_id", source: "readability", - weight: 0.2, - good: :low, thresholds: %{a: 3, b: 4, c: 5, d: 7}, - fix_hint: - "Identifiers have too many sub-words — simplify to 2–3 word names where possible" + weight: 0.2 } - ] + ], + name: "Naming" }, %{ key: :magic_numbers, - name: "Magic Numbers", metrics: [ %{ + fix_hint: "Too many magic numbers — replace literal values with named constants", + good: :low, name: "density", source: "magic_number_density", - weight: 1.0, - good: :low, thresholds: %{a: 0.02, b: 0.05, c: 0.10, d: 0.20}, - fix_hint: "Too many magic numbers — replace literal values with named constants" + weight: 1.0 } - ] + ], + name: "Magic Numbers" } ] - end end diff --git a/lib/codeqa/health_report/config.ex b/lib/codeqa/health_report/config.ex index 7c457b29..2c8946c3 100644 --- a/lib/codeqa/health_report/config.ex +++ b/lib/codeqa/health_report/config.ex @@ -1,26 +1,26 @@ defmodule CodeQA.HealthReport.Config do + alias CodeQA.Config @moduledoc "Loads and merges health report configuration from YAML." alias CodeQA.HealthReport.Categories @spec load(String.t() | nil) :: %{ + block_max_lines: pos_integer(), + block_min_lines: pos_integer(), categories: [map()], - grade_scale: [{number(), String.t()}], - impact_map: %{String.t() => pos_integer()}, combined_top: pos_integer(), - block_min_lines: pos_integer(), - block_max_lines: pos_integer() + grade_scale: [{number(), String.t()}], + impact_map: %{String.t() => pos_integer()} } - def load(nil) do - %{ + def load(nil), + do: %{ + block_max_lines: 20, + block_min_lines: 3, categories: Categories.defaults(), + combined_top: Config.combined_top(), grade_scale: Categories.default_grade_scale(), - impact_map: CodeQA.Config.impact_map(), - combined_top: CodeQA.Config.combined_top(), - block_min_lines: 3, - block_max_lines: 20 + impact_map: Config.impact_map() } - end def load(path) do yaml = YamlElixir.read_from_file!(path) @@ -51,20 +51,25 @@ defmodule CodeQA.HealthReport.Config do block_max_lines = Map.get(yaml, "block_max_lines", 20) %{ + block_max_lines: block_max_lines, + block_min_lines: block_min_lines, categories: categories, - grade_scale: grade_scale, - impact_map: impact_map, combined_top: combined_top, - block_min_lines: block_min_lines, - block_max_lines: block_max_lines + grade_scale: grade_scale, + impact_map: impact_map } end - defp parse_impact(nil), do: CodeQA.Config.impact_map() + defp parse_impact(nil), do: Config.impact_map() defp parse_impact(overrides) when is_map(overrides) do - string_overrides = Map.new(overrides, fn {k, v} -> {to_string(k), v} end) - Map.merge(CodeQA.Config.impact_map(), string_overrides) + string_overrides = + for {k, v} <- overrides do + {to_string(k), v} + end + |> Map.new() + + Map.merge(Config.impact_map(), string_overrides) end defp parse_grade_scale(nil), do: Categories.default_grade_scale() @@ -77,15 +82,15 @@ defmodule CodeQA.HealthReport.Config do |> Enum.sort_by(&elem(&1, 0), :desc) end - defp merge_category(key, nil, override) do - # New category from YAML only - %{ - key: String.to_atom(key), - name: Map.get(override, "name", key), - metrics: Enum.map(Map.get(override, "metrics", []), &parse_metric/1) - } - |> maybe_put_top(override) - end + # New category from YAML only + defp merge_category(key, nil, override), + do: + %{ + key: String.to_atom(key), + metrics: Map.get(override, "metrics", []) |> Enum.map(&parse_metric/1), + name: Map.get(override, "name", key) + } + |> maybe_put_top(override) defp merge_category(_key, default, nil), do: default @@ -106,7 +111,8 @@ defmodule CodeQA.HealthReport.Config do default_names = MapSet.new(defaults, & &1.name) merged_defaults = - Enum.map(defaults, fn default_metric -> + defaults + |> Enum.map(fn default_metric -> case Map.get(overrides_by_name, default_metric.name) do nil -> default_metric override -> merge_metric(default_metric, override) @@ -116,7 +122,7 @@ defmodule CodeQA.HealthReport.Config do # Append new metrics from YAML that aren't in defaults new_metrics = overrides - |> Enum.reject(fn o -> MapSet.member?(default_names, o["name"]) end) + |> Enum.reject(&MapSet.member?(default_names, &1["name"])) |> Enum.map(&parse_metric/1) merged_defaults ++ new_metrics @@ -135,23 +141,22 @@ defmodule CodeQA.HealthReport.Config do else: default.good %{ + good: good, name: default.name, source: Map.get(override, "source", default.source), - weight: Map.get(override, "weight", default.weight), - good: good, - thresholds: thresholds + thresholds: thresholds, + weight: Map.get(override, "weight", default.weight) } end - defp parse_metric(m) do - %{ + defp parse_metric(m), + do: %{ + good: parse_good(m["good"]), name: m["name"], source: m["source"], - weight: m["weight"], - good: parse_good(m["good"]), - thresholds: atomize_thresholds(Map.get(m, "thresholds", %{})) + thresholds: atomize_thresholds(Map.get(m, "thresholds", %{})), + weight: m["weight"] } - end defp parse_good(nil), do: :low defp parse_good("high"), do: :high diff --git a/lib/codeqa/health_report/delta.ex b/lib/codeqa/health_report/delta.ex index 52b0085e..8b3e672b 100644 --- a/lib/codeqa/health_report/delta.ex +++ b/lib/codeqa/health_report/delta.ex @@ -3,8 +3,8 @@ defmodule CodeQA.HealthReport.Delta do @spec compute(map(), map()) :: %{ base: %{aggregate: map()}, - head: %{aggregate: map()}, - delta: %{aggregate: map()} + delta: %{aggregate: map()}, + head: %{aggregate: map()} } def compute(base_results, head_results) do base_agg = get_in(base_results, ["codebase", "aggregate"]) || %{} @@ -12,17 +12,17 @@ defmodule CodeQA.HealthReport.Delta do %{ base: %{aggregate: base_agg}, - head: %{aggregate: head_agg}, - delta: %{aggregate: compute_aggregate_delta(base_agg, head_agg)} + delta: %{aggregate: compute_aggregate_delta(base_agg, head_agg)}, + head: %{aggregate: head_agg} } end defp compute_aggregate_delta(base_agg, head_agg) do MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg)) |> Enum.reduce(%{}, fn metric_name, acc -> - base_m = Map.get(base_agg, metric_name, %{}) - head_m = Map.get(head_agg, metric_name, %{}) - delta = compute_numeric_delta(base_m, head_m) + base = Map.get(base_agg, metric_name, %{}) + head = Map.get(head_agg, metric_name, %{}) + delta = compute_numeric_delta(base, head) if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta) end) end diff --git a/lib/codeqa/health_report/formatter.ex b/lib/codeqa/health_report/formatter.ex index d166f145..a7380948 100644 --- a/lib/codeqa/health_report/formatter.ex +++ b/lib/codeqa/health_report/formatter.ex @@ -1,7 +1,8 @@ defmodule CodeQA.HealthReport.Formatter do @moduledoc "Renders health report as markdown in plain or github format." - alias CodeQA.HealthReport.Formatter.{Github, Plain} + alias CodeQA.HealthReport.Formatter.Github + alias CodeQA.HealthReport.Formatter.Plain @spec format_markdown(map(), atom(), atom(), keyword()) :: String.t() def format_markdown(report, detail, format \\ :plain, opts \\ []) diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index 5bf9f7f2..d65f85ab 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -1,6 +1,11 @@ defmodule CodeQA.HealthReport.Formatter.Github do @moduledoc "Renders health report as rich GitHub-flavored markdown." + alias CodeQA.HealthReport.BehaviorLabels + + import CodeQA.HealthReport.Formatter.Shared, + only: [count_severities_shared: 1, pr_summary_section: 1, worst_severity_shared: 1] + @bar_width 20 @filled "█" @empty "░" @@ -8,7 +13,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do @spec render(map(), atom(), keyword()) :: String.t() def render(report, detail, opts \\ []) do chart? = Keyword.get(opts, :chart, true) - display_categories = merge_cosine_categories(report.categories) + categories = merge_cosine_categories(report.categories) worst_blocks = Map.get(report, :worst_blocks_by_category, %{}) [ @@ -16,11 +21,11 @@ defmodule CodeQA.HealthReport.Formatter.Github do header(report), cosine_legend(), delta_section(Map.get(report, :codebase_delta)), - if(chart?, do: mermaid_chart(display_categories), else: []), - progress_bars(display_categories), + if(chart?, do: mermaid_chart(categories), else: []), + progress_bars(categories), top_issues_section(Map.get(report, :top_issues, []), detail), blocks_section(Map.get(report, :top_blocks, [])), - category_sections(display_categories, detail, worst_blocks), + category_sections(categories, detail, worst_blocks), footer() ] |> List.flatten() @@ -34,15 +39,15 @@ defmodule CodeQA.HealthReport.Formatter.Github do @spec render_part_1(map(), keyword()) :: String.t() def render_part_1(report, opts \\ []) do chart? = Keyword.get(opts, :chart, true) - display_categories = merge_cosine_categories(report.categories) + categories = merge_cosine_categories(report.categories) [ pr_summary_section(Map.get(report, :pr_summary)), header(report), cosine_legend(), delta_section(Map.get(report, :codebase_delta)), - if(chart?, do: mermaid_chart(display_categories), else: []), - progress_bars(display_categories), + if(chart?, do: mermaid_chart(categories), else: []), + progress_bars(categories), sentinel(1) ] |> List.flatten() @@ -83,25 +88,25 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp sentinel_str(n), do: "" defp merge_cosine_categories(categories) do - {cosine, threshold} = Enum.split_with(categories, &(&1.type == :cosine)) + {cosine, threshold} = categories |> Enum.split_with(&(&1.type == :cosine)) case cosine do [] -> threshold _ -> - total_impact = Enum.sum(Enum.map(cosine, & &1.impact)) + total_impact = cosine |> Enum.map(& &1.impact) |> Enum.sum() combined_score = - round(Enum.sum(Enum.map(cosine, &(&1.score * &1.impact))) / max(total_impact, 1)) + round(Enum.sum(cosine |> Enum.map(&(&1.score * &1.impact))) / max(total_impact, 1)) combined = %{ - type: :cosine_group, + categories: cosine, + grade: grade_letter_from_score(combined_score), key: "combined_metrics", name: "Combined Metrics", score: combined_score, - grade: grade_letter_from_score(combined_score), - categories: cosine + type: :cosine_group } threshold ++ [combined] @@ -133,16 +138,15 @@ defmodule CodeQA.HealthReport.Formatter.Github do ] end - defp cosine_legend do - [ + defp cosine_legend, + do: [ "> *Combined metric scores use cosine similarity: +1 = metric profile perfectly matches healthy pattern for this behavior, 0 = no signal, −1 = anti-pattern detected. Mapped to 0–100 using breakpoints (approx: ≥0.5→A, ≥0.2→B, ≥0.0→C, ≥−0.3→D, <−0.3→F); actual letter grades use the full 15-step scale.*", "" ] - end defp mermaid_chart(categories) do - names = Enum.map_join(categories, ", ", fn c -> ~s("#{c.name}") end) - scores = Enum.map_join(categories, ", ", fn c -> to_string(c.score) end) + names = categories |> Enum.map_join(", ", fn c -> ~s("#{c.name}") end) + scores = categories |> Enum.map_join(", ", fn c -> to_string(c.score) end) [ "```mermaid", @@ -159,12 +163,14 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp progress_bars(categories) do max_name_len = - Enum.reduce(categories, 0, fn cat, acc -> + categories + |> Enum.reduce(0, fn cat, acc -> max(acc, String.length(cat.name)) end) rows = - Enum.map(categories, fn cat -> + categories + |> Enum.map(fn cat -> name = String.pad_trailing(cat.name, max_name_len) bar = build_bar(cat.score) score_str = cat.score |> to_string() |> String.pad_leading(3) @@ -185,9 +191,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp category_sections(_categories, :summary, _worst_blocks), do: [] - defp category_sections(categories, detail, worst_blocks) do - Enum.flat_map(categories, &render_category(&1, detail, worst_blocks)) - end + defp category_sections(categories, detail, worst_blocks), + do: categories |> Enum.flat_map(&render_category(&1, detail, worst_blocks)) defp render_category(%{type: :cosine_group} = group, detail, worst_blocks) do emoji = grade_emoji(group.grade) @@ -251,7 +256,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp cosine_group_content(group, detail, worst_blocks) do rows = - Enum.map(group.categories, fn cat -> + group.categories + |> Enum.map(fn cat -> emoji = grade_emoji(cat.grade) "| #{cat.name} | #{cat.score} | #{emoji} #{cat.grade} |" end) @@ -263,7 +269,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do ] sub_sections = - Enum.flat_map(group.categories, fn cat -> + group.categories + |> Enum.flat_map(fn cat -> emoji = grade_emoji(cat.grade) inner = @@ -290,9 +297,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do category_key = to_string(cat.key) behaviors_rows = - Enum.map(cat.behaviors, fn b -> - "| #{b.behavior} | #{format_num(b.cosine)} | #{b.score} | #{b.grade} |" - end) + cat.behaviors + |> Enum.map(&"| #{&1.behavior} | #{format_num(&1.cosine)} | #{&1.score} | #{&1.grade} |") behaviors_table = [ "> Cosine similarity scores for #{n} behaviors.", @@ -321,7 +327,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do [ "> **Worst offender** (`#{location}`):", "> ```#{lang}", - block.source |> String.split("\n") |> Enum.map(&"> #{&1}") |> Enum.join("\n"), + block.source |> String.split("\n") |> Enum.map_join("\n", &"> #{&1}"), "> ```", "" ] @@ -335,14 +341,13 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp section_content(cat, _detail) do metric_summary = - Enum.map_join(cat.metric_scores, ", ", fn m -> "#{m.name}=#{format_num(m.value)}" end) + cat.metric_scores |> Enum.map_join(", ", fn m -> "#{m.name}=#{format_num(m.value)}" end) metrics_table = if cat.metric_scores != [] do rows = - Enum.map(cat.metric_scores, fn m -> - "| #{m.source}.#{m.name} | #{format_num(m.value)} | #{m.score} |" - end) + cat.metric_scores + |> Enum.map(&"| #{&1.source}.#{&1.name} | #{format_num(&1.value)} | #{&1.score} |") [ "| Metric | Value | Score |", @@ -365,7 +370,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp top_issues_section(issues, _detail) do rows = - Enum.map_join(issues, "\n", fn i -> + issues + |> Enum.map_join("\n", fn i -> "| `#{i.category}.#{i.behavior}` | #{format_num(i.cosine)} | #{format_num(i.score)} |" end) @@ -384,10 +390,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do ] end - defp footer do - # Legacy footer for single-part render/3 (used by --output file mode) - ["", ""] - end + defp footer, do: ["", ""] @doc false def grade_emoji(grade) do @@ -412,22 +415,6 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp format_date(timestamp) when is_binary(timestamp), do: String.slice(timestamp, 0, 10) defp format_date(_), do: "unknown" - defp pr_summary_section(nil), do: [] - - defp pr_summary_section(summary) do - delta_str = - if summary.score_delta >= 0, - do: "+#{summary.score_delta}", - else: "#{summary.score_delta}" - - status_str = "#{summary.files_modified} modified, #{summary.files_added} added" - - [ - "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", - "" - ] - end - defp delta_section(nil), do: [] defp delta_section(delta) do @@ -441,7 +428,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do {"Structure", "branching", "mean_branch_count"} ] - rows = Enum.flat_map(metrics, &format_metric_row(&1, base_agg, head_agg)) + rows = metrics |> Enum.flat_map(&format_metric_row(&1, base_agg, head_agg)) if rows == [] do [] @@ -457,31 +444,28 @@ defmodule CodeQA.HealthReport.Formatter.Github do end defp format_metric_row({label, group, key}, base_agg, head_agg) do - base_val = get_in(base_agg, [group, key]) - head_val = get_in(head_agg, [group, key]) + base_value = get_in(base_agg, [group, key]) + head_value = get_in(head_agg, [group, key]) - if is_number(base_val) and is_number(head_val) do - diff = Float.round(head_val - base_val, 2) + if is_number(base_value) and is_number(head_value) do + diff = Float.round(head_value - base_value, 2) diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" - ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + ["| #{label} | #{format_num(base_value)} | #{format_num(head_value)} | #{diff_str} |"] else [] end end - defp blocks_section([]) do - ["> 🟢 **No block-level issues detected**", ""] - end + defp blocks_section([]), do: ["> 🟢 **No block-level issues detected**", ""] defp blocks_section(top_blocks) do - alias CodeQA.HealthReport.BehaviorLabels - severity_counts = count_severities(top_blocks) worst = worst_severity(severity_counts) {icon, verdict} = verdict_text(worst, severity_counts) {actionable, medium_blocks} = - Enum.split_with(top_blocks, fn b -> + top_blocks + |> Enum.split_with(fn b -> top = List.first(b.potentials) top && top.severity in [:critical, :high] end) @@ -495,7 +479,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do action_table = if actionable != [] do rows = - Enum.map(actionable, fn block -> + actionable + |> Enum.map(fn block -> top = List.first(block.potentials) sev_icon = severity_icon(top.severity) label = BehaviorLabels.label(top.category, top.behavior) @@ -513,13 +498,13 @@ defmodule CodeQA.HealthReport.Formatter.Github do [] end - actionable_details = Enum.flat_map(actionable, &format_block_card/1) + actionable_details = actionable |> Enum.flat_map(&format_block_card/1) medium_section = if medium_blocks != [] do n = length(medium_blocks) word = if n == 1, do: "block", else: "blocks" - inner = Enum.flat_map(medium_blocks, &format_block_card/1) |> Enum.join("\n") + inner = medium_blocks |> Enum.flat_map(&format_block_card/1) |> Enum.join("\n") [ "
", @@ -537,20 +522,9 @@ defmodule CodeQA.HealthReport.Formatter.Github do verdict_box ++ action_table ++ actionable_details ++ medium_section end - defp count_severities(blocks) do - blocks - |> Enum.map(fn b -> (List.first(b.potentials) || %{severity: :medium}).severity end) - |> Enum.frequencies() - end + defp count_severities(blocks), do: count_severities_shared(blocks) - defp worst_severity(counts) do - cond do - Map.get(counts, :critical, 0) > 0 -> :critical - Map.get(counts, :high, 0) > 0 -> :high - Map.get(counts, :medium, 0) > 0 -> :medium - true -> :none - end - end + defp worst_severity(counts), do: worst_severity_shared(counts) defp verdict_text(:critical, counts) do n = Map.get(counts, :critical, 0) @@ -580,8 +554,6 @@ defmodule CodeQA.HealthReport.Formatter.Github do end defp format_block_card(block) do - alias CodeQA.HealthReport.BehaviorLabels - end_line = block.end_line || block.start_line top_potential = List.first(block.potentials) icon = severity_icon(top_potential.severity) @@ -603,7 +575,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do end defp format_block_issues(potentials) do - Enum.flat_map(potentials, fn p -> + potentials + |> Enum.flat_map(fn p -> icon = severity_icon(p.severity) label = String.upcase(to_string(p.severity)) delta_str = format_num(p.cosine_delta) @@ -615,7 +588,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp format_code_block(%{source: nil}), do: "_Source code not available_" - defp format_code_block(%{source: source, language: lang, start_line: start_line}) do + defp format_code_block(%{language: lang, source: source, start_line: start_line}) do lang_hint = code_fence_lang(lang) # Add line number comments for context lines = String.split(source, "\n") @@ -623,8 +596,9 @@ defmodule CodeQA.HealthReport.Formatter.Github do numbered_lines = lines |> Enum.with_index(start_line) - |> Enum.map(fn {line, num} -> "#{String.pad_leading(to_string(num), 4)} │ #{line}" end) - |> Enum.join("\n") + |> Enum.map_join("\n", fn {line, num} -> + "#{String.pad_leading(to_string(num), 4)} │ #{line}" + end) "```#{lang_hint}\n#{numbered_lines}\n```" end diff --git a/lib/codeqa/health_report/formatter/plain.ex b/lib/codeqa/health_report/formatter/plain.ex index 517fc5f8..2faadb96 100644 --- a/lib/codeqa/health_report/formatter/plain.ex +++ b/lib/codeqa/health_report/formatter/plain.ex @@ -1,24 +1,29 @@ defmodule CodeQA.HealthReport.Formatter.Plain do @moduledoc "Renders health report as plain markdown." - @spec render(map(), atom()) :: String.t() - def render(report, detail) do - [ - pr_summary_section(Map.get(report, :pr_summary)), - header(report), - cosine_legend(), - delta_section(Map.get(report, :codebase_delta)), - overall_table(report), - top_issues_section(Map.get(report, :top_issues, []), detail), - blocks_section(Map.get(report, :top_blocks, [])), - category_sections(report.categories, detail) - ] - |> List.flatten() - |> Enum.join("\n") - end + alias CodeQA.HealthReport.BehaviorLabels - defp header(report) do - [ + import CodeQA.HealthReport.Formatter.Shared, + only: [count_severities_shared: 1, pr_summary_section: 1, worst_severity_shared: 1] + + @spec render(map(), atom()) :: String.t() + def render(report, detail), + do: + [ + pr_summary_section(Map.get(report, :pr_summary)), + header(report), + cosine_legend(), + delta_section(Map.get(report, :codebase_delta)), + overall_table(report), + top_issues_section(Map.get(report, :top_issues, []), detail), + blocks_section(Map.get(report, :top_blocks, [])), + category_sections(report.categories, detail) + ] + |> List.flatten() + |> Enum.join("\n") + + defp header(report), + do: [ "# Code Health Report", "", "> #{report.metadata.path} — #{format_date(report.metadata.timestamp)} — #{report.metadata.total_files} files analyzed", @@ -26,18 +31,17 @@ defmodule CodeQA.HealthReport.Formatter.Plain do "## Overall: #{report.overall_grade}", "" ] - end - defp cosine_legend do - [ + defp cosine_legend, + do: [ "> *Combined metric scores use cosine similarity: +1 = metric profile perfectly matches healthy pattern for this behavior, 0 = no signal, −1 = anti-pattern detected. Mapped to 0–100 using breakpoints (approx: ≥0.5→A, ≥0.2→B, ≥0.0→C, ≥−0.3→D, <−0.3→F); actual letter grades use the full 15-step scale.*", "" ] - end defp overall_table(report) do rows = - Enum.map(report.categories, fn cat -> + report.categories + |> Enum.map(fn cat -> summary = Map.get(cat, :summary, "") impact = Map.get(cat, :impact, "") "| #{cat.name} | #{cat.grade} | #{cat.score} | #{impact} | #{summary} |" @@ -52,19 +56,20 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp category_sections(_categories, :summary), do: [] - defp category_sections(categories, detail) do - Enum.flat_map(categories, fn cat -> - render_category(cat, detail) - end) - end + defp category_sections(categories, detail), + do: + categories + |> Enum.flat_map( + &render_category( + &1, + detail + ) + ) - defp render_category(%{type: :cosine} = cat, _detail) do - cosine_section_header(cat) ++ cosine_behaviors_table(cat) - end + defp render_category(%{type: :cosine} = cat, _detail), + do: cosine_section_header(cat) ++ cosine_behaviors_table(cat) - defp render_category(cat, _detail) do - section_header(cat) ++ metric_detail(cat) - end + defp render_category(cat, _detail), do: section_header(cat) ++ metric_detail(cat) defp cosine_section_header(cat) do n = length(cat.behaviors) @@ -79,9 +84,8 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp cosine_behaviors_table(cat) do rows = - Enum.map(cat.behaviors, fn b -> - "| #{b.behavior} | #{format_num(b.cosine)} | #{b.score} | #{b.grade} |" - end) + cat.behaviors + |> Enum.map(&"| #{&1.behavior} | #{format_num(&1.cosine)} | #{&1.score} | #{&1.grade} |") [ "| Behavior | Cosine | Score | Grade |", @@ -92,7 +96,7 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp section_header(cat) do metric_summary = - Enum.map_join(cat.metric_scores, ", ", fn m -> "#{m.name}=#{format_num(m.value)}" end) + cat.metric_scores |> Enum.map_join(", ", fn m -> "#{m.name}=#{format_num(m.value)}" end) [ "## #{cat.name} — #{cat.grade}", @@ -104,9 +108,8 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp metric_detail(cat) do rows = - Enum.map(cat.metric_scores, fn m -> - "| #{m.source}.#{m.name} | #{format_num(m.value)} | #{m.score} |" - end) + cat.metric_scores + |> Enum.map(&"| #{&1.source}.#{&1.name} | #{format_num(&1.value)} | #{&1.score} |") if rows == [] do [] @@ -134,9 +137,10 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp top_issues_section(issues, _detail) do rows = - Enum.map(issues, fn i -> - "| #{i.category}.#{i.behavior} | #{format_num(i.cosine)} | #{format_num(i.score)} |" - end) + issues + |> Enum.map( + &"| #{&1.category}.#{&1.behavior} | #{format_num(&1.cosine)} | #{format_num(&1.score)} |" + ) [ "## Top Likely Issues", @@ -149,22 +153,6 @@ defmodule CodeQA.HealthReport.Formatter.Plain do ] ++ [""] end - defp pr_summary_section(nil), do: [] - - defp pr_summary_section(summary) do - delta_str = - if summary.score_delta >= 0, - do: "+#{summary.score_delta}", - else: "#{summary.score_delta}" - - status_str = "#{summary.files_modified} modified, #{summary.files_added} added" - - [ - "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", - "" - ] - end - defp delta_section(nil), do: [] defp delta_section(delta) do @@ -178,7 +166,7 @@ defmodule CodeQA.HealthReport.Formatter.Plain do {"Structure", "branching", "mean_branch_count"} ] - rows = Enum.flat_map(metrics, &format_metric_row(&1, base_agg, head_agg)) + rows = metrics |> Enum.flat_map(&format_metric_row(&1, base_agg, head_agg)) if rows == [] do [] @@ -194,13 +182,13 @@ defmodule CodeQA.HealthReport.Formatter.Plain do end defp format_metric_row({label, group, key}, base_agg, head_agg) do - base_val = get_in(base_agg, [group, key]) - head_val = get_in(head_agg, [group, key]) + base_value = get_in(base_agg, [group, key]) + head_value = get_in(head_agg, [group, key]) - if is_number(base_val) and is_number(head_val) do - diff = Float.round(head_val - base_val, 2) + if is_number(base_value) and is_number(head_value) do + diff = Float.round(head_value - base_value, 2) diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" - ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + ["| #{label} | #{format_num(base_value)} | #{format_num(head_value)} | #{diff_str} |"] else [] end @@ -209,14 +197,13 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp blocks_section([]), do: ["## Code Blocks: 🟢 No block-level issues detected", ""] defp blocks_section(top_blocks) do - alias CodeQA.HealthReport.BehaviorLabels - severity_counts = count_severities(top_blocks) worst = worst_severity(severity_counts) {icon, verdict} = verdict_text(worst, severity_counts) {actionable, medium_blocks} = - Enum.split_with(top_blocks, fn b -> + top_blocks + |> Enum.split_with(fn b -> top = List.first(b.potentials) top && top.severity in [:critical, :high] end) @@ -226,7 +213,8 @@ defmodule CodeQA.HealthReport.Formatter.Plain do action_table = if actionable != [] do rows = - Enum.map(actionable, fn block -> + actionable + |> Enum.map(fn block -> top = List.first(block.potentials) label = BehaviorLabels.label(top.category, top.behavior) location = "#{block.path}:#{block.start_line}-#{block.end_line || block.start_line}" @@ -243,45 +231,34 @@ defmodule CodeQA.HealthReport.Formatter.Plain do [] end - block_details = Enum.flat_map(actionable ++ medium_blocks, &format_block/1) + block_details = (actionable ++ medium_blocks) |> Enum.flat_map(&format_block/1) header ++ action_table ++ block_details end - defp count_severities(blocks) do - blocks - |> Enum.map(fn b -> (List.first(b.potentials) || %{severity: :medium}).severity end) - |> Enum.frequencies() - end + defp count_severities(blocks), do: count_severities_shared(blocks) - defp worst_severity(counts) do - cond do - Map.get(counts, :critical, 0) > 0 -> :critical - Map.get(counts, :high, 0) > 0 -> :high - Map.get(counts, :medium, 0) > 0 -> :medium - true -> :none - end - end + defp worst_severity(counts), do: worst_severity_shared(counts) defp verdict_text(:critical, counts) do n = Map.get(counts, :critical, 0) - {"🔴", "#{n} critical #{pl(n, "block")} — review required before merge"} + {"🔴", "#{n} critical #{pluralize(n, "block")} — review required before merge"} end defp verdict_text(:high, counts) do n = Map.get(counts, :high, 0) + Map.get(counts, :critical, 0) - {"🟠", "#{n} #{pl(n, "block")} need attention before merge"} + {"🟠", "#{n} #{pluralize(n, "block")} need attention before merge"} end defp verdict_text(:medium, counts) do n = Map.get(counts, :medium, 0) - {"🟡", "#{n} #{pl(n, "block")} with minor issues (safe to merge)"} + {"🟡", "#{n} #{pluralize(n, "block")} with minor issues (safe to merge)"} end defp verdict_text(:none, _), do: {"🟢", "No block-level issues detected"} - defp pl(1, word), do: word - defp pl(_, word), do: word <> "s" + defp pluralize(1, word), do: word + defp pluralize(_, word), do: word <> "s" defp format_block(block) do end_line = block.end_line || block.start_line @@ -293,7 +270,7 @@ defmodule CodeQA.HealthReport.Formatter.Plain do subheader = "#{block.type} · #{block.token_count} tokens" - potential_lines = Enum.flat_map(block.potentials, &format_potential/1) + potential_lines = block.potentials |> Enum.flat_map(&format_potential/1) code_lines = format_code_block(block) [header, subheader, "" | potential_lines] ++ ["" | code_lines] ++ [""] end diff --git a/lib/codeqa/health_report/formatter/shared.ex b/lib/codeqa/health_report/formatter/shared.ex new file mode 100644 index 00000000..9643f105 --- /dev/null +++ b/lib/codeqa/health_report/formatter/shared.ex @@ -0,0 +1,49 @@ +defmodule CodeQA.HealthReport.Formatter.Shared do + @moduledoc """ + Shared formatter helpers. + + Extracted by `mix refactor --only ExtractParametricClone`. Both + `Formatter.Github` and `Formatter.Plain` carried identical + `count_severities/1` and `worst_severity/1` implementations. + """ + + @spec worst_severity_shared(map()) :: :critical | :high | :medium | :none + def worst_severity_shared(counts) do + cond do + Map.get(counts, :critical, 0) > 0 -> :critical + Map.get(counts, :high, 0) > 0 -> :high + Map.get(counts, :medium, 0) > 0 -> :medium + true -> :none + end + end + + @spec count_severities_shared([map()]) :: %{atom() => non_neg_integer()} + def count_severities_shared(blocks), + do: + blocks + |> Enum.map(&(List.first(&1.potentials) || %{severity: :medium}).severity) + |> Enum.frequencies() + + @doc """ + PR-summary table row, shared by both formatters. + + Returns an empty list when called with `nil` so callers can splice the + result into a flat list without branching. + """ + @spec pr_summary_section(map() | nil) :: [String.t()] + def pr_summary_section(nil), do: [] + + def pr_summary_section(summary) do + delta_str = + if summary.score_delta >= 0, + do: "+#{summary.score_delta}", + else: "#{summary.score_delta}" + + status_str = "#{summary.files_modified} modified, #{summary.files_added} added" + + [ + "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", + "" + ] + end +end diff --git a/lib/codeqa/health_report/grader.ex b/lib/codeqa/health_report/grader.ex index d671a0bf..4216d608 100644 --- a/lib/codeqa/health_report/grader.ex +++ b/lib/codeqa/health_report/grader.ex @@ -3,6 +3,7 @@ defmodule CodeQA.HealthReport.Grader do alias CodeQA.Config alias CodeQA.HealthReport.Categories + import CodeQA.Shared, only: [humanize_category_shared: 1] @doc """ Score a single metric value (0-100) based on thresholds and direction. @@ -11,13 +12,11 @@ defmodule CodeQA.HealthReport.Grader do For `good: :high`, higher values are better (above A threshold = 100). """ @spec score_metric(map(), number()) :: integer() - def score_metric(%{good: :high, thresholds: t}, value) do - score_by_direction(:high, value, t) |> clamp(0, 100) - end + def score_metric(%{good: :high, thresholds: t}, value), + do: score_by_direction(:high, value, t) |> clamp(0, 100) - def score_metric(%{good: _, thresholds: t}, value) do - score_by_direction(:low, value, t) |> clamp(0, 100) - end + def score_metric(%{good: _, thresholds: t}, value), + do: score_by_direction(:low, value, t) |> clamp(0, 100) @doc """ Maps cosine similarity [-1, +1] to a score [0, 100] with linear interpolation @@ -32,12 +31,12 @@ defmodule CodeQA.HealthReport.Grader do | [-1.0, -0.3) | [0, 30) | """ @spec score_cosine(float()) :: integer() - def score_cosine(cosine) do - cosine - |> cosine_to_score() - |> clamp(0, 100) - |> round() - end + def score_cosine(cosine), + do: + cosine + |> cosine_to_score() + |> clamp(0, 100) + |> round() defp cosine_to_score(c) when c >= 0.5, do: interpolate_between(c, 0.5, 90, 1.0, 100) defp cosine_to_score(c) when c >= 0.2, do: interpolate_between(c, 0.2, 70, 0.5, 90) @@ -89,14 +88,13 @@ defmodule CodeQA.HealthReport.Grader do round(Kernel.max(0, score_at_d - deviation * score_at_d)) end - defp clamp(val, min_val, max_val) do - val |> Kernel.max(min_val) |> Kernel.min(max_val) - end + defp clamp(val, min_val, max_val), do: val |> Kernel.max(min_val) |> Kernel.min(max_val) @doc "Convert a numeric score (0-100) to a letter grade using the given scale." @spec grade_letter(number(), [{number(), String.t()}]) :: String.t() def grade_letter(score, scale \\ Categories.default_grade_scale()) do - Enum.find_value(scale, "F", fn {min, letter} -> + scale + |> Enum.find_value("F", fn {min, letter} -> if score >= min, do: letter end) end @@ -119,11 +117,11 @@ defmodule CodeQA.HealthReport.Grader do score = weighted_category_score(scored) %{ + grade: grade_letter(score, scale), key: category.key, + metric_scores: scored, name: category.name, - score: score, - grade: grade_letter(score, scale), - metric_scores: scored + score: score } end @@ -132,12 +130,12 @@ defmodule CodeQA.HealthReport.Grader do if value do %{ + good: metric_def.good, name: metric_def.name, + score: score_metric(metric_def, value), source: metric_def.source, - weight: metric_def.weight, - good: metric_def.good, value: value, - score: score_metric(metric_def, value) + weight: metric_def.weight } end end @@ -145,10 +143,10 @@ defmodule CodeQA.HealthReport.Grader do defp weighted_category_score([]), do: 0 defp weighted_category_score(scored) do - total_weight = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.weight end) + total_weight = scored |> Enum.reduce(0.0, fn s, acc -> acc + s.weight end) if total_weight > 0 do - weighted = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.score * s.weight end) + weighted = scored |> Enum.reduce(0.0, fn s, acc -> acc + s.score * s.weight end) round(weighted / total_weight) else 0 @@ -164,9 +162,8 @@ defmodule CodeQA.HealthReport.Grader do categories, file_metrics, scale \\ Categories.default_grade_scale() - ) do - Enum.map(categories, &grade_category(&1, file_metrics, scale)) - end + ), + do: categories |> Enum.map(&grade_category(&1, file_metrics, scale)) @doc """ Grade codebase aggregate metrics. Uses mean_ values from aggregate. @@ -188,7 +185,7 @@ defmodule CodeQA.HealthReport.Grader do {source, values} end) - Enum.map(categories, &grade_category(&1, file_like, scale)) + categories |> Enum.map(&grade_category(&1, file_like, scale)) end @doc """ @@ -209,19 +206,20 @@ defmodule CodeQA.HealthReport.Grader do category_grades, scale \\ Categories.default_grade_scale(), impact_map \\ %{} - ) do - if category_grades == [] do - {0, "F"} - else - {weighted_sum, total_impact} = - Enum.reduce(category_grades, {0, 0}, fn g, {ws, ti} -> - impact = Map.get(impact_map, to_string(g.key), 1) - {ws + g.score * impact, ti + impact} - end) - - avg = round(weighted_sum / total_impact) - {avg, grade_letter(avg, scale)} - end + ) + + def overall_score([], _scale, _impact_map), do: {0, "F"} + + def overall_score(category_grades, scale, impact_map) do + {weighted_sum, total_impact} = + category_grades + |> Enum.reduce({0, 0}, fn g, {ws, ti} -> + impact = Map.get(impact_map, to_string(g.key), 1) + {ws + g.score * impact, ti + impact} + end) + + score = round(weighted_sum / total_impact) + {score, grade_letter(score, scale)} end @doc """ @@ -258,11 +256,11 @@ defmodule CodeQA.HealthReport.Grader do end) end - defp score_behavior_entries(behaviors, threshold, worst_files, scale, category) do - behaviors - |> Enum.reject(fn b -> abs(b.cosine) < threshold end) - |> Enum.map(&score_behavior_entry(&1, worst_files, scale, category)) - end + defp score_behavior_entries(behaviors, threshold, worst_files, scale, category), + do: + behaviors + |> Enum.reject(&(abs(&1.cosine) < threshold)) + |> Enum.map(&score_behavior_entry(&1, worst_files, scale, category)) defp score_behavior_entry(b, worst_files, scale, category) do cosine_score = score_cosine(b.cosine) @@ -270,34 +268,28 @@ defmodule CodeQA.HealthReport.Grader do %{ behavior: b.behavior, cosine: b.cosine, - score: cosine_score, grade: grade_letter(cosine_score, scale), + score: cosine_score, worst_offenders: Map.get(worst_files, "#{category}.#{b.behavior}", []) } end defp average_behavior_score([]), do: 50 - defp average_behavior_score(entries) do - round(Enum.sum(Enum.map(entries, & &1.score)) / length(entries)) - end + defp average_behavior_score(entries), + do: (Enum.sum(entries |> Enum.map(& &1.score)) / length(entries)) |> round() - defp build_cosine_category(category, category_score, behavior_entries, scale) do - %{ - type: :cosine, + defp build_cosine_category(category, category_score, behavior_entries, scale), + do: %{ + behaviors: behavior_entries, + grade: grade_letter(category_score, scale), key: category, name: humanize_category(category), score: category_score, - grade: grade_letter(category_score, scale), - behaviors: behavior_entries + type: :cosine } - end - defp humanize_category(slug) do - slug - |> String.split("_") - |> Enum.map_join(" ", &String.capitalize/1) - end + defp humanize_category(slug), do: humanize_category_shared(slug) @doc """ Find worst offender files for a category. Returns top N files sorted by worst score. @@ -319,16 +311,16 @@ defmodule CodeQA.HealthReport.Grader do graded = grade_category(category, metrics, scale) %{ - path: path, - score: graded.score, + bytes: file_data["bytes"], grade: graded.grade, - metric_scores: graded.metric_scores, lines: file_data["lines"], - bytes: file_data["bytes"], + metric_scores: graded.metric_scores, + path: path, + score: graded.score, top_nodes: top_3_nodes(Map.get(file_data, "nodes")) } end) - |> Enum.filter(fn f -> f.metric_scores != [] end) + |> Enum.filter(&(&1.metric_scores != [])) |> Enum.sort_by(& &1.score, :asc) |> Enum.take(top_n) end @@ -351,7 +343,7 @@ defmodule CodeQA.HealthReport.Grader do defp node_impact_score(%{"refactoring_potentials" => potentials}) when is_list(potentials) and potentials != [] do - Enum.sum(Enum.map(potentials, & &1["cosine_delta"])) + potentials |> Enum.map(& &1["cosine_delta"]) |> Enum.sum() end defp node_impact_score(_), do: 0.0 diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index 5ceddf52..543adf73 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -1,4 +1,5 @@ defmodule CodeQA.HealthReport.TopBlocks do + alias CodeQA.Language @moduledoc "Assembles the top_blocks report section from analysis node data." alias CodeQA.CombinedMetrics.Scorer @@ -16,16 +17,13 @@ defmodule CodeQA.HealthReport.TopBlocks do Scorer.all_yamls() |> Enum.flat_map(fn {yaml_path, data} -> category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") - Enum.flat_map(data, &hints_for_behavior(category, &1)) + data |> Enum.flat_map(&hints_for_behavior(category, &1)) end) |> Map.new() end defp hints_for_behavior(category, {behavior, behavior_data}) when is_map(behavior_data) do - case Map.get(behavior_data, "_fix_hint") do - nil -> [] - hint -> [{{category, behavior}, hint}] - end + Map.get(behavior_data, "_fix_hint") |> handle_hints_for_behavior_get(behavior, category) end defp hints_for_behavior(_category, _entry), do: [] @@ -66,7 +64,7 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.group_by(&elem(&1, 0), fn {_cat, block, delta} -> {block, delta} end) |> Enum.map(fn {category, block_deltas} -> # Find the block with highest cosine_delta for this category - {worst_block, _delta} = Enum.max_by(block_deltas, fn {_block, delta} -> delta end) + {worst_block, _delta} = block_deltas |> Enum.max_by(fn {_block, delta} -> delta end) {category, add_source_code(worst_block, base_path)} end) |> Map.new() @@ -83,7 +81,7 @@ defmodule CodeQA.HealthReport.TopBlocks do file_entries = if changed_files == [] do - Enum.map(files, fn {path, data} -> {path, nil, data} end) + files |> Enum.map(fn {path, data} -> {path, nil, data} end) else changed_index = Map.new(changed_files, &{&1.path, &1.status}) @@ -124,9 +122,8 @@ defmodule CodeQA.HealthReport.TopBlocks do do: blocks # When diff_line_ranges provided, filter blocks by overlap - defp filter_by_diff_overlap(blocks, path_ranges, _diff_line_ranges) do - Enum.filter(blocks, &block_overlaps_diff?(&1, path_ranges)) - end + defp filter_by_diff_overlap(blocks, path_ranges, _diff_line_ranges), + do: blocks |> Enum.filter(&block_overlaps_diff?(&1, path_ranges)) @spec block_overlaps_diff?(map(), [{pos_integer(), pos_integer()}]) :: boolean() defp block_overlaps_diff?(_node, []), do: false @@ -135,15 +132,14 @@ defmodule CodeQA.HealthReport.TopBlocks do block_start = node["start_line"] || 1 block_end = node["end_line"] || block_start - Enum.any?(path_ranges, fn {diff_start, diff_end} -> + path_ranges + |> Enum.any?(fn {diff_start, diff_end} -> ranges_overlap?(block_start, block_end, diff_start, diff_end) end) end @spec ranges_overlap?(pos_integer(), pos_integer(), pos_integer(), pos_integer()) :: boolean() - defp ranges_overlap?(start1, end1, start2, end2) do - start1 <= end2 and start2 <= end1 - end + defp ranges_overlap?(start1, end1, start2, end2), do: start1 <= end2 and start2 <= end1 defp collect_nodes(node) do children = node |> Map.get("children", []) |> Enum.flat_map(&collect_nodes/1) @@ -159,11 +155,11 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.sort_by(& &1.cosine_delta, :desc) %{ - start_line: node["start_line"], end_line: node["end_line"], - type: node["type"], + potentials: potentials, + start_line: node["start_line"], token_count: node["token_count"], - potentials: potentials + type: node["type"] } end @@ -180,11 +176,11 @@ defmodule CodeQA.HealthReport.TopBlocks do nil else %{ - category: category, behavior: behavior, + category: category, cosine_delta: cosine_delta, - severity: severity, - fix_hint: Map.get(fix_hints, {category, behavior}) + fix_hint: Map.get(fix_hints, {category, behavior}), + severity: severity } end end @@ -216,7 +212,13 @@ defmodule CodeQA.HealthReport.TopBlocks do nil end - lang = CodeQA.Language.detect(block.path).name() - Map.merge(block, %{source: source, language: lang}) + lang = Language.detect(block.path).name() + Map.merge(block, %{language: lang, source: source}) end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_hints_for_behavior_get(nil, _behavior, _category), do: [] + + defp handle_hints_for_behavior_get(hint, behavior, category), do: [{{category, behavior}, hint}] end diff --git a/lib/codeqa/languages/language.ex b/lib/codeqa/languages/language.ex index 3ccd1728..de1adce4 100644 --- a/lib/codeqa/languages/language.ex +++ b/lib/codeqa/languages/language.ex @@ -67,15 +67,15 @@ defmodule CodeQA.Language do @spec all() :: [module()] def all do {:ok, modules} = :application.get_key(:codeqa, :modules) - Enum.filter(modules, &implements?/1) + modules |> Enum.filter(&implements?/1) end @spec all_keywords() :: [String.t()] - def all_keywords do - all() - |> Enum.flat_map(& &1.keywords()) - |> Enum.uniq() - end + def all_keywords, + do: + all() + |> Enum.flat_map(& &1.keywords()) + |> Enum.uniq() @spec keywords(atom() | String.t()) :: MapSet.t() def keywords(language) do @@ -134,7 +134,7 @@ defmodule CodeQA.Language do @spec find(atom() | String.t()) :: module() def find(language) do name = to_string(language) - Enum.find(all(), fn mod -> mod.name() == name end) || CodeQA.Languages.Unknown + Enum.find(all(), &(&1.name() == name)) || CodeQA.Languages.Unknown end @spec detect(String.t()) :: module() @@ -148,16 +148,17 @@ defmodule CodeQA.Language do end @spec strip_comments(String.t(), module()) :: String.t() - def strip_comments(content, language_mod) do - content - |> strip_block_comments(language_mod.block_comments()) - |> strip_line_comments(language_mod.comment_prefixes()) - end + def strip_comments(content, language_mod), + do: + content + |> strip_block_comments(language_mod.block_comments()) + |> strip_line_comments(language_mod.comment_prefixes()) defp strip_block_comments(content, []), do: content defp strip_block_comments(content, pairs) do - Enum.reduce(pairs, content, fn {open, close}, acc -> + pairs + |> Enum.reduce(content, fn {open, close}, acc -> regex = Regex.compile!(Regex.escape(open) <> ".*?" <> Regex.escape(close), [:dotall]) Regex.replace(regex, acc, fn match -> @@ -169,7 +170,7 @@ defmodule CodeQA.Language do defp strip_line_comments(content, []), do: content defp strip_line_comments(content, prefixes) do - pattern = Enum.map_join(prefixes, "|", &Regex.escape/1) + pattern = prefixes |> Enum.map_join("|", &Regex.escape/1) Regex.replace(Regex.compile!("(#{pattern}).*$", [:multiline]), content, "") end diff --git a/lib/codeqa/math.ex b/lib/codeqa/math.ex index 394edea9..9dd0ac38 100644 --- a/lib/codeqa/math.ex +++ b/lib/codeqa/math.ex @@ -34,7 +34,8 @@ defmodule CodeQA.Math do n = length(x) {sum_x, sum_y, sum_xy, sum_x2, sum_y2} = - Enum.zip_reduce(x, y, {0.0, 0.0, 0.0, 0.0, 0.0}, fn vx, vy, {sx, sy, sxy, sx2, sy2} -> + x + |> Enum.zip_reduce(y, {0.0, 0.0, 0.0, 0.0, 0.0}, fn vx, vy, {sx, sy, sxy, sx2, sy2} -> vx_f = vx * 1.0 vy_f = vy * 1.0 {sx + vx_f, sy + vy_f, sxy + vx_f * vy_f, sx2 + vx_f * vx_f, sy2 + vy_f * vy_f} @@ -55,8 +56,8 @@ defmodule CodeQA.Math do def pearson_correlation(x, y) do n = Nx.size(x) |> Nx.tensor(type: :f64) - x = Nx.as_type(x, :f64) - y = Nx.as_type(y, :f64) + x = x |> Nx.as_type(:f64) + y = y |> Nx.as_type(:f64) sum_x = Nx.sum(x) sum_y = Nx.sum(y) diff --git a/lib/codeqa/metrics/codebase/near_duplicate_blocks_codebase.ex b/lib/codeqa/metrics/codebase/near_duplicate_blocks_codebase.ex index 2e821e98..caf447e7 100644 --- a/lib/codeqa/metrics/codebase/near_duplicate_blocks_codebase.ex +++ b/lib/codeqa/metrics/codebase/near_duplicate_blocks_codebase.ex @@ -21,7 +21,7 @@ defmodule CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebase do @impl true def analyze(files, opts \\ []) do ndb_opts = Keyword.get(opts, :near_duplicate_blocks, []) - max_pairs = Keyword.get(ndb_opts, :max_pairs_per_bucket, nil) + max_pairs = Keyword.get(ndb_opts, :max_pairs_per_bucket) workers = Keyword.get(opts, :workers, System.schedulers_online()) ndb_opts = @@ -31,7 +31,8 @@ defmodule CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebase do pid = Keyword.fetch!(opts, :file_context_pid) all_blocks = - Enum.flat_map(files, fn {path, content} -> + files + |> Enum.flat_map(fn {path, content} -> ctx = FileContextServer.get(pid, content, path: path) NearDuplicateBlocks.label_blocks(ctx.blocks, path) end) diff --git a/lib/codeqa/metrics/codebase/similarity.ex b/lib/codeqa/metrics/codebase/similarity.ex index e20e556f..8b2f1f89 100644 --- a/lib/codeqa/metrics/codebase/similarity.ex +++ b/lib/codeqa/metrics/codebase/similarity.ex @@ -83,15 +83,15 @@ defmodule CodeQA.Metrics.Codebase.Similarity do |> Enum.with_index() |> Task.async_stream( fn {content, i} -> - fp = compute_fingerprints(content, opts) - {i, fp} + fingerprint = compute_fingerprints(content, opts) + {i, fingerprint} end, max_concurrency: workers, timeout: :infinity ) - |> Enum.map(fn {:ok, {i, fp}} -> + |> Enum.map(fn {:ok, {i, fingerprint}} -> maybe_print_fingerprint_progress(has_progress, i, length(contents)) - {i, fp} + {i, fingerprint} end) |> Map.new() @@ -125,8 +125,9 @@ defmodule CodeQA.Metrics.Codebase.Similarity do end defp index_fingerprint_set(set, doc_id, acc) do - Enum.reduce(set, acc, fn fp, idx_acc -> - Map.update(idx_acc, fp, [doc_id], &[doc_id | &1]) + set + |> Enum.reduce(acc, fn fingerprint, idx_acc -> + Map.update(idx_acc, fingerprint, [doc_id], &[doc_id | &1]) end) end @@ -179,8 +180,9 @@ defmodule CodeQA.Metrics.Codebase.Similarity do if has_progress, do: IO.puts(:stderr, "") - Enum.map(candidates, fn {{i, j}, jaccard} -> - {Enum.at(names, i), i, Enum.at(names, j), j, jaccard} + candidates + |> Enum.map(fn {{i, j}, jaccard} -> + {names |> Enum.at(i), i, names |> Enum.at(j), j, jaccard} end) end @@ -196,12 +198,14 @@ defmodule CodeQA.Metrics.Codebase.Similarity do collisions = count_collisions(set, inverted_index, i) size_a = MapSet.size(set) - name_a = Enum.at(names, i) + name_a = names |> Enum.at(i) is_target_a = MapSet.member?(target_set, name_a) collisions - |> Enum.filter(fn {j, _} -> is_target_a or MapSet.member?(target_set, Enum.at(names, j)) end) + |> Enum.filter(fn {j, _} -> + is_target_a or MapSet.member?(target_set, names |> Enum.at(j)) + end) |> Enum.reduce([], fn {j, intersection}, acc_pairs -> jaccard = compute_jaccard(size_a, MapSet.size(Map.get(fingerprints_by_id, j)), intersection) if jaccard >= threshold, do: [{{i, j}, jaccard} | acc_pairs], else: acc_pairs @@ -214,19 +218,22 @@ defmodule CodeQA.Metrics.Codebase.Similarity do end defp count_collisions(set, inverted_index, i) do - Enum.reduce(set, %{}, fn fp, coll_acc -> + set + |> Enum.reduce(%{}, fn fp, coll_acc -> inverted_index |> Map.get(fp, []) |> count_forward_docs(i, coll_acc) end) end defp count_forward_docs(docs, i, acc) do - Enum.reduce(docs, acc, fn doc_id, c_acc -> + docs + |> Enum.reduce(acc, fn doc_id, c_acc -> if doc_id > i, do: Map.update(c_acc, doc_id, 1, &(&1 + 1)), else: c_acc end) end defp merge_valid_pairs(valid_pairs, acc) do - Enum.reduce(valid_pairs, acc, fn {pair, jaccard}, inner_acc -> + valid_pairs + |> Enum.reduce(acc, fn {pair, jaccard}, inner_acc -> Map.put(inner_acc, pair, jaccard) end) end @@ -301,19 +308,19 @@ defmodule CodeQA.Metrics.Codebase.Similarity do defp build_results_map(computed_ncd, target_paths, target_set, top_n) do results = - Enum.reduce(computed_ncd, %{}, fn {name_a, name_b, ncd}, acc -> - acc = maybe_add_similarity(acc, name_a, name_b, ncd, target_set) + computed_ncd + |> Enum.reduce(%{}, fn {name_a, name_b, ncd}, acc -> + acc = acc |> maybe_add_similarity(name_a, name_b, ncd, target_set) maybe_add_similarity(acc, name_b, name_a, ncd, target_set) end) target_paths - |> Enum.map(fn path -> + |> Map.new(fn path -> similarities = Map.get(results, path, []) - sorted = Enum.sort_by(similarities, & &1["score"]) - sorted = if top_n, do: Enum.take(sorted, top_n), else: sorted + sorted = similarities |> Enum.sort_by(& &1["score"]) + sorted = if top_n, do: sorted |> Enum.take(top_n), else: sorted {path, sorted} end) - |> Enum.into(%{}) end defp maybe_add_similarity(acc, path, other_path, ncd, target_set) do @@ -329,21 +336,21 @@ defmodule CodeQA.Metrics.Codebase.Similarity do end end - defp compute_fingerprints(content, _opts) do - content - |> TokenNormalizer.normalize_structural() - |> Enum.map(& &1.kind) - |> Winnowing.kgrams(5) - |> MapSet.new() - end + defp compute_fingerprints(content, _opts), + do: + content + |> TokenNormalizer.normalize_structural() + |> Enum.map(& &1.kind) + |> Winnowing.kgrams(5) + |> MapSet.new() defp cross_file_density(contents) do individual_sum = contents - |> Enum.map(fn c -> byte_size(:zlib.compress(c)) end) + |> Enum.map(&byte_size(:zlib.compress(&1))) |> Enum.sum() - joined = Enum.intersperse(contents, "\n") + joined = contents |> Enum.intersperse("\n") combined = byte_size(:zlib.compress(joined)) Float.round(individual_sum / max(1, combined), 4) diff --git a/lib/codeqa/metrics/file/bradford.ex b/lib/codeqa/metrics/file/bradford.ex index 22b7bcee..9e06efea 100644 --- a/lib/codeqa/metrics/file/bradford.ex +++ b/lib/codeqa/metrics/file/bradford.ex @@ -32,9 +32,7 @@ defmodule CodeQA.Metrics.File.Bradford do @spec analyze(map()) :: map() @impl true - def analyze(%{tokens: []}) do - %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0} - end + def analyze(%{tokens: []}), do: %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0} def analyze(%{tokens: tokens}) do # Count tokens per line using the .line field, then rank densest-first — @@ -45,7 +43,7 @@ defmodule CodeQA.Metrics.File.Bradford do |> Enum.map(fn {_line, toks} -> length(toks) end) |> Enum.sort(:desc) - total = Enum.sum(counts) + total = counts |> Enum.sum() # Need at least 3 lines and 3 tokens to form meaningful zones. if total < 3 or length(counts) < 3 do @@ -53,13 +51,13 @@ defmodule CodeQA.Metrics.File.Bradford do else # Each zone should contain one third of all tokens. # We find zone boundaries by walking the ranked list until each third is filled. - third = total / 3 + target = total / 3 # n1: lines in zone 1 (the dense core — fewest lines, highest token density) # n2: lines in zone 2 (middle tier) # n3: all remaining lines (the sparse tail) - {n1, rest} = count_until(counts, third) - {n2, _} = count_until(rest, third) + {n1, rest} = count_until(counts, target) + {n2, _} = count_until(rest, target) n3 = length(counts) - n1 - n2 # k1 > 1 always: the middle zone always needs more lines than the core. diff --git a/lib/codeqa/metrics/file/branching.ex b/lib/codeqa/metrics/file/branching.ex index ce5e20a0..8b188ac6 100644 --- a/lib/codeqa/metrics/file/branching.ex +++ b/lib/codeqa/metrics/file/branching.ex @@ -48,9 +48,9 @@ defmodule CodeQA.Metrics.File.Branching do @spec analyze(CodeQA.Engine.FileContext.t()) :: map() @impl true - def analyze(%{lines: lines, tokens: tokens, content: content}) do - non_blank_count = Enum.count(lines, &(String.trim(&1) != "")) - branch_count = Enum.count(tokens, &MapSet.member?(@branching_keywords, &1.content)) + def analyze(%{content: content, lines: lines, tokens: tokens}) do + non_blank_count = lines |> Enum.count(&(String.trim(&1) != "")) + branch_count = tokens |> Enum.count(&MapSet.member?(@branching_keywords, &1.content)) density = if non_blank_count > 0, diff --git a/lib/codeqa/metrics/file/brevity.ex b/lib/codeqa/metrics/file/brevity.ex index bc0d9a62..e3e31394 100644 --- a/lib/codeqa/metrics/file/brevity.ex +++ b/lib/codeqa/metrics/file/brevity.ex @@ -1,4 +1,6 @@ defmodule CodeQA.Metrics.File.Brevity do + alias CodeQA.Math + @moduledoc """ Measures how well Brevity law holds in the token distribution. @@ -25,12 +27,12 @@ defmodule CodeQA.Metrics.File.Brevity do end def analyze(%{token_counts: token_counts}) do - pairs = Enum.map(token_counts, fn {token, freq} -> {String.length(token), freq} end) - lengths = Enum.map(pairs, &elem(&1, 0)) - freqs = Enum.map(pairs, &elem(&1, 1)) + pairs = token_counts |> Enum.map(fn {token, freq} -> {String.length(token), freq} end) + lengths = pairs |> Enum.map(&elem(&1, 0)) + freqs = pairs |> Enum.map(&elem(&1, 1)) %{ - "correlation" => CodeQA.Math.pearson_correlation_list(lengths, freqs), + "correlation" => Math.pearson_correlation_list(lengths, freqs), "slope" => log_log_slope(lengths, freqs), "sample_size" => map_size(token_counts) } @@ -40,7 +42,7 @@ defmodule CodeQA.Metrics.File.Brevity do log_lengths = lengths |> Enum.map(&:math.log(max(&1, 1))) |> Nx.tensor(type: :f64) log_freqs = freqs |> Enum.map(&:math.log(max(&1, 1))) |> Nx.tensor(type: :f64) - {slope, _intercept, _r_squared} = CodeQA.Math.linear_regression(log_lengths, log_freqs) + {slope, _intercept, _r_squared} = Math.linear_regression(log_lengths, log_freqs) case Nx.to_number(slope) do val when is_float(val) -> Float.round(val, 4) diff --git a/lib/codeqa/metrics/file/casing_entropy.ex b/lib/codeqa/metrics/file/casing_entropy.ex index 4256e0e6..698e6c08 100644 --- a/lib/codeqa/metrics/file/casing_entropy.ex +++ b/lib/codeqa/metrics/file/casing_entropy.ex @@ -40,9 +40,7 @@ defmodule CodeQA.Metrics.File.CasingEntropy do @spec analyze(map()) :: map() @impl true - def analyze(%{identifiers: []}) do - %{"entropy" => 0.0, "screaming_snake_density" => 0.0} - end + def analyze(%{identifiers: []}), do: %{"entropy" => 0.0, "screaming_snake_density" => 0.0} def analyze(%{identifiers: identifiers}) do counts = @@ -68,6 +66,9 @@ defmodule CodeQA.Metrics.File.CasingEntropy do end defp counts_to_output(counts) do - Map.new(counts, fn {k, v} -> {"#{k}_count", v} end) + for {k, v} <- counts do + {"#{k}_count", v} + end + |> Map.new() end end diff --git a/lib/codeqa/metrics/file/comment_structure.ex b/lib/codeqa/metrics/file/comment_structure.ex index 65bc0e0a..f9e89b92 100644 --- a/lib/codeqa/metrics/file/comment_structure.ex +++ b/lib/codeqa/metrics/file/comment_structure.ex @@ -27,10 +27,10 @@ defmodule CodeQA.Metrics.File.CommentStructure do @spec analyze(map()) :: map() @impl true def analyze(%{content: content, lines: lines}) do - non_blank = Enum.reject(lines, &(String.trim(&1) == "")) + non_blank = lines |> Enum.reject(&(String.trim(&1) == "")) non_blank_count = length(non_blank) - comment_count = Enum.count(non_blank, &Regex.match?(@comment_line, &1)) + comment_count = non_blank |> Enum.count(&Regex.match?(@comment_line, &1)) todo_count = @todo_marker |> Regex.scan(content) |> length() comment_ratio = diff --git a/lib/codeqa/metrics/file/compression.ex b/lib/codeqa/metrics/file/compression.ex index 9f0981b9..a9e6e496 100644 --- a/lib/codeqa/metrics/file/compression.ex +++ b/lib/codeqa/metrics/file/compression.ex @@ -20,15 +20,14 @@ defmodule CodeQA.Metrics.File.Compression do @spec analyze(map()) :: map() @impl true - def analyze(%{content: "", byte_count: 0}) do - %{ + def analyze(%{byte_count: 0, content: ""}), + do: %{ "raw_bytes" => 0, "zlib_bytes" => 0, "zlib_ratio" => 0.0, "redundancy" => 0.0, "unique_line_ratio" => 0.0 } - end def analyze(ctx) do raw_size = ctx.byte_count @@ -40,7 +39,7 @@ defmodule CodeQA.Metrics.File.Compression do unique_line_ratio = case length(non_blank) do 0 -> 0.0 - n -> Float.round(length(Enum.uniq(non_blank)) / n, 4) + n -> Float.round(length(non_blank |> Enum.uniq()) / n, 4) end %{ diff --git a/lib/codeqa/metrics/file/entropy.ex b/lib/codeqa/metrics/file/entropy.ex index 6533a21a..a0ca943c 100644 --- a/lib/codeqa/metrics/file/entropy.ex +++ b/lib/codeqa/metrics/file/entropy.ex @@ -31,9 +31,7 @@ defmodule CodeQA.Metrics.File.Entropy do @spec analyze(map()) :: map() @impl true - def analyze(ctx) do - Map.merge(char_entropy(ctx.content), token_entropy(ctx)) - end + def analyze(ctx), do: char_entropy(ctx.content) |> Map.merge(token_entropy(ctx)) defp char_entropy(""), do: zero_entropy_map("char") @@ -43,11 +41,10 @@ defmodule CodeQA.Metrics.File.Entropy do compute_entropy(counts, total, "char") end - defp token_entropy(%{tokens: [], token_counts: _token_counts}) do - Map.merge(zero_entropy_map("token"), %{"vocab_size" => 0, "total_tokens" => 0}) - end + defp token_entropy(%{token_counts: _token_counts, tokens: []}), + do: zero_entropy_map("token") |> Map.merge(%{"vocab_size" => 0, "total_tokens" => 0}) - defp token_entropy(%{tokens: tokens, token_counts: token_counts}) do + defp token_entropy(%{token_counts: token_counts, tokens: tokens}) do total = length(tokens) vocab_size = map_size(token_counts) @@ -55,20 +52,20 @@ defmodule CodeQA.Metrics.File.Entropy do Map.merge(entropy_map, %{"vocab_size" => vocab_size, "total_tokens" => total}) end - defp zero_entropy_map(prefix) do - %{ + defp zero_entropy_map(prefix), + do: %{ "#{prefix}_entropy" => 0.0, "#{prefix}_max_entropy" => 0.0, "#{prefix}_normalized" => 0.0 } - end defp compute_entropy(counts, total, prefix) do alphabet_size = map_size(counts) max_entropy = if alphabet_size > 1, do: :math.log2(alphabet_size), else: 0.0 entropy = - Enum.reduce(counts, 0.0, fn {_k, c}, acc -> + counts + |> Enum.reduce(0.0, fn {_k, c}, acc -> p = c / total acc - p * :math.log2(p) end) diff --git a/lib/codeqa/metrics/file/function_metrics.ex b/lib/codeqa/metrics/file/function_metrics.ex index 6a9bb0c6..58037480 100644 --- a/lib/codeqa/metrics/file/function_metrics.ex +++ b/lib/codeqa/metrics/file/function_metrics.ex @@ -79,11 +79,11 @@ defmodule CodeQA.Metrics.File.FunctionMetrics do n = length(lengths) avg_len = Float.round(Enum.sum(lengths) / n, 4) - max_len = Enum.max(lengths) + max_len = lengths |> Enum.max() n_p = length(param_counts) avg_params = Float.round(Enum.sum(param_counts) / n_p, 4) - max_params = Enum.max(param_counts) + max_params = param_counts |> Enum.max() %{ "function_count" => n, @@ -95,16 +95,7 @@ defmodule CodeQA.Metrics.File.FunctionMetrics do end end - defp count_params(line) do - case Regex.run(~r/\(([^)]*)\)/, line) do - [_, args] -> - args = String.trim(args) - if args == "", do: 0, else: count_top_level_commas(args) + 1 - - _ -> - 0 - end - end + defp count_params(line), do: Regex.run(~r/\(([^)]*)\)/, line) |> handle_count_params_run() defp count_top_level_commas(args) do args @@ -119,4 +110,13 @@ defmodule CodeQA.Metrics.File.FunctionMetrics do end) |> elem(1) end + + # FIXME: extracted automatically by ExtractCaseToHelper — review + # the parameter list and consider a better name. + defp handle_count_params_run([_, args]) do + args = String.trim(args) + if args == "", do: 0, else: count_top_level_commas(args) + 1 + end + + defp handle_count_params_run(_), do: 0 end diff --git a/lib/codeqa/metrics/file/halstead.ex b/lib/codeqa/metrics/file/halstead.ex index 157f67b5..7ad3137d 100644 --- a/lib/codeqa/metrics/file/halstead.ex +++ b/lib/codeqa/metrics/file/halstead.ex @@ -105,12 +105,11 @@ defmodule CodeQA.Metrics.File.Halstead do end end - defp scan_frequencies(regex, content) do - regex |> Regex.scan(content) |> List.flatten() |> Enum.frequencies() - end + defp scan_frequencies(regex, content), + do: regex |> Regex.scan(content) |> List.flatten() |> Enum.frequencies() - defp zero_result do - %{ + defp zero_result, + do: %{ "n1_unique_operators" => 0, "n2_unique_operands" => 0, "N1_total_operators" => 0, @@ -123,10 +122,9 @@ defmodule CodeQA.Metrics.File.Halstead do "estimated_bugs" => 0.0, "time_to_implement_seconds" => 0.0 } - end - defp base_result(n1, n2, big_n1, big_n2, vocabulary, length) do - %{ + defp base_result(n1, n2, big_n1, big_n2, vocabulary, length), + do: %{ "n1_unique_operators" => n1, "n2_unique_operands" => n2, "N1_total_operators" => big_n1, @@ -138,5 +136,4 @@ defmodule CodeQA.Metrics.File.Halstead do "effort" => 0.0, "estimated_bugs" => 0.0 } - end end diff --git a/lib/codeqa/metrics/file/heaps.ex b/lib/codeqa/metrics/file/heaps.ex index b7cae9c3..2ddf066c 100644 --- a/lib/codeqa/metrics/file/heaps.ex +++ b/lib/codeqa/metrics/file/heaps.ex @@ -1,4 +1,6 @@ defmodule CodeQA.Metrics.File.Heaps do + alias CodeQA.Math + @moduledoc """ Fits Heaps' law to vocabulary growth in a file. @@ -21,9 +23,7 @@ defmodule CodeQA.Metrics.File.Heaps do @spec analyze(map()) :: map() @impl true - def analyze(%{tokens: []}) do - %{"k" => 0.0, "beta" => 0.0, "r_squared" => 0.0} - end + def analyze(%{tokens: []}), do: %{"k" => 0.0, "beta" => 0.0, "r_squared" => 0.0} def analyze(%{tokens: tokens}) do total = length(tokens) @@ -42,7 +42,7 @@ defmodule CodeQA.Metrics.File.Heaps do tokens |> Enum.with_index(1) |> Enum.reduce({MapSet.new(), []}, fn {token, i}, {seen, points} -> - seen = MapSet.put(seen, token.content) + seen = seen |> MapSet.put(token.content) if rem(i, interval) == 0 do {seen, [{i, MapSet.size(seen)} | points]} @@ -56,13 +56,13 @@ defmodule CodeQA.Metrics.File.Heaps do defp fit_heaps(data_points) do # log(V) = log(k) + β * log(n) → linear regression in log-space - ns = Enum.map(data_points, &elem(&1, 0)) - vs = Enum.map(data_points, &elem(&1, 1)) + ns = data_points |> Enum.map(&elem(&1, 0)) + vs = data_points |> Enum.map(&elem(&1, 1)) log_ns = Nx.tensor(ns, type: :f64) |> Nx.log() log_vs = Nx.tensor(vs, type: :f64) |> Nx.log() - {slope, intercept, r_squared} = CodeQA.Math.linear_regression(log_ns, log_vs) + {slope, intercept, r_squared} = Math.linear_regression(log_ns, log_vs) k = :math.exp(Nx.to_number(intercept)) beta = Nx.to_number(slope) diff --git a/lib/codeqa/metrics/file/identifier_length_variance.ex b/lib/codeqa/metrics/file/identifier_length_variance.ex index 424b95b5..d165b31b 100644 --- a/lib/codeqa/metrics/file/identifier_length_variance.ex +++ b/lib/codeqa/metrics/file/identifier_length_variance.ex @@ -21,12 +21,11 @@ defmodule CodeQA.Metrics.File.IdentifierLengthVariance do @spec analyze(map()) :: map() @impl true - def analyze(%{identifiers: []}) do - %{"mean" => 0.0, "variance" => 0.0, "std_dev" => 0.0, "max" => 0} - end + def analyze(%{identifiers: []}), + do: %{"mean" => 0.0, "variance" => 0.0, "std_dev" => 0.0, "max" => 0} def analyze(%{identifiers: identifiers}) do - lengths = Enum.map(identifiers, &String.length/1) + lengths = identifiers |> Enum.map(&String.length/1) n = length(lengths) mean = Enum.sum(lengths) / n @@ -41,7 +40,7 @@ defmodule CodeQA.Metrics.File.IdentifierLengthVariance do "mean" => Float.round(mean, 4), "variance" => Float.round(variance, 4), "std_dev" => Float.round(std_dev, 4), - "max" => Enum.max(lengths) + "max" => lengths |> Enum.max() } end end diff --git a/lib/codeqa/metrics/file/indentation.ex b/lib/codeqa/metrics/file/indentation.ex index 75923b98..3ef9c656 100644 --- a/lib/codeqa/metrics/file/indentation.ex +++ b/lib/codeqa/metrics/file/indentation.ex @@ -21,10 +21,10 @@ defmodule CodeQA.Metrics.File.Indentation do @spec analyze(map()) :: map() @impl true def analyze(%{lines: lines}) do - uses_tabs = Enum.any?(lines, &String.match?(&1, ~r/^\t/)) + uses_tabs = lines |> Enum.any?(&String.match?(&1, ~r/^\t/)) total_lines = length(lines) - blank_count = Enum.count(lines, &(String.trim(&1) == "")) + blank_count = lines |> Enum.count(&(String.trim(&1) == "")) blank_line_ratio = if total_lines > 0, do: Float.round(blank_count / total_lines, 4), else: 0.0 @@ -57,7 +57,7 @@ defmodule CodeQA.Metrics.File.Indentation do %{ "mean_depth" => Float.round(mean, 4), "variance" => Float.round(variance, 4), - "max_depth" => Enum.max(depths), + "max_depth" => depths |> Enum.max(), "uses_tabs" => uses_tabs, "blank_line_ratio" => blank_line_ratio } diff --git a/lib/codeqa/metrics/file/line_patterns.ex b/lib/codeqa/metrics/file/line_patterns.ex index e8b2b452..ae017d19 100644 --- a/lib/codeqa/metrics/file/line_patterns.ex +++ b/lib/codeqa/metrics/file/line_patterns.ex @@ -38,7 +38,7 @@ defmodule CodeQA.Metrics.File.LinePatterns do "string_literal_ratio" => 0.0 } else - blank_count = Enum.count(lines, &(String.trim(&1) == "")) + blank_count = lines |> Enum.count(&(String.trim(&1) == "")) blank_ratio = Float.round(blank_count / total_lines, 4) non_blank = lines |> Enum.map(&String.trim/1) |> Enum.reject(&(&1 == "")) @@ -46,7 +46,7 @@ defmodule CodeQA.Metrics.File.LinePatterns do unique_ratio = if non_blank == [], do: 1.0, - else: Float.round(length(Enum.uniq(non_blank)) / length(non_blank), 4) + else: Float.round(length(non_blank |> Enum.uniq()) / length(non_blank), 4) string_count = @string_literal |> Regex.scan(content) |> length() diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks.ex b/lib/codeqa/metrics/file/near_duplicate_blocks.ex index e1e0c08a..1dbfa452 100644 --- a/lib/codeqa/metrics/file/near_duplicate_blocks.ex +++ b/lib/codeqa/metrics/file/near_duplicate_blocks.ex @@ -45,7 +45,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do @spec analyze([{String.t(), String.t()}], keyword()) :: map() def analyze(labeled_content, opts) do all_blocks = - Enum.flat_map(labeled_content, fn {path, content} -> + labeled_content + |> Enum.flat_map(fn {path, content} -> lang_mod = Language.detect(path) tokens = TokenNormalizer.normalize_structural(content) @@ -65,7 +66,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do @spec analyze_from_blocks([Node.t()], keyword()) :: map() def analyze_from_blocks(all_blocks, opts) do workers = Keyword.get(opts, :workers, System.schedulers_online()) - max_pairs = Keyword.get(opts, :max_pairs_per_bucket, nil) + max_pairs = Keyword.get(opts, :max_pairs_per_bucket) include_pairs = Keyword.get(opts, :include_pairs, false) block_count = length(all_blocks) @@ -84,20 +85,18 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do end result = - Map.merge(result, %{"block_count" => block_count, "sub_block_count" => sub_block_count}) + result |> Map.merge(%{"block_count" => block_count, "sub_block_count" => sub_block_count}) - case include_pairs do - true -> - pairs_result = - for d <- 0..@max_bucket, into: %{} do - {"near_dup_block_d#{d}_pairs", - Map.get(buckets, d, %{pairs: []}).pairs |> format_pairs()} - end - - Map.merge(result, pairs_result) + if include_pairs do + pairs_result = + for d <- 0..@max_bucket, into: %{} do + {"near_dup_block_d#{d}_pairs", + Map.get(buckets, d, %{pairs: []}).pairs |> format_pairs()} + end - false -> - result + Map.merge(result, pairs_result) + else + result end end @@ -110,7 +109,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do @doc false def label_blocks(blocks, path) do - Enum.map(blocks, fn block -> + blocks + |> Enum.map(fn block -> label = if block.start_line, do: "#{path}:#{block.start_line}", else: path %{block | label: label} end) @@ -124,7 +124,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do # analyze_from_blocks gets both without a redundant NodeProtocol.children pass. defp do_find_pairs(blocks, opts) do workers = Keyword.get(opts, :workers, System.schedulers_online()) - max_pairs = Keyword.get(opts, :max_pairs_per_bucket, nil) + max_pairs = Keyword.get(opts, :max_pairs_per_bucket) idf_max_freq = Keyword.get(opts, :idf_max_freq, 1.0) has_progress = Keyword.has_key?(opts, :on_progress) @@ -135,7 +135,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do # sub_block_count derived from the already-computed children_count in decorated. sub_block_count = - Enum.reduce(decorated, 0, fn {_, _, _, _, _, cc, _, _}, acc -> acc + cc end) + decorated |> Enum.sum_by(fn {_, _, _, _, _, cc, _, _} -> cc end) # IDF: prune bigrams that appear in more than idf_max_freq fraction of blocks. # These are structural noise (e.g. "end nil", "return false") that inflate the @@ -144,7 +144,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do decorated = if MapSet.size(pruned) > 0 do - Enum.map(decorated, &Candidates.prune_bigrams(&1, pruned)) + decorated |> Enum.map(&Candidates.prune_bigrams(&1, pruned)) else decorated end @@ -171,7 +171,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do end defp bucket_pairs(raw_pairs, max_pairs) do - Enum.reduce(raw_pairs, %{}, fn {bucket, pair}, acc -> + raw_pairs + |> Enum.reduce(%{}, fn {bucket, pair}, acc -> Map.update( acc, bucket, @@ -191,7 +192,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do defp maybe_append(list, pair, _max, _count), do: [pair | list] defp format_pairs(pairs) do - Enum.map(pairs, fn {label_a, label_b} -> + pairs + |> Enum.map(fn {label_a, label_b} -> %{"source_a" => label_a, "source_b" => label_b} end) end diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex b/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex index 522f5481..8aca991e 100644 --- a/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex +++ b/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex @@ -11,7 +11,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do """ alias CodeQA.AST.Classification.NodeProtocol - alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken alias CodeQA.Metrics.File.NearDuplicateBlocks.Distance # Pre-compute token kind strings to avoid repeated function calls in the hot path. @@ -31,8 +32,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do |> Enum.map(fn {block, i} -> values = canonical_values(NodeProtocol.flat_tokens(block)) children_count = length(NodeProtocol.children(block)) - newline_count = Enum.count(values, &(&1 == @nl_kind)) - bigrams = Enum.chunk_every(values, 2, 1, :discard) + newline_count = values |> Enum.count(&(&1 == @nl_kind)) + bigrams = values |> Enum.chunk_every(2, 1, :discard) {i, block, values, :erlang.phash2(values), length(values), children_count, newline_count, bigrams} @@ -45,10 +46,11 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do """ @spec build_indexes([tuple()]) :: {map(), map()} def build_indexes(decorated) do - Enum.reduce(decorated, {%{}, %{}}, fn {idx, _block, _values, hash, _len, _children, _newlines, - bigrams}, - {exact_acc, shingle_acc} -> - exact_acc = Map.update(exact_acc, hash, [idx], &[idx | &1]) + decorated + |> Enum.reduce({%{}, %{}}, fn {idx, _block, _values, hash, _len, _children, _newlines, + bigrams}, + {exact_acc, shingle_acc} -> + exact_acc = exact_acc |> Map.update(hash, [idx], &[idx | &1]) shingle_acc = bigrams @@ -87,9 +89,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do @doc "Remove bigrams whose hash is in the pruned set from a decorated tuple." @spec prune_bigrams(tuple(), MapSet.t()) :: tuple() - def prune_bigrams({i, b, v, h, l, c, n, bigrams}, pruned) do - {i, b, v, h, l, c, n, Enum.reject(bigrams, &MapSet.member?(pruned, :erlang.phash2(&1)))} - end + def prune_bigrams({i, b, v, h, l, c, n, bigrams}, pruned), + do: {i, b, v, h, l, c, n, bigrams |> Enum.reject(&MapSet.member?(pruned, :erlang.phash2(&1)))} @doc """ Find all near-duplicate pairs for a single block against the full decorated array. @@ -135,7 +136,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do # counter is O(1) (a single :counters.get), much cheaper than the previous # HAMT-based Map.update accumulator on a per-block basis. touched = - Enum.reduce(bigrams_a, [], fn bigram, touched_acc -> + bigrams_a + |> Enum.reduce([], fn bigram, touched_acc -> h = :erlang.phash2(bigram) shingle_index @@ -157,7 +159,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do end near_pairs = - Enum.flat_map(touched, fn j -> + touched + |> Enum.flat_map(fn j -> count = :counters.get(counter, j + 1) if count >= min_shared and not in_exact?.(j) do @@ -187,7 +190,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do # one drop_while (strip trailing), one :lists.reverse. defp canonical_values(tokens) do {reversed, _in_content} = - Enum.reduce(tokens, {[], false}, fn t, {acc, in_content} -> + tokens + |> Enum.reduce({[], false}, fn t, {acc, in_content} -> kind = t.kind is_skip = kind == @nl_kind or kind == @ws_kind @@ -212,9 +216,9 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do if structure_compatible?(children_a, newlines_a, children_b, newlines_b) and abs(len_a - len_b) <= max_allowed do - ed = Distance.token_edit_distance_bounded(values_a, values_b, max_allowed) + edit_distance = Distance.token_edit_distance_bounded(values_a, values_b, max_allowed) - case Distance.percent_bucket(ed, min_count) do + case Distance.percent_bucket(edit_distance, min_count) do nil -> [] bucket when bucket > 0 -> [{bucket, {block_a.label, block_b.label}}] # ed=0 handled by exact_pairs above diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex b/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex index 475aa3e2..48189641 100644 --- a/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex +++ b/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex @@ -32,7 +32,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Distance do a_arr = List.to_tuple(a) b_arr = List.to_tuple(b) lb = tuple_size(b_arr) - init_row = List.to_tuple(Enum.to_list(0..lb)) + init_row = List.to_tuple(0..lb |> Enum.to_list()) result_row = levenshtein_rows(a_arr, b_arr, tuple_size(a_arr), lb, init_row, 1) elem(result_row, lb) end @@ -67,7 +67,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Distance do a_arr = List.to_tuple(a) b_arr = List.to_tuple(b) lb = tuple_size(b_arr) - init_row = List.to_tuple(Enum.to_list(0..lb)) + init_row = List.to_tuple(0..lb |> Enum.to_list()) levenshtein_rows_bounded(a_arr, b_arr, tuple_size(a_arr), lb, init_row, max_distance, 1) end @@ -102,10 +102,10 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Distance do def percent_bucket(0, _min_count), do: 0 def percent_bucket(ed, min_count) do - pct = ed / min_count + percent = ed / min_count @bucket_thresholds - |> Enum.find(fn {bucket, threshold} -> bucket > 0 and pct <= threshold end) + |> Enum.find(fn {bucket, threshold} -> bucket > 0 and percent <= threshold end) |> case do {bucket, _} -> bucket nil -> nil diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex b/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex index 7a15e749..b1c0a045 100644 --- a/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex +++ b/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex @@ -22,7 +22,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksFile do @impl true def analyze(%{blocks: nil}), do: Map.new(keys(), fn k -> {k, 0} end) - def analyze(%{path: path, blocks: blocks}) when is_list(blocks) do + def analyze(%{blocks: blocks, path: path}) when is_list(blocks) do NearDuplicateBlocks.analyze_from_blocks( NearDuplicateBlocks.label_blocks(blocks, path || "unknown"), [] diff --git a/lib/codeqa/metrics/file/ngram.ex b/lib/codeqa/metrics/file/ngram.ex index b100513c..5b842673 100644 --- a/lib/codeqa/metrics/file/ngram.ex +++ b/lib/codeqa/metrics/file/ngram.ex @@ -33,7 +33,7 @@ defmodule CodeQA.Metrics.File.Ngram do @spec analyze(map()) :: map() @impl true def analyze(ctx) do - tokens = Enum.map(ctx.tokens, & &1.content) + tokens = ctx.tokens |> Enum.map(& &1.content) bigram_stats = ngram_stats(tokens, 2) |> rename_keys("bigram") trigram_stats = ngram_stats(tokens, 3) |> rename_keys("trigram") @@ -53,7 +53,7 @@ defmodule CodeQA.Metrics.File.Ngram do defp ngram_stats(tokens, n) do grams = tokens |> Enum.chunk_every(n, 1, :discard) - counts = Enum.frequencies(grams) + counts = grams |> Enum.frequencies() total = length(grams) unique = map_size(counts) repeated = counts |> Map.values() |> Enum.filter(&(&1 > 1)) |> Enum.sum() @@ -70,6 +70,9 @@ defmodule CodeQA.Metrics.File.Ngram do end defp rename_keys(map, prefix) do - Map.new(map, fn {k, v} -> {"#{prefix}_#{k}", v} end) + for {k, v} <- map do + {"#{prefix}_#{k}", v} + end + |> Map.new() end end diff --git a/lib/codeqa/metrics/file/punctuation_density.ex b/lib/codeqa/metrics/file/punctuation_density.ex index 8b42ee41..af0b07de 100644 --- a/lib/codeqa/metrics/file/punctuation_density.ex +++ b/lib/codeqa/metrics/file/punctuation_density.ex @@ -13,8 +13,8 @@ defmodule CodeQA.Metrics.File.PunctuationDensity do def name, do: "punctuation_density" @impl true - def keys do - [ + def keys, + do: [ "question_mark_density", "exclamation_density", "dot_count", @@ -25,7 +25,6 @@ defmodule CodeQA.Metrics.File.PunctuationDensity do "arrow_density", "colon_suffix_density" ] - end # identifier-like token (starts with letter/underscore) ending with non-alphanumeric non-whitespace @id_nonalpha_suffix ~r/[a-zA-Z_]\w*[^\w\s]/ @@ -86,11 +85,7 @@ defmodule CodeQA.Metrics.File.PunctuationDensity do end end - defp count_char(content, char) do - content |> String.graphemes() |> Enum.count(&(&1 == char)) - end + defp count_char(content, char), do: content |> String.graphemes() |> Enum.count(&(&1 == char)) - defp count_matches(content, regex) do - regex |> Regex.scan(content) |> length() - end + defp count_matches(content, regex), do: regex |> Regex.scan(content) |> length() end diff --git a/lib/codeqa/metrics/file/readability.ex b/lib/codeqa/metrics/file/readability.ex index 3e1bd2c4..2a23763c 100644 --- a/lib/codeqa/metrics/file/readability.ex +++ b/lib/codeqa/metrics/file/readability.ex @@ -60,7 +60,7 @@ defmodule CodeQA.Metrics.File.Readability do {avg_sub_words, complex_fraction} = if words != [] do - sub_counts = Enum.map(words, &length(split_identifier(&1))) + sub_counts = words |> Enum.map(&length(split_identifier(&1))) avg = Enum.sum(sub_counts) / length(sub_counts) complex = Enum.count(sub_counts, &(&1 > 2)) / length(sub_counts) {avg, complex} @@ -96,7 +96,10 @@ defmodule CodeQA.Metrics.File.Readability do end defp split_camel_case([], []), do: [] - defp split_camel_case([], [current | rest]), do: Enum.reverse([Enum.reverse(current) | rest]) + + defp split_camel_case([], [current | rest]), + do: [Enum.reverse(current) | rest] |> Enum.reverse() + defp split_camel_case([char | rest], []), do: split_camel_case(rest, [[char]]) defp split_camel_case([char | rest], [current | acc_rest]) do diff --git a/lib/codeqa/metrics/file/rfc.ex b/lib/codeqa/metrics/file/rfc.ex index 5416c684..9fa69018 100644 --- a/lib/codeqa/metrics/file/rfc.ex +++ b/lib/codeqa/metrics/file/rfc.ex @@ -39,7 +39,7 @@ defmodule CodeQA.Metrics.File.RFC do @spec analyze(CodeQA.Engine.FileContext.t()) :: map() @impl true - def analyze(%{tokens: tokens, line_count: line_count}) do + def analyze(%{line_count: line_count, tokens: tokens}) do {func_def_count, call_targets} = scan_tokens(tokens) distinct_call_count = MapSet.size(call_targets) @@ -62,7 +62,7 @@ defmodule CodeQA.Metrics.File.RFC do # Uses a sliding window of two adjacent tokens. defp scan_tokens(tokens) do tokens - |> Enum.zip(Enum.drop(tokens, 1)) + |> Enum.zip(tokens |> Enum.drop(1)) |> Enum.reduce({0, MapSet.new()}, fn {tok, next}, {defs, calls} -> cond do # Function definition: keyword followed by an identifier diff --git a/lib/codeqa/metrics/file/separator_counts.ex b/lib/codeqa/metrics/file/separator_counts.ex index 62586560..381ac45c 100644 --- a/lib/codeqa/metrics/file/separator_counts.ex +++ b/lib/codeqa/metrics/file/separator_counts.ex @@ -17,28 +17,26 @@ defmodule CodeQA.Metrics.File.SeparatorCounts do @spec analyze(map()) :: map() @impl true - def analyze(%{content: content}) do - %{ + def analyze(%{content: content}), + do: %{ "underscore_count" => count(content, "_"), "hyphen_count" => count(content, "-"), "slash_count" => count(content, "/"), "dot_count" => count(content, ".") } - end @impl true - def analyze_loo(baseline, block_content) do - %{ + def analyze_loo(baseline, block_content), + do: %{ "underscore_count" => baseline["underscore_count"] - count(block_content, "_"), "hyphen_count" => baseline["hyphen_count"] - count(block_content, "-"), "slash_count" => baseline["slash_count"] - count(block_content, "/"), "dot_count" => baseline["dot_count"] - count(block_content, ".") } - end - defp count(content, char) do - content - |> String.graphemes() - |> Enum.count(&(&1 == char)) - end + defp count(content, char), + do: + content + |> String.graphemes() + |> Enum.count(&(&1 == char)) end diff --git a/lib/codeqa/metrics/file/vocabulary.ex b/lib/codeqa/metrics/file/vocabulary.ex index 496cc68a..d38cb4e2 100644 --- a/lib/codeqa/metrics/file/vocabulary.ex +++ b/lib/codeqa/metrics/file/vocabulary.ex @@ -55,14 +55,14 @@ defmodule CodeQA.Metrics.File.Vocabulary do end defp compute_mattr(identifiers, total) when total < @window_size do - length(Enum.uniq(identifiers)) / max(total, 1) + length(identifiers |> Enum.uniq()) / max(total, 1) end defp compute_mattr(identifiers, _total) do # Rolling frequency map optimization: O(N) instead of O(N*K) # Start with the first window - {first_window, rest} = Enum.split(identifiers, @window_size) - initial_freqs = Enum.frequencies(first_window) + {first_window, rest} = identifiers |> Enum.split(@window_size) + initial_freqs = first_window |> Enum.frequencies() initial_count = map_size(initial_freqs) # Use a recursive reducer to slide the window diff --git a/lib/codeqa/metrics/file/vowel_density.ex b/lib/codeqa/metrics/file/vowel_density.ex index f3f53de5..a99d79bf 100644 --- a/lib/codeqa/metrics/file/vowel_density.ex +++ b/lib/codeqa/metrics/file/vowel_density.ex @@ -29,9 +29,9 @@ defmodule CodeQA.Metrics.File.VowelDensity do else {vowels, chars} = list - |> Enum.reduce({0, 0}, fn id, {v, c} -> - id_chars = String.length(id) - id_vowels = id |> String.graphemes() |> Enum.count(&MapSet.member?(@vowels, &1)) + |> Enum.reduce({0, 0}, fn identifier, {v, c} -> + id_chars = String.length(identifier) + id_vowels = identifier |> String.graphemes() |> Enum.count(&MapSet.member?(@vowels, &1)) {v + id_vowels, c + id_chars} end) diff --git a/lib/codeqa/metrics/file/winnowing.ex b/lib/codeqa/metrics/file/winnowing.ex index d725a388..2af454f5 100644 --- a/lib/codeqa/metrics/file/winnowing.ex +++ b/lib/codeqa/metrics/file/winnowing.ex @@ -31,7 +31,5 @@ defmodule CodeQA.Metrics.File.Winnowing do # Hash the token list directly to preserve token boundaries. # Joining to a string first would allow hash collisions across different # token sequences that produce the same concatenated string. - defp hash_sequence(sequence) do - :erlang.phash2(sequence) - end + defp hash_sequence(sequence), do: sequence |> :erlang.phash2() end diff --git a/lib/codeqa/metrics/file/zipf.ex b/lib/codeqa/metrics/file/zipf.ex index b03a07c5..b27d71aa 100644 --- a/lib/codeqa/metrics/file/zipf.ex +++ b/lib/codeqa/metrics/file/zipf.ex @@ -1,4 +1,6 @@ defmodule CodeQA.Metrics.File.Zipf do + alias CodeQA.Math + @moduledoc """ Fits Zipf's law to the token frequency distribution. @@ -19,11 +21,10 @@ defmodule CodeQA.Metrics.File.Zipf do @spec analyze(map()) :: map() @impl true - def analyze(%{tokens: [], token_counts: _token_counts}) do - %{"exponent" => 0.0, "r_squared" => 0.0, "vocab_size" => 0, "total_tokens" => 0} - end + def analyze(%{token_counts: _token_counts, tokens: []}), + do: %{"exponent" => 0.0, "r_squared" => 0.0, "vocab_size" => 0, "total_tokens" => 0} - def analyze(%{tokens: tokens, token_counts: token_counts}) do + def analyze(%{token_counts: token_counts, tokens: tokens}) do frequencies = token_counts |> Map.values() |> Enum.sort(:desc) vocab_size = length(frequencies) total_tokens = length(tokens) @@ -54,7 +55,7 @@ defmodule CodeQA.Metrics.File.Zipf do log_ranks = Nx.log(ranks) log_freqs = Nx.log(freqs) - {slope, _intercept, r_squared} = CodeQA.Math.linear_regression(log_ranks, log_freqs) + {slope, _intercept, r_squared} = Math.linear_regression(log_ranks, log_freqs) # Zipf: freq ∝ rank^(-s), so slope is negative; negate to return the positive exponent s {Float.round(-Nx.to_number(slope), 4), Float.round(Nx.to_number(r_squared), 4)} diff --git a/lib/codeqa/metrics/post_processing/menzerath.ex b/lib/codeqa/metrics/post_processing/menzerath.ex index 4b5b10cf..5cc261d4 100644 --- a/lib/codeqa/metrics/post_processing/menzerath.ex +++ b/lib/codeqa/metrics/post_processing/menzerath.ex @@ -38,9 +38,10 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do @impl true def analyze(pipeline_result, files_map, _opts) do file_scores = - Map.new(files_map, fn {path, content} -> + for {path, content} <- files_map do {path, %{"menzerath" => score_file(content)}} - end) + end + |> Map.new() codebase_score = compute_codebase_score(pipeline_result) @@ -52,28 +53,27 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do # --- file-level scoring --- - defp score_file("") do - %{ + defp score_file(""), + do: %{ "blocks" => [], "mean_ratio" => 0.0, "max_ratio" => 0.0, "violation_count" => 0, "insight" => "Empty file." } - end defp score_file(content) do file_lines = content |> String.split("\n") |> length() root_tokens = TokenNormalizer.normalize_structural(content) top_nodes = Parser.detect_blocks(root_tokens, Unknown) - blocks = Enum.map(top_nodes, &score_node(&1, file_lines)) + blocks = top_nodes |> Enum.map(&score_node(&1, file_lines)) all_ratios = collect_ratios(blocks) n = length(all_ratios) mean_ratio = if(n == 0, do: 0.0, else: round4(Enum.sum(all_ratios) / n)) - max_ratio = if(n == 0, do: 0.0, else: round4(Enum.max(all_ratios))) - violation_count = Enum.count(all_ratios, &(&1 >= @violation_threshold)) + max_ratio = if(n == 0, do: 0.0, else: round4(all_ratios |> Enum.max())) + violation_count = all_ratios |> Enum.count(&(&1 >= @violation_threshold)) %{ "blocks" => blocks, @@ -105,7 +105,7 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do defp score_node(node, parent_lines) do ratio = if parent_lines > 0, do: round4(node.line_count / parent_lines), else: 0.0 - children = Enum.map(node.children, &score_node(&1, node.line_count)) + children = node.children |> Enum.map(&score_node(&1, node.line_count)) base = %{ "start_line" => node.start_line, @@ -122,7 +122,7 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do base kids -> - child_ratios = Enum.map(kids, & &1["ratio"]) + child_ratios = kids |> Enum.map(& &1["ratio"]) avg = round4(Enum.sum(child_ratios) / length(child_ratios)) base @@ -154,7 +154,8 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do end defp collect_ratios(blocks) do - Enum.flat_map(blocks, fn block -> + blocks + |> Enum.flat_map(fn block -> [block["ratio"] | collect_ratios(block["children"])] end) end @@ -189,8 +190,8 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do "Not enough files with function data to compute Menzerath conformance (need ≥ 3, got #{n})." } else - xs = Enum.map(pairs, &elem(&1, 0)) - ys = Enum.map(pairs, &elem(&1, 1)) + xs = pairs |> Enum.map(&elem(&1, 0)) + ys = pairs |> Enum.map(&elem(&1, 1)) correlation = round4(pearson(xs, ys)) {exponent, r_squared} = fit_power_law(xs, ys) @@ -221,11 +222,11 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do defp pearson(xs, ys) do n = length(xs) - sum_x = Enum.sum(xs) - sum_y = Enum.sum(ys) - sum_xy = Enum.zip(xs, ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end) - sum_x2 = Enum.reduce(xs, 0.0, fn x, acc -> acc + x * x end) - sum_y2 = Enum.reduce(ys, 0.0, fn y, acc -> acc + y * y end) + sum_x = xs |> Enum.sum() + sum_y = ys |> Enum.sum() + sum_xy = xs |> Enum.zip(ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end) + sum_x2 = xs |> Enum.reduce(0.0, fn x, acc -> acc + x * x end) + sum_y2 = ys |> Enum.reduce(0.0, fn y, acc -> acc + y * y end) num = n * sum_xy - sum_x * sum_y den = :math.sqrt((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y)) @@ -236,20 +237,21 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do defp fit_power_law(xs, ys) do # Linearize: log(y) = log(a) + b * log(x), fit via OLS on log-log scale pairs = - Enum.zip(xs, ys) + xs + |> Enum.zip(ys) |> Enum.filter(fn {x, y} -> x > 0 and y > 0 end) if length(pairs) < 2 do {nil, nil} else - log_xs = Enum.map(pairs, fn {x, _} -> :math.log(x) end) - log_ys = Enum.map(pairs, fn {_, y} -> :math.log(y) end) + log_xs = pairs |> Enum.map(fn {x, _} -> :math.log(x) end) + log_ys = pairs |> Enum.map(fn {_, y} -> :math.log(y) end) n = length(pairs) - sum_lx = Enum.sum(log_xs) - sum_ly = Enum.sum(log_ys) - sum_lx2 = Enum.reduce(log_xs, 0.0, fn x, acc -> acc + x * x end) - sum_lxly = Enum.zip(log_xs, log_ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end) + sum_lx = log_xs |> Enum.sum() + sum_ly = log_ys |> Enum.sum() + sum_lx2 = log_xs |> Enum.reduce(0.0, fn x, acc -> acc + x * x end) + sum_lxly = log_xs |> Enum.zip(log_ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end) denom = n * sum_lx2 - sum_lx * sum_lx @@ -266,10 +268,11 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do log_a = (sum_ly - b * sum_lx) / n mean_ly = sum_ly / n - ss_tot = Enum.reduce(log_ys, 0.0, fn ly, acc -> acc + (ly - mean_ly) ** 2 end) + ss_tot = log_ys |> Enum.reduce(0.0, fn ly, acc -> acc + (ly - mean_ly) ** 2 end) ss_res = - Enum.zip(log_xs, log_ys) + log_xs + |> Enum.zip(log_ys) |> Enum.reduce(0.0, fn {lx, ly}, acc -> acc + (ly - (log_a + b * lx)) ** 2 end) diff --git a/lib/codeqa/shared.ex b/lib/codeqa/shared.ex new file mode 100644 index 00000000..6cb85df6 --- /dev/null +++ b/lib/codeqa/shared.ex @@ -0,0 +1,29 @@ +defmodule CodeQA.Shared do + @moduledoc """ + Cross-cutting helpers shared between top-level pipelines. + + Extracted by `mix refactor --only ExtractParametricClone`. Both + `HealthReport`, `BlockImpactAnalyzer`, and `Diagnostics` were + reimplementing the same path-to-languages reduction; `SampleRunner` and + `Grader` were reimplementing the same slug-humanize. Consolidated here + to one place each. + """ + + alias CodeQA.Language + + @spec project_languages_shared(map()) :: [String.t()] + def project_languages_shared(path_keyed_map), + do: + path_keyed_map + |> Map.keys() + |> Enum.map(&Language.detect(&1).name()) + |> Enum.reject(&(&1 == "unknown")) + |> Enum.uniq() + + @spec humanize_category_shared(String.t()) :: String.t() + def humanize_category_shared(slug), + do: + slug + |> String.split("_") + |> Enum.map_join(" ", &String.capitalize/1) +end diff --git a/lib/mix/tasks/codeqa/sample_report.ex b/lib/mix/tasks/codeqa/sample_report.ex index 1bc5cf0d..812dc522 100644 --- a/lib/mix/tasks/codeqa/sample_report.ex +++ b/lib/mix/tasks/codeqa/sample_report.ex @@ -66,16 +66,19 @@ defmodule Mix.Tasks.Codeqa.SampleReport do if opts[:apply_scalars] do stats = SampleRunner.apply_scalars(opts) IO.puts("\nApplied scalars to YAML configs:") - Enum.each(stats, &print_scalar_stats/1) + stats |> Enum.each(&print_scalar_stats/1) end if opts[:apply_languages] do stats = SampleRunner.apply_languages(opts) IO.puts("\nApplied language coverage to YAML configs:") - Enum.each(stats, fn %{category: cat, behaviors_with_languages: n} -> - IO.puts(" #{cat}: #{n} behaviors with language coverage") - end) + stats + |> Enum.each( + &IO.puts( + " #{&1.category}: #{&1.behaviors_with_languages} behaviors with language coverage" + ) + ) end if path = opts[:file] do @@ -96,7 +99,7 @@ defmodule Mix.Tasks.Codeqa.SampleReport do "ok?" ) - Enum.each(results, &print_row(&1, opts)) + results |> Enum.each(&print_row(&1, opts)) end defp print_row(r, opts) do @@ -120,7 +123,7 @@ defmodule Mix.Tasks.Codeqa.SampleReport do ) if opts[:verbose] do - Enum.each(r.metric_detail, &print_metric_detail/1) + r.metric_detail |> Enum.each(&print_metric_detail/1) end end @@ -166,26 +169,25 @@ defmodule Mix.Tasks.Codeqa.SampleReport do IO.puts("\nTop #{top_n} likely issues (by cosine similarity):") IO.puts(String.duplicate("-", 75)) IO.puts(" " <> pad("behavior", 38) <> pad("cosine", 9) <> "score") - Enum.each(issues, &print_issue_row/1) + issues |> Enum.each(&print_issue_row/1) IO.puts("\nFull breakdown by category:") combined = SampleRunner.score_aggregate(aggregate) IO.puts("") - Enum.each(combined, &print_combined_category/1) + combined |> Enum.each(&print_combined_category/1) else IO.puts("\nNo supported files found at: #{path}") end end - defp print_issue_row(%{category: cat, behavior: b, cosine: cos, score: s, top_metrics: metrics}) do + defp print_issue_row(%{behavior: b, category: cat, cosine: cos, score: s, top_metrics: metrics}) do IO.puts(" " <> pad("#{cat}.#{b}", 38) <> pad(fmt(cos), 9) <> fmt(s)) - Enum.each(metrics, fn %{metric: m, contribution: c} -> - IO.puts(" " <> pad(m, 44) <> fmt(c)) - end) + metrics + |> Enum.each(&IO.puts(" " <> pad(&1.metric, 44) <> fmt(&1.contribution))) end - defp print_combined_category(%{name: name, behaviors: behaviors}) do + defp print_combined_category(%{behaviors: behaviors, name: name}) do IO.puts(name) IO.puts(String.duplicate("-", 60)) @@ -201,9 +203,8 @@ defmodule Mix.Tasks.Codeqa.SampleReport do IO.puts("") end - defp print_scalar_stats(%{category: cat, updated: u, deadzoned: d, skipped: s}) do - IO.puts(" #{pad(cat, 30)} #{u} written #{d} deadzoned #{s} skipped (no samples)") - end + defp print_scalar_stats(%{category: cat, deadzoned: d, skipped: s, updated: u}), + do: " #{pad(cat, 30)} #{u} written #{d} deadzoned #{s} skipped (no samples)" |> IO.puts() defp fmt(f), do: :erlang.float_to_binary(f / 1, decimals: 4) defp pad(s, n), do: String.pad_trailing(to_string(s), n) diff --git a/lib/mix/tasks/codeqa/signal_debug.ex b/lib/mix/tasks/codeqa/signal_debug.ex index 3852dec5..d4193b88 100644 --- a/lib/mix/tasks/codeqa/signal_debug.ex +++ b/lib/mix/tasks/codeqa/signal_debug.ex @@ -19,17 +19,15 @@ defmodule Mix.Tasks.Codeqa.SignalDebug do alias CodeQA.AST.Parsing.SignalStream alias CodeQA.Language - alias CodeQA.AST.Signals.Structural.{ - AccessModifierSignal, - BlankLineSignal, - BracketSignal, - BranchSplitSignal, - ColonIndentSignal, - CommentDividerSignal, - KeywordSignal, - SQLBlockSignal, - TripleQuoteSignal - } + alias CodeQA.AST.Signals.Structural.AccessModifierSignal + alias CodeQA.AST.Signals.Structural.BlankLineSignal + alias CodeQA.AST.Signals.Structural.BracketSignal + alias CodeQA.AST.Signals.Structural.BranchSplitSignal + alias CodeQA.AST.Signals.Structural.ColonIndentSignal + alias CodeQA.AST.Signals.Structural.CommentDividerSignal + alias CodeQA.AST.Signals.Structural.KeywordSignal + alias CodeQA.AST.Signals.Structural.SQLBlockSignal + alias CodeQA.AST.Signals.Structural.TripleQuoteSignal @switches [signal: :string, show_tokens: :boolean] @@ -77,7 +75,8 @@ defmodule Mix.Tasks.Codeqa.SignalDebug do emissions_per_signal = SignalStream.run(tokens, signals, lang_mod) - Enum.zip(signals, emissions_per_signal) + signals + |> Enum.zip(emissions_per_signal) |> Enum.each(fn {signal, emissions} -> print_signal_section(signal, emissions, tokens, lines) end) @@ -86,7 +85,8 @@ defmodule Mix.Tasks.Codeqa.SignalDebug do defp filter_signals(signals, nil), do: signals defp filter_signals(signals, name_filter) do - Enum.filter(signals, fn signal -> + signals + |> Enum.filter(fn signal -> module_name = signal.__struct__ |> Module.split() @@ -120,10 +120,11 @@ defmodule Mix.Tasks.Codeqa.SignalDebug do Mix.shell().info("Emissions: #{length(emissions)}") Mix.shell().info("") - if Enum.empty?(emissions) do + if emissions |> Enum.empty?() do Mix.shell().info(" (no emissions)") else - Enum.each(emissions, fn {_source, group, emission_name, value} -> + emissions + |> Enum.each(fn {_source, group, emission_name, value} -> print_emission(group, emission_name, value, tokens, lines) end) end @@ -132,7 +133,7 @@ defmodule Mix.Tasks.Codeqa.SignalDebug do end defp print_emission(:split, name, token_idx, tokens, lines) do - token = Enum.at(tokens, token_idx) + token = tokens |> Enum.at(token_idx) line_num = token && token.line line_src = line_num && Enum.at(lines, line_num - 1) @@ -151,8 +152,8 @@ defmodule Mix.Tasks.Codeqa.SignalDebug do end defp print_emission(:enclosure, name, {start_idx, end_idx}, tokens, lines) do - start_token = Enum.at(tokens, start_idx) - end_token = Enum.at(tokens, end_idx) + start_token = tokens |> Enum.at(start_idx) + end_token = tokens |> Enum.at(end_idx) start_line = start_token && start_token.line end_line = end_token && end_token.line @@ -163,13 +164,13 @@ defmodule Mix.Tasks.Codeqa.SignalDebug do if start_line do Mix.shell().info( - " open: #{inspect(Enum.at(lines, start_line - 1) |> String.trim_trailing())}" + " open: #{inspect(lines |> Enum.at(start_line - 1) |> String.trim_trailing())}" ) end if end_line && end_line != start_line do Mix.shell().info( - " close: #{inspect(Enum.at(lines, end_line - 1) |> String.trim_trailing())}" + " close: #{inspect(lines |> Enum.at(end_line - 1) |> String.trim_trailing())}" ) end diff --git a/mix.exs b/mix.exs index cb2f4133..7909fc86 100644 --- a/mix.exs +++ b/mix.exs @@ -5,7 +5,7 @@ defmodule CodeQA.MixProject do [ app: :codeqa, version: "0.1.0", - elixir: "~> 1.16", + elixir: "~> 1.18", start_permanent: Mix.env() == :prod, deps: deps(), escript: [main_module: CodeQA.CLI], @@ -54,7 +54,8 @@ defmodule CodeQA.MixProject do {:yaml_elixir, "~> 2.11"}, {:ex_doc, "~> 0.34", only: :dev, runtime: false}, {:credo, "~> 1.7", only: :dev, runtime: false}, - {:dialyxir, "~> 1.4", only: :dev, runtime: false} + {:dialyxir, "~> 1.4", only: :dev, runtime: false}, + {:number42_refactors, github: "num42/num42_refactors", only: [:dev], runtime: false} ] end end diff --git a/mix.lock b/mix.lock index b60b2161..29acb7ba 100644 --- a/mix.lock +++ b/mix.lock @@ -14,7 +14,9 @@ "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, "makeup_erlang": {:hex, :makeup_erlang, "1.0.3", "4252d5d4098da7415c390e847c814bad3764c94a814a0b4245176215615e1035", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "953297c02582a33411ac6208f2c6e55f0e870df7f80da724ed613f10e6706afd"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, + "number42_refactors": {:git, "https://github.com/num42/num42_refactors.git", "13a356b7f1159b2af344fe08c783f7b5bf191aa1", []}, "nx": {:hex, :nx, "0.11.0", "d37723dbd6cfa274a5def6d6664f5680c32e2eb8a1ce25ec6d91751967fa0abf", [:mix], [{:complex, "~> 0.6", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "36157b21239aeb251d6cbac23eb0eb3495a5e1e0cbc2e6df16afd2ede1575205"}, + "sourceror": {:hex, :sourceror, "1.12.0", "da354c5f35aad3cc1132f5d5b0d8437d865e2661c263260480bab51b5eedb437", [:mix], [], "hexpm", "755703683bd014ebcd5de9acc24b68fb874a660a568d1d63f8f98cd8a6ef9cd0"}, "telemetry": {:hex, :telemetry, "1.4.1", "ab6de178e2b29b58e8256b92b382ea3f590a47152ca3651ea857a6cae05ac423", [:rebar3], [], "hexpm", "2172e05a27531d3d31dd9782841065c50dd5c3c7699d95266b2edd54c2dafa1c"}, "yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"}, "yaml_elixir": {:hex, :yaml_elixir, "2.12.1", "d74f2d82294651b58dac849c45a82aaea639766797359baff834b64439f6b3f4", [:mix], [{:yamerl, "~> 0.10", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "d9ac16563c737d55f9bfeed7627489156b91268a3a21cd55c54eb2e335207fed"}, diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index cd4e559b..c629f13f 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,6 +1,5 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." - _languages: [elixir] _log_baseline: -18.9887 branching: mean_branching_density: 0.0243 @@ -105,7 +104,6 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." - _languages: [elixir] _log_baseline: -62.7495 branching: mean_branch_count: -2.0000 @@ -220,7 +218,6 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." - _languages: [elixir, go, javascript, python, ruby] _log_baseline: -88.2885 branching: mean_branch_count: 0.2378 @@ -344,7 +341,6 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." - _languages: [elixir, go, javascript, python, ruby] _log_baseline: 11.3113 branching: mean_branch_count: 0.1713 @@ -472,7 +468,6 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." - _languages: [elixir] _log_baseline: 8.0040 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/dependencies.yml b/priv/combined_metrics/dependencies.yml index 93c0d128..c4e1d2b4 100644 --- a/priv/combined_metrics/dependencies.yml +++ b/priv/combined_metrics/dependencies.yml @@ -1,7 +1,6 @@ import_count_under_10: _doc: "Files should import fewer than 10 modules; high import counts signal excessive coupling." - _languages: [elixir] - _log_baseline: 7.1916 + _log_baseline: 7.2301 branching: mean_branch_count: 0.2110 mean_branching_density: -1.0683 @@ -96,6 +95,9 @@ import_count_under_10: mean_flesch_adapted: -0.0204 mean_fog_adapted: 0.2028 mean_total_lines: -0.0265 + separator_counts: + mean_dot_count: -0.1234 + mean_underscore_count: 0.1368 symbol_density: mean_density: 0.0223 mean_distinct_symbol_types: 0.0643 @@ -114,8 +116,7 @@ import_count_under_10: low_coupling: _doc: "Modules should depend on few external symbols — a low unique-operand count relative to total is a proxy for tight coupling." - _languages: [elixir] - _log_baseline: -38.2335 + _log_baseline: -39.3584 branching: mean_branch_count: 0.0745 mean_branching_density: 0.2097 @@ -210,6 +211,9 @@ low_coupling: mean_avg_tokens_per_line: -0.0248 mean_fog_adapted: 0.0082 mean_total_lines: -0.1353 + separator_counts: + mean_dot_count: -0.2609 + mean_underscore_count: -0.0495 symbol_density: mean_density: -0.0137 mean_distinct_symbol_types: -0.0960 @@ -229,8 +233,7 @@ low_coupling: no_wildcard_imports: _doc: "Wildcard imports (`import *`, `using Module`) pollute the local namespace and hide dependencies." - _languages: [elixir] - _log_baseline: -8.9685 + _log_baseline: -8.9040 branching: mean_branching_density: 0.0249 mean_non_blank_count: -0.0268 @@ -309,6 +312,9 @@ no_wildcard_imports: mean_flesch_adapted: -0.0142 mean_fog_adapted: 0.0290 mean_total_lines: -0.0268 + separator_counts: + mean_dot_count: -0.0137 + mean_underscore_count: 0.0302 symbol_density: mean_density: -0.0266 mean_distinct_symbol_types: -0.0817 diff --git a/priv/combined_metrics/documentation.yml b/priv/combined_metrics/documentation.yml index fba47a32..20119952 100644 --- a/priv/combined_metrics/documentation.yml +++ b/priv/combined_metrics/documentation.yml @@ -1,7 +1,6 @@ docstring_is_nonempty: _doc: "Docstrings must contain meaningful content, not just a placeholder or empty string." - _languages: [elixir] - _log_baseline: 28.4942 + _log_baseline: 29.7782 branching: mean_branch_count: 0.3089 mean_branching_density: 0.2652 @@ -93,6 +92,11 @@ docstring_is_nonempty: mean_avg_tokens_per_line: 0.0601 mean_fog_adapted: 0.0452 mean_total_lines: 0.0437 + separator_counts: + mean_dot_count: 0.1435 + mean_hyphen_count: 0.2418 + mean_slash_count: 0.1709 + mean_underscore_count: 0.0631 symbol_density: mean_density: -0.0578 mean_distinct_symbol_types: 0.0505 @@ -112,8 +116,7 @@ docstring_is_nonempty: file_has_license_header: _doc: "Source files should begin with a license or copyright header." - _languages: [elixir] - _log_baseline: 5.8777 + _log_baseline: 6.0103 branching: mean_branching_density: -0.0081 mean_non_blank_count: 0.0080 @@ -183,6 +186,8 @@ file_has_license_header: readability: mean_avg_tokens_per_line: 0.0091 mean_fog_adapted: 0.0060 + separator_counts: + mean_dot_count: 0.0423 symbol_density: mean_density: -0.0042 mean_symbol_count: 0.0065 @@ -200,8 +205,7 @@ file_has_license_header: file_has_module_docstring: _doc: "Files should have a module-level docstring explaining purpose and usage." - _languages: [elixir] - _log_baseline: 24.1681 + _log_baseline: 24.4917 branching: mean_branch_count: 0.3854 mean_branching_density: -2.0000 @@ -286,6 +290,8 @@ file_has_module_docstring: mean_flesch_adapted: 0.0205 mean_fog_adapted: -0.0266 mean_total_lines: 0.0908 + separator_counts: + mean_dot_count: 0.0777 symbol_density: mean_density: -0.0727 mean_distinct_symbol_types: 0.0618 @@ -304,8 +310,7 @@ file_has_module_docstring: file_has_no_commented_out_code: _doc: "Files should not contain commented-out code blocks left from development." - _languages: [elixir] - _log_baseline: -8.5677 + _log_baseline: -8.4785 branching: mean_branching_density: 0.0368 mean_non_blank_count: -0.0367 @@ -395,6 +400,10 @@ file_has_no_commented_out_code: mean_flesch_adapted: 0.0114 mean_fog_adapted: -0.0779 mean_total_lines: 0.0785 + separator_counts: + mean_dot_count: -0.0411 + mean_hyphen_count: 0.1390 + mean_underscore_count: -0.0181 symbol_density: mean_density: -0.0172 mean_symbol_count: -0.0237 @@ -412,8 +421,7 @@ file_has_no_commented_out_code: function_has_docstring: _doc: "Public functions should have a docstring describing behaviour, params, and return value." - _languages: [elixir] - _log_baseline: 41.6283 + _log_baseline: 43.3959 branching: mean_branch_count: 0.5279 mean_branching_density: 0.3832 @@ -513,6 +521,11 @@ function_has_docstring: mean_flesch_adapted: 0.0026 mean_fog_adapted: 0.0948 mean_total_lines: 0.1394 + separator_counts: + mean_dot_count: 0.1816 + mean_hyphen_count: 0.2506 + mean_slash_count: 0.3105 + mean_underscore_count: 0.1737 symbol_density: mean_density: -0.0353 mean_distinct_symbol_types: 0.0427 @@ -531,8 +544,7 @@ function_has_docstring: function_todo_comment_in_body: _doc: "Functions should not contain TODO/FIXME comments indicating unfinished work." - _languages: [elixir] - _log_baseline: 7.2394 + _log_baseline: 8.0649 branching: mean_branch_count: -0.0287 mean_branching_density: -0.0435 @@ -621,6 +633,11 @@ function_todo_comment_in_body: mean_flesch_adapted: -0.0053 mean_fog_adapted: -0.0109 mean_total_lines: 0.0381 + separator_counts: + mean_dot_count: 0.0485 + mean_hyphen_count: 0.0317 + mean_slash_count: 0.2758 + mean_underscore_count: 0.0645 symbol_density: mean_density: 0.0116 mean_distinct_symbol_types: -0.0140 diff --git a/priv/combined_metrics/error_handling.yml b/priv/combined_metrics/error_handling.yml index b09f542e..c3660502 100644 --- a/priv/combined_metrics/error_handling.yml +++ b/priv/combined_metrics/error_handling.yml @@ -1,7 +1,6 @@ does_not_swallow_errors: _doc: "Errors must be handled or re-raised — empty rescue/catch blocks silently hide failures." - _languages: [elixir] - _log_baseline: 86.0584 + _log_baseline: 88.4465 branching: mean_branch_count: -0.1041 mean_branching_density: -0.2095 @@ -94,6 +93,10 @@ does_not_swallow_errors: mean_flesch_adapted: -0.0373 mean_fog_adapted: 0.3019 mean_total_lines: 0.1054 + separator_counts: + mean_dot_count: 0.5172 + mean_hyphen_count: 0.2483 + mean_underscore_count: 0.0600 symbol_density: mean_density: 0.2563 mean_distinct_symbol_types: 0.0400 @@ -112,8 +115,7 @@ does_not_swallow_errors: error_message_is_descriptive: _doc: "Error values should carry a meaningful message, not just a bare atom or empty string." - _languages: [elixir] - _log_baseline: 52.7053 + _log_baseline: 54.3002 branching: mean_branch_count: 0.0664 mean_branching_density: -0.0540 @@ -201,6 +203,9 @@ error_message_is_descriptive: mean_flesch_adapted: -0.0175 mean_fog_adapted: 0.1420 mean_total_lines: 0.1204 + separator_counts: + mean_hyphen_count: 0.2417 + mean_underscore_count: 0.2177 symbol_density: mean_distinct_symbol_types: 0.0664 mean_symbol_count: 0.3056 @@ -218,8 +223,7 @@ error_message_is_descriptive: returns_typed_error: _doc: "Functions should signal failure via a typed return (e.g. `{:error, reason}`) rather than returning `nil` or `false`." - _languages: [elixir] - _log_baseline: 120.8554 + _log_baseline: 123.9515 branching: mean_branch_count: -0.1286 mean_branching_density: -0.1895 @@ -316,6 +320,9 @@ returns_typed_error: mean_flesch_adapted: -0.1272 mean_fog_adapted: 0.6637 mean_total_lines: 0.0608 + separator_counts: + mean_dot_count: 1.1292 + mean_underscore_count: 0.6279 symbol_density: mean_density: 0.5813 mean_distinct_symbol_types: 0.2134 diff --git a/priv/combined_metrics/function_design.yml b/priv/combined_metrics/function_design.yml index e34ba2ab..ababb9d6 100644 --- a/priv/combined_metrics/function_design.yml +++ b/priv/combined_metrics/function_design.yml @@ -1,6 +1,6 @@ boolean_function_has_question_mark: _doc: "Functions returning a boolean should end with `?` (Elixir/Ruby) or start with `is_`/`has_` (JS/Python)." - _log_baseline: -6.4663 + _log_baseline: -6.8487 brevity: mean_sample_size: 0.0127 casing_entropy: @@ -57,6 +57,8 @@ boolean_function_has_question_mark: mean_avg_tokens_per_line: 0.0248 mean_flesch_adapted: 0.0078 mean_fog_adapted: 0.0564 + separator_counts: + mean_underscore_count: -0.2039 symbol_density: mean_density: 0.0691 mean_distinct_symbol_types: 0.0138 @@ -75,7 +77,7 @@ boolean_function_has_question_mark: cyclomatic_complexity_under_10: _doc: "Functions should have a cyclomatic complexity under 10." - _log_baseline: -1.4896 + _log_baseline: -2.0665 branching: mean_branch_count: -0.2373 mean_branching_density: -0.1952 @@ -162,6 +164,10 @@ cyclomatic_complexity_under_10: mean_flesch_adapted: -0.0140 mean_fog_adapted: 0.1035 mean_total_lines: -0.0421 + separator_counts: + mean_dot_count: -0.3005 + mean_hyphen_count: -0.1769 + mean_underscore_count: 0.1482 symbol_density: mean_density: 0.0552 mean_distinct_symbol_types: -0.0172 @@ -180,7 +186,7 @@ cyclomatic_complexity_under_10: has_verb_in_name: _doc: "Function names should contain a verb describing the action performed." - _log_baseline: 14.8350 + _log_baseline: 18.0187 compression: mean_raw_bytes: 0.0816 mean_redundancy: -0.0390 @@ -198,6 +204,8 @@ has_verb_in_name: mean_avg_sub_words_per_id: 0.1330 mean_flesch_adapted: -0.1324 mean_fog_adapted: 1.3261 + separator_counts: + mean_underscore_count: 0.9417 symbol_density: mean_density: -0.0828 vowel_density: @@ -205,7 +213,7 @@ has_verb_in_name: is_less_than_20_lines: _doc: "Functions should be 20 lines or fewer." - _log_baseline: 23.9658 + _log_baseline: 24.3841 branching: mean_branch_count: -0.0820 mean_branching_density: -0.1010 @@ -299,6 +307,11 @@ is_less_than_20_lines: mean_flesch_adapted: -0.0039 mean_fog_adapted: 0.0868 mean_total_lines: 0.0188 + separator_counts: + mean_dot_count: 0.0717 + mean_hyphen_count: -0.2249 + mean_slash_count: 0.1457 + mean_underscore_count: 0.1129 symbol_density: mean_density: -0.0084 mean_distinct_symbol_types: 0.0127 @@ -318,7 +331,7 @@ is_less_than_20_lines: nesting_depth_under_4: _doc: "Code should not nest deeper than 4 levels." - _log_baseline: 1.0611 + _log_baseline: 1.2381 branching: mean_branch_count: -0.3267 mean_branching_density: -0.2061 @@ -411,6 +424,10 @@ nesting_depth_under_4: mean_flesch_adapted: -0.0374 mean_fog_adapted: 0.2252 mean_total_lines: -0.1206 + separator_counts: + mean_dot_count: -0.2520 + mean_hyphen_count: -0.1841 + mean_underscore_count: 0.3891 symbol_density: mean_density: 0.1426 mean_symbol_count: 0.1355 @@ -429,7 +446,7 @@ nesting_depth_under_4: no_boolean_parameter: _doc: "Functions should not take boolean parameters — a flag usually means the function does two things." - _log_baseline: 13.6290 + _log_baseline: 14.7569 branching: mean_branch_count: -2.0000 mean_branching_density: 1.0271 @@ -526,6 +543,9 @@ no_boolean_parameter: mean_flesch_adapted: -0.0254 mean_fog_adapted: 0.2928 mean_total_lines: -0.0383 + separator_counts: + mean_dot_count: 0.1538 + mean_underscore_count: 0.2112 symbol_density: mean_density: 0.0479 mean_symbol_count: 0.0916 @@ -543,7 +563,7 @@ no_boolean_parameter: no_magic_numbers: _doc: "Numeric literals should be named constants, not inline magic numbers." - _log_baseline: 45.8808 + _log_baseline: 49.2101 branching: mean_branch_count: -0.2708 mean_branching_density: -0.1682 @@ -630,6 +650,9 @@ no_magic_numbers: mean_flesch_adapted: -0.3819 mean_fog_adapted: 1.0656 mean_total_lines: -0.1029 + separator_counts: + mean_hyphen_count: -0.5119 + mean_underscore_count: 1.2283 symbol_density: mean_density: -0.0314 mean_distinct_symbol_types: 0.0644 @@ -649,7 +672,7 @@ no_magic_numbers: parameter_count_under_4: _doc: "Functions should take fewer than 4 parameters." - _log_baseline: 1.9637 + _log_baseline: 3.4096 branching: mean_non_blank_count: 0.0967 brevity: @@ -739,6 +762,9 @@ parameter_count_under_4: mean_flesch_adapted: 0.0271 mean_fog_adapted: -0.1290 mean_total_lines: 0.0967 + separator_counts: + mean_dot_count: 0.9099 + mean_underscore_count: 0.0550 symbol_density: mean_density: 0.0124 mean_distinct_symbol_types: 0.1042 @@ -757,7 +783,7 @@ parameter_count_under_4: uses_ternary_expression: _doc: "Simple conditional assignments should use inline expressions rather than full if-blocks." - _log_baseline: -4.5289 + _log_baseline: -5.0838 branching: mean_branch_count: -0.4160 mean_branching_density: 0.1134 @@ -843,6 +869,9 @@ uses_ternary_expression: mean_flesch_adapted: -0.0682 mean_fog_adapted: 0.5360 mean_total_lines: -0.5296 + separator_counts: + mean_dot_count: -1.1679 + mean_underscore_count: 0.2249 symbol_density: mean_density: 0.3167 mean_distinct_symbol_types: 0.0364 diff --git a/priv/combined_metrics/naming_conventions.yml b/priv/combined_metrics/naming_conventions.yml index 83ce0781..b553e342 100644 --- a/priv/combined_metrics/naming_conventions.yml +++ b/priv/combined_metrics/naming_conventions.yml @@ -1,6 +1,5 @@ class_name_is_noun: _doc: "Class and module names should be nouns describing what they represent, not verbs or gerunds." - _languages: [elixir] _log_baseline: 2.9861 brevity: mean_sample_size: 0.7106 @@ -67,8 +66,7 @@ file_name_matches_primary_export: function_name_is_not_single_word: _doc: "Single-word function names like `run`, `process`, or `handle` are too vague to convey intent." - _languages: [elixir] - _log_baseline: 17.8470 + _log_baseline: 23.5472 compression: mean_raw_bytes: 0.2434 mean_redundancy: 0.0776 @@ -87,6 +85,8 @@ function_name_is_not_single_word: mean_avg_sub_words_per_id: 0.3083 mean_flesch_adapted: -0.3181 mean_fog_adapted: 1.3258 + separator_counts: + mean_underscore_count: 1.8529 symbol_density: mean_density: -0.2431 vowel_density: @@ -94,8 +94,7 @@ function_name_is_not_single_word: function_name_matches_return_type: _doc: "Functions prefixed with `get_`, `fetch_`, or `find_` should return the thing they name." - _languages: [elixir] - _log_baseline: 7.5638 + _log_baseline: 7.7877 branching: mean_max_nesting_depth: 0.1335 brevity: @@ -171,6 +170,10 @@ function_name_matches_return_type: mean_avg_tokens_per_line: 0.0030 mean_flesch_adapted: -0.0107 mean_fog_adapted: 0.0058 + separator_counts: + mean_dot_count: 0.1335 + mean_slash_count: 0.2282 + mean_underscore_count: 0.0064 symbol_density: mean_density: 0.0633 mean_distinct_symbol_types: 0.0639 @@ -190,7 +193,6 @@ function_name_matches_return_type: test_name_starts_with_verb: _doc: "Test descriptions should start with a verb: `creates`, `raises`, `returns`, not a noun phrase." - _languages: [elixir] _log_baseline: 7.8915 branching: mean_branch_count: 1.9977 diff --git a/priv/combined_metrics/testing.yml b/priv/combined_metrics/testing.yml index 1d036f93..1f27f4fb 100644 --- a/priv/combined_metrics/testing.yml +++ b/priv/combined_metrics/testing.yml @@ -1,7 +1,6 @@ reasonable_test_to_code_ratio: _doc: "There should be an adequate number of test cases relative to the code being tested." - _languages: [elixir] - _log_baseline: 11.2157 + _log_baseline: 12.6188 branching: mean_branch_count: 0.1869 mean_branching_density: 0.0352 @@ -98,6 +97,10 @@ reasonable_test_to_code_ratio: mean_flesch_adapted: 0.0106 mean_fog_adapted: -0.0829 mean_total_lines: 0.1973 + separator_counts: + mean_dot_count: 0.2776 + mean_hyphen_count: 0.2470 + mean_underscore_count: 0.0907 symbol_density: mean_density: -0.0353 mean_distinct_symbol_types: -0.0284 @@ -117,8 +120,7 @@ reasonable_test_to_code_ratio: test_has_assertion: _doc: "Every test body must contain at least one assertion — a test without assertions proves nothing." - _languages: [elixir] - _log_baseline: -10.8081 + _log_baseline: -10.1975 branching: mean_branch_count: 0.0918 mean_branching_density: 0.1642 @@ -208,6 +210,11 @@ test_has_assertion: mean_flesch_adapted: -0.0065 mean_fog_adapted: 0.0284 mean_total_lines: -0.0284 + separator_counts: + mean_dot_count: 0.0108 + mean_hyphen_count: -0.1294 + mean_slash_count: 0.5719 + mean_underscore_count: 0.0918 symbol_density: mean_density: 0.0263 mean_distinct_symbol_types: -0.0194 @@ -226,8 +233,7 @@ test_has_assertion: test_name_describes_behavior: _doc: "Test names should describe the expected behaviour, not just the method under test." - _languages: [elixir] - _log_baseline: 57.2080 + _log_baseline: 59.1394 branching: mean_branch_count: 2.0000 mean_branching_density: -1.5965 @@ -306,6 +312,10 @@ test_name_describes_behavior: mean_avg_line_length: 0.1184 mean_avg_tokens_per_line: 0.0316 mean_total_lines: 0.2388 + separator_counts: + mean_dot_count: 0.0701 + mean_slash_count: 1.2619 + mean_underscore_count: 0.2442 symbol_density: mean_density: -0.1391 mean_distinct_symbol_types: 0.0729 @@ -324,8 +334,7 @@ test_name_describes_behavior: test_single_concept: _doc: "Each test should verify a single concept — tests covering multiple things are harder to diagnose when they fail." - _languages: [elixir] - _log_baseline: 37.2588 + _log_baseline: 39.5531 branching: mean_branch_count: 0.3696 mean_branching_density: -2.0000 @@ -423,6 +432,10 @@ test_single_concept: mean_flesch_adapted: 0.0098 mean_fog_adapted: -0.1758 mean_total_lines: 0.3391 + separator_counts: + mean_dot_count: 0.0317 + mean_slash_count: 0.9554 + mean_underscore_count: 0.4518 symbol_density: mean_density: -0.0634 mean_symbol_count: 0.1338 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index f07e7891..d2dff3e4 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,6 +1,5 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." - _languages: [elixir, javascript, ruby] _log_baseline: 22.4319 brevity: mean_sample_size: 0.0752 @@ -65,7 +64,6 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." - _languages: [elixir, javascript, ruby] _log_baseline: 24.0478 brevity: mean_sample_size: -0.5320 @@ -129,7 +127,6 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." - _languages: [elixir, javascript, ruby] _log_baseline: -32.9785 brevity: mean_sample_size: -0.1049 @@ -212,7 +209,6 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." - _languages: [elixir, javascript, ruby] _log_baseline: -2.9877 branching: mean_branch_count: -0.3666 @@ -334,7 +330,6 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." - _languages: [elixir, javascript, ruby] _log_baseline: -33.1356 branching: mean_branch_count: -0.4150 @@ -422,7 +417,6 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." - _languages: [elixir, javascript, ruby] _log_baseline: 9.2985 brevity: mean_sample_size: -0.1542 @@ -513,7 +507,6 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." - _languages: [elixir, javascript, ruby] _log_baseline: 43.6270 branching: mean_branch_count: 0.5193 @@ -633,7 +626,6 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." - _languages: [elixir, javascript, ruby] _log_baseline: 4.1505 brevity: mean_sample_size: -0.0262 @@ -715,7 +707,6 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: 30.8986 branching: mean_branching_density: -0.0445 @@ -816,7 +807,6 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." - _languages: [elixir, javascript, ruby] _log_baseline: -10.5110 branching: mean_branch_count: 0.0340 @@ -942,7 +932,6 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: -3.8620 branching: mean_branch_count: -0.2327 @@ -1033,7 +1022,6 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." - _languages: [elixir, javascript, ruby] _log_baseline: -6.4001 brevity: mean_sample_size: -0.0998 @@ -1106,7 +1094,6 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." - _languages: [elixir, javascript, ruby] _log_baseline: -8.4371 brevity: mean_sample_size: -0.0295 @@ -1198,7 +1185,6 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." - _languages: [elixir, javascript, ruby] _log_baseline: -4.4685 branching: mean_branching_density: 0.0176 diff --git a/test/codeqa/analysis/behavior_config_server_test.exs b/test/codeqa/analysis/behavior_config_server_test.exs index ebcc31bb..34fa02a3 100644 --- a/test/codeqa/analysis/behavior_config_server_test.exs +++ b/test/codeqa/analysis/behavior_config_server_test.exs @@ -13,12 +13,14 @@ defmodule CodeQA.Analysis.BehaviorConfigServerTest do assert is_map(behaviors) assert map_size(behaviors) > 0 - Enum.each(behaviors, fn {category, list} -> + behaviors + |> Enum.each(fn {category, list} -> assert is_binary(category) assert is_list(list) assert list != [] - Enum.each(list, fn {behavior, data} -> + list + |> Enum.each(fn {behavior, data} -> assert is_binary(behavior) assert is_map(data) end) @@ -31,7 +33,9 @@ defmodule CodeQA.Analysis.BehaviorConfigServerTest do {:ok, files} = File.ls(yaml_dir) - Enum.each(files |> Enum.filter(&String.ends_with?(&1, ".yml")), fn yml_file -> + files + |> Enum.filter(&String.ends_with?(&1, ".yml")) + |> Enum.each(fn yml_file -> category = String.trim_trailing(yml_file, ".yml") {:ok, data} = YamlElixir.read_from_file(Path.join(yaml_dir, yml_file)) @@ -45,12 +49,13 @@ defmodule CodeQA.Analysis.BehaviorConfigServerTest do test "get_scalars/3 returns a map of {group, key} => scalar", %{pid: pid} do behaviors = BehaviorConfigServer.get_all_behaviors(pid) - {category, [{behavior, _data} | _]} = Enum.at(behaviors, 0) + {category, [{behavior, _data} | _]} = behaviors |> Enum.at(0) scalars = BehaviorConfigServer.get_scalars(pid, category, behavior) assert is_map(scalars) - Enum.each(scalars, fn {{group, key}, scalar} -> + scalars + |> Enum.each(fn {{group, key}, scalar} -> assert is_binary(group) assert is_binary(key) assert is_float(scalar) @@ -63,7 +68,7 @@ defmodule CodeQA.Analysis.BehaviorConfigServerTest do test "get_log_baseline/3 returns a float", %{pid: pid} do behaviors = BehaviorConfigServer.get_all_behaviors(pid) - {category, [{behavior, _data} | _]} = Enum.at(behaviors, 0) + {category, [{behavior, _data} | _]} = behaviors |> Enum.at(0) baseline = BehaviorConfigServer.get_log_baseline(pid, category, behavior) assert is_float(baseline) diff --git a/test/codeqa/analysis/file_context_server_test.exs b/test/codeqa/analysis/file_context_server_test.exs index 660bd9a3..c43420a7 100644 --- a/test/codeqa/analysis/file_context_server_test.exs +++ b/test/codeqa/analysis/file_context_server_test.exs @@ -2,7 +2,8 @@ defmodule CodeQA.Analysis.FileContextServerTest do use ExUnit.Case, async: true alias CodeQA.Analysis.FileContextServer - alias CodeQA.Engine.{FileContext, Pipeline} + alias CodeQA.Engine.FileContext + alias CodeQA.Engine.Pipeline setup do {:ok, pid} = FileContextServer.start_link() diff --git a/test/codeqa/analysis/file_metrics_server_test.exs b/test/codeqa/analysis/file_metrics_server_test.exs index b68f4b37..a77e368c 100644 --- a/test/codeqa/analysis/file_metrics_server_test.exs +++ b/test/codeqa/analysis/file_metrics_server_test.exs @@ -4,9 +4,7 @@ defmodule CodeQA.Analysis.FileMetricsServerTest do alias CodeQA.Analysis.FileMetricsServer alias CodeQA.Engine.Analyzer - defp build_registry do - Analyzer.build_registry() - end + defp build_registry, do: Analyzer.build_registry() setup do {:ok, pid} = FileMetricsServer.start_link() diff --git a/test/codeqa/ast/classification/node_classifier_test.exs b/test/codeqa/ast/classification/node_classifier_test.exs index 990a35d1..2d762213 100644 --- a/test/codeqa/ast/classification/node_classifier_test.exs +++ b/test/codeqa/ast/classification/node_classifier_test.exs @@ -7,15 +7,13 @@ defmodule CodeQA.AST.NodeClassifierTest do alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Parser - alias CodeQA.AST.Nodes.{ - AttributeNode, - CodeNode, - DocNode, - FunctionNode, - ImportNode, - ModuleNode, - TestNode - } + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.CodeNode + alias CodeQA.AST.Nodes.DocNode + alias CodeQA.AST.Nodes.FunctionNode + alias CodeQA.AST.Nodes.ImportNode + alias CodeQA.AST.Nodes.ModuleNode + alias CodeQA.AST.Nodes.TestNode alias CodeQA.Languages.Code.Native.Go alias CodeQA.Languages.Code.Native.Rust @@ -39,13 +37,12 @@ defmodule CodeQA.AST.NodeClassifierTest do NodeClassifier.classify(block, lang_mod) end - defp node_with_tokens(tokens) do - %Node{ - tokens: tokens, + defp node_with_tokens(tokens), + do: %Node{ + children: [], line_count: 1, - children: [] + tokens: tokens } - end describe "classify/1 — function detection" do test "def → FunctionNode" do @@ -153,8 +150,8 @@ defmodule CodeQA.AST.NodeClassifierTest do end test "direct token in node → DocNode" do - doc_token = %Token{kind: "", content: ~s("""), line: 1, col: 0} - nl = %Token{kind: "", content: "\n", line: 2, col: 0} + doc_token = %Token{col: 0, content: ~s("""), kind: "", line: 1} + nl = %Token{col: 0, content: "\n", kind: "", line: 2} node = node_with_tokens([doc_token, nl]) assert %DocNode{} = NodeClassifier.classify(node, Unknown) end @@ -204,7 +201,7 @@ defmodule CodeQA.AST.NodeClassifierTest do end test "empty-like node with only whitespace tokens → CodeNode" do - nl = %Token{kind: "", content: "\n", line: 1, col: 0} + nl = %Token{col: 0, content: "\n", kind: "", line: 1} node = node_with_tokens([nl]) assert %CodeNode{} = @@ -304,9 +301,9 @@ defmodule CodeQA.AST.NodeClassifierTest do sub_first = List.first(sub_block.tokens) parent.tokens - |> Enum.take_while(fn t -> t != sub_first end) + |> Enum.take_while(&(&1 != sub_first)) |> Enum.reverse() - |> Enum.take_while(fn t -> t.kind != :"" end) + |> Enum.take_while(&(&1.kind != :"")) |> Enum.reverse() end end diff --git a/test/codeqa/ast/classification/node_protocol_test.exs b/test/codeqa/ast/classification/node_protocol_test.exs index 5e79a00d..57c4f421 100644 --- a/test/codeqa/ast/classification/node_protocol_test.exs +++ b/test/codeqa/ast/classification/node_protocol_test.exs @@ -1,5 +1,5 @@ defmodule CodeQA.AST.NodeProtocolTest.FakeNode do - defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label] + defstruct [:children, :end_line, :label, :line_count, :start_line, :tokens] defimpl CodeQA.AST.Classification.NodeProtocol do alias CodeQA.AST.Classification.NodeProtocol @@ -12,9 +12,9 @@ defmodule CodeQA.AST.NodeProtocolTest.FakeNode do def label(n), do: n.label def flat_tokens(n) do - if Enum.empty?(n.children), + if n.children |> Enum.empty?(), do: n.tokens, - else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) + else: n.children |> Enum.flat_map(&NodeProtocol.flat_tokens/1) end end end @@ -27,12 +27,12 @@ defmodule CodeQA.AST.NodeProtocolTest do alias CodeQA.AST.NodeProtocolTest.FakeNode @node %FakeNode{ - tokens: [:a, :b], - line_count: 3, children: [], - start_line: 1, end_line: 3, - label: "foo.ex:1" + label: "foo.ex:1", + line_count: 3, + start_line: 1, + tokens: [:a, :b] } test "tokens/1" do @@ -61,21 +61,21 @@ defmodule CodeQA.AST.NodeProtocolTest do describe "flat_tokens/1" do test "leaf node returns own tokens" do - leaf = %Node{tokens: [:a, :b], line_count: 1, children: []} + leaf = %Node{children: [], line_count: 1, tokens: [:a, :b]} assert NodeProtocol.flat_tokens(leaf) == [:a, :b] end test "non-leaf node returns flattened descendant tokens" do - child_a = %Node{tokens: [:a], line_count: 1, children: []} - child_b = %Node{tokens: [:b, :c], line_count: 1, children: []} - parent = %Node{tokens: [:x], line_count: 2, children: [child_a, child_b]} + child_a = %Node{children: [], line_count: 1, tokens: [:a]} + child_b = %Node{children: [], line_count: 1, tokens: [:b, :c]} + parent = %Node{children: [child_a, child_b], line_count: 2, tokens: [:x]} assert NodeProtocol.flat_tokens(parent) == [:a, :b, :c] end test "deeply nested node returns all leaf tokens" do - leaf = %Node{tokens: [:z], line_count: 1, children: []} - mid = %Node{tokens: [:y], line_count: 1, children: [leaf]} - root = %Node{tokens: [:x], line_count: 2, children: [mid]} + leaf = %Node{children: [], line_count: 1, tokens: [:z]} + mid = %Node{children: [leaf], line_count: 1, tokens: [:y]} + root = %Node{children: [mid], line_count: 2, tokens: [:x]} assert NodeProtocol.flat_tokens(root) == [:z] end end @@ -83,12 +83,12 @@ defmodule CodeQA.AST.NodeProtocolTest do describe "Node implements NodeProtocol" do setup do node = %Node{ - tokens: [:x, :y], - line_count: 3, children: [], - start_line: 1, end_line: 3, - label: "f.ex:1" + label: "f.ex:1", + line_count: 3, + start_line: 1, + tokens: [:x, :y] } %{node: node} diff --git a/test/codeqa/ast/classification/node_type_detector_test.exs b/test/codeqa/ast/classification/node_type_detector_test.exs index f4c97530..81ddef5b 100644 --- a/test/codeqa/ast/classification/node_type_detector_test.exs +++ b/test/codeqa/ast/classification/node_type_detector_test.exs @@ -4,17 +4,20 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.AST.Nodes.{AttributeNode, CodeNode, DocNode, FunctionNode} + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.CodeNode + alias CodeQA.AST.Nodes.DocNode + alias CodeQA.AST.Nodes.FunctionNode alias CodeQA.AST.Parsing.Parser alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang alias CodeQA.Languages.Unknown - defp detect_types(code, lang_mod \\ ElixirLang) do - code - |> TokenNormalizer.normalize_structural() - |> Parser.detect_blocks(lang_mod) - |> NodeTypeDetector.detect_types(lang_mod) - end + defp detect_types(code, lang_mod \\ ElixirLang), + do: + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + |> NodeTypeDetector.detect_types(lang_mod) describe "detect_types/1" do test "block with gets type :doc" do @@ -50,8 +53,9 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do blocks = detect_types(code) code_block = - Enum.find(blocks, fn b -> - Enum.any?(b.tokens, &(&1.kind == "" and &1.content == "def")) + blocks + |> Enum.find(fn b -> + b.tokens |> Enum.any?(&(&1.kind == "" and &1.content == "def")) end) assert is_struct(code_block, FunctionNode) @@ -80,15 +84,15 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do describe "detect_types/1 — typed struct output" do test "returns DocNode for doc blocks" do - doc_token = %Token{kind: "", content: ~s("""), line: 1, col: 0} - nl = %Token{kind: "", content: "\n", line: 2, col: 0} + doc_token = %Token{col: 0, content: ~s("""), kind: "", line: 1} + nl = %Token{col: 0, content: "\n", kind: "", line: 2} node = %Node{ - tokens: [doc_token, nl], - line_count: 2, children: [], + end_line: 2, + line_count: 2, start_line: 1, - end_line: 2 + tokens: [doc_token, nl] } [result] = @@ -101,16 +105,16 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do end test "returns AttributeNode for typespec blocks" do - at = %Token{kind: "@", content: "@", line: 1, col: 0} - spec = %Token{kind: "", content: "spec", line: 1, col: 1} - nl = %Token{kind: "", content: "\n", line: 1, col: 5} + at = %Token{col: 0, content: "@", kind: "@", line: 1} + spec = %Token{col: 1, content: "spec", kind: "", line: 1} + nl = %Token{col: 5, content: "\n", kind: "", line: 1} node = %Node{ - tokens: [at, spec, nl], - line_count: 1, children: [], + end_line: 1, + line_count: 1, start_line: 1, - end_line: 1 + tokens: [at, spec, nl] } [result] = @@ -124,15 +128,15 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do end test "returns CodeNode for unclassified blocks" do - id = %Token{kind: "", content: "foo", line: 1, col: 0} - nl = %Token{kind: "", content: "\n", line: 1, col: 3} + id = %Token{col: 0, content: "foo", kind: "", line: 1} + nl = %Token{col: 3, content: "\n", kind: "", line: 1} node = %Node{ - tokens: [id, nl], - line_count: 1, children: [], + end_line: 1, + line_count: 1, start_line: 1, - end_line: 1 + tokens: [id, nl] } [result] = diff --git a/test/codeqa/ast/classification/typed_node_kind_test.exs b/test/codeqa/ast/classification/typed_node_kind_test.exs index 84149cd6..87ed8c50 100644 --- a/test/codeqa/ast/classification/typed_node_kind_test.exs +++ b/test/codeqa/ast/classification/typed_node_kind_test.exs @@ -3,15 +3,13 @@ defmodule CodeQA.AST.Classification.TypedNodeKindTest do alias CodeQA.AST.Classification.TypedNodeKind - alias CodeQA.AST.Nodes.{ - AttributeNode, - CodeNode, - DocNode, - FunctionNode, - ImportNode, - ModuleNode, - TestNode - } + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.CodeNode + alias CodeQA.AST.Nodes.DocNode + alias CodeQA.AST.Nodes.FunctionNode + alias CodeQA.AST.Nodes.ImportNode + alias CodeQA.AST.Nodes.ModuleNode + alias CodeQA.AST.Nodes.TestNode test "maps each typed node struct to its kind atom" do assert TypedNodeKind.of(%DocNode{}) == :doc diff --git a/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs index 3a6adbb6..fad06d1c 100644 --- a/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs +++ b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs @@ -99,7 +99,8 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeAssertionsLanguagesTest do none_of = Map.get(unquote(Macro.escape(block_assertion)), :none_of, []) all_of = unquote(Macro.escape(block_assertion)).all_of - assert Enum.any?(compounds, fn compound -> + assert compounds + |> Enum.any?(fn compound -> tokens = all_tokens(compound) compound_satisfies?(tokens, all_of, none_of) end), @@ -107,22 +108,20 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeAssertionsLanguagesTest do end end - defp compound_nodes(code) do - code - |> TokenNormalizer.normalize_structural() - |> Parser.detect_blocks(Unknown) - |> NodeTypeDetector.detect_types(Unknown) - |> CompoundNodeBuilder.build() - end + defp compound_nodes(code), + do: + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(Unknown) + |> NodeTypeDetector.detect_types(Unknown) + |> CompoundNodeBuilder.build() - defp all_tokens(%CompoundNode{docs: docs, typespecs: typespecs, code: code}) do - (docs ++ typespecs ++ code) - |> Enum.flat_map(&node_tokens/1) - end + defp all_tokens(%CompoundNode{code: code, docs: docs, typespecs: typespecs}), + do: + (docs ++ typespecs ++ code) + |> Enum.flat_map(&node_tokens/1) - defp node_tokens(node) do - NodeProtocol.tokens(node) - end + defp node_tokens(node), do: node |> NodeProtocol.tokens() defp matches?({:exact, field, value}, token), do: Map.get(token, field) == value @@ -130,7 +129,7 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeAssertionsLanguagesTest do do: String.contains?(Map.get(token, field, ""), value) defp compound_satisfies?(tokens, all_of, none_of) do - Enum.all?(all_of, fn matcher -> Enum.any?(tokens, &matches?(matcher, &1)) end) and + Enum.all?(all_of, fn matcher -> tokens |> Enum.any?(&matches?(matcher, &1)) end) and Enum.all?(none_of, fn matcher -> not Enum.any?(tokens, &matches?(matcher, &1)) end) end end diff --git a/test/codeqa/ast/enrichment/compound_node_builder_test.exs b/test/codeqa/ast/enrichment/compound_node_builder_test.exs index 00a10065..7e4ca8ee 100644 --- a/test/codeqa/ast/enrichment/compound_node_builder_test.exs +++ b/test/codeqa/ast/enrichment/compound_node_builder_test.exs @@ -5,7 +5,9 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do alias CodeQA.AST.Enrichment.CompoundNode alias CodeQA.AST.Enrichment.CompoundNodeBuilder alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.AST.Nodes.{AttributeNode, CodeNode, DocNode} + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.CodeNode + alias CodeQA.AST.Nodes.DocNode alias CodeQA.AST.Parsing.Parser defp build(code) do @@ -108,8 +110,8 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do describe "build/1 with typed node structs" do test "routes DocNode to docs bucket" do - doc = %DocNode{tokens: [:d], line_count: 1, children: [], start_line: 1, end_line: 1} - code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3} + doc = %DocNode{children: [], end_line: 1, line_count: 1, start_line: 1, tokens: [:d]} + code = %CodeNode{children: [], end_line: 3, line_count: 2, start_line: 2, tokens: [:c]} [compound] = CompoundNodeBuilder.build([doc, code]) assert length(compound.docs) == 1 @@ -118,15 +120,15 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do test "routes AttributeNode to typespecs bucket" do attr = %AttributeNode{ - tokens: [:a], - line_count: 1, children: [], - start_line: 1, end_line: 1, - kind: :typespec + kind: :typespec, + line_count: 1, + start_line: 1, + tokens: [:a] } - code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3} + code = %CodeNode{children: [], end_line: 3, line_count: 2, start_line: 2, tokens: [:c]} [compound] = CompoundNodeBuilder.build([attr, code]) assert length(compound.typespecs) == 1 diff --git a/test/codeqa/ast/lexing/string_token_test.exs b/test/codeqa/ast/lexing/string_token_test.exs index 0a99e9e0..68504b56 100644 --- a/test/codeqa/ast/lexing/string_token_test.exs +++ b/test/codeqa/ast/lexing/string_token_test.exs @@ -7,11 +7,11 @@ defmodule CodeQA.AST.StringTokenTest do describe "StringToken struct" do test "has kind, content, line, col, interpolations, multiline, and quotes fields" do tok = %StringToken{ - kind: "", - content: ~s("hello"), - line: 1, col: 0, - interpolations: nil + content: ~s("hello"), + interpolations: nil, + kind: "", + line: 1 } assert tok.kind == "" @@ -24,22 +24,22 @@ defmodule CodeQA.AST.StringTokenTest do end test "interpolations defaults to nil" do - tok = %StringToken{kind: "", content: ~s("hello")} + tok = %StringToken{content: ~s("hello"), kind: ""} assert tok.interpolations == nil end test "multiline defaults to false" do - tok = %StringToken{kind: "", content: ~s("hello")} + tok = %StringToken{content: ~s("hello"), kind: ""} assert tok.multiline == false end test "quotes defaults to :double" do - tok = %StringToken{kind: "", content: ~s("hello")} + tok = %StringToken{content: ~s("hello"), kind: ""} assert tok.quotes == :double end test "multiline triple-quote struct" do - tok = %StringToken{kind: "", content: ~s("""), multiline: true, quotes: :double} + tok = %StringToken{content: ~s("""), kind: "", multiline: true, quotes: :double} assert tok.multiline == true assert tok.quotes == :double end @@ -104,7 +104,7 @@ defmodule CodeQA.AST.StringTokenTest do test "non-string tokens are still plain Token structs" do tokens = TokenNormalizer.normalize_structural("foo = 42") - id = Enum.find(tokens, &(&1.kind == "")) + id = tokens |> Enum.find(&(&1.kind == "")) refute match?(%StringToken{}, id) end end diff --git a/test/codeqa/ast/lexing/token_normalizer_test.exs b/test/codeqa/ast/lexing/token_normalizer_test.exs index 19a886ae..5074ac36 100644 --- a/test/codeqa/ast/lexing/token_normalizer_test.exs +++ b/test/codeqa/ast/lexing/token_normalizer_test.exs @@ -4,7 +4,7 @@ defmodule CodeQA.AST.TokenNormalizerTest do alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.TokenNormalizer - defp kinds(tokens), do: Enum.map(tokens, & &1.kind) + defp kinds(tokens), do: tokens |> Enum.map(& &1.kind) describe "normalize_structural/1" do test "emits between lines" do @@ -21,7 +21,7 @@ defmodule CodeQA.AST.TokenNormalizerTest do |> Enum.filter(fn [h | _] -> h.kind == "" end) |> Enum.map(&length/1) - assert Enum.any?(nl_runs, &(&1 >= 2)) + assert nl_runs |> Enum.any?(&(&1 >= 2)) end test "emits one token per 2 leading spaces" do @@ -62,7 +62,7 @@ defmodule CodeQA.AST.TokenNormalizerTest do test "tokens carry line numbers" do result = TokenNormalizer.normalize_structural("foo\nbar") - lines = Enum.map(result, & &1.line) + lines = result |> Enum.map(& &1.line) assert 1 in lines assert 2 in lines end @@ -82,13 +82,13 @@ defmodule CodeQA.AST.TokenNormalizerTest do test "keyword content is preserved (not normalized away)" do result = TokenNormalizer.normalize_structural("def foo") - contents = Enum.map(result, & &1.content) + contents = result |> Enum.map(& &1.content) assert "def" in contents end test "string token content is the original literal" do result = TokenNormalizer.normalize_structural(~s("hello")) - tok = Enum.find(result, &(&1.kind == "")) + tok = result |> Enum.find(&(&1.kind == "")) assert tok.content == ~s("hello") end @@ -193,7 +193,7 @@ defmodule CodeQA.AST.TokenNormalizerTest do test "multi-char operator value equals content (no normalization)" do result = TokenNormalizer.normalize_structural("x >= y") - tok = Enum.find(result, &(&1.kind == ">=")) + tok = result |> Enum.find(&(&1.kind == ">=")) assert tok.content == ">=" end end @@ -300,28 +300,28 @@ defmodule CodeQA.AST.TokenNormalizerTest do test "triple double-quotes emits a StringToken with kind " do tokens = TokenNormalizer.normalize_structural(~s(""")) - assert [%StringToken{kind: "", content: ~s("""), multiline: true, quotes: :double}] = + assert [%StringToken{content: ~s("""), kind: "", multiline: true, quotes: :double}] = tokens end test "triple single-quotes emits a StringToken with kind " do tokens = TokenNormalizer.normalize_structural("'''") - assert [%StringToken{kind: "", content: "'''", multiline: true, quotes: :single}] = + assert [%StringToken{content: "'''", kind: "", multiline: true, quotes: :single}] = tokens end test "triple-quote is not consumed as empty string + bare quote" do tokens = TokenNormalizer.normalize_structural(~s(""")) - refute Enum.any?(tokens, &(&1.kind == "")) + refute tokens |> Enum.any?(&(&1.kind == "")) end test "content between triple-quotes is tokenized normally" do code = ~s("""\nhello world\n""") tokens = TokenNormalizer.normalize_structural(code) - trip_count = Enum.count(tokens, &(&1.kind == "")) + trip_count = tokens |> Enum.count(&(&1.kind == "")) assert trip_count == 2 - assert Enum.any?(tokens, &(&1.kind == "" and &1.content == "hello")) + assert tokens |> Enum.any?(&(&1.kind == "" and &1.content == "hello")) end test "regular double-quoted string still works" do diff --git a/test/codeqa/ast/lexing/token_protocol_test.exs b/test/codeqa/ast/lexing/token_protocol_test.exs index 340d94a9..0d4c190f 100644 --- a/test/codeqa/ast/lexing/token_protocol_test.exs +++ b/test/codeqa/ast/lexing/token_protocol_test.exs @@ -7,7 +7,7 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do describe "Token implementation" do setup do - {:ok, token: %Token{kind: "", content: "foo", line: 3, col: 7}} + {:ok, token: %Token{col: 7, content: "foo", kind: "", line: 3}} end test "kind/1", %{token: t} do @@ -27,7 +27,7 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do end test "nil location fields are preserved" do - t = %Token{kind: "", content: "\n", line: nil, col: nil} + t = %Token{col: nil, content: "\n", kind: "", line: nil} assert TokenProtocol.line(t) == nil assert TokenProtocol.col(t) == nil end @@ -37,11 +37,11 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do setup do {:ok, token: %StringToken{ - kind: "", - content: "\"hello\"", - line: 10, col: 2, - interpolations: nil + content: "\"hello\"", + interpolations: nil, + kind: "", + line: 10 }} end @@ -63,11 +63,11 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do test "works with interpolated string token" do t = %StringToken{ - kind: "", - content: "\"\#{x}\"", - line: 5, col: 0, - interpolations: ["x"] + content: "\"\#{x}\"", + interpolations: ["x"], + kind: "", + line: 5 } assert TokenProtocol.kind(t) == "" @@ -79,10 +79,10 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do setup do {:ok, token: %StringToken{ - kind: "", + col: 0, content: ~s("""), + kind: "", line: 2, - col: 0, multiline: true, quotes: :double }} @@ -106,10 +106,10 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do test "single-quote variant" do t = %StringToken{ - kind: "", + col: 0, content: "'''", + kind: "", line: 5, - col: 0, multiline: true, quotes: :single } @@ -122,20 +122,20 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do describe "polymorphic use" do test "mixed token list can be processed uniformly" do tokens = [ - %Token{kind: "", content: "x", line: 1, col: 0}, - %StringToken{kind: "", content: "\"hi\"", line: 1, col: 4}, + %Token{col: 0, content: "x", kind: "", line: 1}, + %StringToken{col: 4, content: "\"hi\"", kind: "", line: 1}, %StringToken{ - kind: "", + col: 0, content: ~s("""), + kind: "", line: 2, - col: 0, multiline: true, quotes: :double }, - %Token{kind: "", content: "\n", line: 2, col: 3} + %Token{col: 3, content: "\n", kind: "", line: 2} ] - kinds = Enum.map(tokens, &TokenProtocol.kind/1) + kinds = tokens |> Enum.map(&TokenProtocol.kind/1) assert kinds == ["", "", "", ""] end end diff --git a/test/codeqa/ast/nodes/code_node_test.exs b/test/codeqa/ast/nodes/code_node_test.exs index 20082f0c..c380431d 100644 --- a/test/codeqa/ast/nodes/code_node_test.exs +++ b/test/codeqa/ast/nodes/code_node_test.exs @@ -2,19 +2,20 @@ defmodule CodeQA.AST.Nodes.CodeNodeTest do use ExUnit.Case, async: true alias CodeQA.AST.Classification.NodeProtocol - alias CodeQA.AST.Nodes.{CodeNode, DocNode} + alias CodeQA.AST.Nodes.CodeNode + alias CodeQA.AST.Nodes.DocNode @tokens [:a, :b, :c] describe "CodeNode" do setup do node = %CodeNode{ - tokens: @tokens, - line_count: 2, children: [], - start_line: 1, end_line: 2, - label: "f.ex:1" + label: "f.ex:1", + line_count: 2, + start_line: 1, + tokens: @tokens } %{node: node} @@ -30,7 +31,7 @@ defmodule CodeQA.AST.Nodes.CodeNodeTest do end test "all common fields default to nil except children" do - node = %CodeNode{tokens: [], line_count: 0, children: []} + node = %CodeNode{children: [], line_count: 0, tokens: []} assert NodeProtocol.start_line(node) == nil assert NodeProtocol.end_line(node) == nil assert NodeProtocol.label(node) == nil @@ -40,12 +41,12 @@ defmodule CodeQA.AST.Nodes.CodeNodeTest do describe "DocNode" do test "implements NodeProtocol" do node = %DocNode{ - tokens: @tokens, - line_count: 1, children: [], - start_line: 5, end_line: 5, - label: nil + label: nil, + line_count: 1, + start_line: 5, + tokens: @tokens } assert NodeProtocol.tokens(node) == @tokens diff --git a/test/codeqa/ast/nodes/function_node_test.exs b/test/codeqa/ast/nodes/function_node_test.exs index a1770bce..10b89138 100644 --- a/test/codeqa/ast/nodes/function_node_test.exs +++ b/test/codeqa/ast/nodes/function_node_test.exs @@ -2,19 +2,20 @@ defmodule CodeQA.AST.Nodes.FunctionNodeTest do use ExUnit.Case, async: true alias CodeQA.AST.Classification.NodeProtocol - alias CodeQA.AST.Nodes.{FunctionNode, ModuleNode} + alias CodeQA.AST.Nodes.FunctionNode + alias CodeQA.AST.Nodes.ModuleNode describe "FunctionNode" do setup do node = %FunctionNode{ - tokens: [:a], - line_count: 5, + arity: 2, children: [], - start_line: 10, end_line: 14, label: "foo.ex:10", + line_count: 5, name: "calculate", - arity: 2, + start_line: 10, + tokens: [:a], visibility: :public } @@ -34,7 +35,7 @@ defmodule CodeQA.AST.Nodes.FunctionNodeTest do end test "specific fields default to nil" do - node = %FunctionNode{tokens: [], line_count: 0, children: []} + node = %FunctionNode{children: [], line_count: 0, tokens: []} assert node.name == nil assert node.arity == nil assert node.visibility == nil @@ -44,14 +45,14 @@ defmodule CodeQA.AST.Nodes.FunctionNodeTest do describe "ModuleNode" do test "implements NodeProtocol" do node = %ModuleNode{ - tokens: [:m], - line_count: 20, children: [], - start_line: 1, end_line: 20, + kind: :module, label: nil, + line_count: 20, name: "MyApp.Foo", - kind: :module + start_line: 1, + tokens: [:m] } assert NodeProtocol.tokens(node) == [:m] @@ -60,7 +61,7 @@ defmodule CodeQA.AST.Nodes.FunctionNodeTest do end test "specific fields default to nil" do - node = %ModuleNode{tokens: [], line_count: 0, children: []} + node = %ModuleNode{children: [], line_count: 0, tokens: []} assert node.name == nil assert node.kind == nil end diff --git a/test/codeqa/ast/nodes/import_node_test.exs b/test/codeqa/ast/nodes/import_node_test.exs index 53c4a989..2eb6e865 100644 --- a/test/codeqa/ast/nodes/import_node_test.exs +++ b/test/codeqa/ast/nodes/import_node_test.exs @@ -2,18 +2,20 @@ defmodule CodeQA.AST.Nodes.ImportNodeTest do use ExUnit.Case, async: true alias CodeQA.AST.Classification.NodeProtocol - alias CodeQA.AST.Nodes.{AttributeNode, ImportNode, TestNode} + alias CodeQA.AST.Nodes.AttributeNode + alias CodeQA.AST.Nodes.ImportNode + alias CodeQA.AST.Nodes.TestNode describe "ImportNode" do test "implements NodeProtocol" do node = %ImportNode{ - tokens: [:i], - line_count: 1, children: [], - start_line: 3, end_line: 3, label: nil, - target: "MyApp.Repo" + line_count: 1, + start_line: 3, + target: "MyApp.Repo", + tokens: [:i] } assert NodeProtocol.tokens(node) == [:i] @@ -21,7 +23,7 @@ defmodule CodeQA.AST.Nodes.ImportNodeTest do end test "target defaults to nil" do - node = %ImportNode{tokens: [], line_count: 0, children: []} + node = %ImportNode{children: [], line_count: 0, tokens: []} assert node.target == nil end end @@ -29,14 +31,14 @@ defmodule CodeQA.AST.Nodes.ImportNodeTest do describe "AttributeNode" do test "implements NodeProtocol" do node = %AttributeNode{ - tokens: [:a], - line_count: 1, children: [], - start_line: 2, end_line: 2, + kind: :annotation, label: nil, + line_count: 1, name: "moduledoc", - kind: :annotation + start_line: 2, + tokens: [:a] } assert NodeProtocol.tokens(node) == [:a] @@ -45,7 +47,7 @@ defmodule CodeQA.AST.Nodes.ImportNodeTest do end test "supports :typespec kind" do - node = %AttributeNode{tokens: [], line_count: 0, children: [], kind: :typespec} + node = %AttributeNode{children: [], kind: :typespec, line_count: 0, tokens: []} assert node.kind == :typespec end end @@ -53,13 +55,13 @@ defmodule CodeQA.AST.Nodes.ImportNodeTest do describe "TestNode" do test "implements NodeProtocol" do node = %TestNode{ - tokens: [:t], - line_count: 4, children: [], - start_line: 10, + description: "returns the sum", end_line: 13, label: nil, - description: "returns the sum" + line_count: 4, + start_line: 10, + tokens: [:t] } assert NodeProtocol.tokens(node) == [:t] @@ -67,7 +69,7 @@ defmodule CodeQA.AST.Nodes.ImportNodeTest do end test "description defaults to nil" do - node = %TestNode{tokens: [], line_count: 0, children: []} + node = %TestNode{children: [], line_count: 0, tokens: []} assert node.description == nil end end diff --git a/test/codeqa/ast/parsing/parser_languages_test.exs b/test/codeqa/ast/parsing/parser_languages_test.exs index 5526d10b..89153960 100644 --- a/test/codeqa/ast/parsing/parser_languages_test.exs +++ b/test/codeqa/ast/parsing/parser_languages_test.exs @@ -118,18 +118,18 @@ defmodule CodeQA.AST.Parsing.ParserLanguagesTest do code end, block_assertions} - defp blocks(code, lang_mod \\ Unknown) do - code - |> TokenNormalizer.normalize_structural() - |> Parser.detect_blocks(lang_mod) - end - - defp children(code, lang_mod \\ Unknown) do - code - |> TokenNormalizer.normalize_structural() - |> Parser.detect_blocks(lang_mod) - |> Enum.flat_map(& &1.children) - end + defp blocks(code, lang_mod \\ Unknown), + do: + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + + defp children(code, lang_mod \\ Unknown), + do: + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + |> Enum.flat_map(& &1.children) describe "blocks/2" do for {language, code, _block_assertions} <- @normalized_fixtures do diff --git a/test/codeqa/ast/parsing/parser_test.exs b/test/codeqa/ast/parsing/parser_test.exs index 51ead52e..fb945e82 100644 --- a/test/codeqa/ast/parsing/parser_test.exs +++ b/test/codeqa/ast/parsing/parser_test.exs @@ -69,16 +69,18 @@ defmodule CodeQA.AST.Parsing.ParserTest do # depth 1 — the outer argument list args = - Enum.find(block.children, fn b -> - Enum.any?(b.tokens, &(&1.content == "bar")) + block.children + |> Enum.find(fn b -> + b.tokens |> Enum.any?(&(&1.content == "bar")) end) assert args != nil, "expected an arg-list sub-block containing 'bar'" # depth 2 — the inner call (x, y) inside bar(...) inner = - Enum.find(args.children, fn b -> - Enum.any?(b.tokens, &(&1.content == "x")) + args.children + |> Enum.find(fn b -> + b.tokens |> Enum.any?(&(&1.content == "x")) end) assert inner != nil, "expected a sub-block for the inner call (x, y)" @@ -93,24 +95,27 @@ defmodule CodeQA.AST.Parsing.ParserTest do # depth 1: (inner(deep(value))) d1 = - Enum.find(block.children, fn b -> - Enum.any?(b.tokens, &(&1.content == "inner")) + block.children + |> Enum.find(fn b -> + b.tokens |> Enum.any?(&(&1.content == "inner")) end) assert d1 != nil # depth 2: (deep(value)) d2 = - Enum.find(d1.children, fn b -> - Enum.any?(b.tokens, &(&1.content == "deep")) + d1.children + |> Enum.find(fn b -> + b.tokens |> Enum.any?(&(&1.content == "deep")) end) assert d2 != nil # depth 3: (value) — leaf d3 = - Enum.find(d2.children, fn b -> - Enum.any?(b.tokens, &(&1.content == "value")) + d2.children + |> Enum.find(fn b -> + b.tokens |> Enum.any?(&(&1.content == "value")) end) assert d3 != nil @@ -137,14 +142,15 @@ defmodule CodeQA.AST.Parsing.ParserTest do blocks = Parser.detect_blocks(tokens, Unknown) # The heredoc (including its blank line) should be ONE block, not split heredoc_block = - Enum.find(blocks, fn b -> - Enum.any?(b.tokens, &(&1.kind == "")) + blocks + |> Enum.find(fn b -> + b.tokens |> Enum.any?(&(&1.kind == "")) end) assert heredoc_block != nil # Ensure no split happened inside — the heredoc block contains both "Some" and "More" - contents = Enum.filter(heredoc_block.tokens, &(&1.kind == "")) - names = Enum.map(contents, & &1.content) + contents = heredoc_block.tokens |> Enum.filter(&(&1.kind == "")) + names = contents |> Enum.map(& &1.content) assert "Some" in names assert "More" in names end @@ -170,9 +176,9 @@ defmodule CodeQA.AST.Parsing.ParserTest do blocks = Parser.detect_blocks(tokens, Unknown) # Expect exactly 3 blocks: code-before, heredoc, code-after assert length(blocks) == 3 - assert Enum.any?(Enum.at(blocks, 0).tokens, &(&1.content == "foo")) - assert Enum.any?(Enum.at(blocks, 1).tokens, &(&1.kind == "")) - assert Enum.any?(Enum.at(blocks, 2).tokens, &(&1.content == "bar")) + assert Enum.at(blocks, 0).tokens |> Enum.any?(&(&1.content == "foo")) + assert Enum.at(blocks, 1).tokens |> Enum.any?(&(&1.kind == "")) + assert Enum.at(blocks, 2).tokens |> Enum.any?(&(&1.content == "bar")) end end diff --git a/test/codeqa/ast/parsing/signal_stream_test.exs b/test/codeqa/ast/parsing/signal_stream_test.exs index 69cfcaf2..40dd9eef 100644 --- a/test/codeqa/ast/parsing/signal_stream_test.exs +++ b/test/codeqa/ast/parsing/signal_stream_test.exs @@ -5,7 +5,7 @@ defmodule CodeQA.AST.SignalStreamTest do alias CodeQA.AST.Parsing.SignalStream alias CodeQA.Support.CounterSignal - defp tok(kind, content), do: %Token{kind: kind, content: content, line: 1, col: 0} + defp tok(kind, content), do: %Token{col: 0, content: content, kind: kind, line: 1} test "returns one emission list per signal" do tokens = [tok("", "foo"), tok("", "\n"), tok("", "bar")] diff --git a/test/codeqa/ast/parsing/signal_test.exs b/test/codeqa/ast/parsing/signal_test.exs index 47d72ad6..e4395a2f 100644 --- a/test/codeqa/ast/parsing/signal_test.exs +++ b/test/codeqa/ast/parsing/signal_test.exs @@ -42,14 +42,14 @@ defmodule CodeQA.AST.SignalTest do end test "emit returns {MapSet of {name, value} pairs, new_state}" do - token = %CodeQA.AST.Lexing.Token{kind: "", content: "foo", line: 1, col: 0} + token = %CodeQA.AST.Lexing.Token{col: 0, content: "foo", kind: "", line: 1} {emissions, new_state} = Signal.emit(%TestSignal{}, token, %{count: 0}) assert MapSet.member?(emissions, {:tick, 0}) assert new_state == %{count: 1} end test "emit may return empty MapSet for no emission" do - token = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 1, col: 0} + token = %CodeQA.AST.Lexing.Token{col: 0, content: "\n", kind: "", line: 1} {emissions, _state} = Signal.emit(%SilentSignal{}, token, %{}) assert MapSet.size(emissions) == 0 end diff --git a/test/codeqa/ast/signals/classification/comment_density_signal_test.exs b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs index 374b191a..d884b7ae 100644 --- a/test/codeqa/ast/signals/classification/comment_density_signal_test.exs +++ b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs @@ -8,19 +8,19 @@ defmodule CodeQA.AST.Signals.Classification.CommentDensitySignalTest do defp run(tokens, lang_mod \\ Unknown), do: SignalStream.run(tokens, [%CommentDensitySignal{}], lang_mod) |> List.flatten() - defp t(content, kind \\ ""), do: %{kind: kind, content: content, line: 1, col: 0} - defp nl, do: %{kind: "", content: "\n", line: 1, col: 0} - defp on_line(tokens, line), do: Enum.map(tokens, &%{&1 | line: line}) + defp token(content, kind \\ ""), do: %{col: 0, content: content, kind: kind, line: 1} + defp nl, do: %{col: 0, content: "\n", kind: "", line: 1} + defp on_line(tokens, line), do: tokens |> Enum.map(&%{&1 | line: line}) test "votes comment when >60% of lines start with #" do tokens = - on_line([t("#"), t("license")], 1) ++ + on_line([token("#"), token("license")], 1) ++ [nl()] ++ - on_line([t("#"), t("copyright")], 2) ++ + on_line([token("#"), token("copyright")], 2) ++ [nl()] ++ - on_line([t("#"), t("author")], 3) ++ + on_line([token("#"), token("author")], 3) ++ [nl()] ++ - on_line([t("def"), t("foo")], 4) + on_line([token("def"), token("foo")], 4) emissions = run(tokens, Python) assert [{CommentDensitySignal, :classification, :comment_vote, _}] = emissions @@ -28,18 +28,18 @@ defmodule CodeQA.AST.Signals.Classification.CommentDensitySignalTest do test "does not vote when comment density is low" do tokens = - on_line([t("def"), t("foo")], 1) ++ + on_line([token("def"), token("foo")], 1) ++ [nl()] ++ - on_line([t("#"), t("note")], 2) + on_line([token("#"), token("note")], 2) assert run(tokens, Python) == [] end test "does not vote when no comment_prefixes provided" do tokens = - on_line([t("#"), t("comment")], 1) ++ + on_line([token("#"), token("comment")], 1) ++ [nl()] ++ - on_line([t("#"), t("comment")], 2) + on_line([token("#"), token("comment")], 2) assert run(tokens, Unknown) == [] end diff --git a/test/codeqa/ast/signals/classification/config_signal_test.exs b/test/codeqa/ast/signals/classification/config_signal_test.exs index da510c2b..875c92a6 100644 --- a/test/codeqa/ast/signals/classification/config_signal_test.exs +++ b/test/codeqa/ast/signals/classification/config_signal_test.exs @@ -4,25 +4,25 @@ defmodule CodeQA.AST.Signals.Classification.ConfigSignalTest do alias CodeQA.AST.Signals.Classification.ConfigSignal defp run(tokens), do: SignalStream.run(tokens, [%ConfigSignal{}], []) |> List.flatten() - defp t(content, kind \\ ""), do: %{kind: kind, content: content, line: 1, col: 0} + defp token(content, kind \\ ""), do: %{col: 0, content: content, kind: kind, line: 1} test "emits config_vote for 'config' keyword at indent 0" do - emissions = run([t("config"), t(":app"), t(","), t("key:"), t("val")]) + emissions = run([token("config"), token(":app"), token(","), token("key:"), token("val")]) assert [{ConfigSignal, :classification, :config_vote, 3}] = emissions end test "emits config_vote for 'configure' keyword" do - emissions = run([t("configure")]) + emissions = run([token("configure")]) assert [{ConfigSignal, :classification, :config_vote, 3}] = emissions end test "does not emit when indented" do - emissions = run([t("", ""), t("config")]) + emissions = run([token("", ""), token("config")]) assert emissions == [] end test "does not emit for 'config' inside brackets" do - tokens = [t("(", "("), t("config"), t(")", ")")] + tokens = [token("(", "("), token("config"), token(")", ")")] assert run(tokens) == [] end end diff --git a/test/codeqa/ast/signals/classification/data_signal_test.exs b/test/codeqa/ast/signals/classification/data_signal_test.exs index 852067bc..c9913a17 100644 --- a/test/codeqa/ast/signals/classification/data_signal_test.exs +++ b/test/codeqa/ast/signals/classification/data_signal_test.exs @@ -5,24 +5,24 @@ defmodule CodeQA.AST.Signals.Classification.DataSignalTest do defp run(tokens), do: SignalStream.run(tokens, [%DataSignal{}], []) |> List.flatten() - defp t(content, kind), do: %{kind: kind, content: content, line: 1, col: 0} - defp str(v), do: t(v, "") - defp num(v), do: t(v, "") - defp id(v), do: t(v, "") + defp token(content, kind), do: %{col: 0, content: content, kind: kind, line: 1} + defp string_kind_token(v), do: token(v, "") + defp num(v), do: token(v, "") + defp id(v), do: token(v, "") test "votes data for high-literal token stream" do - tokens = [str("foo"), str("bar"), num("1"), num("2"), id("key")] + tokens = [string_kind_token("foo"), string_kind_token("bar"), num("1"), num("2"), id("key")] emissions = run(tokens) assert [{DataSignal, :classification, :data_vote, _}] = emissions end test "does not vote when control-flow keyword present" do - tokens = [str("foo"), id("if"), str("bar")] + tokens = [string_kind_token("foo"), id("if"), string_kind_token("bar")] assert run(tokens) == [] end test "does not vote when literal ratio is low" do - tokens = [id("foo"), id("bar"), id("baz"), str("one")] + tokens = [id("foo"), id("bar"), id("baz"), string_kind_token("one")] assert run(tokens) == [] end end diff --git a/test/codeqa/ast/signals/classification/type_signal_test.exs b/test/codeqa/ast/signals/classification/type_signal_test.exs index aa400d38..e970047e 100644 --- a/test/codeqa/ast/signals/classification/type_signal_test.exs +++ b/test/codeqa/ast/signals/classification/type_signal_test.exs @@ -5,35 +5,44 @@ defmodule CodeQA.AST.Signals.Classification.TypeSignalTest do defp run(tokens), do: SignalStream.run(tokens, [%TypeSignal{}], []) |> List.flatten() - defp t(content, kind \\ ""), do: %{kind: kind, content: content, line: 1, col: 0} + defp token(content, kind \\ ""), do: %{col: 0, content: content, kind: kind, line: 1} test "emits type_vote weight 3 for @type at indent 0" do - emissions = run([t("@", "@"), t("type"), t("t"), t("::"), t("integer")]) + emissions = run([token("@", "@"), token("type"), token("t"), token("::"), token("integer")]) assert [{TypeSignal, :classification, :type_vote, 3}] = emissions end test "emits type_vote for @typep" do - emissions = run([t("@", "@"), t("typep"), t("t"), t("::")]) + emissions = run([token("@", "@"), token("typep"), token("t"), token("::")]) assert [{TypeSignal, :classification, :type_vote, 3}] = emissions end test "emits type_vote for @opaque" do - emissions = run([t("@", "@"), t("opaque"), t("t"), t("::")]) + emissions = run([token("@", "@"), token("opaque"), token("t"), token("::")]) assert [{TypeSignal, :classification, :type_vote, 3}] = emissions end test "does not emit for @spec" do - emissions = run([t("@", "@"), t("spec"), t("foo"), t("()")]) + emissions = run([token("@", "@"), token("spec"), token("foo"), token("()")]) assert emissions == [] end test "does not emit for @type inside indented block" do - emissions = run([t("", ""), t("@", "@"), t("type"), t("t")]) + emissions = run([token("", ""), token("@", "@"), token("type"), token("t")]) assert emissions == [] end test "emits at most one vote" do - tokens = [t("@", "@"), t("type"), t("a"), t("", ""), t("@", "@"), t("typep"), t("b")] + tokens = [ + token("@", "@"), + token("type"), + token("a"), + token("", ""), + token("@", "@"), + token("typep"), + token("b") + ] + emissions = run(tokens) assert length(emissions) == 1 end diff --git a/test/codeqa/ast/signals/structural/branch_split_signal_test.exs b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs index 320390c9..55b12a4f 100644 --- a/test/codeqa/ast/signals/structural/branch_split_signal_test.exs +++ b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs @@ -2,7 +2,8 @@ defmodule CodeQA.AST.Signals.Structural.BranchSplitSignalTest do use ExUnit.Case, async: true alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.AST.Parsing.{Signal, SignalStream} + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream alias CodeQA.AST.Signals.Structural.BranchSplitSignal alias CodeQA.Languages.Code.Scripting.PHP alias CodeQA.Languages.Code.Scripting.Python diff --git a/test/codeqa/block_impact/codebase_impact_test.exs b/test/codeqa/block_impact/codebase_impact_test.exs index 55ef4b44..848694ca 100644 --- a/test/codeqa/block_impact/codebase_impact_test.exs +++ b/test/codeqa/block_impact/codebase_impact_test.exs @@ -42,7 +42,7 @@ defmodule CodeQA.BlockImpact.CodebaseImpactTest do assert is_map(result) # Should have at least one group with mean_ keys all_keys = result |> Map.values() |> Enum.flat_map(&Map.keys/1) - assert Enum.any?(all_keys, &String.starts_with?(&1, "mean_")) + assert all_keys |> Enum.any?(&String.starts_with?(&1, "mean_")) end test "produces a different aggregate than the baseline when a large node is removed" do diff --git a/test/codeqa/block_impact/file_impact_test.exs b/test/codeqa/block_impact/file_impact_test.exs index b44f0a9d..661003c0 100644 --- a/test/codeqa/block_impact/file_impact_test.exs +++ b/test/codeqa/block_impact/file_impact_test.exs @@ -43,7 +43,7 @@ defmodule CodeQA.BlockImpact.FileImpactTest do tokens = TokenNormalizer.normalize_structural(tiny_content) nodes = Parser.detect_blocks(tokens, Unknown) # Find or construct a node with < 10 tokens - small_nodes = Enum.filter(nodes, fn n -> length(n.tokens) < 10 end) + small_nodes = nodes |> Enum.filter(&(length(&1.tokens) < 10)) if small_nodes != [] do node = List.first(small_nodes) diff --git a/test/codeqa/block_impact/refactoring_potentials_test.exs b/test/codeqa/block_impact/refactoring_potentials_test.exs index 8593dfc3..538e9a64 100644 --- a/test/codeqa/block_impact/refactoring_potentials_test.exs +++ b/test/codeqa/block_impact/refactoring_potentials_test.exs @@ -6,11 +6,11 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do alias CodeQA.CombinedMetrics.SampleRunner alias CodeQA.Engine.Analyzer - defp file_cosines(fm) do - fm - |> FileScorer.file_to_aggregate() - |> SampleRunner.diagnose_aggregate(top: 99_999) - end + defp file_cosines(fm), + do: + fm + |> FileScorer.file_to_aggregate() + |> SampleRunner.diagnose_aggregate(top: 99_999) describe "compute/5" do test "returns a list of maps with category, behavior, cosine_delta" do @@ -49,7 +49,8 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do assert is_list(result) - Enum.each(result, fn item -> + result + |> Enum.each(fn item -> assert Map.has_key?(item, "category") assert Map.has_key?(item, "behavior") assert Map.has_key?(item, "cosine_delta") @@ -102,7 +103,7 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do top: 99 ) - deltas = Enum.map(result, & &1["cosine_delta"]) + deltas = result |> Enum.map(& &1["cosine_delta"]) assert deltas == Enum.sort(deltas, :desc) end @@ -170,7 +171,8 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do ) excluded_present? = fn result -> - Enum.any?(result, fn p -> + result + |> Enum.any?(fn p -> p["category"] == "function_design" and p["behavior"] == "cyclomatic_complexity_under_10" end) end diff --git a/test/codeqa/block_impact_analyzer_test.exs b/test/codeqa/block_impact_analyzer_test.exs index e793f088..dd70a018 100644 --- a/test/codeqa/block_impact_analyzer_test.exs +++ b/test/codeqa/block_impact_analyzer_test.exs @@ -40,7 +40,8 @@ defmodule CodeQA.BlockImpactAnalyzerTest do nodes = result["files"]["lib/my_module.ex"]["nodes"] - Enum.each(nodes, fn node -> + nodes + |> Enum.each(fn node -> assert Map.has_key?(node, "start_line") assert Map.has_key?(node, "end_line") assert Map.has_key?(node, "column_start") @@ -60,7 +61,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do result = BlockImpactAnalyzer.analyze(pipeline_result, files) nodes = result["files"]["lib/my_module.ex"]["nodes"] - start_lines = Enum.map(nodes, & &1["start_line"]) + start_lines = nodes |> Enum.map(& &1["start_line"]) assert start_lines == Enum.sort(start_lines) end @@ -80,9 +81,8 @@ defmodule CodeQA.BlockImpactAnalyzerTest do nodes = result["files"]["lib/my_module.ex"]["nodes"] - Enum.each(nodes, fn node -> - assert length(node["refactoring_potentials"]) <= 1 - end) + nodes + |> Enum.each(&assert length(&1["refactoring_potentials"]) <= 1) end test "node['type'] reflects classified block kind, not the always-:code default" do diff --git a/test/codeqa/cli_test.exs b/test/codeqa/cli_test.exs index 9abd9911..9fa6e279 100644 --- a/test/codeqa/cli_test.exs +++ b/test/codeqa/cli_test.exs @@ -1,14 +1,16 @@ defmodule CodeQA.CLITest do use ExUnit.Case, async: false + alias CodeQA.CLI + alias CodeQA.Config setup do - CodeQA.Config.reset() + Config.reset() tmp_dir = Path.join(System.tmp_dir!(), "codeqa_test_#{System.unique_integer([:positive])}") File.mkdir_p!(Path.join(tmp_dir, "lib")) File.write!(Path.join(tmp_dir, "lib/app.ex"), "defmodule App do\nend\n") on_exit(fn -> - CodeQA.Config.reset() + Config.reset() File.rm_rf!(tmp_dir) end) @@ -25,7 +27,7 @@ defmodule CodeQA.CLITest do - ignored/** """) - json = CodeQA.CLI.main(["analyze", dir, "--show-files"]) + json = CLI.main(["analyze", dir, "--show-files"]) report = Jason.decode!(json) # total_files == 1 proves the ignored file was excluded (setup has exactly 2 files) @@ -35,7 +37,7 @@ defmodule CodeQA.CLITest do end test "works normally when .codeqa.yml is absent", %{dir: dir} do - json = CodeQA.CLI.main(["analyze", dir]) + json = CLI.main(["analyze", dir]) report = Jason.decode!(json) assert report["metadata"]["total_files"] == 1 @@ -52,7 +54,7 @@ defmodule CodeQA.CLITest do - ignored_by_config/** """) - json = CodeQA.CLI.main(["analyze", dir, "--ignore-paths", "ignored_by_flag/**"]) + json = CLI.main(["analyze", dir, "--ignore-paths", "ignored_by_flag/**"]) report = Jason.decode!(json) # Only lib/app.ex should be analyzed — both ignore sources must apply diff --git a/test/codeqa/collector_test.exs b/test/codeqa/collector_test.exs index f2aeb599..38dedd12 100644 --- a/test/codeqa/collector_test.exs +++ b/test/codeqa/collector_test.exs @@ -1,11 +1,12 @@ defmodule CodeQA.CollectorTest do use ExUnit.Case, async: false + alias CodeQA.Config alias CodeQA.Engine.Collector setup do - CodeQA.Config.reset() - on_exit(&CodeQA.Config.reset/0) + Config.reset() + on_exit(&Config.reset/0) end describe "ignored?/2" do @@ -165,7 +166,7 @@ defmodule CodeQA.CollectorTest do File.write!(Path.join(tmp_dir, "generated/schema.ex"), "defmodule Schema do\nend") File.write!(Path.join(tmp_dir, ".codeqa.yml"), "ignore_paths:\n - generated/**\n") - CodeQA.Config.load(tmp_dir) + Config.load(tmp_dir) files = Collector.collect_files(tmp_dir) assert Map.has_key?(files, "lib/app.ex") diff --git a/test/codeqa/combined_metrics/file_scorer_test.exs b/test/codeqa/combined_metrics/file_scorer_test.exs index 55ef9334..8047925a 100644 --- a/test/codeqa/combined_metrics/file_scorer_test.exs +++ b/test/codeqa/combined_metrics/file_scorer_test.exs @@ -79,7 +79,7 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do result = FileScorer.worst_files_per_behavior(files_map, combined_top: 99) for {_key, entries} <- result do - cosines = Enum.map(entries, & &1.cosine) + cosines = entries |> Enum.map(& &1.cosine) assert cosines == Enum.sort(cosines) end end @@ -93,7 +93,7 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do result = FileScorer.worst_files_per_behavior(files_map) for {_key, entries} <- result do - file_paths = Enum.map(entries, & &1.file) + file_paths = entries |> Enum.map(& &1.file) refute "lib/empty.ex" in file_paths refute "lib/nokey.ex" in file_paths end @@ -165,8 +165,10 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do test "top_nodes is [] when file_data nodes is nil" do files_map = - build_files_map() - |> Map.new(fn {path, data} -> {path, Map.put(data, "nodes", nil)} end) + for {path, data} <- build_files_map() do + {path, Map.put(data, "nodes", nil)} + end + |> Map.new() result = FileScorer.worst_files_per_behavior(files_map) @@ -177,8 +179,10 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do test "top_nodes is [] when file_data nodes is []" do files_map = - build_files_map() - |> Map.new(fn {path, data} -> {path, Map.put(data, "nodes", [])} end) + for {path, data} <- build_files_map() do + {path, Map.put(data, "nodes", [])} + end + |> Map.new() result = FileScorer.worst_files_per_behavior(files_map) @@ -197,7 +201,8 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do # Any behavior that only applies to rust should not have this .ex file in results rust_only_keys = - Enum.filter(results, fn {key, entries} -> + results + |> Enum.filter(fn {key, entries} -> [cat, beh] = String.split(key, ".", parts: 2) yaml_path = "priv/combined_metrics/#{cat}.yml" @@ -218,8 +223,8 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do # Build a realistic files_map using a real project file so diagnose_aggregate # has real metric values to work with. We use a small fixed map rather than # running the full analyzer to keep tests fast. - defp build_files_map do - %{ + defp build_files_map, + do: %{ "lib/example_a.ex" => %{ "metrics" => %{ "halstead" => %{ @@ -291,5 +296,4 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do "bytes" => 8192 } } - end end diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs index 692c306a..e851eb0e 100644 --- a/test/codeqa/combined_metrics/sample_runner_test.exs +++ b/test/codeqa/combined_metrics/sample_runner_test.exs @@ -26,14 +26,15 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do langs = get_in(data, ["name_is_generic", "_languages"]) assert is_list(langs) assert langs != [] - assert Enum.all?(langs, &is_binary/1) + assert langs |> Enum.all?(&is_binary/1) end test "behaviors without sample dirs get no _languages key" do SampleRunner.apply_languages(category: "variable_naming") {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/variable_naming.yml") - Enum.each(data, fn {_behavior, groups} -> + data + |> Enum.each(fn {_behavior, groups} -> if is_map(groups) do case Map.get(groups, "_languages") do nil -> :ok @@ -93,7 +94,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do test "accepts :languages option without crashing" do result = SampleRunner.score_aggregate(%{}, languages: ["elixir"]) assert is_list(result) - assert Enum.all?(result, &Map.has_key?(&1, :behaviors)) + assert result |> Enum.all?(&Map.has_key?(&1, :behaviors)) end test "with languages option returns fewer behaviors than unfiltered" do @@ -134,7 +135,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do end test "name_is_generic result has good_score > bad_score", %{results: results} do - generic = Enum.find(results, &(&1.behavior == "name_is_generic")) + generic = results |> Enum.find(&(&1.behavior == "name_is_generic")) assert generic != nil assert generic.good_score > generic.bad_score end diff --git a/test/codeqa/diagnostics_test.exs b/test/codeqa/diagnostics_test.exs index 4e5db617..cea59974 100644 --- a/test/codeqa/diagnostics_test.exs +++ b/test/codeqa/diagnostics_test.exs @@ -1,11 +1,12 @@ defmodule CodeQA.DiagnosticsTest do use ExUnit.Case, async: true + alias CodeQA.Diagnostics @small_path Path.expand("../../lib/codeqa/health_report/formatter", __DIR__) describe "run/1 aggregate mode" do test "plain format output structure" do - output = CodeQA.Diagnostics.run(path: @small_path, mode: :aggregate, top: 5, format: :plain) + output = Diagnostics.run(path: @small_path, mode: :aggregate, top: 5, format: :plain) assert output =~ "## Diagnose: aggregate" assert output =~ "| Behavior | Cosine | Score |" @@ -13,7 +14,7 @@ defmodule CodeQA.DiagnosticsTest do end test "json format returns valid JSON with issues and categories keys" do - output = CodeQA.Diagnostics.run(path: @small_path, mode: :aggregate, top: 5, format: :json) + output = Diagnostics.run(path: @small_path, mode: :aggregate, top: 5, format: :json) decoded = Jason.decode!(output) assert Map.has_key?(decoded, "issues") @@ -24,21 +25,21 @@ defmodule CodeQA.DiagnosticsTest do describe "run/1 per-file mode" do @tag timeout: 120_000 test "runs without error on a small directory" do - output = CodeQA.Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :plain) + output = Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :plain) assert output =~ "## Diagnose: per-file" end @tag timeout: 120_000 test "output contains per-file table header" do - output = CodeQA.Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :plain) + output = Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :plain) assert output =~ "| File | Behavior | Cosine | Score |" end @tag timeout: 120_000 test "json format returns valid JSON with files key" do - output = CodeQA.Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :json) + output = Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :json) decoded = Jason.decode!(output) assert Map.has_key?(decoded, "files") diff --git a/test/codeqa/engine/analyzer_test.exs b/test/codeqa/engine/analyzer_test.exs index 38886c6c..0d50feb8 100644 --- a/test/codeqa/engine/analyzer_test.exs +++ b/test/codeqa/engine/analyzer_test.exs @@ -1,6 +1,7 @@ defmodule CodeQA.Engine.AnalyzerTest do use ExUnit.Case, async: true + alias CodeQA.CombinedMetrics.Scorer alias CodeQA.Engine.Analyzer describe "analyze_file/2" do @@ -10,7 +11,8 @@ defmodule CodeQA.Engine.AnalyzerTest do assert is_map(result) assert map_size(result) > 0 # Each value should be a map of metric keys to numbers - Enum.each(result, fn {_group, keys} -> + result + |> Enum.each(fn {_group, keys} -> assert is_map(keys) end) end @@ -26,8 +28,10 @@ defmodule CodeQA.Engine.AnalyzerTest do agg = Analyzer.analyze_codebase_aggregate(files) assert is_map(agg) # At least one group should have mean_ keys - Enum.each(agg, fn {_group, keys} -> - Enum.each(keys, fn {key, val} -> + agg + |> Enum.each(fn {_group, keys} -> + keys + |> Enum.each(fn {key, val} -> assert String.starts_with?(key, "mean_") or String.starts_with?(key, "std_") or String.starts_with?(key, "min_") or String.starts_with?(key, "max_") @@ -58,7 +62,7 @@ defmodule CodeQA.Engine.AnalyzerTest do test "result matches analyze_file_for_loo/2 for referenced metrics" do baseline = Analyzer.analyze_file_for_loo("lib/foo.ex", @sample) partial = Analyzer.analyze_file_for_loo_partial("lib/foo.ex", @sample, baseline) - referenced = CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names() + referenced = Scorer.referenced_file_metric_names() for name <- referenced, Map.has_key?(baseline, name) do assert Map.get(partial, name) == Map.get(baseline, name), @@ -71,8 +75,9 @@ defmodule CodeQA.Engine.AnalyzerTest do sentinel = %{"sentinel_key" => 99.0} tampered_baseline = - Enum.reduce(baseline, %{}, fn {name, _val}, acc -> - if name in CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names() do + baseline + |> Enum.reduce(%{}, fn {name, _val}, acc -> + if name in Scorer.referenced_file_metric_names() do Map.put(acc, name, baseline[name]) else Map.put(acc, name, sentinel) @@ -83,7 +88,7 @@ defmodule CodeQA.Engine.AnalyzerTest do Analyzer.analyze_file_for_loo_partial("lib/foo.ex", @sample, tampered_baseline) for {name, value} <- partial, - name not in CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names() do + name not in Scorer.referenced_file_metric_names() do assert value == sentinel, "non-referenced metric #{name} was recomputed instead of inherited" end diff --git a/test/codeqa/git_test.exs b/test/codeqa/git_test.exs index f1a800cb..dc50405b 100644 --- a/test/codeqa/git_test.exs +++ b/test/codeqa/git_test.exs @@ -54,7 +54,7 @@ defmodule CodeQA.GitTest do in_tmp_git_repo(fn repo -> File.write!(Path.join(repo, ".gitignore"), "ignored.ex\n") - paths = Enum.map(1..1200, fn i -> "file_#{i}.ex" end) ++ ["ignored.ex"] + paths = Enum.map(1..1200, &"file_#{&1}.ex") ++ ["ignored.ex"] ignored = Git.gitignored_files(repo, paths) @@ -136,7 +136,7 @@ defmodule CodeQA.GitTest do test "handles multiple hunks in same file" do in_tmp_git_repo(fn repo -> - lines = Enum.map_join(1..20, "\n", &"line#{&1}") + lines = 1..20 |> Enum.map_join("\n", &"line#{&1}") File.write!(Path.join(repo, "foo.ex"), lines <> "\n") {_, 0} = System.cmd("git", ["add", "."], cd: repo) {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) @@ -144,12 +144,11 @@ defmodule CodeQA.GitTest do # Change line 2 and line 15 new_lines = 1..20 - |> Enum.map(fn + |> Enum.map_join("\n", fn 2 -> "changed2" 15 -> "changed15" n -> "line#{n}" end) - |> Enum.join("\n") File.write!(Path.join(repo, "foo.ex"), new_lines <> "\n") {_, 0} = System.cmd("git", ["add", "."], cd: repo) @@ -253,7 +252,7 @@ defmodule CodeQA.GitTest do test "returns ranges in ascending order" do in_tmp_git_repo(fn repo -> - lines = Enum.map_join(1..20, "\n", &"line#{&1}") + lines = 1..20 |> Enum.map_join("\n", &"line#{&1}") File.write!(Path.join(repo, "foo.ex"), lines <> "\n") {_, 0} = System.cmd("git", ["add", "."], cd: repo) {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) @@ -261,13 +260,12 @@ defmodule CodeQA.GitTest do # Change lines 2, 10, and 18 new_lines = 1..20 - |> Enum.map(fn + |> Enum.map_join("\n", fn 2 -> "changed2" 10 -> "changed10" 18 -> "changed18" n -> "line#{n}" end) - |> Enum.join("\n") File.write!(Path.join(repo, "foo.ex"), new_lines <> "\n") {_, 0} = System.cmd("git", ["add", "."], cd: repo) diff --git a/test/codeqa/health_report/categories_test.exs b/test/codeqa/health_report/categories_test.exs index 0912d46b..6b4e2821 100644 --- a/test/codeqa/health_report/categories_test.exs +++ b/test/codeqa/health_report/categories_test.exs @@ -7,9 +7,10 @@ defmodule CodeQA.HealthReport.CategoriesTest do test "all metrics have fix_hint field" do categories = Categories.defaults() - metrics = Enum.flat_map(categories, & &1.metrics) + metrics = categories |> Enum.flat_map(& &1.metrics) - Enum.each(metrics, fn metric -> + metrics + |> Enum.each(fn metric -> assert Map.has_key?(metric, :fix_hint), "Metric #{metric.name} missing :fix_hint field" @@ -24,7 +25,8 @@ defmodule CodeQA.HealthReport.CategoriesTest do test "all categories have expected keys" do categories = Categories.defaults() - Enum.each(categories, fn category -> + categories + |> Enum.each(fn category -> assert Map.has_key?(category, :key) assert Map.has_key?(category, :name) assert Map.has_key?(category, :metrics) @@ -34,9 +36,10 @@ defmodule CodeQA.HealthReport.CategoriesTest do test "all metrics have required threshold keys" do categories = Categories.defaults() - metrics = Enum.flat_map(categories, & &1.metrics) + metrics = categories |> Enum.flat_map(& &1.metrics) - Enum.each(metrics, fn metric -> + metrics + |> Enum.each(fn metric -> assert Map.has_key?(metric, :name) assert Map.has_key?(metric, :source) assert Map.has_key?(metric, :weight) @@ -49,9 +52,10 @@ defmodule CodeQA.HealthReport.CategoriesTest do test "fix_hint is accessible via Map.get" do categories = Categories.defaults() - metrics = Enum.flat_map(categories, & &1.metrics) + metrics = categories |> Enum.flat_map(& &1.metrics) - Enum.each(metrics, fn metric -> + metrics + |> Enum.each(fn metric -> hint = Map.get(metric, :fix_hint) assert is_binary(hint) assert String.length(hint) > 0 @@ -63,7 +67,7 @@ defmodule CodeQA.HealthReport.CategoriesTest do assert length(categories) == 6 - metrics = Enum.flat_map(categories, & &1.metrics) + metrics = categories |> Enum.flat_map(& &1.metrics) assert length(metrics) == 24 end diff --git a/test/codeqa/health_report/delta_test.exs b/test/codeqa/health_report/delta_test.exs index 6932e0c0..21a2e1b6 100644 --- a/test/codeqa/health_report/delta_test.exs +++ b/test/codeqa/health_report/delta_test.exs @@ -3,9 +3,7 @@ defmodule CodeQA.HealthReport.DeltaTest do alias CodeQA.HealthReport.Delta - defp make_results(aggregate) do - %{"codebase" => %{"aggregate" => aggregate}} - end + defp make_results(aggregate), do: %{"codebase" => %{"aggregate" => aggregate}} test "returns base, head, and delta aggregates" do base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) diff --git a/test/codeqa/health_report/formatter_test.exs b/test/codeqa/health_report/formatter_test.exs index 8518d945..55c922c4 100644 --- a/test/codeqa/health_report/formatter_test.exs +++ b/test/codeqa/health_report/formatter_test.exs @@ -4,110 +4,104 @@ defmodule CodeQA.HealthReport.FormatterTest do alias CodeQA.HealthReport.Formatter @sample_report %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 42}, - overall_score: 79, - overall_grade: "B+", categories: [ %{ - type: :threshold, - name: "Readability", - key: :readability, - score: 100, grade: "A", impact: 3, - summary: "Excellent", + key: :readability, metric_scores: [ %{ + good: :high, name: "flesch_adapted", + score: 100, source: "readability", - weight: 0.4, - good: :high, value: 102.5, - score: 100 + weight: 0.4 } ], + name: "Readability", + score: 100, + summary: "Excellent", + type: :threshold, worst_offenders: [ %{ - path: "lib/foo.ex", - score: 75, + bytes: 3840, grade: "B+", lines: 120, - bytes: 3840, metric_scores: [ %{ + good: :high, name: "flesch_adapted", + score: 75, source: "readability", - good: :high, - value: 65.0, - score: 75 + value: 65.0 } - ] + ], + path: "lib/foo.ex", + score: 75 } ] }, %{ - type: :threshold, - name: "Complexity", - key: :complexity, - score: 35, grade: "D", impact: 5, - summary: "Critical — requires attention", + key: :complexity, metric_scores: [ - %{name: "difficulty", source: "halstead", weight: 0.35, value: 24.01, score: 65} + %{name: "difficulty", score: 65, source: "halstead", value: 24.01, weight: 0.35} ], + name: "Complexity", + score: 35, + summary: "Critical — requires attention", + type: :threshold, worst_offenders: [] } - ] + ], + metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 42}, + overall_grade: "B+", + overall_score: 79 } @cosine_category %{ - type: :cosine, - key: "function_design", - name: "Function Design", - score: 64, - grade: "C", - impact: 1, behaviors: [ %{ behavior: "no_boolean_parameter", cosine: 0.12, - score: 56, grade: "C", + score: 56, worst_offenders: [ - %{file: "lib/foo/bar.ex", cosine: -0.71} + %{cosine: -0.71, file: "lib/foo/bar.ex"} ] }, %{ behavior: "single_responsibility", cosine: 0.45, - score: 78, grade: "B+", + score: 78, worst_offenders: [] } - ] - } - - @enriched_cosine_category %{ - type: :cosine, + ], + grade: "C", + impact: 1, key: "function_design", name: "Function Design", score: 64, - grade: "C", - impact: 1, + type: :cosine + } + + @enriched_cosine_category %{ behaviors: [ %{ behavior: "no_boolean_parameter", cosine: -0.65, - score: 42, grade: "D+", + score: 42, worst_offenders: [ %{ - file: "lib/codeqa/formatter.ex", cosine: -0.65, + file: "lib/codeqa/formatter.ex", top_metrics: [ - %{metric: "branching.mean_depth", contribution: -4.10}, - %{metric: "halstead.effort", contribution: -3.22} + %{contribution: -4.10, metric: "branching.mean_depth"}, + %{contribution: -3.22, metric: "halstead.effort"} ], top_nodes: [ %{"start_line" => 89, "type" => "block"}, @@ -116,30 +110,36 @@ defmodule CodeQA.HealthReport.FormatterTest do } ] } - ] + ], + grade: "C", + impact: 1, + key: "function_design", + name: "Function Design", + score: 64, + type: :cosine } @enriched_threshold_category %{ - type: :threshold, - name: "Complexity", - key: :complexity, - score: 32, grade: "F", impact: 5, - summary: "Critical", + key: :complexity, metric_scores: [ - %{name: "difficulty", source: "halstead", weight: 0.35, good: :low, value: 39.0, score: 32} + %{good: :low, name: "difficulty", score: 32, source: "halstead", value: 39.0, weight: 0.35} ], + name: "Complexity", + score: 32, + summary: "Critical", + type: :threshold, worst_offenders: [ %{ - path: "lib/foo.ex", - score: 32, + bytes: 15872, grade: "F", lines: 491, - bytes: 15_872, metric_scores: [ - %{name: "difficulty", source: "halstead", good: :low, value: 99.0, score: 0} + %{good: :low, name: "difficulty", score: 0, source: "halstead", value: 99.0} ], + path: "lib/foo.ex", + score: 32, top_nodes: [ %{"start_line" => 201, "type" => "block"}, %{"start_line" => 312, "type" => "block"} @@ -210,15 +210,15 @@ defmodule CodeQA.HealthReport.FormatterTest do describe "plain formatter: PR summary section" do @sample_report_with_pr Map.put(@sample_report, :pr_summary, %{ - base_score: 85, - head_score: 77, - score_delta: -8, base_grade: "B+", - head_grade: "C+", + base_score: 85, blocks_flagged: 6, - files_changed: 3, files_added: 1, - files_modified: 2 + files_changed: 3, + files_modified: 2, + head_grade: "C+", + head_score: 77, + score_delta: -8 }) test "renders PR summary line when pr_summary present" do @@ -271,24 +271,24 @@ defmodule CodeQA.HealthReport.FormatterTest do describe "plain formatter: block section" do @block_potential %{ - category: "function_design", behavior: "cyclomatic_complexity_under_10", + category: "function_design", cosine_delta: 0.41, - severity: :critical, - fix_hint: "Reduce branching" + fix_hint: "Reduce branching", + severity: :critical } @top_blocks [ %{ + end_line: 67, + language: "elixir", path: "lib/foo.ex", - status: "modified", + potentials: [@block_potential], + source: "def foo do\n :bar\nend", start_line: 42, - end_line: 67, - type: "code", + status: "modified", token_count: 84, - source: "def foo do\n :bar\nend", - language: "elixir", - potentials: [@block_potential] + type: "code" } ] @@ -426,24 +426,24 @@ defmodule CodeQA.HealthReport.FormatterTest do describe "github formatter: block section" do @block_potential %{ - category: "function_design", behavior: "cyclomatic_complexity_under_10", + category: "function_design", cosine_delta: 0.41, - severity: :critical, - fix_hint: "Reduce branching" + fix_hint: "Reduce branching", + severity: :critical } @top_blocks_gh [ %{ + end_line: 67, + language: "elixir", path: "lib/foo.ex", - status: "modified", + potentials: [@block_potential], + source: "def foo do\n :bar\nend", start_line: 42, - end_line: 67, - type: "code", + status: "modified", token_count: 84, - source: "def foo do\n :bar\nend", - language: "elixir", - potentials: [@block_potential] + type: "code" } ] @@ -472,15 +472,15 @@ defmodule CodeQA.HealthReport.FormatterTest do describe "github formatter: PR summary and delta" do @pr_summary_gh %{ - base_score: 85, - head_score: 77, - score_delta: -8, base_grade: "B+", - head_grade: "C+", + base_score: 85, blocks_flagged: 6, - files_changed: 3, files_added: 1, - files_modified: 2 + files_changed: 3, + files_modified: 2, + head_grade: "C+", + head_score: 77, + score_delta: -8 } @delta_gh %{ @@ -514,7 +514,8 @@ defmodule CodeQA.HealthReport.FormatterTest do test "each part ends with sentinel comment" do parts = Formatter.render_parts(@sample_report) - Enum.with_index(parts, 1) + parts + |> Enum.with_index(1) |> Enum.each(fn {part, n} -> assert part =~ "" end) @@ -560,27 +561,28 @@ defmodule CodeQA.HealthReport.FormatterTest do test "returns single part with blocks (top 10 limit means no slicing needed)" do blocks = - Enum.map(1..10, fn i -> - %{ - path: "lib/file_#{i}.ex", - status: "modified", - start_line: 10, + 1..10 + |> Enum.map( + &%{ end_line: 30, - type: "function", - token_count: 150, - source: "def foo, do: :bar", language: "elixir", + path: "lib/file_#{&1}.ex", potentials: [ %{ - category: "function_design", behavior: "single_responsibility", + category: "function_design", cosine_delta: 0.35, - severity: :high, - fix_hint: "Consider extracting helper function" + fix_hint: "Consider extracting helper function", + severity: :high } - ] + ], + source: "def foo, do: :bar", + start_line: 10, + status: "modified", + token_count: 150, + type: "function" } - end) + ) report = Map.put(@sample_report, :top_blocks, blocks) parts = Github.render_parts_3(report) @@ -592,23 +594,23 @@ defmodule CodeQA.HealthReport.FormatterTest do test "part ends with sentinel" do blocks = [ %{ - path: "lib/foo.ex", - status: nil, - start_line: 1, end_line: 10, - type: "code", - token_count: 50, - source: "def foo, do: :bar", language: "elixir", + path: "lib/foo.ex", potentials: [ %{ - category: "function_design", behavior: "single_responsibility", + category: "function_design", cosine_delta: 0.35, - severity: :high, - fix_hint: nil + fix_hint: nil, + severity: :high } - ] + ], + source: "def foo, do: :bar", + start_line: 1, + status: nil, + token_count: 50, + type: "code" } ] @@ -620,23 +622,23 @@ defmodule CodeQA.HealthReport.FormatterTest do test "renders source code in fenced block" do blocks = [ %{ - path: "lib/foo.ex", - status: nil, - start_line: 1, end_line: 10, - type: "code", - token_count: 50, - source: "def hello do\n :world\nend", language: "elixir", + path: "lib/foo.ex", potentials: [ %{ - category: "function_design", behavior: "single_responsibility", + category: "function_design", cosine_delta: 0.35, - severity: :high, - fix_hint: nil + fix_hint: nil, + severity: :high } - ] + ], + source: "def hello do\n :world\nend", + start_line: 1, + status: nil, + token_count: 50, + type: "code" } ] diff --git a/test/codeqa/health_report/grader_test.exs b/test/codeqa/health_report/grader_test.exs index 6f9ea544..4dcd0a16 100644 --- a/test/codeqa/health_report/grader_test.exs +++ b/test/codeqa/health_report/grader_test.exs @@ -164,7 +164,7 @@ defmodule CodeQA.HealthReport.GraderTest do result = Analyzer.analyze_codebase(files) aggregate = get_in(result, ["codebase", "aggregate"]) all_cosines = SampleRunner.diagnose_aggregate(aggregate, top: 99_999) - cosines_by_category = Enum.group_by(all_cosines, & &1.category) + cosines_by_category = all_cosines |> Enum.group_by(& &1.category) {:ok, cosines_by_category: cosines_by_category} end @@ -251,7 +251,7 @@ defmodule CodeQA.HealthReport.GraderTest do end test "worst_offenders uses worst_files lookup", %{cosines_by_category: cosines_by_category} do - sentinel = [%{file: "lib/sentinel.ex", cosine: -0.99}] + sentinel = [%{cosine: -0.99, file: "lib/sentinel.ex"}] # Get one real behavior key to inject into worst_files [first_cat | _] = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) first_behavior = hd(first_cat.behaviors) @@ -260,8 +260,8 @@ defmodule CodeQA.HealthReport.GraderTest do worst_files = %{lookup_key => sentinel} result = Grader.grade_cosine_categories(cosines_by_category, worst_files, @default_scale) - found_cat = Enum.find(result, &(&1.key == first_cat.key)) - found_behavior = Enum.find(found_cat.behaviors, &(&1.behavior == first_behavior.behavior)) + found_cat = result |> Enum.find(&(&1.key == first_cat.key)) + found_behavior = found_cat.behaviors |> Enum.find(&(&1.behavior == first_behavior.behavior)) assert found_behavior.worst_offenders == sentinel end @@ -270,9 +270,9 @@ defmodule CodeQA.HealthReport.GraderTest do } do sentinel = [ %{ - file: "lib/sentinel.ex", cosine: -0.99, - top_metrics: [%{metric: "foo.bar", contribution: -1.5}], + file: "lib/sentinel.ex", + top_metrics: [%{contribution: -1.5, metric: "foo.bar"}], top_nodes: [%{"start_line" => 42, "type" => "block"}] } ] @@ -284,8 +284,8 @@ defmodule CodeQA.HealthReport.GraderTest do worst_files = %{lookup_key => sentinel} result = Grader.grade_cosine_categories(cosines_by_category, worst_files, @default_scale) - found_cat = Enum.find(result, &(&1.key == first_cat.key)) - found_behavior = Enum.find(found_cat.behaviors, &(&1.behavior == first_behavior.behavior)) + found_cat = result |> Enum.find(&(&1.key == first_cat.key)) + found_behavior = found_cat.behaviors |> Enum.find(&(&1.behavior == first_behavior.behavior)) assert found_behavior.worst_offenders == sentinel end @@ -304,16 +304,16 @@ defmodule CodeQA.HealthReport.GraderTest do describe "worst_offenders/4 top_nodes" do @category %{ key: :function_design, - name: "Function Design", metrics: [ %{ - source: "halstead", - name: "tokens", - weight: 1.0, good: :low, - thresholds: %{a: 10, b: 20, c: 30, d: 40} + name: "tokens", + source: "halstead", + thresholds: %{a: 10, b: 20, c: 30, d: 40}, + weight: 1.0 } - ] + ], + name: "Function Design" } test "returns top_nodes: [] when file_data has no nodes key" do @@ -488,7 +488,7 @@ defmodule CodeQA.HealthReport.GraderTest do # child_node is not top-level, so only top-level nodes are considered assert length(entry.top_nodes) == 3 - start_lines = Enum.map(entry.top_nodes, & &1["start_line"]) + start_lines = entry.top_nodes |> Enum.map(& &1["start_line"]) refute 11 in start_lines end end diff --git a/test/codeqa/health_report/top_blocks_test.exs b/test/codeqa/health_report/top_blocks_test.exs index 4bef28b2..d2a47f3f 100644 --- a/test/codeqa/health_report/top_blocks_test.exs +++ b/test/codeqa/health_report/top_blocks_test.exs @@ -5,8 +5,8 @@ defmodule CodeQA.HealthReport.TopBlocksTest do alias CodeQA.HealthReport.TopBlocks # A node with cosine_delta 0.60 — will be :critical when codebase_cosine = 0.0 (gap=1.0, ratio=0.60) - defp make_node(cosine_delta, token_count \\ 20) do - %{ + defp make_node(cosine_delta, token_count \\ 20), + do: %{ "start_line" => 1, "end_line" => 10, "type" => "code", @@ -20,15 +20,12 @@ defmodule CodeQA.HealthReport.TopBlocksTest do ], "children" => [] } - end - defp make_results(nodes) do - %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}, "metadata" => %{"path" => "/tmp"}} - end + defp make_results(nodes), + do: %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}, "metadata" => %{"path" => "/tmp"}} - defp lookup(cosine \\ 0.0) do - %{{"function_design", "cyclomatic_complexity_under_10"} => cosine} - end + defp lookup(cosine \\ 0.0), + do: %{{"function_design", "cyclomatic_complexity_under_10"} => cosine} describe "severity classification" do test ":critical when severity_ratio > 0.50" do @@ -109,7 +106,7 @@ defmodule CodeQA.HealthReport.TopBlocksTest do } blocks = TopBlocks.build(results, [], lookup()) - deltas = Enum.map(blocks, fn b -> hd(b.potentials).cosine_delta end) + deltas = blocks |> Enum.map(&hd(&1.potentials).cosine_delta) assert deltas == Enum.sort(deltas, :desc) end diff --git a/test/codeqa/health_report_test.exs b/test/codeqa/health_report_test.exs index 80f8575f..13ae5ebb 100644 --- a/test/codeqa/health_report_test.exs +++ b/test/codeqa/health_report_test.exs @@ -11,7 +11,7 @@ defmodule CodeQA.HealthReportTest do test "without base_results: pr_summary and codebase_delta are nil" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} results = Analyzer.analyze_codebase(files) - results = BlockImpactAnalyzer.analyze(results, files) + results = results |> BlockImpactAnalyzer.analyze(files) report = HealthReport.generate(results) @@ -28,14 +28,15 @@ defmodule CodeQA.HealthReportTest do test "without base_results: top_blocks shows top 10 blocks by impact" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} results = Analyzer.analyze_codebase(files) - results = BlockImpactAnalyzer.analyze(results, files) + results = results |> BlockImpactAnalyzer.analyze(files) report = HealthReport.generate(results) # top_blocks is a flat list of blocks (may be empty if no blocks above threshold) assert is_list(report.top_blocks) - Enum.each(report.top_blocks, fn block -> + report.top_blocks + |> Enum.each(fn block -> assert Map.has_key?(block, :path) assert Map.has_key?(block, :status) assert Map.has_key?(block, :potentials) @@ -48,13 +49,12 @@ defmodule CodeQA.HealthReportTest do test "worst_offenders is always empty in categories" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} results = Analyzer.analyze_codebase(files) - results = BlockImpactAnalyzer.analyze(results, files) + results = results |> BlockImpactAnalyzer.analyze(files) report = HealthReport.generate(results) - Enum.each(report.categories, fn cat -> - assert Map.get(cat, :worst_offenders, []) == [] - end) + report.categories + |> Enum.each(&assert Map.get(&1, :worst_offenders, []) == []) end end @@ -63,7 +63,7 @@ defmodule CodeQA.HealthReportTest do test "pr_summary is populated" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} head_results = Analyzer.analyze_codebase(files) - head_results = BlockImpactAnalyzer.analyze(head_results, files) + head_results = head_results |> BlockImpactAnalyzer.analyze(files) base_results = Analyzer.analyze_codebase(files) changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] @@ -75,15 +75,15 @@ defmodule CodeQA.HealthReportTest do ) assert %{ - base_score: base_score, - head_score: head_score, - score_delta: delta, base_grade: _, - head_grade: _, + base_score: base_score, blocks_flagged: flagged, - files_changed: 1, files_added: 0, - files_modified: 1 + files_changed: 1, + files_modified: 1, + head_grade: _, + head_score: head_score, + score_delta: delta } = report.pr_summary assert is_integer(base_score) @@ -96,12 +96,12 @@ defmodule CodeQA.HealthReportTest do test "codebase_delta is populated" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} head_results = Analyzer.analyze_codebase(files) - head_results = BlockImpactAnalyzer.analyze(head_results, files) + head_results = head_results |> BlockImpactAnalyzer.analyze(files) base_results = Analyzer.analyze_codebase(files) report = HealthReport.generate(head_results, base_results: base_results) - assert %{base: %{aggregate: _}, head: %{aggregate: _}, delta: %{aggregate: _}} = + assert %{base: %{aggregate: _}, delta: %{aggregate: _}, head: %{aggregate: _}} = report.codebase_delta end @@ -113,7 +113,7 @@ defmodule CodeQA.HealthReportTest do } head_results = Analyzer.analyze_codebase(files) - head_results = BlockImpactAnalyzer.analyze(head_results, files) + head_results = head_results |> BlockImpactAnalyzer.analyze(files) base_results = Analyzer.analyze_codebase(files) changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] @@ -124,7 +124,7 @@ defmodule CodeQA.HealthReportTest do changed_files: changed ) - paths = Enum.map(report.top_blocks, & &1.path) + paths = report.top_blocks |> Enum.map(& &1.path) refute "lib/bar.ex" in paths end end diff --git a/test/codeqa/metrics/codebase/near_duplicate_blocks_codebase_test.exs b/test/codeqa/metrics/codebase/near_duplicate_blocks_codebase_test.exs index 1c797761..f6d934e2 100644 --- a/test/codeqa/metrics/codebase/near_duplicate_blocks_codebase_test.exs +++ b/test/codeqa/metrics/codebase/near_duplicate_blocks_codebase_test.exs @@ -3,7 +3,7 @@ defmodule CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebaseTest do alias CodeQA.Analysis.FileContextServer alias CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebase - defp files(pairs), do: Map.new(pairs) + defp files(pairs), do: pairs |> Map.new() defp with_pid(fun) do {:ok, pid} = FileContextServer.start_link() @@ -91,7 +91,7 @@ defmodule CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebaseTest do ) pairs_lists = result |> Map.values() |> Enum.filter(&is_list/1) - assert Enum.all?(pairs_lists, &(length(&1) <= 2)) + assert pairs_lists |> Enum.all?(&(length(&1) <= 2)) end) end end diff --git a/test/codeqa/metrics/codebase/similarity_test.exs b/test/codeqa/metrics/codebase/similarity_test.exs index d20dbf13..58aef269 100644 --- a/test/codeqa/metrics/codebase/similarity_test.exs +++ b/test/codeqa/metrics/codebase/similarity_test.exs @@ -64,7 +64,7 @@ defmodule CodeQA.Metrics.Codebase.SimilarityTest do pairs = result["ncd_pairs"] scores = pairs |> Map.values() |> List.flatten() |> Enum.map(& &1["score"]) - assert Enum.all?(scores, &(&1 < 0.2)) + assert scores |> Enum.all?(&(&1 < 0.2)) end test "ncd_paths restricts which files are compared" do diff --git a/test/codeqa/metrics/file/bradford_test.exs b/test/codeqa/metrics/file/bradford_test.exs index db948d9e..5e797fda 100644 --- a/test/codeqa/metrics/file/bradford_test.exs +++ b/test/codeqa/metrics/file/bradford_test.exs @@ -44,7 +44,7 @@ defmodule CodeQA.Metrics.File.BradfordTest do # k2 = 3/3 = 1.0 — tail needs the same number of lines as the middle # k_ratio = 1.0 — perfectly symmetric: no zone is more stretched than another test "uniform file has k = 1" do - code = Enum.map_join(1..9, "\n", fn _ -> "a b c" end) + code = 1..9 |> Enum.map_join("\n", fn _ -> "a b c" end) assert result(code) == %{"k1" => 1.0, "k2" => 1.0, "k_ratio" => 1.0} end end @@ -62,9 +62,9 @@ defmodule CodeQA.Metrics.File.BradfordTest do # meaning extreme concentration is at the very top, not spread across zones test "concentrated file produces k1=4.0, k2=2.0, k_ratio=0.5" do dense = "a b c d e f g h i j" - medium = Enum.map_join(1..3, "\n", fn _ -> "a b c" end) - sparse = Enum.map_join(1..9, "\n", fn _ -> "a" end) - code = Enum.join([dense, medium, sparse], "\n") + medium = 1..3 |> Enum.map_join("\n", fn _ -> "a b c" end) + sparse = 1..9 |> Enum.map_join("\n", fn _ -> "a" end) + code = [dense, medium, sparse] |> Enum.join("\n") assert result(code) == %{ # 1 dense line does the work of 4 middle lines — extreme core @@ -79,12 +79,12 @@ defmodule CodeQA.Metrics.File.BradfordTest do test "concentrated file has higher k1 than uniform" do # k1 is the primary concentration signal: how many times more lines the # middle zone needs compared to the core. A uniform file scores 1.0 here. - uniform = Enum.map_join(1..9, "\n", fn _ -> "a b c" end) + uniform = 1..9 |> Enum.map_join("\n", fn _ -> "a b c" end) dense = "a b c d e f g h i j" - medium = Enum.map_join(1..3, "\n", fn _ -> "a b c" end) - sparse = Enum.map_join(1..9, "\n", fn _ -> "a" end) - concentrated = Enum.join([dense, medium, sparse], "\n") + medium = 1..3 |> Enum.map_join("\n", fn _ -> "a b c" end) + sparse = 1..9 |> Enum.map_join("\n", fn _ -> "a" end) + concentrated = [dense, medium, sparse] |> Enum.join("\n") assert result(concentrated)["k1"] > result(uniform)["k1"] end @@ -97,24 +97,22 @@ defmodule CodeQA.Metrics.File.BradfordTest do # k_ratio > 1 → k2 > k1 → the tail is more stretched than the core jump, # typical of many medium lines plus a huge sparse tail code = - Enum.join( - [ - "a b c d e f g h i j", - "a b c", - "a b c", - "a b c", - "a", - "a", - "a", - "a", - "a", - "a", - "a", - "a", - "a" - ], - "\n" - ) + [ + "a b c d e f g h i j", + "a b c", + "a b c", + "a b c", + "a", + "a", + "a", + "a", + "a", + "a", + "a", + "a", + "a" + ] + |> Enum.join("\n") assert result(code)["k_ratio"] < 1.0 end diff --git a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs index cb10540c..6a819ea5 100644 --- a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs +++ b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs @@ -3,9 +3,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksFileTest do alias CodeQA.Engine.Pipeline alias CodeQA.Metrics.File.NearDuplicateBlocksFile - defp ctx(code, path \\ "test.ex") do - Pipeline.build_file_context(code, path: path) - end + defp ctx(code, path \\ "test.ex"), do: code |> Pipeline.build_file_context(path: path) describe "name/0" do test "returns near_duplicate_blocks_file" do @@ -45,7 +43,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksFileTest do test "no _pairs keys in output" do result = NearDuplicateBlocksFile.analyze(ctx("x = 1\n")) - refute Enum.any?(Map.keys(result), &String.ends_with?(&1, "_pairs")) + refute Map.keys(result) |> Enum.any?(&String.ends_with?(&1, "_pairs")) end test "detects exact duplicate blocks at d0" do diff --git a/test/codeqa/metrics/file/near_duplicate_blocks_test.exs b/test/codeqa/metrics/file/near_duplicate_blocks_test.exs index a65e201e..e25d1897 100644 --- a/test/codeqa/metrics/file/near_duplicate_blocks_test.exs +++ b/test/codeqa/metrics/file/near_duplicate_blocks_test.exs @@ -18,19 +18,18 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksTest do end describe "find_pairs/2 idf_max_freq option" do - defp make_block(tokens, label) do - %CodeQA.AST.Enrichment.Node{ + defp make_block(tokens, label), + do: %CodeQA.AST.Enrichment.Node{ + children: [], label: label, - tokens: Enum.map(tokens, &%{kind: &1}), line_count: length(tokens), - children: [] + tokens: tokens |> Enum.map(&%{kind: &1}) } - end test "exact duplicates are still detected when all bigrams are high-frequency" do # 30 blocks all sharing bigram [end, nil] → pruned by IDF # Two additional identical blocks → should still match via exact hash index (d0) - common = Enum.map(1..30, fn i -> make_block(~w[end nil common_#{i}], "file:#{i}") end) + common = 1..30 |> Enum.map(&make_block(~w[end nil common_#{&1}], "file:#{&1}")) dup = make_block(~w[end nil special unique_token], "dup:1") dup2 = make_block(~w[end nil special unique_token], "dup:2") @@ -42,7 +41,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksTest do test "near-duplicates are detected via non-pruned unique bigrams" do # 50 blocks all sharing [end, nil] → pruned # Two near-duplicates sharing unique bigrams [nil, special], [special, alpha] → not pruned - common = Enum.map(1..50, fn i -> make_block(~w[end nil common_#{i}], "common:#{i}") end) + common = 1..50 |> Enum.map(&make_block(~w[end nil common_#{&1}], "common:#{&1}")) near_a = make_block(~w[end nil special alpha beta gamma], "near:1") near_b = make_block(~w[end nil special alpha beta delta], "near:2") @@ -58,10 +57,10 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksTest do child = make_block(["x"], "child:1") parent = %CodeQA.AST.Enrichment.Node{ + children: [child, child], label: "a:1", - tokens: Enum.map(["def", "", "end"], &%{kind: &1}), line_count: 3, - children: [child, child] + tokens: ["def", "", "end"] |> Enum.map(&%{kind: &1}) } solo = make_block(["y", "z", "w", "v", "u"], "b:1") @@ -192,7 +191,12 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksTest do # one identifier differs block_b = "def bar\n x = 1\nend\n" result = NDB.analyze([{"a.ex", block_a <> "\n\n" <> block_b}], []) - near_dup_total = Enum.sum(for d <- 0..8, do: result["near_dup_block_d#{d}"]) + + near_dup_total = + 0..8 + |> Enum.map(&result["near_dup_block_d#{&1}"]) + |> Enum.sum() + assert near_dup_total >= 1 end @@ -204,7 +208,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksTest do test "returns only count keys (no pairs keys)" do result = NDB.analyze([{"a.ex", "x = 1\n"}], []) - refute Enum.any?(Map.keys(result), &String.ends_with?(&1, "_pairs")) + refute Map.keys(result) |> Enum.any?(&String.ends_with?(&1, "_pairs")) end test "find_pairs/2 with include_pairs option returns pair data" do