Source code for reducto.reports

"""Module storing the different reports presented by the package. """

from __future__ import annotations

import pathlib
from typing import Dict, Union, List, cast, Any
from enum import Enum
import statistics

try:
    from tabulate import tabulate
except ModuleNotFoundError:  # pragma: no cover, check for library installation
    import warnings

    warnings.warn(
        "tabulate package is not installed and may raise errors if the format is called."
    )
    tabulate = None  # type: ignore[assignment]


# This is done to avoid circular imports.
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from .src import SourceFile  # pragma: no cover, only to avoid circular imports
    from .package import Package  # pragma: no cover, only to avoid circular imports

import os


# The values in GroupedReportType may be either str or int.
# Due to mypy failure they are left to Any to avoid complains.
GroupedReportType = Dict[str, Any]
UnGroupedReportType = Dict[str, GroupedReportType]
SourceReportType = Union[str, UnGroupedReportType]
PackageReportType = Union[str, GroupedReportType, UnGroupedReportType]


[docs]class ReportFormat(Enum): """Formats allowed for the reports. JSON corresponds to the base dict format, the remaining formats correspond to the ones defined in tabulate package. """ JSON = "json" # Tabulate formats: SIMPLE = "simple" PLAIN = "plain" GRID = "grid" FANCY_GRID = "fancy_grid" GITHUB = "github" PIPE = "pipe" ORGTBL = "orgtbl" JIRA = "jira" PRESTO = "presto" PRETTY = "pretty" PSQL = "psql" RST = "rst" MEDIAWIKI = "mediawiki" MOINMOIN = "moinmoin" YOUTRACK = "youtrack" HTML = "html" UNSAFEHTML = "unsafehtml" LATEX = "latex" LATEX_RAW = "latex_raw" LATEX_BOOKTABS = "latex_booktabs" LATEX_LONGTABLE = "latex_longtable" TSV = "tsv" TEXTILE = "textile" def __str__(self) -> str: return self.value
[docs]class ReportFormatError(Exception): """Error raised on wrong reporting format.""" def __init__(self, fmt: ReportFormat) -> None: msg = ( f"Report format not defined: {fmt}. " f"Must be one defined in {[str(fmt) for fmt in ReportFormat]}." ) super().__init__(msg)
[docs]class SourceReport: """Reporting class per a source (.py) file. Contains a report method to obtain the proper report format from a SourceFile object. Methods ------- report See Also -------- SourceFile """ def __init__(self, src_file: SourceFile) -> None: """ Parameters ---------- src_file : src.SourceFile File to where the data is gathered from to obtain a report. """ self._src_file: SourceFile = src_file def __repr__(self) -> str: return type(self).__name__ @property def source_file(self) -> SourceFile: """Returns the source file. Returns ------- src_file : src.SourceFile. """ return self._src_file
[docs] def report( self, fmt: ReportFormat = ReportFormat.JSON, is_package: bool = False, percentage: bool = False, ) -> SourceReportType: """Report of a source file. Parameters ---------- fmt : ReportFormat Must be one of ReportFormats. Defaults to ReportFormats.JSON. is_package : bool Bool to determine if a SourceFile is the single entry point for an app. Defaults to False. percentage : bool Whether to report the lines as percentage or not. Defaults to False Returns ------- report : ReportDict Raises ------ ReportFormatError When the reporting required is not defined in ReportFormat enum. """ report_ = self._as_dict(percentage=percentage) if fmt == ReportFormat.JSON: pass elif fmt in set(fmt_ for fmt_ in ReportFormat): if is_package: return self._table(report_, fmt=str(fmt)) else: raise ReportFormatError(fmt) return report_
def _as_dict(self, percentage: bool = False) -> GroupedReportType: """Report of a file with a dict format. The reporting is a dict with the source file name as a key, and an inner key with the following data: lines (total lines of the file), number of functions, average function length, docstring lines, comment lines, blank lines. Parameters ---------- percentage : bool Whether to report the lines as percentage or not. Defaults to False Returns ------- dict_report : ReportDict """ # Check whether any function was found if len(self.source_file.functions) == 0: avg_func_length: int = 0 else: avg_func_length = round( statistics.mean([f.source_lines for f in self.source_file.functions]) ) docstring_lines = self.source_file.total_docstrings comment_lines = self.source_file.comment_lines blank_lines = self.source_file.blank_lines source_lines = self.source_file.source_lines lines = len(self.source_file) if percentage: docstring_lines = str(round(docstring_lines / lines * 100)) + "%" # type: ignore[assignment] comment_lines = str(round(comment_lines / lines * 100)) + "%" # type: ignore[assignment] blank_lines = str(round(blank_lines / lines * 100)) + "%" # type: ignore[assignment] source_lines = str(round(source_lines / lines * 100)) + "%" # type: ignore[assignment] data: Dict[str, Union[int, str]] = { "lines": lines, "number_of_functions": len(self.source_file.functions), "average_function_length": round(avg_func_length), "docstring_lines": docstring_lines, "comment_lines": comment_lines, "blank_lines": blank_lines, "source_lines": source_lines, } return {self.source_file.name: data} def _table( self, report: GroupedReportType, fmt: str = "grid" ) -> str: # pragma: no cover, proxy """Creates the report from tabulate. Proxy method for tabulate_report""" columns: List[str] = [ "lines", "number_of_functions", "source_lines", "docstring_lines", "comment_lines", "blank_lines", "average_function_length", ] return tabulate_report( self.source_file.name, report, columns, grouped=True, fmt=fmt )
[docs]class PackageReport: """Define report for a package, gets a pkg.Package as input. Contains a report method to obtain the proper report format from a SourceFile object. See Also -------- Package """ def __init__(self, package: Package) -> None: """ Parameters ---------- package : Package Package containing the data to be reported """ self._package: Package = package self.columns: List[str] = [ "lines", "number_of_functions", "source_lines", "docstring_lines", "comment_lines", "blank_lines", "average_function_length", "source_files", ] def __repr__(self) -> str: return type(self).__name__ + f"({self.package.name})" @property def package(self) -> Package: """Returns the package given as input. Returns ------- package : Package """ return self._package @property def name(self) -> str: # Redirect name to simplify testing return self.package.name
[docs] def report( self, fmt: ReportFormat = ReportFormat.JSON, grouped: bool = False, percentage: bool = False, ) -> PackageReportType: """Report method for a package. Generates the report for a Package made of Source files. Initially gets the info either grouped or ungrouped, if the format chosen is json its returned directly. Parameters ---------- fmt : ReportFormat Format to return the information. Defaults to ReportFormats.JSON. grouped : bool Whether to return the information by source files, or grouped at the package level (resumes the package). Defaults to False, returns the information per source file. percentage : bool Whether to report the lines as percentage or not. Defaults to False Returns ------- report : ReportPackageDict Raises ------ ReportFormatError When a report format is not defined """ if grouped: report: Union[ GroupedReportType, UnGroupedReportType ] = self._report_grouped(percentage=percentage) else: report = self._report_ungrouped(percentage=percentage) if fmt == ReportFormat.JSON: pass # Returns the reports untouched elif fmt in set(fmt_ for fmt_ in ReportFormat): return self._table(report, fmt=str(fmt), grouped=grouped) else: # Other formats may modify the report here raise ReportFormatError(fmt) return report
def _report_grouped(self, percentage: bool = False) -> GroupedReportType: """Obtain the reporting information grouped for the whole package. Parameters ---------- percentage : bool Whether to report the lines as percentage. Defaults to False. Returns ------- report : ReportDict Dict ordered as: {package_name: {source_file_report}}. """ report_ungrouped: UnGroupedReportType = self._report_ungrouped() package_lines: int = len(self.package) lines: int = 0 number_of_functions: int = 0 average_function_length: int = 0 docstring_lines: Union[int, str] = 0 comment_lines: Union[int, str] = 0 blank_lines: Union[int, str] = 0 source_lines: Union[int, str] = 0 for reporting in report_ungrouped[self.package.name].values(): lines += cast(int, reporting["lines"]) number_of_functions += cast(int, reporting["number_of_functions"]) # Weight for the average function length across the whole package. weight: float = cast(int, reporting["lines"]) / package_lines average_function_length += reporting["average_function_length"] * weight docstring_lines += reporting["docstring_lines"] comment_lines += reporting["comment_lines"] blank_lines += reporting["blank_lines"] source_lines += reporting["source_lines"] if percentage: docstring_lines = str(round(docstring_lines / lines * 100)) + "%" # type: ignore[operator] comment_lines = str(round(comment_lines / lines * 100)) + "%" # type: ignore[operator] blank_lines = str(round(blank_lines / lines * 100)) + "%" # type: ignore[operator] source_lines = str(round(source_lines / lines * 100)) + "%" # type: ignore[operator] report_grouped: Dict[str, Union[int, str]] = { "lines": lines, "number_of_functions": number_of_functions, "average_function_length": round(average_function_length), "docstring_lines": docstring_lines, "comment_lines": comment_lines, "blank_lines": blank_lines, "source_files": len(self.package.source_files), "source_lines": source_lines, } return {self.package.name: report_grouped} def _report_ungrouped(self, percentage: bool = False) -> UnGroupedReportType: """Obtain the reporting information per source file. Parameters ---------- percentage : bool Whether to report the lines as percentage or not. Passed to SourceReport. Returns ------- report : ReportPackageDict Dict ordered as: {package_name: {source_file_1: {source_file_report}, source_file_2: {source_file_report} } } """ report: GroupedReportType = {} for file in self.package.source_files: report[self._get_relname(str(file))] = SourceReport(file).report( fmt=ReportFormat.JSON, percentage=percentage )[ file.name # type: ignore[index] ] return {self.package.name: report} def _get_relname(self, file: str) -> str: """Obtain the relative name of a file in the package. Parameters ---------- file : str Name of the file. Returns ------- relname : str Relative path of the file starting on the package. Examples -------- For a given __init__.py file at the top of a package named my_package: >>> package_report._package_relname('__init__.py') 'my_package/__init__.py' """ relname: str = os.path.relpath(file, start=self.package.path) return str(pathlib.Path(self.package.name) / relname) def _table( self, report: Union[GroupedReportType, UnGroupedReportType], fmt: str = "grid", grouped: bool = True, ) -> str: # pragma: no cover, proxy """Creates the report from tabulate. Proxy method for tabulate_report""" return tabulate_report( self.name, report, self.columns, grouped=grouped, fmt=fmt )
def tabulate_report( name: str, report: Union[GroupedReportType, UnGroupedReportType], columns: List[str], grouped: bool = True, fmt: str = "grid", ) -> str: """Generates a table report using tabulate. Parameters ---------- name : str Name of the module contained internally. report : Union[GroupedReportType, UnGroupedReportType] Json report obtained internally from SourceReport or PackageReport. columns : List[str] Column names. grouped : bool Whether to create a table with the modules colapsed or not. fmt : str Format of the table. Passed to tabulate. Returns ------- table : str str representation of the table. """ headers: List[str] = [] table: List[List[Union[str, int]]] = [] if grouped: inner_col: Dict[str, Union[str, int]] = report[name] headers.extend(column_split(columns, fmt=fmt)) headers.insert(0, "package") row: List[Union[str, int]] = [name] row.extend([inner_col[col] for col in columns]) table.append(row) else: inner_filename_col: Dict[str, Dict[str, Union[str, int]]] = report[name] columns = columns.copy() columns.remove("source_files") headers.extend(column_split(columns, fmt=fmt)) headers.insert(0, "filename") rows: List[List[Union[str, int]]] = [] for filename in inner_filename_col: row = [filename] row.extend([inner_filename_col[filename][col] for col in columns]) rows.append(row) table.extend(rows) return tabulate(table, headers=headers, tablefmt=fmt) def column_split(columns: List[str], fmt: str = "rst") -> List[str]: r"""Splits the columns to avoid longer formats for tabulate. Replaces every `_` by `\n`. Currently an error in tabulate when there are present \n characters makes the tables to be parsed wrongly. In the case of github, the columns are not split. Parameters ---------- columns : List[str] fmt : str Format of the table. Information used on tabulate. Returns ------- split : List[str] """ if fmt == "github": return columns return [column.replace("_", "\n") for column in columns]