Вопрос или проблема
Существует ли способ преобразовать приведенные ниже вложенные списки в датафрейм в R? Списки ниже имеют неравные размеры, поэтому я хотел бы узнать, можем ли мы преобразовать это в датафрейм. Если значение отсутствует, оно должно быть заменено на NA.
Буду признателен, если кто-то сможет помочь мне здесь.
df = list(profile = list(symbol = "GUJTHEM.BO", price = 327.15, beta = 0.74051946,
volAvg = 28661, mktCap = 37012635091, lastDiv = 0.91667,
range = "72.45-329.25", changes = 7.15, companyName = "Gujarat Themis Biosyn Limited",
currency = "INR", cik = NULL, isin = "INE942C01045", cusip = NULL,
exchange = "Bombay Stock Exchange", exchangeShortName = "BSE",
industry = "Medical - Pharmaceuticals", website = "http://www.gtbl.in",
description = "Gujarat Themis Biosyn Ltd. работает как фармацевтическая компания. Фирма занимается производством лекарств. Ее продукты включают Рифамицин и Ловастатин. Компания была основана 11 декабря 1981 года и находится в Мумбаи, Индия.",
ceo = "Tapas B. Guhathakurata", sector = "Healthcare", country = "IN",
fullTimeEmployees = "109", phone = NULL, address = NULL,
city = "Mumbai", state = NULL, zip = NULL, dcfDiff = 269.86597,
dcf = 40.0840275267633, image = "https://images.financialmodelingprep.com/symbol/GUJTHEM.BO.png",
ipoDate = "2002-01-02", defaultImage = FALSE, isEtf = FALSE,
isActivelyTrading = TRUE, isAdr = FALSE, isFund = FALSE),
metrics = list(dividendYielTTM = 0.00280198685618218, volume = 65232,
yearHigh = 329.25, yearLow = 72.45), ratios = list(list(
dividendYielTTM = 0.00280198685618218, dividendYielPercentageTTM = 0.280198685618218,
peRatioTTM = 45.1462462181789, pegRatioTTM = -4.47304255293272,
payoutRatioTTM = 0, currentRatioTTM = 2.52653313861836,
quickRatioTTM = 2.30226114372479, cashRatioTTM = 0.405047437596108,
daysOfSalesOutstandingTTM = 67.5415263243447, daysOfInventoryOutstandingTTM = 22.4513726790451,
operatingCycleTTM = 89.9928990033898, daysOfPayablesOutstandingTTM = 61.5881830238727,
cashConversionCycleTTM = 28.4047159795172, grossProfitMarginTTM = 0.511702623011759,
operatingProfitMarginTTM = 0.46029151482928, pretaxProfitMarginTTM = 0.457815704653914,
netProfitMarginTTM = 0.341295257363155, effectiveTaxRateTTM = 0.254512456643638,
returnOnAssetsTTM = 0.212371777951953, returnOnEquityTTM = 0.262134869576203,
returnOnCapitalEmployedTTM = 0.312455901495426, netIncomePerEBTTTM = 0.745486128793151,
ebtPerEbitTTM = 0.994621212654149, ebitPerRevenueTTM = 0.46029151482928,
debtRatioTTM = 0.0054941800135803, debtEquityRatioTTM = 0.00610190549162543,
longTermDebtToCapitalizationTTM = 0.00498889599913074,
totalDebtToCapitalizationTTM = 0.00606489805686609, interestCoverageTTM = 235.817850033179,
cashFlowToDebtRatioTTM = 2.73155346926801, companyEquityMultiplierTTM = 1.11061258942062,
receivablesTurnoverTTM = 5.40408278970798, payablesTurnoverTTM = 5.92646157232012,
inventoryTurnoverTTM = 16.2573578559262, fixedAssetTurnoverTTM = 0.864508988056981,
assetTurnoverTTM = 0.622252355900682, operatingCashFlowPerShareTTM = 0.512083835117145,
freeCashFlowPerShareTTM = 0.512083835117145, cashPerShareTTM = 2.49001830463895,
operatingCashFlowSalesRatioTTM = 0.0241182638114006,
freeCashFlowOperatingCashFlowRatioTTM = 1, cashFlowCoverageRatiosTTM = 2.73155346926801,
shortTermCoverageRatiosTTM = 15.3196215549157, capitalExpenditureCoverageRatioTTM = 0,
dividendPaidAndCapexCoverageRatioTTM = 0, priceBookValueRatioTTM = 10.6483186985541,
priceToBookRatioTTM = 10.6483186985541, priceToSalesRatioTTM = 23.9697249739499,
priceEarningsRatioTTM = 45.1462462181789, priceToFreeCashFlowsRatioTTM = 993.841230089684,
priceToOperatingCashFlowsRatioTTM = 638.860236478975,
priceCashFlowRatioTTM = 638.860236478975, priceEarningsToGrowthRatioTTM = -4.47304255293272,
priceSalesRatioTTM = 23.9697249739499, enterpriseValueMultipleTTM = 50.651759310777,
priceFairValueTTM = 10.6483186985541, dividendPerShareTTM = 0.91667)),
insideTrades = list(), keyExecutives = list(), splitsHistory = list(
list(date = "2024-08-09", label = "August 09, 24", numerator = 3,
denominator = 2), list(date = "2023-10-10", label = "October 10, 23",
numerator = 5, denominator = 1)), stockDividend = list(
list(date = "2024-07-15", label = "July 15, 24", adjDividend = 0.16667,
dividend = 0.25, recordDate = "2024-07-15", paymentDate = "2024-08-22",
declarationDate = ""), list(date = "2024-02-22",
label = "February 22, 24", adjDividend = 0.75, dividend = 0.75,
recordDate = "", paymentDate = "", declarationDate = ""),
list(date = "2023-09-01", label = "September 01, 23",
adjDividend = 0.2, dividend = 1, recordDate = "2023-09-01",
paymentDate = "2023-10-09", declarationDate = ""),
list(date = "2022-12-02", label = "December 02, 22",
adjDividend = 0.88, dividend = 4.4, recordDate = "2022-12-02",
paymentDate = "2022-12-21", declarationDate = ""),
list(date = "2022-08-29", label = "August 29, 22", adjDividend = 0.8,
dividend = 4, recordDate = "2022-08-30", paymentDate = "2022-10-07",
declarationDate = "")), stockNews = list(), rating = list(
list(symbol = "GUJTHEM.BO", date = "2024-11-08", rating = "B+",
ratingScore = 3, ratingRecommendation = "Neutral",
ratingDetailsDCFScore = 3, ratingDetailsDCFRecommendation = "Neutral",
ratingDetailsROEScore = 5, ratingDetailsROERecommendation = "Strong Buy",
ratingDetailsROAScore = 5, ratingDetailsROARecommendation = "Strong Buy",
ratingDetailsDEScore = 4, ratingDetailsDERecommendation = "Buy",
ratingDetailsPEScore = 1, ratingDetailsPERecommendation = "Strong Sell",
ratingDetailsPBScore = 1, ratingDetailsPBRecommendation = "Strong Sell")),
financialsAnnual = list(income = list(list(date = "2024-03-31",
symbol = "GUJTHEM.BO", reportedCurrency = "INR", cik = "0000000000",
fillingDate = "2024-03-31", acceptedDate = "2024-03-29 20:00:00",
calendarYear = "2023", period = "FY", revenue = 1698219000,
costOfRevenue = 818880000, grossProfit = 879339000, grossProfitRatio = 0.5178007077,
researchAndDevelopmentExpenses = 38925000, generalAndAdministrativeExpenses = 13933000,
sellingAndMarketingExpenses = 3622000, sellingGeneralAndAdministrativeExpenses = 127360000,
otherExpenses = 43657000, operatingExpenses = 127360000,
costAndExpenses = 946240000, interestIncome = 40351000,
interestExpense = 2292000, depreciationAndAmortization = 35359000,
ebitda = 787338000, ebitdaratio = 0.4636257161, operatingIncome = 751979000,
operatingIncomeRatio = 0.4428044911, totalOtherIncomeExpensesNet = 41364000,
incomeBeforeTax = 793343000, incomeBeforeTaxRatio = 0.4671617736,
incomeTaxExpense = 201711000, netIncome = 591632000,
netIncomeRatio = 0.3483838068, eps = 8.14, epsdiluted = 8.14,
weightedAverageShsOut = 72682072, weightedAverageShsOutDil = 72643510,
link = "", finalLink = ""), list(date = "2023-03-31",
symbol = "GUJTHEM.BO", reportedCurrency = "INR", cik = "0000000000",
fillingDate = "2023-03-31", acceptedDate = "2023-03-30 20:00:00",
calendarYear = "2022", period = "FY", revenue = 1489729000,
costOfRevenue = 307810000, grossProfit = 1181919000,
grossProfitRatio = 0.7933785272, researchAndDevelopmentExpenses = 29324000,
generalAndAdministrativeExpenses = 14405000, sellingAndMarketingExpenses = 10075000,
sellingGeneralAndAdministrativeExpenses = 37726000, otherExpenses = 60238000,
operatingExpenses = 465500000, costAndExpenses = 773310000,
interestIncome = 50444000, interestExpense = 2007000,
depreciationAndAmortization = 25757000, ebitda = 802412000,
ebitdaratio = 0.5386295091, operatingIncome = 802411742,
operatingIncomeRatio = 0.5386293359, totalOtherIncomeExpensesNet = -27763742.4299999,
incomeBeforeTax = 774648000, incomeBeforeTaxRatio = 0.5199925624,
incomeTaxExpense = 194960000, netIncome = 579688000,
netIncomeRatio = 0.3891231224, eps = 7.98, epsdiluted = 7.98,
weightedAverageShsOut = 72643516, weightedAverageShsOutDil = 72643510,
link = "", finalLink = ""), list(date = "2022-03-31",
symbol = "GUJTHEM.BO", reportedCurrency = "INR", cik = "0000000000",
fillingDate = "2022-03-31", acceptedDate = "2022-03-30 20:00:00",
calendarYear = "2021", period = "FY", revenue = 1141917000,
costOfRevenue = 208921000, grossProfit = 932996000, grossProfitRatio = 0.8170436205,
researchAndDevelopmentExpenses = 4500000, generalAndAdministrativeExpenses = 8279000,
sellingAndMarketingExpenses = 2924000, sellingGeneralAndAdministrativeExpenses = 11203000,
otherExpenses = 713000, operatingExpenses = 367252000,
costAndExpenses = 576173000, interestIncome = 32610000,
interestExpense = 7328000, depreciationAndAmortization = 22330000,
ebitda = 620541000, ebitdaratio = 0.5434204062, operatingIncome = 598211000,
operatingIncomeRatio = 0.5238655699, totalOtherIncomeExpensesNet = -7866000,
incomeBeforeTax = 590345000, incomeBeforeTaxRatio = 0.5169771533,
incomeTaxExpense = 154103000, netIncome = 436242000,
netIncomeRatio = 0.3820260141, eps = 6.01, epsdiluted = 6.01,
weightedAverageShsOut = 72643521, weightedAverageShsOutDil = 72643510,
link = "", finalLink = ""), list(date = "2021-03-31",
symbol = "GUJTHEM.BO", reportedCurrency = "INR", cik = "0000000000",
fillingDate = "2021-03-31", acceptedDate = "2021-03-30 20:00:00",
calendarYear = "2020", period = "FY", revenue = 904250000,
costOfRevenue = 211075000, grossProfit = 693175000, grossProfitRatio = 0.7665745093,
researchAndDevelopmentExpenses = 0, generalAndAdministrativeExpenses = 6609000,
sellingAndMarketingExpenses = 1924000, sellingGeneralAndAdministrativeExpenses = 8533000,
otherExpenses = 1590000, operatingExpenses = 297682000,
costAndExpenses = 508757000, interestIncome = 21509000,
interestExpense = 7895000, depreciationAndAmortization = 17325000,
ebitda = 433983000, ebitdaratio = 0.4799369643, operatingIncome = 416658000,
operatingIncomeRatio = 0.4607774399, totalOtherIncomeExpensesNet = -7839000,
incomeBeforeTax = 408819000, incomeBeforeTaxRatio = 0.4521083771,
incomeTaxExpense = 107023000, netIncome = 301797000,
netIncomeRatio = 0.3337539397, eps = 4.15, epsdiluted = 4.15,
weightedAverageShsOut = 72652142, weightedAverageShsOutDil = 72643510,
link = "", finalLink = ""), list(date = "2020-03-31",
symbol = "GUJTHEM.BO", reportedCurrency = "INR", cik = "0000000000",
fillingDate = "2020-03-31", acceptedDate = "2020-03-30 20:00:00",
calendarYear = "2019", period = "FY", revenue = 849769000,
costOfRevenue = 154691000, grossProfit = 695078000, grossProfitRatio = 0.8179611165,
researchAndDevelopmentExpenses = 0, generalAndAdministrativeExpenses = 4904000,
sellingAndMarketingExpenses = 5062000, sellingGeneralAndAdministrativeExpenses = 9966000,
otherExpenses = 1142000, operatingExpenses = 389782000,
costAndExpenses = 544473000, interestIncome = 15714000,
interestExpense = 8572000, depreciationAndAmortization = 13620000,
ebitda = 335110000, ebitdaratio = 0.3943542304, operatingIncome = 321490000,
operatingIncomeRatio = 0.3783263452, totalOtherIncomeExpensesNet = -8924000,
incomeBeforeTax = 312566000, incomeBeforeTaxRatio = 0.3678246676,
incomeTaxExpense = 75820000, netIncome = 236746000, netIncomeRatio = 0.2786004196,
eps = 3.26, epsdiluted = 3.26, weightedAverageShsOut = 72643510,
weightedAverageShsOutDil = 72643510, link = "", finalLink = "")),
balance = list(list(date = "2024-03-31", symbol = "GUJTHEM.BO",
reportedCurrency = "INR", cik = "0000000000", fillingDate = "2024-03-31",
acceptedDate = "2024-03-29 20:00:00", calendarYear = "2023",
period = "FY", cashAndCashEquivalents = 81568000,
shortTermInvestments = 98663000, cashAndShortTermInvestments = 81568000,
netReceivables = 270278000, inventory = 33268000,
otherCurrentAssets = 90507000, totalCurrentAssets = 475621000,
propertyPlantEquipmentNet = 1292399000, goodwill = 0,
intangibleAssets = 0, goodwillAndIntangibleAssets = 0,
longTermInvestments = 351394000, taxAssets = 0, otherNonCurrentAssets = 91718999,
totalNonCurrentAssets = 1735511999, otherAssets = 1,
totalAssets = 2211133000, accountPayables = 99084000,
shortTermDebt = 15437000, taxPayables = 0, deferredRevenue = 0,
otherCurrentLiabilities = 47071000, totalCurrentLiabilities = 161592000,
longTermDebt = 12247000, deferredRevenueNonCurrent = 0,
deferredTaxLiabilitiesNonCurrent = 16643999, otherNonCurrentLiabilities = 6890000,
totalNonCurrentLiabilities = 35780999, otherLiabilities = 1,
capitalLeaseObligations = 12247000, totalLiabilities = 197373000,
preferredStock = 0, commonStock = 72644000, retainedEarnings = 1917904000,
accumulatedOtherComprehensiveIncomeLoss = 1569000,
othertotalStockholdersEquity = 21643000, totalStockholdersEquity = 2013760000,
totalEquity = 2013760000, totalLiabilitiesAndStockholdersEquity = 2211133000,
minorityInterest = 0, totalLiabilitiesAndTotalEquity = 2211133000,
totalInvestments = 351394000, totalDebt = 27684000,
netDebt = -53884000, link = "", finalLink = ""),
list(date = "2023-03-31", symbol = "GUJTHEM.BO",
reportedCurrency = "INR", cik = "0000000000",
fillingDate = "2023-03-31", acceptedDate = "2023-03-30 20:00:00",
calendarYear = "2022", period = "FY", cashAndCashEquivalents = 60550000,
shortTermInvestments = 41762000, cashAndShortTermInvestments = 102312000,
netReceivables = 354053000, inventory = 146188000,
otherCurrentAssets = 5912000, totalCurrentAssets = 608465000,
propertyPlantEquipmentNet = 532557000, goodwill = 0,
intangibleAssets = 0, goodwillAndIntangibleAssets = 0,
longTermInvestments = 373566000, taxAssets = 0,
otherNonCurrentAssets = 148693000, totalNonCurrentAssets = 1054816000,
otherAssets = 0, totalAssets = 1663281000, accountPayables = 95445000,
shortTermDebt = 4191000, taxPayables = 5280000,
deferredRevenue = 37897000, otherCurrentLiabilities = 15249000,
totalCurrentLiabilities = 152782000, longTermDebt = 0,
deferredRevenueNonCurrent = 0, deferredTaxLiabilitiesNonCurrent = 1.3e+07,
otherNonCurrentLiabilities = 6040000, totalNonCurrentLiabilities = 19040000,
otherLiabilities = 0, capitalLeaseObligations = 4191000,
totalLiabilities = 171822000, preferredStock = 0,
commonStock = 72644000, retainedEarnings = 1395602000,
accumulatedOtherComprehensiveIncomeLoss = 1569000,
othertotalStockholdersEquity = 21644000, totalStockholdersEquity = 1491459000,
totalEquity = 1491459000, totalLiabilitiesAndStockholdersEquity = 1663281000,
minorityInterest = 0, totalLiabilitiesAndTotalEquity = 1663281000,
totalInvestments = 415328000, totalDebt = 4191000,
netDebt = -56359000, link = "", finalLink = ""))))
Я пробовал следующий подход, но он не сработал
nullToNA <- function(x) {
x[sapply(x, is.null)] <- NA
return(x)
}
df_1 = as.data.frame(rbindlist(lapply(df, nullToNA), fill = TRUE))
Error in x[sapply(x, is.null)] <- NA : invalid subscript type 'list'
Ответ или решение
Чтобы преобразовать вложенные списки в data.frame
в R, особенно когда списки имеют разный размер и значения, которые могут отсутствовать, можно использовать несколько шагов. Ваша задача заключается в том, чтобы корректно отобразить структуру данных в формат, который позволяет легко обрабатывать и анализировать информацию.
Шаги для преобразования вложенных списков в data.frame
в R
-
Устранение значений NULL: Нам необходимо заменить все значения
NULL
наNA
, чтобы избежать ошибок при дальнейшей обработке данных. -
Функции для обработки списков: Мы будем использовать функции
lapply
иsapply
для рекурсивного обхода вложенных списков и преобразования их вdata.frame
. -
Разделение данных на отдельные части: Структура вашего списка может содержать разные уровни вложенности. Поэтому необходимо обработать каждый уровень отдельно.
-
Использование
bind_rows
изdplyr
илиrbindlist
изdata.table
: Эти функции поддерживают объединение списков разной длины и заполнение недостающих значенийNA
.
Пример кода
Ниже представлен код для выполнения вышеуказанных действий:
# Установите необходимые пакеты, если они ещё не установлены
install.packages("data.table")
library(data.table)
# Определяем функцию, которая заменяет NULL на NA
nullToNA <- function(x) {
x[sapply(x, is.null)] <- NA
return(x)
}
# Функция для преобразования списка в data.frame
list_to_dataframe <- function(lst) {
# Применяем функцию nullToNA ко всему списку
lst <- lapply(lst, nullToNA)
# Преобразуем каждый элемент списка в data.table
df_list <- lapply(lst, function(x) {
if (is.list(x)) {
rbindlist(lapply(x, as.data.table), fill = TRUE, idcol = NULL)
} else {
as.data.table(x)
}
})
# Объединяем все data.tables в один
final_df <- rbindlist(df_list, fill = TRUE)
return(final_df)
}
# Применим функцию к вашему списку данных
final_df <- list_to_dataframe(df)
# Просмотр результата
print(final_df)
Объяснение кода
-
Функция
nullToNA
заменяет все значенияNULL
в списке наNA
. -
list_to_dataframe
: Эта функция принимает список в качестве аргумента, преобразует его, и в конце объединяет все элементы в единуюdata.frame
. Внутри функции мы используемlapply
для обхода каждого элемента списка. -
rbindlist
из пакетаdata.table
объединяет все преобразованные элементы в одинdata.frame
, при этом обеспечивает заполнение недостающих значенийNA
.
Заключение
При использовании данного подхода вы сможете без труда преобразовать вложенные списки в data.frame
. Это обеспечит гибкость для дальнейшего анализа и визуализации ваших данных. Убедитесь, что у вас установлены необходимые библиотеки, и следуйте коду для достижения желаемого результата.