Amazon FBA ofrece extracción usando VBA
excel-vba web-scraping (1)
Estoy usando el código mencionado a continuación para extraer datos de Amazon.
Sub Macro1()
'' Macro1 Macro
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://www.amazon.com/gp/offer-listing/B00N41UTWG/ref=olp_f_new?ie=UTF8&f_new=true" _
, Destination:=Range("$A$1"))
.Name = "oldOfferPrice" _
'' "its_details_value_node.html?nsc=true&listId=www_s201_b9233&tsId=BBK01.ED0439"
.FieldNames = True
.RowNumbers = True
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = True
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End Sub
El código anterior está extrayendo datos completos de la página, pero Mi requisito es extraer el valor específico de los datos incompletos. Solo deseo extraer los precios y los precios de las páginas están en este formato.
<div class="a-row a-spacing-mini olpOffer">
<div class="a-column a-span2">
<span class="a-size-large a-color-price olpOfferPrice a-text-bold"> $171.99 </span>
<span class="a-color-price">
<span class="supersaver"><i class="a-icon a-icon-prime" aria-label="Amazon Prime TM"><span class="a-icon-alt">Amazon Prime TM</span></i></span>
</span>
Ayude a resolver este problema para poder extraer solo los precios. Deseo extraer dos valores, por ejemplo, $ 171.99 y Amazon Prime TM. Tenga en cuenta que existen varios valores de precio y vendedor en una página y deseo extraerlos todos.
Aquí hay un ejemplo que muestra cómo puede recuperar las ofertas de Amazon para ciertos ASIN utilizando XHR
y Split
, y los resultados de salida a la hoja:
Sub TestExtractAmazonOffers()
Dim arrList() As Variant
'' clear sheet
Sheets("Sheet1").Cells.Delete
'' retrieve offers for certain ASIN
arrList = ExtractAmazonOffers("B00N41UTWG")
'' output data
Output Sheets(1), 1, 1, arrList
End Sub
Function ExtractAmazonOffers(strASIN As String)
Dim strUrl As String
Dim arrTmp() As String
Dim strTmp As String
Dim arrItems() As String
Dim i As Long
Dim arrCols() As String
Dim strSellerName As String
Dim strOfferPrice As String
Dim strAmazonPrime As String
Dim strShippingPrice As String
Dim arrResults() As Variant
Dim arrCells() As Variant
'' init
arrResults = Array(Array("Offer Price", "Amazon Prime TM", "Shipping Price", "Seller Name"))
strUrl = "http://www.amazon.com/gp/offer-listing/" & strASIN & "/ref=olp_f_new?ie=UTF8&f_new=true"
Do
'' http get request of the search result page
With CreateObject("MSXML2.XMLHttp")
.Open "GET", strUrl, False
.Send
strResp = .ResponseText
End With
arrTmp = Split(strResp, "id=""olpOfferList""", 2)
If UBound(arrTmp) = 1 Then
arrItems = Split(arrTmp(1), "<div class=""a-row a-spacing-mini olpOffer"">")
For i = 1 To UBound(arrItems)
'' get item columns
arrCols = Split(arrItems(i), "<div class=""a-column", 6)
'' retrieve seller name from column 4
strTmp = Split(arrCols(4), "olpSellerName", 2)(1)
arrTmp = Split(strTmp, "alt=""", 2)
If UBound(arrTmp) = 1 Then '' from image alt
strTmp = Split(arrTmp(1), """", 2)(0)
strSellerName = Trim(strTmp)
Else '' from link
strTmp = Split(strTmp, "<a", 2)(1)
strTmp = Split(strTmp, ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strSellerName = Trim(strTmp)
End If
'' retrieve offer price from column 1
strTmp = Split(arrCols(1), "olpOfferPrice", 2)(1)
strTmp = Split(strTmp, ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strOfferPrice = Trim(strTmp)
'' retrieve amazon prime
arrTmp = Split(arrCols(1), "olpShippingInfo", 2)
strAmazonPrime = IIf(InStr(arrTmp(0), "Amazon Prime") > 0, "Amazon Prime", "-")
'' retrieve shipping info
arrTmp = Split(arrTmp(1), "olpShippingPrice", 2)
If UBound(arrTmp) = 1 Then
strTmp = Split(arrTmp(1), ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strShippingPrice = Trim(strTmp)
Else
strShippingPrice = "Free"
End If
'' store data
ReDim Preserve arrResults(UBound(arrResults) + 1)
arrResults(UBound(arrResults)) = Array(strOfferPrice, strAmazonPrime, strShippingPrice, strSellerName)
Next
End If
'' search for next page link
arrTmp = Split(strResp, "class=""a-last""", 2)
If UBound(arrTmp) = 0 Then Exit Do
strTmp = Split(arrTmp(1), "href=""", 2)(1)
strUrl = Split(strTmp, """", 2)(0)
If Left(strUrl, 1) = "/" Then strUrl = "http://www.amazon.com" & strUrl
Loop
'' convert nested array to 2-dimensional array
ReDim arrCells(UBound(arrResults), 3)
For i = 0 To UBound(arrCells, 1)
For j = 0 To UBound(arrCells, 2)
arrCells(i, j) = arrResults(i)(j)
Next
Next
ExtractAmazonOffers = arrCells
End Function
Sub Output(objSheet As Worksheet, lngTop As Long, lngLeft As Long, arrCells As Variant)
With objSheet
.Select
With .Range(.Cells(lngTop, lngLeft), .Cells( _
UBound(arrCells, 1) - LBound(arrCells, 1) + lngTop, _
UBound(arrCells, 2) - LBound(arrCells, 2) + lngLeft))
.NumberFormat = "@"
.Value = arrCells
.Columns.AutoFit
End With
End With
End Sub
La hoja resultante es la siguiente: